Haplotagging (Long-read RNA)

Phase variants in long-read RNA sequencing data

tools

The following tools run by default (methods: "all"):

flair-longshot, longcallr

usage

nexus run --nf-workflow haplotagging_long-read-rna.nf -params-file params.yaml

Note

Nextflow config files are available here. Use the config file that matches your installed nexus version (e.g. nexus_v0.2.0_nextflow_slurm.config).

parameters

Download params.yaml

# =============================================================================
# params.yaml — haplotagging_long-read-rna
#
# Usage:
#   nextflow run haplotagging_long-read-rna.nf -params-file params.yaml
#
# methods selects which haplotagging tools to run (CSV list or "all"):
#   {flair-longshot, longcallr}
# =============================================================================


# ---- required: input/output paths -------------------------------------------

# TSV file. Required columns depend on which methods you run:
#   flair-longshot  → sample_id, fastq_file
#   longcallr       → sample_id, bam_file, bam_bai_file
# To run both, include all four columns.
samples_tsv_file: ""

# Directory to which output files will be copied
output_dir: ""

# Reference genome FASTA file (may be .fa or .fa.gz)
reference_genome_fasta_file: ""


# ---- optional ---------------------------------------------------------------

# Methods to run. CSV list of {flair-longshot, longcallr}, or "all".
methods: "all"

# Output format for haplotagged reads.
#   "bam"   →  publish the full phased BAM(s) (default).
#   "tsv"   →  publish only a haplotag TSV per method (~1000× smaller).
#   "both"  →  publish BAM and TSV.
# Phased VCFs are always published.
haplotag_output: "bam"


# =============================================================================
# Tool-specific settings (only consumed when the corresponding tool runs).
# =============================================================================

# FLAIR + Longshot — inputs come from the samples_tsv_file 'fastq_file' column.
flair_longshot:
  flair_align_extra_args: ""
  longshot_extra_args: ""

# LongcallR — inputs come from the samples_tsv_file 'bam_file' / 'bam_bai_file'
# columns. preset and reference_genes_gtf_file are REQUIRED.
longcallr:
  # LongcallR preset: hifi-isoseq | hifi-masseq | ont-cdna | ont-drna
  preset: "hifi-isoseq"
  reference_genes_gtf_file: ""
  extra_args: ""