Variant Calling (Short-read DNA Somatic)

Identify somatic DNA variants in short-read DNA sequencing BAM files

tools

The following tools run by default (methods: "all"):

clairs, deepsomatic, delly2, dysgu, gridss2, lumpy, manta, mutect2, octopus, sequenza, strelka2, svaba

usage

nexus run --nf-workflow variant_calling_short-read-dna-somatic.nf -params-file params.yaml
Note

Nextflow config files are available here. Use the config file that matches your installed nexus version (e.g. nexus_v0.2.0_nextflow_slurm.config).

parameters

Download params.yaml

# =============================================================================
# params.yaml — variant_calling_short-read-dna-somatic
#
# Usage:
#   nextflow run variant_calling_short-read-dna-somatic.nf -params-file params.yaml
#
# Fill in the required fields below. Optional fields can be left at their
# defaults or removed entirely.
# =============================================================================


# -----------------------------------------------------------------------------
# Required
# -----------------------------------------------------------------------------

# TSV file with columns:
#   sample_id
#   tumor_bam_file                  (indel-realigned)
#   tumor_bam_bai_file              (indel-realigned)
#   normal_bam_file                 (indel-realigned)
#   normal_bam_bai_file             (indel-realigned)
#   tumor_bam_file_no_realign       (not indel-realigned, for Manta and GRIDSS2)
#   tumor_bam_bai_file_no_realign   (not indel-realigned, for Manta and GRIDSS2)
#   normal_bam_file_no_realign      (not indel-realigned, for Manta and GRIDSS2)
#   normal_bam_bai_file_no_realign  (not indel-realigned, for Manta and GRIDSS2)
#   normal_sample_id
samples_tsv_file: ""

# Directory to which output files will be copied
output_dir: ""

# Reference genome FASTA file
reference_genome_fasta_file: ""


# -----------------------------------------------------------------------------
# Optional — general
# -----------------------------------------------------------------------------

# Methods to run. Comma-separated list or 'all'.
# Allowed values:
#   all, clairs, deepsomatic, delly2, dysgu, gridss2,
#   lumpy, manta, mutect2, octopus, sequenza, strelka2, svaba
methods: "all"


# -----------------------------------------------------------------------------
# ClairS
# Optional arguments.
# -----------------------------------------------------------------------------
clairs:
  # Extra CLI arguments passed directly to ClairS.
  extra_args: "--platform ilmn"


# -----------------------------------------------------------------------------
# DeepSomatic
# Required when methods includes 'deepsomatic' or 'all':
#   input_path, output_path
# Optional otherwise.
# -----------------------------------------------------------------------------
deepsomatic:
  # Required when running DeepSomatic
  input_path: ""
  output_path: ""

  # Containerization engine: 'singularity' or 'docker'
  containerization: "singularity"

  # DeepSomatic --model_type value
  model_type: "WGS"

  # Path to the run_deepsomatic binary
  bin_path: "run_deepsomatic"

  # DeepSomatic version
  bin_version: "1.9.0"


# -----------------------------------------------------------------------------
# Delly2
# Required when methods includes 'delly2' or 'all':
#   exclude_tsv_file
# Optional otherwise.
# -----------------------------------------------------------------------------
delly2:
  # Required when running Delly2
  exclude_tsv_file: ""

  # Extra CLI arguments passed directly to delly2 call.
  call_extra_args: "--map-qual 20"


# -----------------------------------------------------------------------------
# Dysgu
# Optional arguments.
# -----------------------------------------------------------------------------
dysgu:
  # Extra CLI arguments passed directly to dysgu run.
  run_extra_args: "--mode pe --min-support 3 --min-size 30 --mq 20"

  # Extra CLI arguments passed directly to dysgu filter.
  filter_extra_args: "--support-fraction 0.05 --min-mapq 20 --pass-prob 0.2"


# -----------------------------------------------------------------------------
# GRIDSS2
# Optional arguments.
# -----------------------------------------------------------------------------
gridss2:
  # Extra CLI arguments passed directly to gridss.
  extra_args: ""

  # Extra CLI arguments passed directly to gridss_somatic_filter.
  somatic_filter_extra_args: "--ref BSgenome.Hsapiens.UCSC.hg38"


# -----------------------------------------------------------------------------
# Manta
# Optional arguments.
# -----------------------------------------------------------------------------
manta:
  # Extra CLI arguments passed directly to configManta.py.
  config_extra_args: ""

  # Extra CLI arguments passed directly to runWorkflow.py.
  run_extra_args: ""


# -----------------------------------------------------------------------------
# Mutect2
# Optional otherwise.
# -----------------------------------------------------------------------------
mutect2:
  germline_resource_vcf_file: ""
  panel_of_normals_vcf_file: ""
  getpileupsummaries_variant_vcf_file: ""

  # Extra CLI arguments passed directly to GATK4 Mutect2.
  extra_args: ""

  # Extra CLI arguments passed directly to GATK4 GetPileupSummaries.
  getpileupsummaries_extra_args: ""

  # Chromosomes (for compute parallelization purposes)
  chromosomes: "chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY"


# -----------------------------------------------------------------------------
# Octopus
# Required when methods includes 'octopus' or 'all':
#   regions_txt_file
# Optional otherwise.
# -----------------------------------------------------------------------------
octopus:
  # Required when running octopus
  regions_txt_file: ""

  # Extra CLI arguments passed directly to octopus.
  extra_args: "--min-mapping-quality 20 --min-supporting-reads 3"


# -----------------------------------------------------------------------------
# Sequenza
# Required when methods includes 'sequenza' or 'all':
#   assembly
# Optional otherwise.
# -----------------------------------------------------------------------------
sequenza:
  # Required when running sequenza
  assembly: ""

  # Extra CLI arguments passed directly to sequenza-utils gc_wiggle.
  sequenzautils_gcwiggle: "-w 50"

  # Extra CLI arguments passed directly to sequenza-utils bam2seqz.
  sequenzautils_bam2seqz: "-N 20 --qformat sanger"

  # Extra CLI arguments passed directly to sequenza-utils seqz_binning.
  sequenzautils_seqzbinning: "--window 50"

  # Chromosomes (for compute parallelization purposes)
  chromosomes: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 ch22 chrX chrY"


# -----------------------------------------------------------------------------
# Strelka2
# Optional arguments.
# -----------------------------------------------------------------------------
strelka2:
  # Extra CLI arguments passed directly to configureStrelkaSomaticWorkflow.py.
  extra_args: ""


# -----------------------------------------------------------------------------
# Svaba
# Optional arguments.
# -----------------------------------------------------------------------------
svaba:
  # Extra CLI arguments passed directly to svaba run.
  extra_args: "--hp --read-tracking"