1# Workflow for running the GATK CNV pipeline on a matched pair. Supports both WGS and WES. 2# 3# Notes: 4# 5# - The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the 6# GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists). 7# These intervals will be padded on both sides by the amount specified by padding (default 250) 8# and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning, 9# e.g., for WES). For WGS, the intervals should simply cover the autosomal chromosomes (sex chromosomes may be 10# included, but care should be taken to 1) avoid creating panels of mixed sex, and 2) denoise case samples only 11# with panels containing only individuals of the same sex as the case samples). 12# 13# - Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals 14# argument, which accepts formats compatible with the GATK -XL argument 15# (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists). 16# This may be useful for excluding centromeric regions, etc. from analysis. Alternatively, these regions may 17# be manually filtered from the final callset. 18# 19# A reasonable blacklist for excluded intervals (-XL) can be found at: 20# hg19: gs://gatk-best-practices/somatic-b37/CNV_and_centromere_blacklist.hg19.list 21# hg38: gs://gatk-best-practices/somatic-hg38/CNV_and_centromere_blacklist.hg38liftover.list (untested) 22# 23# - The sites file (common_sites) should be a Picard or GATK-style interval list. This is a list of sites 24# of known variation at which allelic counts will be collected for use in modeling minor-allele fractions. 25# 26# - If you opt to run FuncotateSegments (i.e. set `is_run_funcotator` to `true`), then please also ensure that you have 27# the correct value for `funcotator_ref_version`. Treat `funcotator_ref_version` as required if 28# `is_run_funcotator` is `true`. Valid values for `funcotator_ref_version` are `hg38` and `hg19`. 29# The latter includes GRCh37. 30# 31# 32# - Example invocation: 33# 34# java -jar cromwell.jar run cnv_somatic_pair_workflow.wdl -i my_parameters.json 35# 36############# 37 38version 1.0 39 40import "../cnv_common_tasks.wdl" as CNVTasks 41import "cnv_somatic_oncotator_workflow.wdl" as CNVOncotator 42import "cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments 43 44workflow CNVSomaticPairWorkflow { 45 46 input { 47 ################################## 48 #### required basic arguments #### 49 ################################## 50 File common_sites 51 File intervals 52 File? blacklist_intervals 53 File tumor_bam 54 File tumor_bam_idx 55 File? normal_bam 56 File? normal_bam_idx 57 File read_count_pon 58 File ref_fasta_dict 59 File ref_fasta_fai 60 File ref_fasta 61 String gatk_docker 62 63 ################################## 64 #### optional basic arguments #### 65 ################################## 66 # For running oncotator 67 Boolean? is_run_oncotator 68 # For running funcotator 69 Boolean? is_run_funcotator 70 71 File? gatk4_jar_override 72 Int? preemptible_attempts 73 # Use as a last resort to increase the disk given to every task in case of ill behaving data 74 Int? emergency_extra_disk 75 76 # Required if BAM/CRAM is in a requester pays bucket 77 String? gcs_project_for_requester_pays 78 79 #################################################### 80 #### optional arguments for PreprocessIntervals #### 81 #################################################### 82 Int? padding 83 Int? bin_length 84 Int? mem_gb_for_preprocess_intervals 85 86 ############################################## 87 #### optional arguments for CollectCounts #### 88 ############################################## 89 String? collect_counts_format 90 Int? mem_gb_for_collect_counts 91 92 ##################################################### 93 #### optional arguments for CollectAllelicCounts #### 94 ##################################################### 95 String? minimum_base_quality 96 Int? mem_gb_for_collect_allelic_counts 97 98 ################################################## 99 #### optional arguments for DenoiseReadCounts #### 100 ################################################## 101 Int? number_of_eigensamples 102 Int? mem_gb_for_denoise_read_counts 103 104 ############################################## 105 #### optional arguments for ModelSegments #### 106 ############################################## 107 Int? max_num_segments_per_chromosome 108 Int? min_total_allele_count 109 Int? min_total_allele_count_normal 110 Float? genotyping_homozygous_log_ratio_threshold 111 Float? genotyping_base_error_rate 112 Float? kernel_variance_copy_ratio 113 Float? kernel_variance_allele_fraction 114 Float? kernel_scaling_allele_fraction 115 Int? kernel_approximation_dimension 116 Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256] 117 Float? num_changepoints_penalty_factor 118 Float? minor_allele_fraction_prior_alpha 119 Int? num_samples_copy_ratio 120 Int? num_burn_in_copy_ratio 121 Int? num_samples_allele_fraction 122 Int? num_burn_in_allele_fraction 123 Float? smoothing_threshold_copy_ratio 124 Float? smoothing_threshold_allele_fraction 125 Int? max_num_smoothing_iterations 126 Int? num_smoothing_iterations_per_fit 127 Int? mem_gb_for_model_segments 128 129 ###################################################### 130 #### optional arguments for CallCopyRatioSegments #### 131 ###################################################### 132 Float? neutral_segment_copy_ratio_lower_bound 133 Float? neutral_segment_copy_ratio_upper_bound 134 Float? outlier_neutral_segment_copy_ratio_z_score_threshold 135 Float? calling_copy_ratio_z_score_threshold 136 Int? mem_gb_for_call_copy_ratio_segments 137 138 ######################################### 139 #### optional arguments for plotting #### 140 ######################################### 141 Int? minimum_contig_length 142 # If maximum_copy_ratio = Infinity, the maximum copy ratio will be automatically determined 143 String? maximum_copy_ratio 144 Float? point_size_copy_ratio 145 Float? point_size_allele_fraction 146 Int? mem_gb_for_plotting 147 148 ########################################## 149 #### optional arguments for Oncotator #### 150 ########################################## 151 String? additional_args_for_oncotator 152 String? oncotator_docker 153 Int? mem_gb_for_oncotator 154 Int? boot_disk_space_gb_for_oncotator 155 156 ################################################## 157 #### optional arguments for FuncotateSegments #### 158 ################################################## 159 String? additional_args_for_funcotator 160 String? funcotator_ref_version 161 Int? mem_gb_for_funcotator 162 File? funcotator_transcript_selection_list 163 File? funcotator_data_sources_tar_gz 164 String? funcotator_transcript_selection_mode 165 Array[String]? funcotator_annotation_defaults 166 Array[String]? funcotator_annotation_overrides 167 Array[String]? funcotator_excluded_fields 168 Boolean? funcotator_is_removing_untared_datasources 169 Int? funcotator_disk_space_gb 170 Boolean? funcotator_use_ssd 171 Int? funcotator_cpu 172 } 173 174 Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_dict, "GB") + size(ref_fasta_fai, "GB")) 175 Int read_count_pon_size = ceil(size(read_count_pon, "GB")) 176 Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bam_idx, "GB")) 177 Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) else 0 178 179 Int gatk4_override_size = if defined(gatk4_jar_override) then ceil(size(gatk4_jar_override, "GB")) else 0 180 # This is added to every task as padding, should increase if systematically you need more disk for every call 181 Int disk_pad = 20 + ceil(size(intervals, "GB")) + ceil(size(common_sites, "GB")) + gatk4_override_size + select_first([emergency_extra_disk, 0]) 182 183 File final_normal_bam = select_first([normal_bam, "null"]) 184 File final_normal_bam_idx = select_first([normal_bam_idx, "null"]) 185 186 Int preprocess_intervals_disk = ref_size + disk_pad 187 call CNVTasks.PreprocessIntervals { 188 input: 189 intervals = intervals, 190 blacklist_intervals = blacklist_intervals, 191 ref_fasta = ref_fasta, 192 ref_fasta_fai = ref_fasta_fai, 193 ref_fasta_dict = ref_fasta_dict, 194 padding = padding, 195 bin_length = bin_length, 196 gatk4_jar_override = gatk4_jar_override, 197 gatk_docker = gatk_docker, 198 mem_gb = mem_gb_for_preprocess_intervals, 199 disk_space_gb = preprocess_intervals_disk, 200 preemptible_attempts = preemptible_attempts 201 } 202 203 Int collect_counts_tumor_disk = tumor_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad 204 call CNVTasks.CollectCounts as CollectCountsTumor { 205 input: 206 intervals = PreprocessIntervals.preprocessed_intervals, 207 bam = tumor_bam, 208 bam_idx = tumor_bam_idx, 209 ref_fasta = ref_fasta, 210 ref_fasta_fai = ref_fasta_fai, 211 ref_fasta_dict = ref_fasta_dict, 212 format = collect_counts_format, 213 enable_indexing = false, 214 gatk4_jar_override = gatk4_jar_override, 215 gatk_docker = gatk_docker, 216 mem_gb = mem_gb_for_collect_counts, 217 disk_space_gb = collect_counts_tumor_disk, 218 preemptible_attempts = preemptible_attempts, 219 gcs_project_for_requester_pays = gcs_project_for_requester_pays 220 } 221 222 Int collect_allelic_counts_tumor_disk = tumor_bam_size + ref_size + disk_pad 223 call CNVTasks.CollectAllelicCounts as CollectAllelicCountsTumor { 224 input: 225 common_sites = common_sites, 226 bam = tumor_bam, 227 bam_idx = tumor_bam_idx, 228 ref_fasta = ref_fasta, 229 ref_fasta_dict = ref_fasta_dict, 230 ref_fasta_fai = ref_fasta_fai, 231 minimum_base_quality = minimum_base_quality, 232 gatk4_jar_override = gatk4_jar_override, 233 gatk_docker = gatk_docker, 234 mem_gb = mem_gb_for_collect_allelic_counts, 235 disk_space_gb = collect_allelic_counts_tumor_disk, 236 preemptible_attempts = preemptible_attempts, 237 gcs_project_for_requester_pays = gcs_project_for_requester_pays 238 } 239 240 Int denoise_read_counts_tumor_disk = read_count_pon_size + ceil(size(CollectCountsTumor.counts, "GB")) + disk_pad 241 call DenoiseReadCounts as DenoiseReadCountsTumor { 242 input: 243 entity_id = CollectCountsTumor.entity_id, 244 read_counts = CollectCountsTumor.counts, 245 read_count_pon = read_count_pon, 246 number_of_eigensamples = number_of_eigensamples, 247 gatk4_jar_override = gatk4_jar_override, 248 gatk_docker = gatk_docker, 249 mem_gb = mem_gb_for_denoise_read_counts, 250 disk_space_gb = denoise_read_counts_tumor_disk, 251 preemptible_attempts = preemptible_attempts 252 } 253 254 Int model_segments_normal_portion = if defined(normal_bam) then ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) else 0 255 Int model_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsTumor.allelic_counts, "GB")) + model_segments_normal_portion + disk_pad 256 call ModelSegments as ModelSegmentsTumor { 257 input: 258 entity_id = CollectCountsTumor.entity_id, 259 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios, 260 allelic_counts = CollectAllelicCountsTumor.allelic_counts, 261 normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts, 262 max_num_segments_per_chromosome = max_num_segments_per_chromosome, 263 min_total_allele_count = min_total_allele_count, 264 min_total_allele_count_normal = min_total_allele_count_normal, 265 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold, 266 genotyping_base_error_rate = genotyping_base_error_rate, 267 kernel_variance_copy_ratio = kernel_variance_copy_ratio, 268 kernel_variance_allele_fraction = kernel_variance_allele_fraction, 269 kernel_scaling_allele_fraction = kernel_scaling_allele_fraction, 270 kernel_approximation_dimension = kernel_approximation_dimension, 271 window_sizes = window_sizes, 272 num_changepoints_penalty_factor = num_changepoints_penalty_factor, 273 minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha, 274 num_samples_copy_ratio = num_samples_copy_ratio, 275 num_burn_in_copy_ratio = num_burn_in_copy_ratio, 276 num_samples_allele_fraction = num_samples_allele_fraction, 277 num_burn_in_allele_fraction = num_burn_in_allele_fraction, 278 smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio, 279 smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction, 280 max_num_smoothing_iterations = max_num_smoothing_iterations, 281 num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit, 282 gatk4_jar_override = gatk4_jar_override, 283 gatk_docker = gatk_docker, 284 mem_gb = mem_gb_for_model_segments, 285 disk_space_gb = model_segments_tumor_disk, 286 preemptible_attempts = preemptible_attempts 287 } 288 289 Int copy_ratio_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.copy_ratio_only_segments, "GB")) + disk_pad 290 call CallCopyRatioSegments as CallCopyRatioSegmentsTumor { 291 input: 292 entity_id = CollectCountsTumor.entity_id, 293 copy_ratio_segments = ModelSegmentsTumor.copy_ratio_only_segments, 294 neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound, 295 neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound, 296 outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold, 297 calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold, 298 gatk4_jar_override = gatk4_jar_override, 299 gatk_docker = gatk_docker, 300 mem_gb = mem_gb_for_call_copy_ratio_segments, 301 disk_space_gb = copy_ratio_segments_tumor_disk, 302 preemptible_attempts = preemptible_attempts 303 } 304 305 # The F=files from other tasks are small enough to just combine into one disk variable and pass to the tumor plotting tasks 306 Int plot_tumor_disk = ref_size + ceil(size(DenoiseReadCountsTumor.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsTumor.modeled_segments, "GB")) + disk_pad 307 call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosTumor { 308 input: 309 entity_id = CollectCountsTumor.entity_id, 310 standardized_copy_ratios = DenoiseReadCountsTumor.standardized_copy_ratios, 311 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios, 312 ref_fasta_dict = ref_fasta_dict, 313 minimum_contig_length = minimum_contig_length, 314 maximum_copy_ratio = maximum_copy_ratio, 315 point_size_copy_ratio = point_size_copy_ratio, 316 gatk4_jar_override = gatk4_jar_override, 317 gatk_docker = gatk_docker, 318 mem_gb = mem_gb_for_plotting, 319 disk_space_gb = plot_tumor_disk, 320 preemptible_attempts = preemptible_attempts 321 } 322 323 call PlotModeledSegments as PlotModeledSegmentsTumor { 324 input: 325 entity_id = CollectCountsTumor.entity_id, 326 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios, 327 het_allelic_counts = ModelSegmentsTumor.het_allelic_counts, 328 modeled_segments = ModelSegmentsTumor.modeled_segments, 329 ref_fasta_dict = ref_fasta_dict, 330 minimum_contig_length = minimum_contig_length, 331 maximum_copy_ratio = maximum_copy_ratio, 332 point_size_copy_ratio = point_size_copy_ratio, 333 point_size_allele_fraction = point_size_allele_fraction, 334 gatk4_jar_override = gatk4_jar_override, 335 gatk_docker = gatk_docker, 336 mem_gb = mem_gb_for_plotting, 337 disk_space_gb = plot_tumor_disk, 338 preemptible_attempts = preemptible_attempts 339 } 340 341 Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad 342 if (defined(normal_bam)) { 343 call CNVTasks.CollectCounts as CollectCountsNormal { 344 input: 345 intervals = PreprocessIntervals.preprocessed_intervals, 346 bam = final_normal_bam, 347 bam_idx = final_normal_bam_idx, 348 ref_fasta = ref_fasta, 349 ref_fasta_fai = ref_fasta_fai, 350 ref_fasta_dict = ref_fasta_dict, 351 format = collect_counts_format, 352 enable_indexing = false, 353 gatk4_jar_override = gatk4_jar_override, 354 gatk_docker = gatk_docker, 355 mem_gb = mem_gb_for_collect_counts, 356 disk_space_gb = collect_counts_normal_disk, 357 preemptible_attempts = preemptible_attempts, 358 gcs_project_for_requester_pays = gcs_project_for_requester_pays 359 } 360 361 Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad 362 call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal { 363 input: 364 common_sites = common_sites, 365 bam = final_normal_bam, 366 bam_idx = final_normal_bam_idx, 367 ref_fasta = ref_fasta, 368 ref_fasta_dict = ref_fasta_dict, 369 ref_fasta_fai = ref_fasta_fai, 370 minimum_base_quality = minimum_base_quality, 371 gatk4_jar_override = gatk4_jar_override, 372 gatk_docker = gatk_docker, 373 mem_gb = mem_gb_for_collect_allelic_counts, 374 disk_space_gb = collect_allelic_counts_normal_disk, 375 preemptible_attempts = preemptible_attempts, 376 gcs_project_for_requester_pays = gcs_project_for_requester_pays 377 } 378 379 Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad 380 call DenoiseReadCounts as DenoiseReadCountsNormal { 381 input: 382 entity_id = CollectCountsNormal.entity_id, 383 read_counts = CollectCountsNormal.counts, 384 read_count_pon = read_count_pon, 385 number_of_eigensamples = number_of_eigensamples, 386 gatk4_jar_override = gatk4_jar_override, 387 gatk_docker = gatk_docker, 388 mem_gb = mem_gb_for_denoise_read_counts, 389 disk_space_gb = denoise_read_counts_normal_disk, 390 preemptible_attempts = preemptible_attempts 391 } 392 393 Int model_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) + disk_pad 394 call ModelSegments as ModelSegmentsNormal { 395 input: 396 entity_id = CollectCountsNormal.entity_id, 397 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, 398 allelic_counts = CollectAllelicCountsNormal.allelic_counts, 399 max_num_segments_per_chromosome = max_num_segments_per_chromosome, 400 min_total_allele_count = min_total_allele_count_normal, 401 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold, 402 genotyping_base_error_rate = genotyping_base_error_rate, 403 kernel_variance_copy_ratio = kernel_variance_copy_ratio, 404 kernel_variance_allele_fraction = kernel_variance_allele_fraction, 405 kernel_scaling_allele_fraction = kernel_scaling_allele_fraction, 406 kernel_approximation_dimension = kernel_approximation_dimension, 407 window_sizes = window_sizes, 408 num_changepoints_penalty_factor = num_changepoints_penalty_factor, 409 minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha, 410 num_samples_copy_ratio = num_samples_copy_ratio, 411 num_burn_in_copy_ratio = num_burn_in_copy_ratio, 412 num_samples_allele_fraction = num_samples_allele_fraction, 413 num_burn_in_allele_fraction = num_burn_in_allele_fraction, 414 smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio, 415 smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction, 416 max_num_smoothing_iterations = max_num_smoothing_iterations, 417 num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit, 418 gatk4_jar_override = gatk4_jar_override, 419 gatk_docker = gatk_docker, 420 mem_gb = mem_gb_for_model_segments, 421 disk_space_gb = model_segments_normal_disk, 422 preemptible_attempts = preemptible_attempts 423 } 424 425 Int copy_ratio_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.copy_ratio_only_segments, "GB")) + disk_pad 426 call CallCopyRatioSegments as CallCopyRatioSegmentsNormal { 427 input: 428 entity_id = CollectCountsNormal.entity_id, 429 copy_ratio_segments = ModelSegmentsNormal.copy_ratio_only_segments, 430 neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound, 431 neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound, 432 outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold, 433 calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold, 434 gatk4_jar_override = gatk4_jar_override, 435 gatk_docker = gatk_docker, 436 mem_gb = mem_gb_for_call_copy_ratio_segments, 437 disk_space_gb = copy_ratio_segments_normal_disk, 438 preemptible_attempts = preemptible_attempts 439 } 440 441 # The files from other tasks are small enough to just combine into one disk variable and pass to the normal plotting tasks 442 Int plot_normal_disk = ref_size + ceil(size(DenoiseReadCountsNormal.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsNormal.modeled_segments, "GB")) + disk_pad 443 call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosNormal { 444 input: 445 entity_id = CollectCountsNormal.entity_id, 446 standardized_copy_ratios = DenoiseReadCountsNormal.standardized_copy_ratios, 447 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, 448 ref_fasta_dict = ref_fasta_dict, 449 minimum_contig_length = minimum_contig_length, 450 maximum_copy_ratio = maximum_copy_ratio, 451 point_size_copy_ratio = point_size_copy_ratio, 452 gatk4_jar_override = gatk4_jar_override, 453 gatk_docker = gatk_docker, 454 mem_gb = mem_gb_for_plotting, 455 disk_space_gb = plot_normal_disk, 456 preemptible_attempts = preemptible_attempts 457 } 458 459 call PlotModeledSegments as PlotModeledSegmentsNormal { 460 input: 461 entity_id = CollectCountsNormal.entity_id, 462 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, 463 het_allelic_counts = ModelSegmentsNormal.het_allelic_counts, 464 modeled_segments = ModelSegmentsNormal.modeled_segments, 465 ref_fasta_dict = ref_fasta_dict, 466 minimum_contig_length = minimum_contig_length, 467 maximum_copy_ratio = maximum_copy_ratio, 468 point_size_copy_ratio = point_size_copy_ratio, 469 point_size_allele_fraction = point_size_allele_fraction, 470 gatk4_jar_override = gatk4_jar_override, 471 gatk_docker = gatk_docker, 472 mem_gb = mem_gb_for_plotting, 473 disk_space_gb = plot_normal_disk, 474 preemptible_attempts = preemptible_attempts 475 } 476 } 477 478 if (select_first([is_run_oncotator, false])) { 479 call CNVOncotator.CNVOncotatorWorkflow as CNVOncotatorWorkflow { 480 input: 481 called_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments, 482 additional_args = additional_args_for_oncotator, 483 oncotator_docker = oncotator_docker, 484 mem_gb_for_oncotator = mem_gb_for_oncotator, 485 boot_disk_space_gb_for_oncotator = boot_disk_space_gb_for_oncotator, 486 preemptible_attempts = preemptible_attempts 487 } 488 } 489 if (select_first([is_run_funcotator, false])) { 490 call CNVFuncotateSegments.CNVFuncotateSegmentsWorkflow as CNVFuncotateSegmentsWorkflow { 491 input: 492 input_seg_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments, 493 funcotator_ref_version = select_first([funcotator_ref_version, "hg19"]), 494 extra_args = additional_args_for_funcotator, 495 ref_fasta = ref_fasta, 496 ref_fasta_fai = ref_fasta_fai, 497 ref_fasta_dict = ref_fasta_dict, 498 transcript_selection_list = funcotator_transcript_selection_list, 499 funcotator_data_sources_tar_gz = funcotator_data_sources_tar_gz, 500 gatk4_jar_override = gatk4_jar_override, 501 gatk_docker = gatk_docker, 502 mem_gb = mem_gb_for_funcotator, 503 preemptible_attempts = preemptible_attempts, 504 transcript_selection_mode = funcotator_transcript_selection_mode, 505 annotation_defaults = funcotator_annotation_defaults, 506 annotation_overrides = funcotator_annotation_overrides, 507 funcotator_excluded_fields = funcotator_excluded_fields, 508 is_removing_untared_datasources = funcotator_is_removing_untared_datasources, 509 disk_space_gb = funcotator_disk_space_gb, 510 use_ssd = funcotator_use_ssd, 511 cpu = funcotator_cpu 512 } 513 } 514 515 output { 516 File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals 517 518 File read_counts_entity_id_tumor = CollectCountsTumor.entity_id 519 File read_counts_tumor = CollectCountsTumor.counts 520 File allelic_counts_entity_id_tumor = CollectAllelicCountsTumor.entity_id 521 File allelic_counts_tumor = CollectAllelicCountsTumor.allelic_counts 522 File denoised_copy_ratios_tumor = DenoiseReadCountsTumor.denoised_copy_ratios 523 File standardized_copy_ratios_tumor = DenoiseReadCountsTumor.standardized_copy_ratios 524 File het_allelic_counts_tumor = ModelSegmentsTumor.het_allelic_counts 525 File normal_het_allelic_counts_tumor = ModelSegmentsTumor.normal_het_allelic_counts 526 File copy_ratio_only_segments_tumor = ModelSegmentsTumor.copy_ratio_only_segments 527 File copy_ratio_legacy_segments_tumor = ModelSegmentsTumor.copy_ratio_legacy_segments 528 File allele_fraction_legacy_segments_tumor = ModelSegmentsTumor.allele_fraction_legacy_segments 529 File modeled_segments_begin_tumor = ModelSegmentsTumor.modeled_segments_begin 530 File copy_ratio_parameters_begin_tumor = ModelSegmentsTumor.copy_ratio_parameters_begin 531 File allele_fraction_parameters_begin_tumor = ModelSegmentsTumor.allele_fraction_parameters_begin 532 File modeled_segments_tumor = ModelSegmentsTumor.modeled_segments 533 File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters 534 File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters 535 File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments 536 File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments 537 File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot 538 File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD 539 Float standardized_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD_value 540 File denoised_MAD_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD 541 Float denoised_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD_value 542 File delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD 543 Float delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD_value 544 File scaled_delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD 545 Float scaled_delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD_value 546 File modeled_segments_plot_tumor = PlotModeledSegmentsTumor.modeled_segments_plot 547 548 File? read_counts_entity_id_normal = CollectCountsNormal.entity_id 549 File? read_counts_normal = CollectCountsNormal.counts 550 File? allelic_counts_entity_id_normal = CollectAllelicCountsNormal.entity_id 551 File? allelic_counts_normal = CollectAllelicCountsNormal.allelic_counts 552 File? denoised_copy_ratios_normal = DenoiseReadCountsNormal.denoised_copy_ratios 553 File? standardized_copy_ratios_normal = DenoiseReadCountsNormal.standardized_copy_ratios 554 File? het_allelic_counts_normal = ModelSegmentsNormal.het_allelic_counts 555 File? normal_het_allelic_counts_normal = ModelSegmentsNormal.normal_het_allelic_counts 556 File? copy_ratio_only_segments_normal = ModelSegmentsNormal.copy_ratio_only_segments 557 File? copy_ratio_legacy_segments_normal = ModelSegmentsNormal.copy_ratio_legacy_segments 558 File? allele_fraction_legacy_segments_normal = ModelSegmentsNormal.allele_fraction_legacy_segments 559 File? modeled_segments_begin_normal = ModelSegmentsNormal.modeled_segments_begin 560 File? copy_ratio_parameters_begin_normal = ModelSegmentsNormal.copy_ratio_parameters_begin 561 File? allele_fraction_parameters_begin_normal = ModelSegmentsNormal.allele_fraction_parameters_begin 562 File? modeled_segments_normal = ModelSegmentsNormal.modeled_segments 563 File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters 564 File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters 565 File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments 566 File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments 567 File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot 568 File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD 569 Float? standardized_MAD_value_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD_value 570 File? denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD 571 Float? denoised_MAD_value_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD_value 572 File? delta_MAD_normal = PlotDenoisedCopyRatiosNormal.delta_MAD 573 Float? delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.delta_MAD_value 574 File? scaled_delta_MAD_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD 575 Float? scaled_delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD_value 576 File? modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot 577 578 File oncotated_called_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_file, "null"]) 579 File oncotated_called_gene_list_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_gene_list_file, "null"]) 580 File funcotated_called_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_seg_simple_tsv, "null"]) 581 File funcotated_called_gene_list_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_gene_list_tsv, "null"]) 582 } 583} 584 585task DenoiseReadCounts { 586 input { 587 String entity_id 588 File read_counts 589 File read_count_pon 590 Int? number_of_eigensamples #use all eigensamples in panel by default 591 File? gatk4_jar_override 592 593 # Runtime parameters 594 String gatk_docker 595 Int? mem_gb 596 Int? disk_space_gb 597 Boolean use_ssd = false 598 Int? cpu 599 Int? preemptible_attempts 600 } 601 602 Int machine_mem_mb = select_first([mem_gb, 13]) * 1000 603 Int command_mem_mb = machine_mem_mb - 1000 604 605 command <<< 606 set -e 607 export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override} 608 609 gatk --java-options "-Xmx~{command_mem_mb}m" DenoiseReadCounts \ 610 --input ~{read_counts} \ 611 --count-panel-of-normals ~{read_count_pon} \ 612 ~{"--number-of-eigensamples " + number_of_eigensamples} \ 613 --standardized-copy-ratios ~{entity_id}.standardizedCR.tsv \ 614 --denoised-copy-ratios ~{entity_id}.denoisedCR.tsv 615 >>> 616 617 runtime { 618 docker: "~{gatk_docker}" 619 memory: machine_mem_mb + " MB" 620 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD" 621 cpu: select_first([cpu, 1]) 622 preemptible: select_first([preemptible_attempts, 5]) 623 } 624 625 output { 626 File standardized_copy_ratios = "~{entity_id}.standardizedCR.tsv" 627 File denoised_copy_ratios = "~{entity_id}.denoisedCR.tsv" 628 } 629} 630 631task ModelSegments { 632 input { 633 String entity_id 634 File denoised_copy_ratios 635 File allelic_counts 636 File? normal_allelic_counts 637 Int? max_num_segments_per_chromosome 638 Int? min_total_allele_count 639 Int? min_total_allele_count_normal 640 Float? genotyping_homozygous_log_ratio_threshold 641 Float? genotyping_base_error_rate 642 Float? kernel_variance_copy_ratio 643 Float? kernel_variance_allele_fraction 644 Float? kernel_scaling_allele_fraction 645 Int? kernel_approximation_dimension 646 Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256] 647 Float? num_changepoints_penalty_factor 648 Float? minor_allele_fraction_prior_alpha 649 Int? num_samples_copy_ratio 650 Int? num_burn_in_copy_ratio 651 Int? num_samples_allele_fraction 652 Int? num_burn_in_allele_fraction 653 Float? smoothing_threshold_copy_ratio 654 Float? smoothing_threshold_allele_fraction 655 Int? max_num_smoothing_iterations 656 Int? num_smoothing_iterations_per_fit 657 String? output_dir 658 File? gatk4_jar_override 659 660 # Runtime parameters 661 String gatk_docker 662 Int? mem_gb 663 Int? disk_space_gb 664 Boolean use_ssd = false 665 Int? cpu 666 Int? preemptible_attempts 667 } 668 669 Int machine_mem_mb = select_first([mem_gb, 13]) * 1000 670 # ModelSegments seems to need at least 3GB of overhead to run 671 Int command_mem_mb = machine_mem_mb - 3000 672 673 # If optional output_dir not specified, use "out" 674 String output_dir_ = select_first([output_dir, "out"]) 675 676 # default values are min_total_allele_count_ = 0 in matched-normal mode 677 # = 30 in case-only mode 678 Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30 679 Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count]) 680 681 command <<< 682 set -e 683 export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override} 684 685 gatk --java-options "-Xmx~{command_mem_mb}m" ModelSegments \ 686 --denoised-copy-ratios ~{denoised_copy_ratios} \ 687 --allelic-counts ~{allelic_counts} \ 688 ~{"--normal-allelic-counts " + normal_allelic_counts} \ 689 --minimum-total-allele-count-case ~{min_total_allele_count_} \ 690 --minimum-total-allele-count-normal ~{default="30" min_total_allele_count_normal} \ 691 --genotyping-homozygous-log-ratio-threshold ~{default="-10.0" genotyping_homozygous_log_ratio_threshold} \ 692 --genotyping-base-error-rate ~{default="0.05" genotyping_base_error_rate} \ 693 --maximum-number-of-segments-per-chromosome ~{default="1000" max_num_segments_per_chromosome} \ 694 --kernel-variance-copy-ratio ~{default="0.0" kernel_variance_copy_ratio} \ 695 --kernel-variance-allele-fraction ~{default="0.025" kernel_variance_allele_fraction} \ 696 --kernel-scaling-allele-fraction ~{default="1.0" kernel_scaling_allele_fraction} \ 697 --kernel-approximation-dimension ~{default="100" kernel_approximation_dimension} \ 698 --window-size ~{sep=" --window-size " window_sizes} \ 699 --number-of-changepoints-penalty-factor ~{default="1.0" num_changepoints_penalty_factor} \ 700 --minor-allele-fraction-prior-alpha ~{default="25.0" minor_allele_fraction_prior_alpha} \ 701 --number-of-samples-copy-ratio ~{default="100" num_samples_copy_ratio} \ 702 --number-of-burn-in-samples-copy-ratio ~{default="50" num_burn_in_copy_ratio} \ 703 --number-of-samples-allele-fraction ~{default="100" num_samples_allele_fraction} \ 704 --number-of-burn-in-samples-allele-fraction ~{default="50" num_burn_in_allele_fraction} \ 705 --smoothing-credible-interval-threshold-copy-ratio ~{default="2.0" smoothing_threshold_copy_ratio} \ 706 --smoothing-credible-interval-threshold-allele-fraction ~{default="2.0" smoothing_threshold_allele_fraction} \ 707 --maximum-number-of-smoothing-iterations ~{default="10" max_num_smoothing_iterations} \ 708 --number-of-smoothing-iterations-per-fit ~{default="0" num_smoothing_iterations_per_fit} \ 709 --output ~{output_dir_} \ 710 --output-prefix ~{entity_id} 711 712 # We need to create the file even if the above command doesn't so we have something to delocalize 713 # If no file is created by the above task then it will copy out an empty file 714 touch ~{output_dir_}/~{entity_id}.hets.normal.tsv 715 >>> 716 717 runtime { 718 docker: "~{gatk_docker}" 719 memory: machine_mem_mb + " MB" 720 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD" 721 cpu: select_first([cpu, 1]) 722 preemptible: select_first([preemptible_attempts, 5]) 723 } 724 725 output { 726 File het_allelic_counts = "~{output_dir_}/~{entity_id}.hets.tsv" 727 File normal_het_allelic_counts = "~{output_dir_}/~{entity_id}.hets.normal.tsv" 728 File copy_ratio_only_segments = "~{output_dir_}/~{entity_id}.cr.seg" 729 File copy_ratio_legacy_segments = "~{output_dir_}/~{entity_id}.cr.igv.seg" 730 File allele_fraction_legacy_segments = "~{output_dir_}/~{entity_id}.af.igv.seg" 731 File modeled_segments_begin = "~{output_dir_}/~{entity_id}.modelBegin.seg" 732 File copy_ratio_parameters_begin = "~{output_dir_}/~{entity_id}.modelBegin.cr.param" 733 File allele_fraction_parameters_begin = "~{output_dir_}/~{entity_id}.modelBegin.af.param" 734 File modeled_segments = "~{output_dir_}/~{entity_id}.modelFinal.seg" 735 File copy_ratio_parameters = "~{output_dir_}/~{entity_id}.modelFinal.cr.param" 736 File allele_fraction_parameters = "~{output_dir_}/~{entity_id}.modelFinal.af.param" 737 } 738} 739 740task CallCopyRatioSegments { 741 input { 742 String entity_id 743 File copy_ratio_segments 744 Float? neutral_segment_copy_ratio_lower_bound 745 Float? neutral_segment_copy_ratio_upper_bound 746 Float? outlier_neutral_segment_copy_ratio_z_score_threshold 747 Float? calling_copy_ratio_z_score_threshold 748 File? gatk4_jar_override 749 750 # Runtime parameters 751 String gatk_docker 752 Int? mem_gb 753 Int? disk_space_gb 754 Boolean use_ssd = false 755 Int? cpu 756 Int? preemptible_attempts 757 } 758 759 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 760 Int command_mem_mb = machine_mem_mb - 1000 761 762 command <<< 763 set -e 764 export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override} 765 766 gatk --java-options "-Xmx~{command_mem_mb}m" CallCopyRatioSegments \ 767 --input ~{copy_ratio_segments} \ 768 --neutral-segment-copy-ratio-lower-bound ~{default="0.9" neutral_segment_copy_ratio_lower_bound} \ 769 --neutral-segment-copy-ratio-upper-bound ~{default="1.1" neutral_segment_copy_ratio_upper_bound} \ 770 --outlier-neutral-segment-copy-ratio-z-score-threshold ~{default="2.0" outlier_neutral_segment_copy_ratio_z_score_threshold} \ 771 --calling-copy-ratio-z-score-threshold ~{default="2.0" calling_copy_ratio_z_score_threshold} \ 772 --output ~{entity_id}.called.seg 773 >>> 774 775 runtime { 776 docker: "~{gatk_docker}" 777 memory: machine_mem_mb + " MB" 778 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD" 779 cpu: select_first([cpu, 1]) 780 preemptible: select_first([preemptible_attempts, 5]) 781 } 782 783 output { 784 File called_copy_ratio_segments = "~{entity_id}.called.seg" 785 File called_copy_ratio_legacy_segments = "~{entity_id}.called.igv.seg" 786 } 787} 788 789task PlotDenoisedCopyRatios { 790 input { 791 String entity_id 792 File standardized_copy_ratios 793 File denoised_copy_ratios 794 File ref_fasta_dict 795 Int? minimum_contig_length 796 String? maximum_copy_ratio 797 Float? point_size_copy_ratio 798 String? output_dir 799 File? gatk4_jar_override 800 801 # Runtime parameters 802 String gatk_docker 803 Int? mem_gb 804 Int? disk_space_gb 805 Boolean use_ssd = false 806 Int? cpu 807 Int? preemptible_attempts 808 } 809 810 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 811 Int command_mem_mb = machine_mem_mb - 1000 812 813 # If optional output_dir not specified, use "out" 814 String output_dir_ = select_first([output_dir, "out"]) 815 816 command <<< 817 set -e 818 export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override} 819 820 gatk --java-options "-Xmx~{command_mem_mb}m" PlotDenoisedCopyRatios \ 821 --standardized-copy-ratios ~{standardized_copy_ratios} \ 822 --denoised-copy-ratios ~{denoised_copy_ratios} \ 823 --sequence-dictionary ~{ref_fasta_dict} \ 824 --minimum-contig-length ~{default="1000000" minimum_contig_length} \ 825 --maximum-copy-ratio ~{default="4.0" maximum_copy_ratio} \ 826 --point-size-copy-ratio ~{default="0.2" point_size_copy_ratio} \ 827 --output ~{output_dir_} \ 828 --output-prefix ~{entity_id} 829 >>> 830 831 runtime { 832 docker: "~{gatk_docker}" 833 memory: machine_mem_mb + " MB" 834 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD" 835 cpu: select_first([cpu, 1]) 836 preemptible: select_first([preemptible_attempts, 5]) 837 } 838 839 output { 840 File denoised_copy_ratios_plot = "~{output_dir_}/~{entity_id}.denoised.png" 841 File standardized_MAD = "~{output_dir_}/~{entity_id}.standardizedMAD.txt" 842 Float standardized_MAD_value = read_float(standardized_MAD) 843 File denoised_MAD = "~{output_dir_}/~{entity_id}.denoisedMAD.txt" 844 Float denoised_MAD_value = read_float(denoised_MAD) 845 File delta_MAD = "~{output_dir_}/~{entity_id}.deltaMAD.txt" 846 Float delta_MAD_value = read_float(delta_MAD) 847 File scaled_delta_MAD = "~{output_dir_}/~{entity_id}.scaledDeltaMAD.txt" 848 Float scaled_delta_MAD_value = read_float(scaled_delta_MAD) 849 } 850} 851 852task PlotModeledSegments { 853 input { 854 String entity_id 855 File denoised_copy_ratios 856 File het_allelic_counts 857 File modeled_segments 858 File ref_fasta_dict 859 Int? minimum_contig_length 860 String? maximum_copy_ratio 861 Float? point_size_copy_ratio 862 Float? point_size_allele_fraction 863 String? output_dir 864 File? gatk4_jar_override 865 866 # Runtime parameters 867 String gatk_docker 868 Int? mem_gb 869 Int? disk_space_gb 870 Boolean use_ssd = false 871 Int? cpu 872 Int? preemptible_attempts 873 } 874 875 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 876 Int command_mem_mb = machine_mem_mb - 1000 877 878 # If optional output_dir not specified, use "out" 879 String output_dir_ = select_first([output_dir, "out"]) 880 881 command <<< 882 set -e 883 export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override} 884 885 gatk --java-options "-Xmx~{command_mem_mb}m" PlotModeledSegments \ 886 --denoised-copy-ratios ~{denoised_copy_ratios} \ 887 --allelic-counts ~{het_allelic_counts} \ 888 --segments ~{modeled_segments} \ 889 --sequence-dictionary ~{ref_fasta_dict} \ 890 --minimum-contig-length ~{default="1000000" minimum_contig_length} \ 891 --maximum-copy-ratio ~{default="4.0" maximum_copy_ratio} \ 892 --point-size-copy-ratio ~{default="0.2" point_size_copy_ratio} \ 893 --point-size-allele-fraction ~{default="0.4" point_size_allele_fraction} \ 894 --output ~{output_dir_} \ 895 --output-prefix ~{entity_id} 896 >>> 897 898 runtime { 899 docker: "~{gatk_docker}" 900 memory: machine_mem_mb + " MB" 901 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD" 902 cpu: select_first([cpu, 1]) 903 preemptible: select_first([preemptible_attempts, 5]) 904 } 905 906 output { 907 File modeled_segments_plot = "~{output_dir_}/~{entity_id}.modeled.png" 908 } 909} 910