1#
2# A wrapper for the gCNV case workflow intended for lowering computing cost by making it feasible to use
3# preemptible cloud instances with low memory requirements. CPU, memory and disk requirements can be
4# lowered for GermlineCNVCaller and DetermineGermlineContigPloidy tasks.
5#
6#
7# - Example invocation:
8#
9#       java -jar cromwell.jar run cnv_germline_case_scattered_workflow.wdl -i my_parameters.json
10#
11####################
12
13version 1.0
14
15import "cnv_germline_case_workflow.wdl" as GermlineCNVCaseWorkflow
16import "../cnv_common_tasks.wdl" as CNVTasks
17
18workflow CNVGermlineCaseScatteredWorkflow {
19
20    input {
21      ##################################
22      #### required basic arguments ####
23      ##################################
24      File intervals
25      File? blacklist_intervals
26      File filtered_intervals
27      Array[String]+ normal_bams
28      Array[String]+ normal_bais
29      File contig_ploidy_model_tar
30      Array[File]+ gcnv_model_tars
31      Int num_intervals_per_scatter
32      File ref_fasta_dict
33      File ref_fasta_fai
34      File ref_fasta
35      String gatk_docker
36      Int num_samples_per_scatter_block
37
38      ##################################
39      #### optional basic arguments ####
40      ##################################
41      File? gatk4_jar_override
42      Int? preemptible_attempts
43
44      # Required if BAM/CRAM is in a requester pays bucket
45      String? gcs_project_for_requester_pays
46
47      ####################################################
48      #### optional arguments for PreprocessIntervals ####
49      ####################################################
50      Int? padding
51      Int? bin_length
52
53      ##############################################
54      #### optional arguments for CollectCounts ####
55      ##############################################
56      Array[String]? disabled_read_filters_for_collect_counts
57      String? collect_counts_format
58      Boolean? collect_counts_enable_indexing
59      Int? mem_gb_for_collect_counts
60
61      ######################################################################
62      #### optional arguments for DetermineGermlineContigPloidyCaseMode ####
63      ######################################################################
64      Float? ploidy_mapping_error_rate
65      Float? ploidy_sample_psi_scale
66      Int? mem_gb_for_determine_germline_contig_ploidy
67      Int? cpu_for_determine_germline_contig_ploidy
68      Int? disk_for_determine_germline_contig_ploidy
69
70      ##########################################################
71      #### optional arguments for GermlineCNVCallerCaseMode ####
72      ##########################################################
73      Float? gcnv_p_alt
74      Float? gcnv_cnv_coherence_length
75      Int? gcnv_max_copy_number
76      Int? mem_gb_for_germline_cnv_caller
77      Int? cpu_for_germline_cnv_caller
78      Int? disk_for_germline_cnv_caller
79
80      # optional arguments for germline CNV denoising model
81      Float? gcnv_mapping_error_rate
82      Float? gcnv_sample_psi_scale
83      Float? gcnv_depth_correction_tau
84      String? gcnv_copy_number_posterior_expectation_mode
85      Int? gcnv_active_class_padding_hybrid_mode
86
87      # optional arguments for Hybrid ADVI
88      Float? gcnv_learning_rate
89      Float? gcnv_adamax_beta_1
90      Float? gcnv_adamax_beta_2
91      Int? gcnv_log_emission_samples_per_round
92      Float? gcnv_log_emission_sampling_median_rel_error
93      Int? gcnv_log_emission_sampling_rounds
94      Int? gcnv_max_advi_iter_first_epoch
95      Int? gcnv_max_advi_iter_subsequent_epochs
96      Int? gcnv_min_training_epochs
97      Int? gcnv_max_training_epochs
98      Float? gcnv_initial_temperature
99      Int? gcnv_num_thermal_advi_iters
100      Int? gcnv_convergence_snr_averaging_window
101      Float? gcnv_convergence_snr_trigger_threshold
102      Int? gcnv_convergence_snr_countdown_window
103      Int? gcnv_max_calling_iters
104      Float? gcnv_caller_update_convergence_threshold
105      Float? gcnv_caller_internal_admixing_rate
106      Float? gcnv_caller_external_admixing_rate
107      Boolean? gcnv_disable_annealing
108
109      ###################################################
110      #### arguments for PostprocessGermlineCNVCalls ####
111      ###################################################
112      Int ref_copy_number_autosomal_contigs
113      Array[String]? allosomal_contigs
114
115      ##########################
116      #### arguments for QC ####
117      ##########################
118      Int maximum_number_events_per_sample
119      Int maximum_number_pass_events_per_sample
120    }
121
122    call CNVTasks.SplitInputArray as SplitInputBamsList {
123        input:
124            input_array = normal_bams,
125            num_inputs_in_scatter_block = num_samples_per_scatter_block,
126            gatk_docker = gatk_docker
127    }
128
129    call CNVTasks.SplitInputArray as SplitInputBaisList {
130        input:
131            input_array = normal_bais,
132            num_inputs_in_scatter_block = num_samples_per_scatter_block,
133            gatk_docker = gatk_docker
134    }
135
136    Array[Array[String]] split_bams = SplitInputBamsList.split_array
137    Array[Array[String]] split_bais = SplitInputBaisList.split_array
138
139    scatter (subarray_index in range(length(split_bams))) {
140        call GermlineCNVCaseWorkflow.CNVGermlineCaseWorkflow {
141            input:
142                intervals = intervals,
143                blacklist_intervals = blacklist_intervals,
144                filtered_intervals = filtered_intervals,
145                normal_bams = split_bams[subarray_index],
146                normal_bais = split_bais[subarray_index],
147                contig_ploidy_model_tar = contig_ploidy_model_tar,
148                gcnv_model_tars = gcnv_model_tars,
149                num_intervals_per_scatter = num_intervals_per_scatter,
150                ref_fasta_dict = ref_fasta_dict,
151                ref_fasta_fai = ref_fasta_fai,
152                ref_fasta = ref_fasta,
153                gatk_docker = gatk_docker,
154                gatk4_jar_override = gatk4_jar_override,
155                preemptible_attempts = preemptible_attempts,
156                gcs_project_for_requester_pays = gcs_project_for_requester_pays,
157                padding = padding,
158                bin_length = bin_length,
159                disabled_read_filters_for_collect_counts = disabled_read_filters_for_collect_counts,
160                collect_counts_format = collect_counts_format,
161                collect_counts_enable_indexing = collect_counts_enable_indexing,
162                mem_gb_for_collect_counts = mem_gb_for_collect_counts,
163                ploidy_mapping_error_rate = ploidy_mapping_error_rate,
164                ploidy_sample_psi_scale = ploidy_sample_psi_scale,
165                mem_gb_for_determine_germline_contig_ploidy = mem_gb_for_determine_germline_contig_ploidy,
166                cpu_for_determine_germline_contig_ploidy = cpu_for_determine_germline_contig_ploidy,
167                disk_for_determine_germline_contig_ploidy = disk_for_determine_germline_contig_ploidy,
168                gcnv_p_alt = gcnv_p_alt,
169                gcnv_cnv_coherence_length = gcnv_cnv_coherence_length,
170                gcnv_max_copy_number = gcnv_max_copy_number,
171                mem_gb_for_germline_cnv_caller = mem_gb_for_germline_cnv_caller,
172                cpu_for_germline_cnv_caller = cpu_for_germline_cnv_caller,
173                disk_for_germline_cnv_caller = disk_for_germline_cnv_caller,
174                gcnv_mapping_error_rate = gcnv_mapping_error_rate,
175                gcnv_sample_psi_scale = gcnv_sample_psi_scale,
176                gcnv_depth_correction_tau = gcnv_depth_correction_tau,
177                gcnv_copy_number_posterior_expectation_mode = gcnv_copy_number_posterior_expectation_mode,
178                gcnv_active_class_padding_hybrid_mode = gcnv_active_class_padding_hybrid_mode,
179                gcnv_learning_rate = gcnv_learning_rate,
180                gcnv_adamax_beta_1 = gcnv_adamax_beta_1,
181                gcnv_adamax_beta_2 = gcnv_adamax_beta_2,
182                gcnv_log_emission_samples_per_round = gcnv_log_emission_samples_per_round,
183                gcnv_log_emission_sampling_median_rel_error = gcnv_log_emission_sampling_median_rel_error,
184                gcnv_log_emission_sampling_rounds = gcnv_log_emission_sampling_rounds,
185                gcnv_max_advi_iter_first_epoch = gcnv_max_advi_iter_first_epoch,
186                gcnv_max_advi_iter_subsequent_epochs = gcnv_max_advi_iter_subsequent_epochs,
187                gcnv_min_training_epochs = gcnv_min_training_epochs,
188                gcnv_max_training_epochs = gcnv_max_training_epochs,
189                gcnv_initial_temperature = gcnv_initial_temperature,
190                gcnv_num_thermal_advi_iters = gcnv_num_thermal_advi_iters,
191                gcnv_convergence_snr_averaging_window = gcnv_convergence_snr_averaging_window,
192                gcnv_convergence_snr_trigger_threshold = gcnv_convergence_snr_trigger_threshold,
193                gcnv_convergence_snr_countdown_window = gcnv_convergence_snr_countdown_window,
194                gcnv_max_calling_iters = gcnv_max_calling_iters,
195                gcnv_caller_update_convergence_threshold = gcnv_caller_update_convergence_threshold,
196                gcnv_caller_internal_admixing_rate = gcnv_caller_internal_admixing_rate,
197                gcnv_caller_external_admixing_rate = gcnv_caller_external_admixing_rate,
198                gcnv_disable_annealing = gcnv_disable_annealing,
199                ref_copy_number_autosomal_contigs = ref_copy_number_autosomal_contigs,
200                allosomal_contigs = allosomal_contigs,
201                maximum_number_events_per_sample = maximum_number_events_per_sample,
202                maximum_number_pass_events_per_sample = maximum_number_pass_events_per_sample
203        }
204    }
205
206    output {
207        File preprocessed_intervals = CNVGermlineCaseWorkflow.preprocessed_intervals[0]
208        Array[File] read_counts_entity_id = flatten(CNVGermlineCaseWorkflow.read_counts_entity_id)
209        Array[File] read_counts = flatten(CNVGermlineCaseWorkflow.read_counts)
210        Array[File] sample_contig_ploidy_calls_tars = flatten(CNVGermlineCaseWorkflow.sample_contig_ploidy_calls_tars)
211        Array[Array[File]] gcnv_calls_tars = flatten(CNVGermlineCaseWorkflow.gcnv_calls_tars)
212        Array[File] gcnv_tracking_tars = flatten(CNVGermlineCaseWorkflow.gcnv_tracking_tars)
213        Array[File] genotyped_intervals_vcfs = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcfs)
214        Array[File] genotyped_intervals_vcf_indexes = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcf_indexes)
215        Array[File] genotyped_segments_vcfs = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcfs)
216        Array[File] genotyped_segments_vcf_indexes = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcf_indexes)
217        Array[File] qc_status_files = flatten(CNVGermlineCaseWorkflow.qc_status_files)
218        Array[String] qc_status_strings = flatten(CNVGermlineCaseWorkflow.qc_status_strings)
219        Array[File] denoised_copy_ratios = flatten(CNVGermlineCaseWorkflow.denoised_copy_ratios)
220    }
221}
222
223