1# 2# A wrapper for the gCNV case workflow intended for lowering computing cost by making it feasible to use 3# preemptible cloud instances with low memory requirements. CPU, memory and disk requirements can be 4# lowered for GermlineCNVCaller and DetermineGermlineContigPloidy tasks. 5# 6# 7# - Example invocation: 8# 9# java -jar cromwell.jar run cnv_germline_case_scattered_workflow.wdl -i my_parameters.json 10# 11#################### 12 13version 1.0 14 15import "cnv_germline_case_workflow.wdl" as GermlineCNVCaseWorkflow 16import "../cnv_common_tasks.wdl" as CNVTasks 17 18workflow CNVGermlineCaseScatteredWorkflow { 19 20 input { 21 ################################## 22 #### required basic arguments #### 23 ################################## 24 File intervals 25 File? blacklist_intervals 26 File filtered_intervals 27 Array[String]+ normal_bams 28 Array[String]+ normal_bais 29 File contig_ploidy_model_tar 30 Array[File]+ gcnv_model_tars 31 Int num_intervals_per_scatter 32 File ref_fasta_dict 33 File ref_fasta_fai 34 File ref_fasta 35 String gatk_docker 36 Int num_samples_per_scatter_block 37 38 ################################## 39 #### optional basic arguments #### 40 ################################## 41 File? gatk4_jar_override 42 Int? preemptible_attempts 43 44 # Required if BAM/CRAM is in a requester pays bucket 45 String? gcs_project_for_requester_pays 46 47 #################################################### 48 #### optional arguments for PreprocessIntervals #### 49 #################################################### 50 Int? padding 51 Int? bin_length 52 53 ############################################## 54 #### optional arguments for CollectCounts #### 55 ############################################## 56 Array[String]? disabled_read_filters_for_collect_counts 57 String? collect_counts_format 58 Boolean? collect_counts_enable_indexing 59 Int? mem_gb_for_collect_counts 60 61 ###################################################################### 62 #### optional arguments for DetermineGermlineContigPloidyCaseMode #### 63 ###################################################################### 64 Float? ploidy_mapping_error_rate 65 Float? ploidy_sample_psi_scale 66 Int? mem_gb_for_determine_germline_contig_ploidy 67 Int? cpu_for_determine_germline_contig_ploidy 68 Int? disk_for_determine_germline_contig_ploidy 69 70 ########################################################## 71 #### optional arguments for GermlineCNVCallerCaseMode #### 72 ########################################################## 73 Float? gcnv_p_alt 74 Float? gcnv_cnv_coherence_length 75 Int? gcnv_max_copy_number 76 Int? mem_gb_for_germline_cnv_caller 77 Int? cpu_for_germline_cnv_caller 78 Int? disk_for_germline_cnv_caller 79 80 # optional arguments for germline CNV denoising model 81 Float? gcnv_mapping_error_rate 82 Float? gcnv_sample_psi_scale 83 Float? gcnv_depth_correction_tau 84 String? gcnv_copy_number_posterior_expectation_mode 85 Int? gcnv_active_class_padding_hybrid_mode 86 87 # optional arguments for Hybrid ADVI 88 Float? gcnv_learning_rate 89 Float? gcnv_adamax_beta_1 90 Float? gcnv_adamax_beta_2 91 Int? gcnv_log_emission_samples_per_round 92 Float? gcnv_log_emission_sampling_median_rel_error 93 Int? gcnv_log_emission_sampling_rounds 94 Int? gcnv_max_advi_iter_first_epoch 95 Int? gcnv_max_advi_iter_subsequent_epochs 96 Int? gcnv_min_training_epochs 97 Int? gcnv_max_training_epochs 98 Float? gcnv_initial_temperature 99 Int? gcnv_num_thermal_advi_iters 100 Int? gcnv_convergence_snr_averaging_window 101 Float? gcnv_convergence_snr_trigger_threshold 102 Int? gcnv_convergence_snr_countdown_window 103 Int? gcnv_max_calling_iters 104 Float? gcnv_caller_update_convergence_threshold 105 Float? gcnv_caller_internal_admixing_rate 106 Float? gcnv_caller_external_admixing_rate 107 Boolean? gcnv_disable_annealing 108 109 ################################################### 110 #### arguments for PostprocessGermlineCNVCalls #### 111 ################################################### 112 Int ref_copy_number_autosomal_contigs 113 Array[String]? allosomal_contigs 114 115 ########################## 116 #### arguments for QC #### 117 ########################## 118 Int maximum_number_events_per_sample 119 Int maximum_number_pass_events_per_sample 120 } 121 122 call CNVTasks.SplitInputArray as SplitInputBamsList { 123 input: 124 input_array = normal_bams, 125 num_inputs_in_scatter_block = num_samples_per_scatter_block, 126 gatk_docker = gatk_docker 127 } 128 129 call CNVTasks.SplitInputArray as SplitInputBaisList { 130 input: 131 input_array = normal_bais, 132 num_inputs_in_scatter_block = num_samples_per_scatter_block, 133 gatk_docker = gatk_docker 134 } 135 136 Array[Array[String]] split_bams = SplitInputBamsList.split_array 137 Array[Array[String]] split_bais = SplitInputBaisList.split_array 138 139 scatter (subarray_index in range(length(split_bams))) { 140 call GermlineCNVCaseWorkflow.CNVGermlineCaseWorkflow { 141 input: 142 intervals = intervals, 143 blacklist_intervals = blacklist_intervals, 144 filtered_intervals = filtered_intervals, 145 normal_bams = split_bams[subarray_index], 146 normal_bais = split_bais[subarray_index], 147 contig_ploidy_model_tar = contig_ploidy_model_tar, 148 gcnv_model_tars = gcnv_model_tars, 149 num_intervals_per_scatter = num_intervals_per_scatter, 150 ref_fasta_dict = ref_fasta_dict, 151 ref_fasta_fai = ref_fasta_fai, 152 ref_fasta = ref_fasta, 153 gatk_docker = gatk_docker, 154 gatk4_jar_override = gatk4_jar_override, 155 preemptible_attempts = preemptible_attempts, 156 gcs_project_for_requester_pays = gcs_project_for_requester_pays, 157 padding = padding, 158 bin_length = bin_length, 159 disabled_read_filters_for_collect_counts = disabled_read_filters_for_collect_counts, 160 collect_counts_format = collect_counts_format, 161 collect_counts_enable_indexing = collect_counts_enable_indexing, 162 mem_gb_for_collect_counts = mem_gb_for_collect_counts, 163 ploidy_mapping_error_rate = ploidy_mapping_error_rate, 164 ploidy_sample_psi_scale = ploidy_sample_psi_scale, 165 mem_gb_for_determine_germline_contig_ploidy = mem_gb_for_determine_germline_contig_ploidy, 166 cpu_for_determine_germline_contig_ploidy = cpu_for_determine_germline_contig_ploidy, 167 disk_for_determine_germline_contig_ploidy = disk_for_determine_germline_contig_ploidy, 168 gcnv_p_alt = gcnv_p_alt, 169 gcnv_cnv_coherence_length = gcnv_cnv_coherence_length, 170 gcnv_max_copy_number = gcnv_max_copy_number, 171 mem_gb_for_germline_cnv_caller = mem_gb_for_germline_cnv_caller, 172 cpu_for_germline_cnv_caller = cpu_for_germline_cnv_caller, 173 disk_for_germline_cnv_caller = disk_for_germline_cnv_caller, 174 gcnv_mapping_error_rate = gcnv_mapping_error_rate, 175 gcnv_sample_psi_scale = gcnv_sample_psi_scale, 176 gcnv_depth_correction_tau = gcnv_depth_correction_tau, 177 gcnv_copy_number_posterior_expectation_mode = gcnv_copy_number_posterior_expectation_mode, 178 gcnv_active_class_padding_hybrid_mode = gcnv_active_class_padding_hybrid_mode, 179 gcnv_learning_rate = gcnv_learning_rate, 180 gcnv_adamax_beta_1 = gcnv_adamax_beta_1, 181 gcnv_adamax_beta_2 = gcnv_adamax_beta_2, 182 gcnv_log_emission_samples_per_round = gcnv_log_emission_samples_per_round, 183 gcnv_log_emission_sampling_median_rel_error = gcnv_log_emission_sampling_median_rel_error, 184 gcnv_log_emission_sampling_rounds = gcnv_log_emission_sampling_rounds, 185 gcnv_max_advi_iter_first_epoch = gcnv_max_advi_iter_first_epoch, 186 gcnv_max_advi_iter_subsequent_epochs = gcnv_max_advi_iter_subsequent_epochs, 187 gcnv_min_training_epochs = gcnv_min_training_epochs, 188 gcnv_max_training_epochs = gcnv_max_training_epochs, 189 gcnv_initial_temperature = gcnv_initial_temperature, 190 gcnv_num_thermal_advi_iters = gcnv_num_thermal_advi_iters, 191 gcnv_convergence_snr_averaging_window = gcnv_convergence_snr_averaging_window, 192 gcnv_convergence_snr_trigger_threshold = gcnv_convergence_snr_trigger_threshold, 193 gcnv_convergence_snr_countdown_window = gcnv_convergence_snr_countdown_window, 194 gcnv_max_calling_iters = gcnv_max_calling_iters, 195 gcnv_caller_update_convergence_threshold = gcnv_caller_update_convergence_threshold, 196 gcnv_caller_internal_admixing_rate = gcnv_caller_internal_admixing_rate, 197 gcnv_caller_external_admixing_rate = gcnv_caller_external_admixing_rate, 198 gcnv_disable_annealing = gcnv_disable_annealing, 199 ref_copy_number_autosomal_contigs = ref_copy_number_autosomal_contigs, 200 allosomal_contigs = allosomal_contigs, 201 maximum_number_events_per_sample = maximum_number_events_per_sample, 202 maximum_number_pass_events_per_sample = maximum_number_pass_events_per_sample 203 } 204 } 205 206 output { 207 File preprocessed_intervals = CNVGermlineCaseWorkflow.preprocessed_intervals[0] 208 Array[File] read_counts_entity_id = flatten(CNVGermlineCaseWorkflow.read_counts_entity_id) 209 Array[File] read_counts = flatten(CNVGermlineCaseWorkflow.read_counts) 210 Array[File] sample_contig_ploidy_calls_tars = flatten(CNVGermlineCaseWorkflow.sample_contig_ploidy_calls_tars) 211 Array[Array[File]] gcnv_calls_tars = flatten(CNVGermlineCaseWorkflow.gcnv_calls_tars) 212 Array[File] gcnv_tracking_tars = flatten(CNVGermlineCaseWorkflow.gcnv_tracking_tars) 213 Array[File] genotyped_intervals_vcfs = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcfs) 214 Array[File] genotyped_intervals_vcf_indexes = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcf_indexes) 215 Array[File] genotyped_segments_vcfs = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcfs) 216 Array[File] genotyped_segments_vcf_indexes = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcf_indexes) 217 Array[File] qc_status_files = flatten(CNVGermlineCaseWorkflow.qc_status_files) 218 Array[String] qc_status_strings = flatten(CNVGermlineCaseWorkflow.qc_status_strings) 219 Array[File] denoised_copy_ratios = flatten(CNVGermlineCaseWorkflow.denoised_copy_ratios) 220 } 221} 222 223