1from .. import types 2 3# interval list .tsv file column names 4contig_column_name = "CONTIG" 5start_column_name = "START" 6end_column_name = "END" 7count_column_name = "COUNT" 8 9# prefix for saving posteriors for multiple samples 10sample_folder_prefix = "SAMPLE_" 11 12# log copy number posterior matrix column name prefix for each integer copy number state 13copy_number_column_prefix = "COPY_NUMBER_" 14 15# generic column prefix 16output_column_prefix = "VALUE_" 17 18# ploidy prior table header column names 19ploidy_prior_contig_name_column = "CONTIG_NAME" 20ploidy_prior_prefix = "PLOIDY_PRIOR_" 21 22# column names for ploidy and depth .tsv outputs 23sample_name_column_name = "SAMPLE_NAME" 24global_read_depth_column_name = "GLOBAL_READ_DEPTH" 25average_ploidy_column_name = "AVERAGE_PLOIDY" 26ploidy_column_name = "PLOIDY" 27ploidy_gq_column_name = "PLOIDY_GQ" 28 29# column names for copy-number segments file 30num_points_column_name = "NUM_POINTS" 31call_copy_number_column_name = "CALL_COPY_NUMBER" 32quality_some_called_column_name = "QUALITY_SOME_CALLED" 33quality_all_called_column_name = "QUALITY_ALL_CALLED" 34quality_start_column_name = "QUALITY_START" 35quality_end_column_name = "QUALITY_END" 36 37# column name for baseline copy-number file 38baseline_copy_number_column_name = "BASELINE_COPY_NUMBER" 39 40# column name for denoised copy-number files 41denoised_copy_ratio_mean_column_name = "DENOISED_COPY_RATIO_MEAN" 42denoised_copy_ratio_std_column_name = "DENOISED_COPY_RATIO_STD" 43 44# regular expression for matching sample name from header comment line 45sample_name_header_regexp = "^@RG.*SM:(.*)[\t]*.*$" 46 47# prefix for adding sample name as a header comment line 48sample_name_sam_header_prefix = "RG\tID:GATKCopyNumber\tSM:" 49 50# SAM header comment tag 51sam_comment_tag = "CO" 52 53# regular expression for matching key value pair from SAM comment line 54sam_comment_key_value_regexp = "^@CO[\t](.*):(.*).*" 55 56# SAM style comment characters 57default_comment_char = "@" 58default_delimiter_char = "\t" 59default_key_value_sep = ":" 60 61# key values for storing array type in shape information 62type_key_value = "dtype" 63shape_key_value = "shape" 64 65# dtype dictionaries giving types of mandatory columns whose names are known ahead of time 66# (some of these dictionaries are not currently used, but we define their formats for future reference) 67interval_dtypes_dict = { 68 contig_column_name: str, 69 start_column_name: types.med_uint, 70 end_column_name: types.med_uint 71} 72 73read_count_dtypes_dict = { 74 **interval_dtypes_dict, 75 count_column_name: types.med_uint 76} 77 78ploidy_prior_dtypes_dict = { 79 ploidy_prior_contig_name_column: str 80} 81 82sample_coverage_metadata_dtypes_dict = { 83 sample_name_column_name: str 84} 85 86sample_ploidy_metadata_dtypes_dict = { 87 contig_column_name: str, 88 ploidy_column_name: types.small_uint, 89 ploidy_gq_column_name: types.floatX 90} 91 92sample_read_depth_metadata_dtypes_dict = { 93 global_read_depth_column_name: types.floatX, 94 average_ploidy_column_name: types.floatX 95} 96 97copy_number_segment_dtypes_dict = { 98 **interval_dtypes_dict, 99 num_points_column_name: types.med_uint, 100 call_copy_number_column_name: types.small_uint, 101 baseline_copy_number_column_name: types.small_uint, 102 quality_some_called_column_name: types.floatX, 103 quality_all_called_column_name: types.floatX, 104 quality_start_column_name: types.floatX, 105 quality_end_column_name: types.floatX 106} 107 108denoised_copy_ratio_dtypes_dict = { 109 **interval_dtypes_dict, 110 denoised_copy_ratio_mean_column_name: types.floatX, 111 denoised_copy_ratio_std_column_name: types.floatX 112} 113 114# default file names for loading and saving models, posteriors, and configurations 115default_sample_read_depth_tsv_filename = "global_read_depth.tsv" 116default_sample_name_txt_filename = "sample_name.txt" 117default_sample_contig_ploidy_tsv_filename = "contig_ploidy.tsv" 118default_copy_number_log_posterior_tsv_filename = "log_q_c_tc.tsv" 119default_copy_number_log_emission_tsv_filename = "log_c_emission_tc.tsv" 120default_class_log_posterior_tsv_filename = "log_q_tau_tk.tsv" 121default_baseline_copy_number_tsv_filename = "baseline_copy_number_t.tsv" 122default_copy_number_segments_tsv_filename = "copy_number_segments.tsv" 123default_denoised_copy_ratios_mean_tsv_filename = "mu_denoised_copy_ratio_t.tsv" 124default_denoised_copy_ratios_std_tsv_filename = "std_denoised_copy_ratio_t.tsv" 125 126default_denoising_config_json_filename = "denoising_config.json" 127default_calling_config_json_filename = "calling_config.json" 128default_ploidy_config_json_filename = "ploidy_config.json" 129default_gcnvkernel_version_json_filename = "gcnvkernel_version.json" 130 131default_interval_list_filename = "interval_list.tsv" 132default_contig_ploidy_prior_tsv_filename = "contig_ploidy_prior.tsv" 133 134default_adamax_m_filename = "adamax_m.npy" 135default_adamax_u_filename = "adamax_u.npy" 136default_adamax_res_filename = "adamax_res.npy" 137 138# default exit code that indicates that inference diverged 139# note that it needs to be in sync with the corresponding constant in GermlineCNVCaller 140diverged_inference_exit_code = 239 141