1from .. import types
2
3# interval list .tsv file column names
4contig_column_name = "CONTIG"
5start_column_name = "START"
6end_column_name = "END"
7count_column_name = "COUNT"
8
9# prefix for saving posteriors for multiple samples
10sample_folder_prefix = "SAMPLE_"
11
12# log copy number posterior matrix column name prefix for each integer copy number state
13copy_number_column_prefix = "COPY_NUMBER_"
14
15# generic column prefix
16output_column_prefix = "VALUE_"
17
18# ploidy prior table header column names
19ploidy_prior_contig_name_column = "CONTIG_NAME"
20ploidy_prior_prefix = "PLOIDY_PRIOR_"
21
22# column names for ploidy and depth .tsv outputs
23sample_name_column_name = "SAMPLE_NAME"
24global_read_depth_column_name = "GLOBAL_READ_DEPTH"
25average_ploidy_column_name = "AVERAGE_PLOIDY"
26ploidy_column_name = "PLOIDY"
27ploidy_gq_column_name = "PLOIDY_GQ"
28
29# column names for copy-number segments file
30num_points_column_name = "NUM_POINTS"
31call_copy_number_column_name = "CALL_COPY_NUMBER"
32quality_some_called_column_name = "QUALITY_SOME_CALLED"
33quality_all_called_column_name = "QUALITY_ALL_CALLED"
34quality_start_column_name = "QUALITY_START"
35quality_end_column_name = "QUALITY_END"
36
37# column name for baseline copy-number file
38baseline_copy_number_column_name = "BASELINE_COPY_NUMBER"
39
40# column name for denoised copy-number files
41denoised_copy_ratio_mean_column_name = "DENOISED_COPY_RATIO_MEAN"
42denoised_copy_ratio_std_column_name = "DENOISED_COPY_RATIO_STD"
43
44# regular expression for matching sample name from header comment line
45sample_name_header_regexp = "^@RG.*SM:(.*)[\t]*.*$"
46
47# prefix for adding sample name as a header comment line
48sample_name_sam_header_prefix = "RG\tID:GATKCopyNumber\tSM:"
49
50# SAM header comment tag
51sam_comment_tag = "CO"
52
53# regular expression for matching key value pair from SAM comment line
54sam_comment_key_value_regexp = "^@CO[\t](.*):(.*).*"
55
56# SAM style comment characters
57default_comment_char = "@"
58default_delimiter_char = "\t"
59default_key_value_sep = ":"
60
61# key values for storing array type in shape information
62type_key_value = "dtype"
63shape_key_value = "shape"
64
65# dtype dictionaries giving types of mandatory columns whose names are known ahead of time
66# (some of these dictionaries are not currently used, but we define their formats for future reference)
67interval_dtypes_dict = {
68    contig_column_name: str,
69    start_column_name: types.med_uint,
70    end_column_name: types.med_uint
71}
72
73read_count_dtypes_dict = {
74    **interval_dtypes_dict,
75    count_column_name: types.med_uint
76}
77
78ploidy_prior_dtypes_dict = {
79    ploidy_prior_contig_name_column: str
80}
81
82sample_coverage_metadata_dtypes_dict = {
83    sample_name_column_name: str
84}
85
86sample_ploidy_metadata_dtypes_dict = {
87    contig_column_name: str,
88    ploidy_column_name: types.small_uint,
89    ploidy_gq_column_name: types.floatX
90}
91
92sample_read_depth_metadata_dtypes_dict = {
93    global_read_depth_column_name: types.floatX,
94    average_ploidy_column_name: types.floatX
95}
96
97copy_number_segment_dtypes_dict = {
98    **interval_dtypes_dict,
99    num_points_column_name: types.med_uint,
100    call_copy_number_column_name: types.small_uint,
101    baseline_copy_number_column_name: types.small_uint,
102    quality_some_called_column_name: types.floatX,
103    quality_all_called_column_name: types.floatX,
104    quality_start_column_name: types.floatX,
105    quality_end_column_name: types.floatX
106}
107
108denoised_copy_ratio_dtypes_dict = {
109    **interval_dtypes_dict,
110    denoised_copy_ratio_mean_column_name: types.floatX,
111    denoised_copy_ratio_std_column_name: types.floatX
112}
113
114# default file names for loading and saving models, posteriors, and configurations
115default_sample_read_depth_tsv_filename = "global_read_depth.tsv"
116default_sample_name_txt_filename = "sample_name.txt"
117default_sample_contig_ploidy_tsv_filename = "contig_ploidy.tsv"
118default_copy_number_log_posterior_tsv_filename = "log_q_c_tc.tsv"
119default_copy_number_log_emission_tsv_filename = "log_c_emission_tc.tsv"
120default_class_log_posterior_tsv_filename = "log_q_tau_tk.tsv"
121default_baseline_copy_number_tsv_filename = "baseline_copy_number_t.tsv"
122default_copy_number_segments_tsv_filename = "copy_number_segments.tsv"
123default_denoised_copy_ratios_mean_tsv_filename = "mu_denoised_copy_ratio_t.tsv"
124default_denoised_copy_ratios_std_tsv_filename = "std_denoised_copy_ratio_t.tsv"
125
126default_denoising_config_json_filename = "denoising_config.json"
127default_calling_config_json_filename = "calling_config.json"
128default_ploidy_config_json_filename = "ploidy_config.json"
129default_gcnvkernel_version_json_filename = "gcnvkernel_version.json"
130
131default_interval_list_filename = "interval_list.tsv"
132default_contig_ploidy_prior_tsv_filename = "contig_ploidy_prior.tsv"
133
134default_adamax_m_filename = "adamax_m.npy"
135default_adamax_u_filename = "adamax_u.npy"
136default_adamax_res_filename = "adamax_res.npy"
137
138# default exit code that indicates that inference diverged
139# note that it needs to be in sync with the corresponding constant in GermlineCNVCaller
140diverged_inference_exit_code = 239
141