1############################################################################### 2# 3# defaultValues.py - store default values used in many places in CheckM 4# 5############################################################################### 6# # 7# This program is free software: you can redistribute it and/or modify # 8# it under the terms of the GNU General Public License as published by # 9# the Free Software Foundation, either version 3 of the License, or # 10# (at your option) any later version. # 11# # 12# This program is distributed in the hope that it will be useful, # 13# but WITHOUT ANY WARRANTY; without even the implied warranty of # 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15# GNU General Public License for more details. # 16# # 17# You should have received a copy of the GNU General Public License # 18# along with this program. If not, see <http://www.gnu.org/licenses/>. # 19# # 20############################################################################### 21 22import os 23from checkm.checkmData import DBManager 24 25 26class DefaultValues(): 27 """Default values for filenames and common constants.""" 28 29 __DBM = DBManager() 30 31 # set of markers recognized to be unreliable. These are often 32 # ubiquitous, single-copy genes, but ones which are challenging 33 # to correctly annotate with the PFAM and TIGRFAM models. 34 MARKERS_TO_EXCLUDE = {'TIGR00398', 'TIGR00399'} 35 36 E_VAL = 1e-10 37 LENGTH = 0.7 38 PSEUDOGENE_LENGTH = 0.3 39 40 TAXON_MARKER_FILE_HEADER = '# [Taxon Marker File]' 41 LINEAGE_MARKER_FILE_HEADER = '# [Lineage Marker File]' 42 43 SEQ_CONCAT_CHAR = '&&' 44 45 CHECKM_DATA_DIR = __DBM.config.values["dataRoot"] 46 PHYLO_HMM_MODELS = phyloHMMs = os.path.join(CHECKM_DATA_DIR, 'hmms', 'phylo.hmm') 47 HMM_MODELS = os.path.join(CHECKM_DATA_DIR, 'hmms', 'checkm.hmm') 48 PFAM_CLAN_FILE = os.path.join(CHECKM_DATA_DIR, 'pfam', 'Pfam-A.hmm.dat') 49 50 IMG_METADATA_FILE = os.path.join(CHECKM_DATA_DIR, 'img', 'img_metadata.tsv') 51 REDUNDANT_TIGRFAM_FILE = os.path.join(CHECKM_DATA_DIR, 'pfam', 'tigrfam2pfam.tsv') 52 53 SELECTED_MARKER_SETS = os.path.join(CHECKM_DATA_DIR, 'selected_marker_sets.tsv') 54 TAXON_MARKER_SETS = os.path.join(CHECKM_DATA_DIR, 'taxon_marker_sets.tsv') 55 56 GENOME_TREE_DIR = os.path.join(CHECKM_DATA_DIR, 'genome_tree') 57 PPLACER_REF_PACKAGE_FULL = os.path.join(GENOME_TREE_DIR, 'genome_tree_full.refpkg') 58 PPLACER_REF_PACKAGE_REDUCED = os.path.join(GENOME_TREE_DIR, 'genome_tree_reduced.refpkg') 59 GENOME_TREE = 'genome_tree.tre' 60 GENOME_TREE_FASTA = 'genome_tree.fasta' 61 GENOME_TREE_DEREP = 'genome_tree.derep.txt' 62 GENOME_TREE_TAXONOMY = 'genome_tree.taxonomy.tsv' 63 GENOME_TREE_METADATA = 'genome_tree.metadata.tsv' 64 GENOME_TREE_MISSING_DUPLICATE = 'missing_duplicate_genes_50.tsv' 65 DISTRIBUTION_DIR = os.path.join(CHECKM_DATA_DIR, 'distributions') 66 67 PHYLO_HMM_MODEL_INFO = 'phylo_hmm_info.pkl.gz' 68 CHECKM_HMM_MODEL_INFO = 'checkm_hmm_info.pkl.gz' 69 70 HMMER_TABLE_PHYLO_OUT = 'hmmer.tree.txt' 71 HMMER_PHYLO_OUT = 'hmmer.tree.ali.txt' 72 73 HMMER_TABLE_OUT = 'hmmer.analyze.txt' 74 HMMER_OUT = 'hmmer.analyze.ali.txt' 75 76 PRODIGAL_AA = 'genes.faa' 77 PRODIGAL_NT = 'genes.fna' 78 PRODIGAL_GFF = 'genes.gff' 79 80 PPLACER_CONCAT_SEQ_OUT = 'concatenated.fasta' 81 PPLACER_JSON_OUT = 'concatenated.pplacer.json' 82 PPLACER_OUT = 'pplacer.out' 83 PPLACER_TREE_OUT = 'concatenated.tre' 84 85 BIN_STATS_PHYLO_OUT = 'bin_stats.tree.tsv' 86 # SEQ_STATS_PHYLO_OUT = 'seq_stats.tree.tsv' 87 88 BIN_STATS_OUT = 'bin_stats.analyze.tsv' 89 # SEQ_STATS_OUT = 'seq_stats.analyze.tsv' 90 91 BIN_STATS_EXT_OUT = 'bin_stats_ext.tsv' 92 MARKER_GENE_STATS = 'marker_gene_stats.tsv' 93 94 CONTIG_BREAK = 'NNNNNNNNNN' 95 96 UNBINNED = 'unbinned' 97 98 MIN_SEQ_LEN_GC_STD = 1000