1#!/usr/bin/env bash 2 3# Creates a set of files that map records between GENCODE and RefSeq. 4# Pulled directly from the ensemble database. 5# Currently not used by Funcotator. 6 7outFileBaseName="gencode_xrefseq" 8outExt=".tsv" 9 10hg19db="homo_sapiens_core_75_37" 11hg38db="homo_sapiens_core_90_38" 12 13hg19FileName=${outFileBaseName}_v75_37.hg19${outExt} 14hg38FileName=${outFileBaseName}_v90_38.hg38${outExt} 15 16################################################################################ 17 18 19# Create our query to the DB: 20read -r -d '' query <<- ENDOFQUERYINPUT 21SELECT mrna.transcript_id as transcript_id, mRNA_id, prot_acc FROM 22 ( 23 SELECT CONCAT(transcript.stable_id, '.', transcript.version) AS transcript_id, xref.display_label AS mRNA_id 24 FROM transcript, object_xref, xref, external_db 25 WHERE 26 transcript.transcript_id = object_xref.ensembl_id AND 27 object_xref.ensembl_object_type = 'Transcript' AND 28 object_xref.xref_id = xref.xref_id AND 29 xref.external_db_id = external_db.external_db_id AND 30 external_db.db_name = 'RefSeq_mRNA' 31 ) AS mrna 32 JOIN 33 ( 34 SELECT CONCAT(transcript.stable_id, '.', transcript.version) AS transcript_id, xref.display_label AS prot_acc 35 FROM translation, transcript, object_xref, xref,external_db 36 WHERE 37 ( 38 transcript.transcript_id = translation.transcript_id AND 39 translation.translation_id = object_xref.ensembl_id AND 40 object_xref.ensembl_object_type = 'Translation' AND 41 object_xref.xref_id = xref.xref_id AND 42 xref.external_db_id = external_db.external_db_id AND 43 external_db.db_name = 'RefSeq_peptide' 44 ) 45 ) AS prot 46 ON mrna.transcript_id = prot.transcript_id 47; 48ENDOFQUERYINPUT 49 50echo "Getting HG19 gencode <=> refseq..." 51echo -e "transcript_id\tmRNA_id\tprot_acc" > ${hg19FileName} 52time mysql -u anonymous -h ensembldb.ensembl.org -e "use ${hg19db};${query}" | tail -n +2 | sort -n -k1 >> ${hg19FileName} 53 54echo "Getting HG38 gencode <=> refseq..." 55echo -e "transcript_id\tmRNA_id\tprot_acc" > ${hg38FileName} 56time mysql -u anonymous -h ensembldb.ensembl.org -e "use ${hg38db};${query}" | tail -n +2 | sort -n -k1 >> ${hg38FileName} 57 58echo 'Done!' 59 60 61