1# File download/unzip written 2012 by Lenna X. Peterson (arklenna@gmail.com) 2# Dictionary extraction written 2011 by Hongbo Zhu 3# 4# This code is part of the Biopython distribution and governed by its 5# license. Please see the LICENSE file that should have been included 6# as part of this package. 7 8"""Download PDB Chemical Component Dictionary and generate dict. 9 10Download and parse PDB Chemical Component Dictionary, 11then write out dict for to_one_letter_code. 12""" 13 14 15import gzip 16import inspect 17import os 18import warnings 19 20from urllib.request import urlopen 21 22url = "ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz" 23 24# extract name of gzip file 25gzname = os.path.basename(url) 26# extract name of cif file (split by sep, remove last, rejoin) 27cifname = os.extsep.join(gzname.split(os.extsep)[:-1]) 28 29url_handle = urlopen(url) 30 31with open(gzname, "wb") as gzh: 32 print("Downloading file... (approx. 29 MB)") 33 while True: 34 data = url_handle.read(1024) 35 if len(data) == 0: 36 break 37 gzh.write(data) 38 39# size as of 13 April 2012 40if os.path.getsize(gzname) < 29944258: 41 warnings.warn("ERROR: Downloaded file is too small", RuntimeWarning) 42 43fh = gzip.open(gzname, "rb") 44 45# write extracted file to disk (not necessary) 46# with open(cifname, 'wb') as cifh: 47# print("Extracting file...") 48# cifh.write(fh.read()) 49 50# The following code written by Hongbo Zhu 51# generate three_to_one_dict 52# two records in PDB Chemical Component Dictionary are parsed to 53# generate the dictionary: 54# _chem_comp.one_letter_code 55# _chem_comp.three_letter_code 56 57three_to_one_buf = [] # all three-letter codes 58three_to_one_buf_noq = [] # only those with non-'?' one-letter codes 59 60current_line = "to_one_letter_code = {" 61current_line_noq = "to_one_letter_code = {" 62 63found_one = False # found one-letter code 64found_three = False # found three-letter code 65 66counter = 0 67counter_noq = 0 68 69line = fh.readline() 70 71while line: 72 if line.startswith("_chem_comp.one_letter_code"): 73 one = line.strip().split()[-1] 74 found_one = True 75 if line.startswith("_chem_comp.three_letter_code"): 76 three = "%-3s" % (line.strip().split()[-1],) # make it three-letter 77 found_three = True 78 79 if found_one and found_three: 80 if counter % 5 == 0: 81 three_to_one_buf.append(f"{current_line}\n") 82 current_line = " " 83 84 current_line = f"{current_line}'{three}':'{one}'," 85 counter += 1 86 87 if one != "?": 88 if counter_noq % 5 == 0: 89 three_to_one_buf_noq.append(f"{current_line_noq}\n") 90 current_line_noq = " " 91 92 current_line_noq = f"{current_line_noq}'{three}':'{one}'," 93 counter_noq += 1 94 95 found_one = False 96 found_three = False 97 98 line = fh.readline() 99 100if len(current_line) < 5: 101 three_to_one_buf[-1] = three_to_one_buf[:-1] # remove the last comma 102 three_to_one_buf.append("}") 103else: 104 three_to_one_buf.append("%s }" % (current_line[:-1])) 105 106if len(current_line_noq) < 5: 107 three_to_one_buf_noq[-1] = three_to_one_buf_noq[:-1] 108 three_to_one_buf_noq.append("}") 109else: 110 three_to_one_buf_noq.append("%s }" % (current_line_noq[:-1])) 111 112# Find path of current script 113_scriptPath = os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]) 114# Path to SCOP module 115_rafPath = os.path.normpath(os.path.join(_scriptPath, "..", "..", "Bio", "SCOP")) 116_threeAllPath = os.path.join(_rafPath, "three_to_one_all.py") 117_threePath = os.path.join(_rafPath, "three_to_one_dict.py") 118 119# with open(_threeAllPath, 'w') as fh: 120# fh.writelines(three_to_one_buf) 121with open(_threePath, "w") as fh: 122 fh.writelines(three_to_one_buf_noq) 123