1#!/usr/local/bin/python3.8 2 3from hh_reader import read_result 4from copy import deepcopy 5from pdbx.reader.PdbxReader import PdbxReader 6from pdbx.writer.PdbxWriter import PdbxWriter 7import re, os, sys, tempfile, glob 8 9from operator import itemgetter # hzhu 10from itertools import groupby # hzhu 11 12EMPTY = '*' 13GAP = '-' 14DEBUG_MODE = False 15 16class Gap: 17 """ A gap is a continuous stretch of indels. 18 It is defined by a opening position and a size/length 19 """ 20 def __init__(self, open_pos, size): 21 self.open_pos = open_pos # gap opening position 22 self.size = size # num of indels in the gap 23 24 def __repr__(self): 25 return 'Gap opening pos = %d, size = %d' % (self.open_pos, self.size) 26 27class Grid: 28 """ 29 Implementation of 2D grid of cells 30 Includes boundary handling 31 """ 32 33 def __init__(self, grid_height, grid_width): 34 """ 35 Initializes grid to be empty, take height and width of grid as parameters 36 Indexed by rows (left to right), then by columns (top to bottom) 37 """ 38 39 self._grid_height = grid_height 40 self._grid_width = grid_width 41 self._cells = [ [ EMPTY for dummy_col in range(self._grid_width) ] 42 for dummy_row in range(self._grid_height)] 43 44 def __str__(self): 45 """ Return multi-line string represenation for grid """ 46 47 ans = '' 48 for row in range(self._grid_height): 49 ans += ''.join(self._cells[row]) 50 ans += '\n' 51 return ans 52 53 def clear(self): 54 """ Clears grid to be empty """ 55 56 self._cells = [[EMPTY for dummy_col in range(self._grid_width)] 57 for dummy_row in range(self._grid_height)] 58 59 def get_grid_height(self): 60 """ Return the height of the grid """ 61 62 return self._grid_height 63 64 def get_grid_width(self): 65 """ Return the width of the grid """ 66 67 return self._grid_width 68 69 def get_cell(self, row, col): 70 return self._cells[row][col] 71 72 def get_seq_start(self, row): 73 """ Returns the start position of the sequence """ 74 75 index = 0 76 for pos in self._cells[row]: 77 if pos != EMPTY: 78 return index 79 index += 1 80 81 return None 82 83 def get_seq_end(self, row): 84 """ Returns the end position of the sequence """ 85 86 index = 0 87 for pos in reversed(self._cells[row]): 88 if pos != EMPTY: 89 return self.get_grid_width() - index 90 index += 1 91 92 return None 93 94 def get_gaps(self, row): 95 """ Return the position of gaps in a row """ 96 97 gaps = list() 98 99 index = 0 100 for pos in self._cells[row]: 101 if pos == GAP: 102 gaps.append(index) 103 index += 1 104 105 return gaps 106 107 def get_gaps_ref_gapless(self, row): 108 """ Return the pos of gaps in a row. 109 The opening positions of the gaps are wrt. the gapless seq 110 """ 111 # get all the indels 112 indels = self.get_gaps(row) 113 gaps = [] 114 # combine continuous indels into a gap 115 for k,i in groupby( enumerate(indels), lambda x: x[0]-x[1] ): 116 g = list(map(itemgetter(1), i)) 117 gaps.append( Gap(g[0], len(g)) ) 118 119 # offset the gap opening positions 120 for i in range(1, len(gaps)): 121 # offset by total gap number before 122 gaps[i].open_pos -= sum([gaps[j].size for j in range(i)]) 123 124 return gaps # a list of Gap instances 125 126 def get_seq_indeces(self, row): 127 128 seq = list() 129 for pos, res in enumerate(self._cells[row]): 130 if res != EMPTY and res != GAP: 131 seq.append(pos) 132 133 return seq 134 135 ## def get_gap_list(self): # hzhu commented this out. wrote a new version 136 ## """ Returns a list of list of all gap positions in the sequence grid. """ 137 ## gap_pos = set() 138 139 ## for row in range(self.get_grid_height()): 140 ## for gap in self.get_gaps(row): 141 ## gap_pos.add(gap) 142 143 ## gap_pos = list(sorted(gap_pos)) 144 ## boundaries = [ (x + 1) for x, y in zip(gap_pos, gap_pos[1:]) if y - x != 1 ] 145 146 ## gap_list = list() 147 ## prev = 0 148 149 ## for boundary in boundaries: 150 ## sub_list = [ pos for pos in gap_pos[prev:] if pos < boundary ] 151 ## gap_list.append(sub_list) 152 ## prev += len(sub_list) 153 154 ## gap_list.append([ x for x in gap_pos[prev:]]) 155 156 ## return gap_list 157 158 def get_gap_list(self): 159 """ Returns a list of Gap instances for all rows in the grid 160 """ 161 gap_dict = dict() # each position should occur as gap at most once 162 # keys are gap openning positions 163 # values are Gap instances 164 gap_list = [] 165 for row in range(self.get_grid_height()): 166 gap_pos = [] 167 gaps = self.get_gaps_ref_gapless(row) 168 169 for g in gaps: 170 if g.open_pos in gap_dict: # if there is already gaps at this open pos 171 if g.size > gap_dict[g.open_pos].size: # if new gap is bigger 172 gap_dict[g.open_pos] = g # keep the larger gap as they overlap 173 else: 174 gap_dict[g.open_pos] = g 175 176 gap_list = sorted(list(gap_dict.values()), key=lambda x: x.open_pos) # sort according to start position 177 return gap_list # a list of Gap instances 178 179 def set_gap(self, row, col): 180 """ Set cell with index (row, col) to be a gap """ 181 182 self._cells[row][col] = GAP 183 184 def set_empty(self, row, col): 185 """ Set cell with index (row, col) to be a gap """ 186 187 self._cells[row][col] = EMPTY 188 189 def set_cell(self, row, col, res): 190 """ Set cell with index (row, col) to be full """ 191 192 self._cells[row][col] = res 193 194 def is_empty(self, row, col): 195 """ Checks whether cell with index (row, col) is empty """ 196 197 return self._cells[row][col] == EMPTY 198 199 def is_gap(self, row, col): 200 """ Checks whetehr cell with indxex (row, col) is a gap """ 201 202 return self._cells[row][col] == GAP 203 204 def insert_gaps(self, cols): 205 """ Inserts a gaps into a column of the template grid """ 206 207 for col in cols: 208 for row in range(self._grid_height): 209 if col >= self.get_seq_start(row) and col < self.get_seq_end(row): 210 self._cells[row].insert(col, GAP) 211 else: 212 self._cells[row].insert(col, EMPTY) 213 214 self._grid_width += 1 215 216 def insert_gaps_row(self, cols, row): 217 """ Intert gaps into cols only for certain row""" 218 for col in cols: 219 if col >= self.get_seq_start(row) and col < self.get_seq_end(row): 220 self._cells[row].insert(col, GAP) 221 else: 222 self._cells[row].insert(col, EMPTY) 223 # NOTE: grid_with should not be changed after every row is updated. 224 #self._grid_width += 1 225 226 def clean_trail_empty(self): 227 """ Remove all trailing EMPTY and pad grid to same width""" 228 # first find out the max length (exluding trailing EMPTY) 229 max_width = 0 230 for row in range(self._grid_height): 231 for i in range(len(self._cells[row])-1, -1, -1): 232 if self._cells[row][i] != EMPTY: 233 break 234 if i+1 > max_width: 235 max_width = i+1 236 237 # delete excessive EMPTY 238 for row in range(self._grid_height): 239 del self._cells[row][max_width:] 240 241 # then pad all rows to the same length 242 [self._cells[row].append( EMPTY * (max_width-len(self._cells[row])) ) \ 243 for row in range(self._grid_height) if len(self._cells[row]) < max_width] 244 self._grid_width = max_width 245 return 246 247 def remove_gaps(self, keep_width=True): # hzhu add keep_width option 248 """ Removes all gaps from the grid. """ 249 250 for row in range(self.get_grid_height()): 251 not_gap = list() 252 for col in range(self.get_grid_width()): 253 if not self.is_gap(row, col): 254 not_gap.append(col) 255 256 self._cells[row] = [ self._cells[row][col] for col in not_gap ] 257 258 if keep_width: # hzhu only pad to original width if desired 259 for del_pos in range(self._grid_width - len(not_gap)): 260 self._cells[row].append(EMPTY) 261 262 if not keep_width: # hzhu if width is not kept, make sure width is consistent 263 self.clean_trail_empty() 264 265 return 266 267 268class QueryGrid(Grid): 269 270 def __init__(self, grid_height, grid_width): 271 Grid.__init__(self, grid_height, grid_width) 272 273 def get_query_start(self, row): 274 """ Returns the query start position """ 275 return self.get_seq_start(row) + 1 276 277 def get_query_end(self, row): 278 """ Returns the query end postion """ 279 280 return self.get_seq_end(row) - len(self.get_gaps(row)) 281 282 def get_col_residue(self, col): 283 """ Tries to find a the query residue in a given column. Used by derive_global_seq() to 284 identify the global query sequence """ 285 286 for row in range(self.get_grid_height()): 287 if not self.is_empty(row, col): 288 return self._cells[row][col] 289 290 return GAP 291 292class TemplateGrid(Grid): 293 294 def __init__(self, grid_height, grid_width): 295 Grid.__init__(self, grid_height, grid_width) 296 297 self._start = list() 298 self._end = list() 299 self._pdb_code = list() 300 self._chain = list() 301 self._organism = list() 302 self._resolution = list() 303 304 def display(self): 305 """ Return multi-line string represenation for grid """ 306 307 ans = '' 308 for row in range(self._grid_height): 309 ans += '>P1;{p}\nstructure:{p}:{s}:{c}:{e}:{c}::{o}:{r}:\n{a}*\n'.format( 310 p = self._pdb_code[row], 311 s = add_white_space_end(self.get_template_start(row), 4), 312 e = add_white_space_end(self.get_template_end(row), 4), 313 c = self._chain[row], 314 o = self._organism[row], 315 r = self._resolution[row], 316 a = ''.join(self._cells[row]).replace(EMPTY, GAP).replace('#', GAP)) 317 318 return ans 319 320 def debug(self, row): 321 """ Return multi-line string represenation for grid, for debugging purposes """ 322 323 ans = '{p}\nInternal: {s}, {e} Query: {qs}, {qe} Gaps ({g1}): {g2}\n{seq}\n'.format( 324 p = self._pdb_code[row], 325 s = self.get_seq_start(row), 326 e = self.get_seq_end(row), 327 qs = self.get_template_start(row), 328 qe = self.get_template_end(row), 329 g1 = len(self.get_gaps(row)), 330 g2 = ', '.join([str(gap) for gap in self.get_gaps(row)]), 331 seq = ''.join(self._cells[row])) 332 333 return ans 334 335 def set_metadata(self, row, start, end, pdb_code, chain, organism, resolution): 336 """ Used by create_template_grid() to setup metadata of pir template """ 337 338 self._start.append(start) 339 self._end.append(end) 340 self._pdb_code.append(pdb_code) 341 self._chain.append(chain) 342 self._organism.append(organism) 343 self._resolution.append(resolution) 344 345 def set_map(self, row, start, end): 346 347 self._start[row] = start 348 self._end[row] = end 349 350 def get_template_start(self, row): 351 """ Returns the template start position """ 352 353 return self._start[row] 354 355 def get_template_end(self, row): 356 """ Return sthe template end position """ 357 358 return self._end[row] 359 360 def del_row(self, row): 361 """ Removes a complete template entry from the grid """ 362 363 del self._cells[row] 364 del self._start[row] 365 del self._end[row] 366 del self._pdb_code[row] 367 del self._chain[row] 368 del self._organism[row] 369 del self._resolution[row] 370 self._grid_height -= 1 371 372# Helper functions 373 374def add_white_space_end(string, length): 375 """ Adds whitespaces to a string until it has the wished length""" 376 377 edited_string = str(string) 378 379 if len(edited_string) >= length: 380 return string 381 else: 382 while len(edited_string) != length: 383 edited_string += ' ' 384 385 return edited_string 386 387def convert_aa_code(three_letter, convert): 388 """ 389 Assumes a string that contains a three letter aminoacid code and 390 returns the corresponding one letter code. 391 """ 392 393 aa_code = { 394 'CYS': 'C', 395 'ASP': 'D', 396 'SER': 'S', 397 'GLN': 'Q', 398 'LYS': 'K', 399 'ILE': 'I', 400 'PRO': 'P', 401 'THR': 'T', 402 'PHE': 'F', 403 'ASN': 'N', 404 'GLY': 'G', 405 'HIS': 'H', 406 'LEU': 'L', 407 'ARG': 'R', 408 'TRP': 'W', 409 'ALA': 'A', 410 'VAL': 'V', 411 'GLU': 'E', 412 'TYR': 'Y', 413 'MET': 'M', 414 } 415 416 non_canonical = { 417 'MSE': 1, 418 'HYP': 2, 419 'MLY': 3, 420 'SEP': 4, 421 'TPO': 5, 422 'CSO': 6, 423 'PTR': 7, 424 'KCX': 8, 425 'CME': 9, 426 'CSD': 10, 427 'CAS': 11, 428 'MLE': 12, 429 'DAL': 13, 430 'CGU': 14, 431 'DLE': 15, 432 'FME': 16, 433 'DVA': 17, 434 'OCS': 18, 435 'DPR': 19, 436 'MVA': 20, 437 'TYS': 21, 438 'M3L': 22, 439 'SMC': 23, 440 'ALY': 24, 441 'CSX': 25, 442 'DCY': 26, 443 'NLE': 27, 444 'DGL': 28, 445 'DSN': 29, 446 'CSS': 30, 447 'DLY': 31, 448 'MLZ': 32, 449 'DPN': 33, 450 'DAR': 34, 451 'PHI': 35, 452 'IAS': 36, 453 'DAS': 37, 454 'HIC': 38, 455 'MP8': 39, 456 'DTH': 40, 457 'DIL': 41, 458 'MEN': 42, 459 'DTY': 43, 460 'CXM': 44, 461 'DGN': 45, 462 'DTR': 46, 463 'SAC': 47, 464 'DSG': 48, 465 'MME': 49, 466 'MAA': 50, 467 'YOF': 51, 468 'FP9': 52, 469 'FVA': 53, 470 'MLU': 54, 471 'OMY': 55, 472 'FGA': 56, 473 'MEA': 57, 474 'CMH': 58, 475 'DHI': 59, 476 'SEC': 60, 477 'OMZ': 61, 478 'SCY': 62, 479 'MHO': 63, 480 'MED': 64, 481 'CAF': 65, 482 'NIY': 66, 483 'OAS': 67, 484 'SCH': 68, 485 'MK8': 69, 486 'SME': 70, 487 'LYZ': 71 488 } 489 490 if three_letter in aa_code.keys(): 491 return aa_code[three_letter] 492 elif convert and (three_letter in non_canonical.keys()): 493 return non_canonical[three_letter] 494 else: 495 return '-' 496 497 498def get_query_name(hhr_file): 499 500 with open(hhr_file) as fh: 501 for line in fh: 502 if line.startswith('Query'): 503 # match the PDB Code 504 m = re.search('(\d[A-Z0-9]{3})_(\S)', line) 505 506 if m: 507 pdb_code = m.group(1) 508 chain = m.group(2) 509 else: 510 pdb_code = 'UKNP' 511 chain = 'A' 512 # raise ValueError('Input HHR-File Does not seem to be a PDB-Structure') 513 514 break 515 516 return pdb_code, chain 517 518def get_cif_files(folder): 519 """ Gets all cif files located in folder. """ 520 521 return glob(os.path.join(folder, '*.cif')) 522 523def open_cif(cif_file): 524 """ Assumes a mmCif file and returns a data block used for subsequent procedures """ 525 # The "usual" procedure to open a mmCIF with pdbX/mmCIF 526 with open(cif_file) as cif_fh: 527 data = [] 528 reader = PdbxReader(cif_fh) 529 reader.read(data) 530 531 block = data[0] 532 533 return block 534 535def get_pdb_entry_id(block): 536 """ Extracts the PDB entry information of a cif file and returns it as a string """ 537 538 entry = block.getObj('entry') 539 entry_id = entry.getValue('id') 540 541 return entry_id 542 543 544def template_id_to_pdb(template_id): 545 """ 546 Extracts PDB ID and chain name from the provided template id 547 """ 548 # match PDBID without chain (8fab, 1a01) 549 m = re.match(r'/^(\d[A-Za-z0-9]{3})$', template_id) 550 if m: 551 return m.group(1).upper(), 'A' 552 553 # PDB CODE with chain Identifier 554 m = re.match(r'^(\d[A-Za-z0-9]{3})_(\S)$', template_id) 555 if m: 556 return m.group(1).upper(), m.group(2).upper() 557 558 # Match DALI ID 559 m = re.match(r'^(\d[A-Za-z0-9]{3})([A-Za-z0-9]?)_\d+$', template_id) 560 if m: 561 return m.group(1).upper(), m.group(2).upper() 562 563 # No PDB code and chain identified 564 return None, None 565 566 567def create_template_grid(hhr_data): 568 """ Creates a template grid """ 569 570 total_seq = len(hhr_data) 571 templ_max = max( [ hhr.start[0] + len(to_seq(hhr.template_ali)) for hhr in hhr_data ] ) - 1 572 573 574 template_grid = TemplateGrid(total_seq, templ_max) 575 576 for row, template in enumerate(hhr_data): 577 seq_start = template.start[0] - 1 578 templatealignment = to_seq(template.template_ali) 579 seq_end = seq_start + len(templatealignment) 580 581 # Load Meta Data 582 start = template.start[1] 583 end = template.end[1] 584 585 # Get pdb_code and chain identifier of template 586 pdb_code, chain = template_id_to_pdb(template.template_id) 587 588 m = re.search("(\d+.\d+)A", template.template_info) # try to extract resolution of the structure 589 590 if m: 591 resolution = m.group(1) 592 else: 593 resolution = "" 594 595 m = re.search("\{(.*)\}", template.template_info) # try to extract the organism 596 if m: 597 organism = m.group(1).replace(":", " ") # make sure that no colons are in the organism 598 else: 599 organism = "" 600 601 template_grid.set_metadata(row, start, end, pdb_code, chain, organism, resolution) 602 603 # Write sequence into the grid 604 for pos, col in enumerate(range(seq_start, seq_end)): 605 template_grid.set_cell(row, col, templatealignment[pos]) 606 607 return template_grid 608 609 610def to_seq(ali): 611 if isinstance(ali, list): 612 return ''.join(ali) 613 else: 614 return ali 615 616 617def create_query_grid(hhr_data): 618 """ Creates a Query Grid """ 619 620 total_seq = len(hhr_data) 621 query_max = max( [ hhr.start[0] + len(to_seq(hhr.query_ali)) for hhr in hhr_data ] ) - 1 622 623 query_grid = QueryGrid(total_seq, query_max) 624 625 for row, query in enumerate(hhr_data): 626 627 queryalignment = to_seq(query.query_ali) 628 query_start = query.start[0] - 1 629 query_end = query_start + len(queryalignment) 630 631 for pos, col in enumerate(range(query_start, query_end)): 632 if queryalignment[pos] not in ['Z', 'U', 'O', 'J', 'X', 'B']: # CAUTION 633 634 query_grid.set_cell(row, col, queryalignment[pos]) 635 636 return query_grid 637 638def create_gapless_grid(grid): 639 """ Returns a gapless grid """ 640 641 gapless = deepcopy(grid) 642 gapless.remove_gaps(keep_width=False) # hzhu: shrink grid 643 644 return gapless 645 646def process_query_grid(query_grid, gapless_grid): 647 """ Processes a query grid sucht that it contains all gaps 648 """ 649 gaplist = query_grid.get_gap_list() 650 off_set = 0 651 652 for g in gaplist: 653 gapless_grid.insert_gaps([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ]) 654 off_set += g.size 655 656 return gapless_grid 657 658def derive_global_seq(processed_query_grid, query_name, query_chain): 659 660 global_seq = list() 661 662 for col in range(processed_query_grid.get_grid_width()): 663 global_seq.append(processed_query_grid.get_col_residue(col)) 664 665 # this is the query entry 666 header = '>P1;{q}\nsequence:{q}:1 :{c}:{l} :{c}::::\n'.format( 667 q = query_name, 668 l = len(global_seq), 669 c = query_chain) 670 671 return header + ''.join(global_seq) + '*' 672 673def process_template_grid(query_grid, template_grid): 674 """ Insertes Gaps into the template grid 675 Only add gaps from **other** query_grids into template grid (NOT gapless) 676 """ 677 gaplist = query_grid.get_gap_list() # use this to keep the offset 678 679 for row in range(template_grid.get_grid_height()): 680 # do NOT consider gaps in current query row 681 gaplist_row = query_grid.get_gaps_ref_gapless(row) 682 gapdict_row = dict(zip([g.open_pos for g in gaplist_row], 683 [g.size for g in gaplist_row])) 684 off_set = 0 685 for g in gaplist: 686 # if there is a gap with same opening position in the current row, 687 # only consider g if it is larger than the on in the current row 688 if g.open_pos in gapdict_row: 689 if g.size > gapdict_row[g.open_pos]: 690 template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos, 691 g.open_pos+g.size-gapdict_row[g.open_pos]) ], row) 692 else: 693 template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ], row) 694 695 off_set += g.size # even if the gaps are not inserted, the offset should be adjusted 696 697 template_grid.clean_trail_empty() # clean the redundant trailing EMPTY char 698 699 return template_grid 700 701def compare_with_cifs(template_grid, folder, output_path, convert, threshold): 702 """ 703 Compare the PIR Alignment with Atomsection of a mmCIF file. To make the ATOM-Section of 704 a mmCIF file compatible with MODELLER, each residue has in the ATOM-Section has to match 705 corresponding positions in the PIR-Alignment 706 """ 707 708 # glob the mmCif files from given directory and map the PDB identifier to the path 709 cif_files = glob.glob(os.path.join(folder, '*.cif')) 710 cif_paths = { path.split('/')[-1].split('.')[0].upper() : path for path in cif_files } 711 cif_edits = dict() 712 713 714 # create the path where renumbered cifs are saved to 715 if not os.path.exists(output_path): 716 os.mkdir(output_path) 717 718 # if the cif does not contain any residue of the por alignment we delete it 719 del_row = list() 720 721 for row in range(template_grid.get_grid_height()): 722 # get the pdb code and strand id from the current template 723 pdb_code = template_grid._pdb_code[row] 724 chain = template_grid._chain[row] # hhr users pdb chain ID 725 726 727 # load mmCif file accordingly 728 if pdb_code in cif_edits.keys(): 729 block = cif_edits[pdb_code] 730 else: 731 try: 732 block = open_cif(cif_paths[pdb_code]) 733 except KeyError: 734 del_row.append(row) 735 print ('! Did not find the mmCIF file for {pdb}. Removing it from the alignment.'.format( 736 pdb = pdb_code)) 737 continue 738 739 # Create a mapping of the atom site 740 atom_site = block.getObj('atom_site') 741 742 ######################################################################## 743 ## Get the mapping of the residues in the atom section ## 744 ######################################################################## 745 746 cif_seq = dict() 747 # For the case that we have to rename a chain 748 cif_chains = set([]) 749 750 # Iterate through the atomsection of the cif file 751 for atom_row in range(0, atom_site.getRowCount()): 752 753 try: 754 if atom_site.getValue('label_comp_id', atom_row) == 'HOH': 755 continue 756 cif_chain = atom_site.getValue('label_asym_id', atom_row) 757 pdb_chain = atom_site.getValue('auth_asym_id', atom_row) # use PDB chain ID 758 except IndexError: 759 pass 760 761 cif_chains.add(cif_chain) 762 763 # We do not care about the residues apart from the chain 764 #if cif_chain != chain: # hzhu 765 if pdb_chain != chain: # hhr uses PDB chain, not the cif chain! hzhu 766 continue 767 # and update the chain id from pdb_chain to cif_chain 768 if atom_site.getValue('group_PDB', atom_row).startswith('ATOM'): # hzhu in case HETATM ruins ch id 769 template_grid._chain[row] = cif_chain 770 771 # get the residue and the residue number 772 try: 773 res_num = int(atom_site.getValue("label_seq_id", atom_row)) 774 except ValueError: 775 continue 776 777 residue = atom_site.getValue('label_comp_id', atom_row) 778 residue = convert_aa_code(residue, convert) 779 780 781 if res_num not in cif_seq.keys(): 782 cif_seq[res_num] = residue 783 elif res_num in cif_seq.keys() and cif_seq[res_num] == residue: 784 continue 785 elif res_num in cif_seq.keys() and cif_seq[res_num] != residue: 786 cif_seq[res_num] = '-' 787 788 if DEBUG_MODE: 789 print ('! {p} {c}: mmCIF contains a residue position that is assigned {cr} to two residues. Removing it.'.format( 790 p = pdb_code, 791 c = chain, 792 cr = res_num)) 793 794 ######################################################################## 795 ## Rename chain if necessary ## 796 ######################################################################## 797 798 chain_idx = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 799 800 if len(template_grid._chain[row]) != 1: 801 i = 0 802 new_chain = 0 803 804 while i < len(chain_idx): 805 if chain_idx[i] in cif_chains: 806 if DEBUG_MODE: 807 print ('! {p} {c}: Chain identifier {i} is already taken.'.format( 808 p = pdb_code, 809 c = chain, 810 i = chain_idx[i])) 811 i += 1 812 else: 813 new_chain = chain_idx[i] 814 break 815 816 if new_chain == 0: 817 if DEBUG_MODE: 818 print ('! {p} {c}: Could not use {p}. The chain identifier {c} is not compatible with MODELLER (2 letters) and could not be renanmed.'.format( 819 p = pdb_code, 820 c = chain)) 821 822 del_row.append(row) 823 continue 824 825 if new_chain != 0: 826 print ('Selected new chain name {c}'.format(c = new_chain)) 827 828 #TODO 829 830 ######################################################################## 831 ## Compare cif positions with the atom positions ## 832 ######################################################################## 833 834 del_pos = list() 835 mod_pos = dict() 836 mapping = dict() 837 838 for pos_cif, pos_tem in zip(range(template_grid.get_template_start(row), 839 template_grid.get_template_end(row) + 1), template_grid.get_seq_indeces(row)): 840 841 res_tem = template_grid.get_cell(row, pos_tem) 842 843 try: 844 res_cif = cif_seq[pos_cif] 845 except KeyError: 846 res_cif = -1 847 848 849 match = True if res_tem == res_cif else False 850 851 if not match: 852 if res_cif == 1 and res_tem == 'M': 853 mod_pos[pos_cif] = 1 854 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 855 856 elif res_cif == 2 and res_tem == 'P': 857 858 mod_pos[pos_cif] = 2 859 mapping[(pos_tem, res_tem)] = (pos_cif, 'P') 860 861 elif res_cif == 3 and res_tem == 'K': 862 863 mod_pos[pos_cif] = 3 864 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 865 866 elif res_cif == 4 and res_tem == 'S': 867 868 mod_pos[pos_cif] = 4 869 mapping[(pos_tem, res_tem)] = (pos_cif, 'S') 870 871 elif res_cif == 5 and res_tem == 'T': 872 873 mod_pos[pos_cif] = 5 874 mapping[(pos_tem, res_tem)] = (pos_cif, 'T') 875 876 elif res_cif == 6 and res_tem == 'C': 877 878 mod_pos[pos_cif] = 6 879 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 880 881 elif res_cif == 7 and res_tem == 'Y': 882 883 mod_pos[pos_cif] = 7 884 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 885 886 elif res_cif == 8 and res_tem == 'K': 887 888 mod_pos[pos_cif] = 8 889 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 890 891 elif res_cif == 9 and res_tem == 'C': 892 893 mod_pos[pos_cif] = 9 894 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 895 896 elif res_cif == 10 and res_tem == 'A': 897 898 mod_pos[pos_cif] = 10 899 mapping[(pos_tem, res_tem)] = (pos_cif, 'A') 900 901 elif res_cif == 11 and res_tem == 'C': 902 903 mod_pos[pos_cif] = 11 904 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 905 906 elif res_cif == 12 and res_tem == 'L': 907 908 mod_pos[pos_cif] = 12 909 mapping[(pos_tem, res_tem)] = (pos_cif, 'L') 910 911 elif res_cif == 13 and res_tem == 'A': 912 913 mod_pos[pos_cif] = 13 914 mapping[(pos_tem, res_tem)] = (pos_cif, 'A') 915 916 elif res_cif == 14 and res_tem == 'E': 917 918 mod_pos[pos_cif] = 14 919 mapping[(pos_tem, res_tem)] = (pos_cif, 'E') 920 921 elif res_cif == 15 and res_tem == 'L': 922 923 mod_pos[pos_cif] = 15 924 mapping[(pos_tem, res_tem)] = (pos_cif, 'L') 925 926 elif res_cif == 16 and res_tem == 'M': 927 928 mod_pos[pos_cif] = 16 929 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 930 931 elif res_cif == 17 and res_tem == 'V': 932 933 mod_pos[pos_cif] = 17 934 mapping[(pos_tem, res_tem)] = (pos_cif, 'V') 935 936 elif res_cif == 18 and res_tem == 'C': 937 938 mod_pos[pos_cif] = 18 939 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 940 941 elif res_cif == 19 and res_tem == 'P': 942 943 mod_pos[pos_cif] = 19 944 mapping[(pos_tem, res_tem)] = (pos_cif, 'P') 945 946 elif res_cif == 20 and res_tem == 'V': 947 948 mod_pos[pos_cif] = 20 949 mapping[(pos_tem, res_tem)] = (pos_cif, 'V') 950 951 elif res_cif == 21 and res_tem == 'Y': 952 953 mod_pos[pos_cif] = 21 954 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 955 956 elif res_cif == 22 and res_tem == 'K': 957 958 mod_pos[pos_cif] = 22 959 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 960 961 elif res_cif == 23 and res_tem == 'C': 962 963 mod_pos[pos_cif] = 23 964 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 965 966 elif res_cif == 24 and res_tem == 'K': 967 968 mod_pos[pos_cif] = 24 969 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 970 971 elif res_cif == 25 and res_tem == 'C': 972 973 mod_pos[pos_cif] = 25 974 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 975 976 elif res_cif == 26 and res_tem == 'C': 977 978 mod_pos[pos_cif] = 26 979 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 980 981 elif res_cif == 27 and res_tem == 'L': 982 983 mod_pos[pos_cif] = 27 984 mapping[(pos_tem, res_tem)] = (pos_cif, 'L') 985 986 elif res_cif == 28 and res_tem == 'E': 987 988 mod_pos[pos_cif] = 28 989 mapping[(pos_tem, res_tem)] = (pos_cif, 'E') 990 991 elif res_cif == 29 and res_tem == 'S': 992 993 mod_pos[pos_cif] = 29 994 mapping[(pos_tem, res_tem)] = (pos_cif, 'S') 995 996 elif res_cif == 30 and res_tem == 'C': 997 998 mod_pos[pos_cif] = 30 999 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1000 1001 elif res_cif == 31 and res_tem == 'K': 1002 1003 mod_pos[pos_cif] = 31 1004 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 1005 1006 elif res_cif == 32 and res_tem == 'K': 1007 1008 mod_pos[pos_cif] = 32 1009 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 1010 1011 elif res_cif == 33 and res_tem == 'F': 1012 1013 mod_pos[pos_cif] = 33 1014 mapping[(pos_tem, res_tem)] = (pos_cif, 'F') 1015 1016 elif res_cif == 34 and res_tem == 'R': 1017 1018 mod_pos[pos_cif] = 34 1019 mapping[(pos_tem, res_tem)] = (pos_cif, 'R') 1020 1021 elif res_cif == 35 and res_tem == 'F': 1022 1023 mod_pos[pos_cif] = 35 1024 mapping[(pos_tem, res_tem)] = (pos_cif, 'F') 1025 1026 elif res_cif == 36 and res_tem == 'D': 1027 1028 mod_pos[pos_cif] = 36 1029 mapping[(pos_tem, res_tem)] = (pos_cif, 'D') 1030 1031 elif res_cif == 37 and res_tem == 'D': 1032 1033 mod_pos[pos_cif] = 37 1034 mapping[(pos_tem, res_tem)] = (pos_cif, 'D') 1035 1036 elif res_cif == 38 and res_tem == 'H': 1037 1038 mod_pos[pos_cif] = 38 1039 mapping[(pos_tem, res_tem)] = (pos_cif, 'H') 1040 1041 elif res_cif == 39 and res_tem == 'P': 1042 1043 mod_pos[pos_cif] = 39 1044 mapping[(pos_tem, res_tem)] = (pos_cif, 'P') 1045 1046 elif res_cif == 40 and res_tem == 'T': 1047 1048 mod_pos[pos_cif] = 40 1049 mapping[(pos_tem, res_tem)] = (pos_cif, 'T') 1050 1051 elif res_cif == 41 and res_tem == 'I': 1052 1053 mod_pos[pos_cif] = 41 1054 mapping[(pos_tem, res_tem)] = (pos_cif, 'I') 1055 1056 elif res_cif == 42 and res_tem == 'N': 1057 1058 mod_pos[pos_cif] = 42 1059 mapping[(pos_tem, res_tem)] = (pos_cif, 'N') 1060 1061 elif res_cif == 43 and res_tem == 'Y': 1062 1063 mod_pos[pos_cif] = 43 1064 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 1065 1066 elif res_cif == 44 and res_tem == 'M': 1067 1068 mod_pos[pos_cif] = 44 1069 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 1070 1071 elif res_cif == 45 and res_tem == 'G': 1072 1073 mod_pos[pos_cif] = 45 1074 mapping[(pos_tem, res_tem)] = (pos_cif, 'G') 1075 1076 elif res_cif == 46 and res_tem == 'W': 1077 1078 mod_pos[pos_cif] = 46 1079 mapping[(pos_tem, res_tem)] = (pos_cif, 'W') 1080 1081 elif res_cif == 47 and res_tem == 'S': 1082 1083 mod_pos[pos_cif] = 47 1084 mapping[(pos_tem, res_tem)] = (pos_cif, 'S') 1085 1086 elif res_cif == 48 and res_tem == 'N': 1087 1088 mod_pos[pos_cif] = 48 1089 mapping[(pos_tem, res_tem)] = (pos_cif, 'N') 1090 1091 elif res_cif == 49 and res_tem == 'M': 1092 1093 mod_pos[pos_cif] = 49 1094 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 1095 1096 elif res_cif == 50 and res_tem == 'A': 1097 1098 mod_pos[pos_cif] = 50 1099 mapping[(pos_tem, res_tem)] = (pos_cif, 'A') 1100 1101 elif res_cif == 51 and res_tem == 'Y': 1102 1103 mod_pos[pos_cif] = 51 1104 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 1105 1106 elif res_cif == 52 and res_tem == 'P': 1107 1108 mod_pos[pos_cif] = 52 1109 mapping[(pos_tem, res_tem)] = (pos_cif, 'P') 1110 1111 elif res_cif == 53 and res_tem == 'V': 1112 1113 mod_pos[pos_cif] = 53 1114 mapping[(pos_tem, res_tem)] = (pos_cif, 'V') 1115 1116 elif res_cif == 54 and res_tem == 'L': 1117 1118 mod_pos[pos_cif] = 54 1119 mapping[(pos_tem, res_tem)] = (pos_cif, 'L') 1120 1121 elif res_cif == 55 and res_tem == 'Y': 1122 1123 mod_pos[pos_cif] = 55 1124 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 1125 1126 elif res_cif == 56 and res_tem == 'E': 1127 1128 mod_pos[pos_cif] = 56 1129 mapping[(pos_tem, res_tem)] = (pos_cif, 'E') 1130 1131 elif res_cif == 57 and res_tem == 'F': 1132 1133 mod_pos[pos_cif] = 57 1134 mapping[(pos_tem, res_tem)] = (pos_cif, 'F') 1135 1136 elif res_cif == 58 and res_tem == 'C': 1137 1138 mod_pos[pos_cif] = 58 1139 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1140 1141 elif res_cif == 59 and res_tem == 'H': 1142 1143 mod_pos[pos_cif] = 59 1144 mapping[(pos_tem, res_tem)] = (pos_cif, 'H') 1145 1146 elif res_cif == 60 and res_tem == 'C': 1147 1148 mod_pos[pos_cif] = 60 1149 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1150 1151 elif res_cif == 61 and res_tem == 'Y': 1152 1153 mod_pos[pos_cif] = 61 1154 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 1155 1156 elif res_cif == 62 and res_tem == 'C': 1157 1158 mod_pos[pos_cif] = 62 1159 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1160 1161 elif res_cif == 63 and res_tem == 'M': 1162 1163 mod_pos[pos_cif] = 63 1164 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 1165 1166 elif res_cif == 64 and res_tem == 'M': 1167 1168 mod_pos[pos_cif] = 64 1169 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 1170 1171 elif res_cif == 65 and res_tem == 'C': 1172 1173 mod_pos[pos_cif] = 65 1174 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1175 1176 elif res_cif == 66 and res_tem == 'Y': 1177 1178 mod_pos[pos_cif] = 66 1179 mapping[(pos_tem, res_tem)] = (pos_cif, 'Y') 1180 1181 elif res_cif == 67 and res_tem == 'S': 1182 1183 mod_pos[pos_cif] = 67 1184 mapping[(pos_tem, res_tem)] = (pos_cif, 'S') 1185 1186 elif res_cif == 68 and res_tem == 'C': 1187 1188 mod_pos[pos_cif] = 68 1189 mapping[(pos_tem, res_tem)] = (pos_cif, 'C') 1190 1191 elif res_cif == 69 and res_tem == 'L': 1192 1193 mod_pos[pos_cif] = 69 1194 mapping[(pos_tem, res_tem)] = (pos_cif, 'L') 1195 1196 elif res_cif == 70 and res_tem == 'M': 1197 1198 mod_pos[pos_cif] = 70 1199 mapping[(pos_tem, res_tem)] = (pos_cif, 'M') 1200 1201 elif res_cif == 71 and res_tem == 'K': 1202 1203 mod_pos[pos_cif] = 71 1204 mapping[(pos_tem, res_tem)] = (pos_cif, 'K') 1205 1206 else: 1207 # insert a gap 1208 template_grid.set_empty(row, pos_tem) 1209 mapping[(pos_tem, res_tem)] = (pos_cif, res_cif) 1210 1211 if DEBUG_MODE: 1212 print ('! {p} {c}: template pos {pt} ({rt}) does not match cif pos {pc} ({rc}). Replacing with gap.'.format( 1213 p = pdb_code, 1214 c = chain, 1215 pt = pos_tem, 1216 rt = res_tem, 1217 pc = pos_cif, 1218 rc = res_cif if res_cif != -1 else 'DNE')) 1219 1220 if res_cif != -1: 1221 del_pos.append(pos_cif) 1222 else: 1223 mapping[(pos_tem, res_tem)] = (pos_cif, res_cif) 1224 1225 1226 # adjust template start and end positions 1227 correct_mapping = { key:value for key, value in mapping.items() if key[1] == value[1] } 1228 1229 try: 1230 tstart = correct_mapping[sorted(correct_mapping.keys())[0]][0] 1231 tend = correct_mapping[sorted(correct_mapping.keys())[-1]][0] 1232 template_grid.set_map(row, tstart, tend) 1233 except IndexError: 1234 # This exception handles cases in which all residues were deleted 1235 if DEBUG_MODE: 1236 print ('! {p} {c}: Removing {p} from alignment. No residues matched the alignment sequence.'.format( 1237 p = pdb_code, 1238 c = chain)) 1239 1240 del_row.append(row) 1241 continue 1242 1243 ######################################################################## 1244 ## Delete rows from the PIR Alignment if the residue ratio is to low ## 1245 ######################################################################## 1246 1247 if threshold > 0: 1248 1249 gaps = 0 1250 res = 0 1251 1252 for col in range(template_grid.get_grid_width()): 1253 if template_grid.is_empty(row, col): 1254 template_grid.set_gap(row, col) 1255 1256 if template_grid.is_gap(row, col): 1257 gaps += 1 1258 else: 1259 res += 1 1260 1261 ratio = res/float(gaps + res) 1262 1263 if ratio > threshold: 1264 print ('! Template {p} successfully passed residue ratio ({r:.2f} / {t}).'.format( 1265 p = pdb_code, 1266 r = ratio, 1267 t = threshold )) 1268 else: 1269 print ('! Template {p} did not passed residue ratio ({r:.2f} / {t}). Removing it from pir Alignment.'.format( 1270 p = pdb_code, 1271 r = ratio, 1272 t = threshold )) 1273 1274 if row not in del_row: 1275 del_row.append(row) 1276 continue 1277 1278 ######################################################################## 1279 ## Edit cif files ## 1280 ######################################################################## 1281 1282 rem_row = list() # verbosity: saves information about removed residues 1283 mod_row = list() # verbosity: saves information about modified residues 1284 cha_row = list() # verbosity: saves any other changes 1285 1286 for atom_row in reversed(range(0, atom_site.getRowCount())): 1287 1288 try: 1289 cif_chain = atom_site.getValue('label_asym_id', atom_row) 1290 except IndexError: 1291 pass 1292 1293 # We do not care about the residues apart from the chain 1294 if cif_chain != chain: 1295 continue 1296 1297 # get the residue number 1298 try: 1299 res_num = int(atom_site.getValue("label_seq_id", atom_row)) 1300 except ValueError: 1301 continue 1302 1303 # pdb_PDB_model_num has to be set to 1 1304 try: 1305 model_num = int(atom_site.getValue('pdbx_PDB_model_num', atom_row)) 1306 except IndexError: 1307 model_num = 1 # if we cannot extract, assume that it is alright 1308 1309 try: 1310 ins_code = atom_site.getValue('pdbx_PDB_ins_code', atom_row) 1311 except IndexError: 1312 ins_code = '?' # assume it has no insertion code 1313 1314 group_PDB = atom_site.getValue('group_PDB', atom_row) 1315 residue = atom_site.getValue('label_comp_id', atom_row) 1316 residue = convert_aa_code(residue, convert) 1317 1318 # MODELLER accepts only structures if pdbx_PDB_model_num is set to 1 1319 if model_num != 1: 1320 1321 if (res_num, residue, 'model_num') not in cha_row: 1322 cha_row.append((res_num, residue, 'model_num')) 1323 1324 atom_site.setValue(1, "pdbx_PDB_model_num", atom_row) 1325 1326 if ins_code != '?': 1327 1328 if (res_num, residue, 'ins_code') not in cha_row: 1329 cha_row.append((res_num, residue, 'ins_code')) 1330 1331 atom_site.setValue('?', "pdbx_PDB_ins_code", atom_row) 1332 1333 if group_PDB != 'ATOM': 1334 1335 if (res_num, residue, 'group_PDB') not in cha_row: 1336 cha_row.append((res_num, residue, 'group_PDB')) 1337 1338 atom_site.setValue('ATOM', 'group_PDB', atom_row) 1339 1340 ######################################################################## 1341 ## Delete residues ## 1342 ######################################################################## 1343 1344 if res_num in del_pos: 1345 if (res_num, residue) not in rem_row: 1346 rem_row.append((res_num, residue)) 1347 1348 atom_site.removeRow(atom_row) 1349 1350 ######################################################################## 1351 ## Modify residues ## 1352 ######################################################################## 1353 1354 if res_num in mod_pos.keys(): 1355 1356 # Get the data 1357 type_symbol = atom_site.getValue('type_symbol', atom_row) 1358 label_atom_id = atom_site.getValue('label_atom_id', atom_row) 1359 auth_atom_id = atom_site.getValue('auth_atom_id', atom_row) 1360 1361 if mod_pos[res_num] == 1: # try to convert MSE to M 1362 1363 atom_site.setValue('MET', 'label_comp_id', atom_row) 1364 1365 try: 1366 atom_site.setValue('MET', 'auth_comp_id', atom_row) 1367 except IndexError: 1368 pass 1369 1370 if type_symbol == 'SE': 1371 atom_site.setValue('S', 'type_symbol', atom_row) 1372 if label_atom_id == 'SE': 1373 atom_site.setValue('S', 'label_atom_id', atom_row) 1374 if auth_atom_id == 'SE': 1375 atom_site.setValue('S', 'auth_atom_id', atom_row) 1376 1377 if (res_num, residue, 'MSE -> MET') not in mod_row: 1378 mod_row.append((res_num, residue, 'MSE -> MET')) 1379 1380 elif mod_pos[res_num] == 2: # try to convert HYP to PRO 1381 # apparently it is enough to rename the label_comp_id to PRO to get 1382 # MODELLER working with Hydroxyprolines (HYP) 1383 1384 atom_site.setValue('PRO', 'label_comp_id', atom_row) 1385 1386 try: 1387 atom_site.setValue('PRO', 'auth_comp_id', atom_row) 1388 except IndexError: 1389 pass 1390 1391 if (res_num, residue, 'HYP -> PRO') not in mod_row: 1392 mod_row.append((res_num, residue, 'HYP -> PRO')) 1393 1394 elif mod_pos[res_num] == 3: # try to convert MLY to LYS 1395 1396 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1397 1398 try: 1399 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1400 except IndexError: 1401 pass 1402 1403 if (res_num, residue, 'MLY -> LYS') not in mod_row: 1404 mod_row.append((res_num, residue, 'MLY -> LYS')) 1405 1406 elif mod_pos[res_num] == 4: # converts Phosphoserine to Serine 1407 1408 atom_site.setValue('SER', 'label_comp_id', atom_row) 1409 1410 try: 1411 atom_site.setValue('SER', 'auth_comp_id', atom_row) 1412 except IndexError: 1413 pass 1414 1415 if (res_num, residue, 'SEP -> SER') not in mod_row: 1416 mod_row.append((res_num, residue, 'SEP -> SER')) 1417 1418 elif mod_pos[res_num] == 5: # converts Phosphothreonine to Threonine 1419 1420 atom_site.setValue('THR', 'label_comp_id', atom_row) 1421 1422 try: 1423 atom_site.setValue('THR', 'auth_comp_id', atom_row) 1424 except IndexError: 1425 pass 1426 1427 if (res_num, residue, 'TPO -> THR') not in mod_row: 1428 mod_row.append((res_num, residue, 'TPO -> THR')) 1429 1430 elif mod_pos[res_num] == 6: # converts S-HYDROXYCYSTEINE to Cysteine 1431 1432 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1433 1434 try: 1435 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1436 except IndexError: 1437 pass 1438 1439 if (res_num, residue, 'CSO -> CYS') not in mod_row: 1440 mod_row.append((res_num, residue, 'CSO -> CYS')) 1441 1442 elif mod_pos[res_num] == 7: # converts O-PHOSPHOTYROSINE to Tyrosine 1443 1444 atom_site.setValue('TYR', 'label_comp_id', atom_row) 1445 1446 try: 1447 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 1448 except IndexError: 1449 pass 1450 1451 if (res_num, residue, 'PTR -> TYR') not in mod_row: 1452 mod_row.append((res_num, residue, 'PTR -> TYR')) 1453 1454 elif mod_pos[res_num] == 8: # converts LYSINE NZ-CARBOXYLIC ACID to Lysine 1455 1456 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1457 1458 try: 1459 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1460 except IndexError: 1461 pass 1462 1463 if (res_num, residue, 'KCX -> LYS') not in mod_row: 1464 mod_row.append((res_num, residue, 'KCX -> LYS')) 1465 1466 elif mod_pos[res_num] == 9: # converts S,S-(2-HYDROXYETHYL)THIOCYSTEINE to Cysteine 1467 1468 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1469 1470 try: 1471 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1472 except IndexError: 1473 pass 1474 1475 if (res_num, residue, 'CME -> CYS') not in mod_row: 1476 mod_row.append((res_num, residue, 'CME -> CYS')) 1477 1478 elif mod_pos[res_num] == 10: # converts 3-SULFINOALANINE to Alanine 1479 1480 atom_site.setValue('ALA', 'label_comp_id', atom_row) 1481 1482 try: 1483 atom_site.setValue('ALA', 'auth_comp_id', atom_row) 1484 except IndexError: 1485 pass 1486 1487 if (res_num, residue, 'CSD -> ALA') not in mod_row: 1488 mod_row.append((res_num, residue, 'CSD -> ALA')) 1489 1490 elif mod_pos[res_num] == 11: # converts S-(DIMETHYLARSENIC)CYSTEINE to Cysteine 1491 1492 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1493 1494 try: 1495 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1496 except IndexError: 1497 pass 1498 1499 if (res_num, residue, 'CAS -> CYS') not in mod_row: 1500 mod_row.append((res_num, residue, 'CAS -> CYS')) 1501 1502 elif mod_pos[res_num] == 12: # converts N-METHYLLEUCINE (MLE) to Leucine 1503 1504 atom_site.setValue('LEU', 'label_comp_id', atom_row) 1505 1506 try: 1507 atom_site.setValue('LEU', 'auth_comp_id', atom_row) 1508 except IndexError: 1509 pass 1510 1511 if (res_num, residue, 'MLE -> LEU') not in mod_row: 1512 mod_row.append((res_num, residue, 'MLE -> LEU')) 1513 1514 elif mod_pos[res_num] == 13: # converts D-ALANINE (DAL) to ALA 1515 1516 atom_site.setValue('ALA', 'label_comp_id', atom_row) 1517 1518 try: 1519 atom_site.setValue('ALA', 'auth_comp_id', atom_row) 1520 except IndexError: 1521 pass 1522 1523 if (res_num, residue, 'DAL -> ALA') not in mod_row: 1524 mod_row.append((res_num, residue, 'DAL -> ALA')) 1525 1526 elif mod_pos[res_num] == 14: # converts GAMMA-CARBOXY-GLUTAMIC ACID (CGU) to GLU 1527 1528 atom_site.setValue('GLU', 'label_comp_id', atom_row) 1529 1530 try: 1531 atom_site.setValue('GLU', 'auth_comp_id', atom_row) 1532 except IndexError: 1533 pass 1534 1535 if (res_num, residue, 'CGU -> GLU') not in mod_row: 1536 mod_row.append((res_num, residue, 'CGU -> GLU')) 1537 1538 elif mod_pos[res_num] == 15: # converts D-LEUCINE (DLE) to LEU 1539 1540 atom_site.setValue('LEU', 'label_comp_id', atom_row) 1541 1542 try: 1543 atom_site.setValue('LEU', 'auth_comp_id', atom_row) 1544 except IndexError: 1545 pass 1546 1547 if (res_num, residue, 'DLE -> LEU') not in mod_row: 1548 mod_row.append((res_num, residue, 'DLE -> LEU')) 1549 1550 elif mod_pos[res_num] == 16: # converts N-FORMYLMETHIONINE (FME) to MET 1551 1552 atom_site.setValue('MET', 'label_comp_id', atom_row) 1553 1554 try: 1555 atom_site.setValue('MET', 'auth_comp_id', atom_row) 1556 except IndexError: 1557 pass 1558 1559 if (res_num, residue, 'FME -> MET') not in mod_row: 1560 mod_row.append((res_num, residue, 'FME -> MET')) 1561 1562 elif mod_pos[res_num] == 17: # converts D-VAL (DVA) to VAL 1563 1564 atom_site.setValue('VAL', 'label_comp_id', atom_row) 1565 1566 try: 1567 atom_site.setValue('VAL', 'auth_comp_id', atom_row) 1568 except IndexError: 1569 pass 1570 1571 if (res_num, residue, 'DVA -> VAL') not in mod_row: 1572 mod_row.append((res_num, residue, 'DVA -> VAL')) 1573 1574 elif mod_pos[res_num] == 18: # converts CYSTEINESULFONIC ACID (OCS) to CYS 1575 1576 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1577 1578 try: 1579 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1580 except IndexError: 1581 pass 1582 1583 if (res_num, residue, 'OCS -> CYS') not in mod_row: 1584 mod_row.append((res_num, residue, 'OCS -> CYS')) 1585 1586 elif mod_pos[res_num] == 19: # converts D-PROLINE (DPR) to PRO 1587 1588 atom_site.setValue('PRO', 'label_comp_id', atom_row) 1589 1590 try: 1591 atom_site.setValue('PRO', 'auth_comp_id', atom_row) 1592 except IndexError: 1593 pass 1594 1595 if (res_num, residue, 'DPR -> PRO') not in mod_row: 1596 mod_row.append((res_num, residue, 'DPR -> PRO')) 1597 1598 elif mod_pos[res_num] == 20: # converts N-METHYLVALINE (MVA) to VAL 1599 1600 atom_site.setValue('VAL', 'label_comp_id', atom_row) 1601 1602 try: 1603 atom_site.setValue('VAL', 'auth_comp_id', atom_row) 1604 except IndexError: 1605 pass 1606 1607 if (res_num, residue, 'MVA -> VAL') not in mod_row: 1608 mod_row.append((res_num, residue, 'MVA -> VAL')) 1609 1610 elif mod_pos[res_num] == 21: # converts O-SULFO-L-TYROSINE (TYS) to VAL 1611 1612 atom_site.setValue('TYR', 'label_comp_id', atom_row) 1613 1614 try: 1615 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 1616 except IndexError: 1617 pass 1618 1619 if (res_num, residue, 'TYS -> TYR') not in mod_row: 1620 mod_row.append((res_num, residue, 'TYS -> TYR')) 1621 1622 elif mod_pos[res_num] == 22: # converts N-TRIMETHYLLYSINE (M3L) to LYS 1623 1624 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1625 1626 try: 1627 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1628 except IndexError: 1629 pass 1630 1631 if (res_num, residue, 'M3L -> LYS') not in mod_row: 1632 mod_row.append((res_num, residue, 'M3L -> LYS')) 1633 1634 elif mod_pos[res_num] == 23: # converts S-METHYLCYSTEINE (SMC) to CYS 1635 1636 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1637 1638 try: 1639 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1640 except IndexError: 1641 pass 1642 1643 if (res_num, residue, 'SMC -> CYS') not in mod_row: 1644 mod_row.append((res_num, residue, 'SMC -> CYS')) 1645 1646 elif mod_pos[res_num] == 24: # converts N(6)-ACETYLLYSINE (ALY) to LYS 1647 1648 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1649 1650 try: 1651 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1652 except IndexError: 1653 pass 1654 1655 if (res_num, residue, 'ALY -> LYS') not in mod_row: 1656 mod_row.append((res_num, residue, 'ALY -> LYS')) 1657 1658 elif mod_pos[res_num] == 25: # converts S-OXY CYSTEINE (CSX) to CYS 1659 1660 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1661 1662 try: 1663 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1664 except IndexError: 1665 pass 1666 1667 if (res_num, residue, 'CSX -> CYS') not in mod_row: 1668 mod_row.append((res_num, residue, 'CSX -> CYS')) 1669 1670 elif mod_pos[res_num] == 26: # converts D-CYSTEINE (DCY) to CYS 1671 1672 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1673 1674 try: 1675 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1676 except IndexError: 1677 pass 1678 1679 if (res_num, residue, 'DCY -> CYS') not in mod_row: 1680 mod_row.append((res_num, residue, 'DCY -> CYS')) 1681 1682 elif mod_pos[res_num] == 27: # converts NORLEUCINE (NLE) to LEU 1683 1684 atom_site.setValue('LEU', 'label_comp_id', atom_row) 1685 1686 try: 1687 atom_site.setValue('LEU', 'auth_comp_id', atom_row) 1688 except IndexError: 1689 pass 1690 1691 if (res_num, residue, 'NLE -> LEU') not in mod_row: 1692 mod_row.append((res_num, residue, 'NLE -> LEU')) 1693 1694 elif mod_pos[res_num] == 28: # converts D-GLUTAMIC ACID (DGL) to GLU 1695 1696 atom_site.setValue('GLU', 'label_comp_id', atom_row) 1697 1698 try: 1699 atom_site.setValue('GLU', 'auth_comp_id', atom_row) 1700 except IndexError: 1701 pass 1702 1703 if (res_num, residue, 'DGL -> GLU') not in mod_row: 1704 mod_row.append((res_num, residue, 'DGL -> GLU')) 1705 1706 elif mod_pos[res_num] == 29: # converts D-SERINE (DSN) to SER 1707 1708 atom_site.setValue('SER', 'label_comp_id', atom_row) 1709 1710 try: 1711 atom_site.setValue('SER', 'auth_comp_id', atom_row) 1712 except IndexError: 1713 pass 1714 1715 if (res_num, residue, 'DSN -> SER') not in mod_row: 1716 mod_row.append((res_num, residue, 'DSN -> SER')) 1717 1718 elif mod_pos[res_num] == 30: # converts S-MERCAPTOCYSTEINE (CSS) to CYS 1719 1720 atom_site.setValue('CYS', 'label_comp_id', atom_row) 1721 1722 try: 1723 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 1724 except IndexError: 1725 pass 1726 1727 if (res_num, residue, 'CSS -> CYS') not in mod_row: 1728 mod_row.append((res_num, residue, 'CSS -> CYS')) 1729 1730 elif mod_pos[res_num] == 31: # converts D-LYSINE (DLY) to LYS 1731 1732 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1733 1734 try: 1735 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1736 except IndexError: 1737 pass 1738 1739 if (res_num, residue, 'DLY -> LYS') not in mod_row: 1740 mod_row.append((res_num, residue, 'DLY -> LYS')) 1741 1742 elif mod_pos[res_num] == 32: # converts N-METHYL-LYSINE (MLZ) to LYS 1743 1744 atom_site.setValue('LYS', 'label_comp_id', atom_row) 1745 1746 try: 1747 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 1748 except IndexError: 1749 pass 1750 1751 if (res_num, residue, 'MLZ -> LYS') not in mod_row: 1752 mod_row.append((res_num, residue, 'MLZ -> LYS')) 1753 1754 elif mod_pos[res_num] == 33: # converts D-PHENYLALANINE (DPN) to PHE 1755 1756 atom_site.setValue('PHE', 'label_comp_id', atom_row) 1757 1758 try: 1759 atom_site.setValue('PHE', 'auth_comp_id', atom_row) 1760 except IndexError: 1761 pass 1762 1763 if (res_num, residue, 'DPN -> PHE') not in mod_row: 1764 mod_row.append((res_num, residue, 'DPN -> PHE')) 1765 1766 elif mod_pos[res_num] == 34: # converts D-ARGININE (DAR) to ARG 1767 1768 atom_site.setValue('ARG', 'label_comp_id', atom_row) 1769 1770 try: 1771 atom_site.setValue('ARG', 'auth_comp_id', atom_row) 1772 except IndexError: 1773 pass 1774 1775 if (res_num, residue, 'DAR -> ARG') not in mod_row: 1776 mod_row.append((res_num, residue, 'DAR -> ARG')) 1777 1778 elif mod_pos[res_num] == 35: # converts IODO-PHENYLALANINE (PHI) to PHE 1779 1780 atom_site.setValue('PHE', 'label_comp_id', atom_row) 1781 1782 try: 1783 atom_site.setValue('PHE', 'auth_comp_id', atom_row) 1784 except IndexError: 1785 pass 1786 1787 if (res_num, residue, 'PHI -> PHE') not in mod_row: 1788 mod_row.append((res_num, residue, 'PHI -> PHE')) 1789 1790 elif mod_pos[res_num] == 36: # converts BETA-L-ASPARTIC ACID (IAS) to ASP 1791 1792 atom_site.setValue('ASP', 'label_comp_id', atom_row) 1793 1794 try: 1795 atom_site.setValue('ASP', 'auth_comp_id', atom_row) 1796 except IndexError: 1797 pass 1798 1799 if (res_num, residue, 'IAS -> ASP') not in mod_row: 1800 mod_row.append((res_num, residue, 'IAS -> ASP')) 1801 1802 elif mod_pos[res_num] == 37: # converts D-ASPARTIC ACID (DAS) to ASP 1803 1804 atom_site.setValue('ASP', 'label_comp_id', atom_row) 1805 1806 try: 1807 atom_site.setValue('ASP', 'auth_comp_id', atom_row) 1808 except IndexError: 1809 pass 1810 1811 if (res_num, residue, 'DAS -> ASP') not in mod_row: 1812 mod_row.append((res_num, residue, 'DAS -> ASP')) 1813 1814 elif mod_pos[res_num] == 38: # converts 4-METHYL-HISTIDINE (HIC) to HIS 1815 1816 atom_site.setValue('HIS', 'label_comp_id', atom_row) 1817 1818 try: 1819 atom_site.setValue('HIS', 'auth_comp_id', atom_row) 1820 except IndexError: 1821 pass 1822 1823 if (res_num, residue, 'HIC -> HIS') not in mod_row: 1824 mod_row.append((res_num, residue, 'HIC -> HIS')) 1825 1826 elif mod_pos[res_num] == 39: # converts (4R)-4-methyl-L-proline (MP8) to PRO 1827 1828 atom_site.setValue('PRO', 'label_comp_id', atom_row) 1829 1830 try: 1831 atom_site.setValue('PRO', 'auth_comp_id', atom_row) 1832 except IndexError: 1833 pass 1834 1835 if (res_num, residue, 'MP8 -> PRO') not in mod_row: 1836 mod_row.append((res_num, residue, 'MP8 -> PRO')) 1837 1838 elif mod_pos[res_num] == 40: # converts D-THREONINE (DTH) to THR 1839 1840 atom_site.setValue('THR', 'label_comp_id', atom_row) 1841 1842 try: 1843 atom_site.setValue('THR', 'auth_comp_id', atom_row) 1844 except IndexError: 1845 pass 1846 1847 if (res_num, residue, 'DTH -> THR') not in mod_row: 1848 mod_row.append((res_num, residue, 'DTH -> THR')) 1849 1850 elif mod_pos[res_num] == 41: # converts D-ISOLEUCINE (DIL) to ILE 1851 1852 atom_site.setValue('ILE', 'label_comp_id', atom_row) 1853 1854 try: 1855 atom_site.setValue('ILE', 'auth_comp_id', atom_row) 1856 except IndexError: 1857 pass 1858 1859 if (res_num, residue, 'DIL -> ILE') not in mod_row: 1860 mod_row.append((res_num, residue, 'DIL -> ILE')) 1861 1862 elif mod_pos[res_num] == 42: # converts N-METHYL ASPARAGINE (MEN) to ASN 1863 1864 atom_site.setValue('ASN', 'label_comp_id', atom_row) 1865 1866 try: 1867 atom_site.setValue('ASN', 'auth_comp_id', atom_row) 1868 except IndexError: 1869 pass 1870 1871 if (res_num, residue, 'MEN -> ASN') not in mod_row: 1872 mod_row.append((res_num, residue, 'MEN -> ASN')) 1873 1874 elif mod_pos[res_num] == 43: # converts D-TYROSINE (DTY) to TYR 1875 1876 atom_site.setValue('TYR', 'label_comp_id', atom_row) 1877 1878 try: 1879 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 1880 except IndexError: 1881 pass 1882 1883 if (res_num, residue, 'DTY -> TYR') not in mod_row: 1884 mod_row.append((res_num, residue, 'DTY -> TYR')) 1885 1886 elif mod_pos[res_num] == 44: # converts N-CARBOXYMETHIONINE (CXM) to MET 1887 1888 atom_site.setValue('MET', 'label_comp_id', atom_row) 1889 1890 try: 1891 atom_site.setValue('MET', 'auth_comp_id', atom_row) 1892 except IndexError: 1893 pass 1894 1895 if (res_num, residue, 'CXM -> MET') not in mod_row: 1896 mod_row.append((res_num, residue, 'CXM -> MET')) 1897 1898 elif mod_pos[res_num] == 45: # converts D-GLUTAMINE (DGN) to MET 1899 1900 atom_site.setValue('GLN', 'label_comp_id', atom_row) 1901 1902 try: 1903 atom_site.setValue('GLN', 'auth_comp_id', atom_row) 1904 except IndexError: 1905 pass 1906 1907 if (res_num, residue, 'DGN -> GLN') not in mod_row: 1908 mod_row.append((res_num, residue, 'DGN -> GLN')) 1909 1910 elif mod_pos[res_num] == 46: # converts D-TRYPTOPHAN (DTR) to TRP 1911 1912 atom_site.setValue('TRP', 'label_comp_id', atom_row) 1913 1914 try: 1915 atom_site.setValue('TRP', 'auth_comp_id', atom_row) 1916 except IndexError: 1917 pass 1918 1919 if (res_num, residue, 'DTR -> TRP') not in mod_row: 1920 mod_row.append((res_num, residue, 'DTR -> TRP')) 1921 1922 elif mod_pos[res_num] == 47: # converts N-ACETYL-SERINE (SAC) to SER 1923 1924 atom_site.setValue('SER', 'label_comp_id', atom_row) 1925 1926 try: 1927 atom_site.setValue('SER', 'auth_comp_id', atom_row) 1928 except IndexError: 1929 pass 1930 1931 if (res_num, residue, 'SAC -> SER') not in mod_row: 1932 mod_row.append((res_num, residue, 'SAC -> SER')) 1933 1934 elif mod_pos[res_num] == 48: # converts D-ASPARAGINE (DSG) to ASN 1935 1936 atom_site.setValue('ASN', 'label_comp_id', atom_row) 1937 1938 try: 1939 atom_site.setValue('ASN', 'auth_comp_id', atom_row) 1940 except IndexError: 1941 pass 1942 1943 if (res_num, residue, 'DSG -> ASN') not in mod_row: 1944 mod_row.append((res_num, residue, 'DSG -> ASN')) 1945 1946 elif mod_pos[res_num] == 49: # converts N-METHYL METHIONINE (MME) to MET 1947 1948 atom_site.setValue('MET', 'label_comp_id', atom_row) 1949 1950 try: 1951 atom_site.setValue('MET', 'auth_comp_id', atom_row) 1952 except IndexError: 1953 pass 1954 1955 if (res_num, residue, 'MME -> MET') not in mod_row: 1956 mod_row.append((res_num, residue, 'MME -> MET')) 1957 1958 elif mod_pos[res_num] == 50: # converts N-methyl-L-alanine (MAA) to ALA 1959 1960 atom_site.setValue('ALA', 'label_comp_id', atom_row) 1961 1962 try: 1963 atom_site.setValue('ALA', 'auth_comp_id', atom_row) 1964 except IndexError: 1965 pass 1966 1967 if (res_num, residue, 'MAA -> ALA') not in mod_row: 1968 mod_row.append((res_num, residue, 'MAA -> ALA')) 1969 1970 elif mod_pos[res_num] == 51: # converts 3-FLUOROTYROSINE (YOF) to TYR 1971 1972 atom_site.setValue('TYR', 'label_comp_id', atom_row) 1973 1974 try: 1975 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 1976 except IndexError: 1977 pass 1978 1979 if (res_num, residue, 'YOF -> TYR') not in mod_row: 1980 mod_row.append((res_num, residue, 'YOF -> TYR')) 1981 1982 elif mod_pos[res_num] == 52: # converts (4R)-4-fluoro-L-proline (FP9) to PRO 1983 1984 atom_site.setValue('PRO', 'label_comp_id', atom_row) 1985 1986 try: 1987 atom_site.setValue('PRO', 'auth_comp_id', atom_row) 1988 except IndexError: 1989 pass 1990 1991 if (res_num, residue, 'FP9 -> PRO') not in mod_row: 1992 mod_row.append((res_num, residue, 'FP9 -> PRO')) 1993 1994 elif mod_pos[res_num] == 53: # converts N-formyl-L-valine (FVA) to VAL 1995 1996 atom_site.setValue('VAL', 'label_comp_id', atom_row) 1997 1998 try: 1999 atom_site.setValue('VAL', 'auth_comp_id', atom_row) 2000 except IndexError: 2001 pass 2002 2003 if (res_num, residue, 'FVA -> VAL') not in mod_row: 2004 mod_row.append((res_num, residue, 'FVA -> VAL')) 2005 2006 elif mod_pos[res_num] == 54: # converts N-methyl-D-leucine (MLU) to LEU 2007 2008 atom_site.setValue('LEU', 'label_comp_id', atom_row) 2009 2010 try: 2011 atom_site.setValue('LEU', 'auth_comp_id', atom_row) 2012 except IndexError: 2013 pass 2014 2015 if (res_num, residue, 'MLU -> LEU') not in mod_row: 2016 mod_row.append((res_num, residue, 'MLU -> LEU')) 2017 2018 elif mod_pos[res_num] == 55: # converts (betaR)-3-chloro-beta-hydroxy-L-tyrosine (OMY) to TYR 2019 2020 atom_site.setValue('TYR', 'label_comp_id', atom_row) 2021 2022 try: 2023 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 2024 except IndexError: 2025 pass 2026 2027 if (res_num, residue, 'OMY -> TYR') not in mod_row: 2028 mod_row.append((res_num, residue, 'OMY -> TYR')) 2029 2030 elif mod_pos[res_num] == 56: # converts GAMMA-D-GLUTAMIC ACID (FGA) to GLU 2031 2032 atom_site.setValue('GLU', 'label_comp_id', atom_row) 2033 2034 try: 2035 atom_site.setValue('GLU', 'auth_comp_id', atom_row) 2036 except IndexError: 2037 pass 2038 2039 if (res_num, residue, 'FGA -> GLU') not in mod_row: 2040 mod_row.append((res_num, residue, 'FGA -> GLU')) 2041 2042 elif mod_pos[res_num] == 57: # converts N-METHYLPHENYLALANINE (MEA) to PHE 2043 2044 atom_site.setValue('PHE', 'label_comp_id', atom_row) 2045 2046 try: 2047 atom_site.setValue('PHE', 'auth_comp_id', atom_row) 2048 except IndexError: 2049 pass 2050 2051 if (res_num, residue, 'MEA -> PHE') not in mod_row: 2052 mod_row.append((res_num, residue, 'MEA -> PHE')) 2053 2054 elif mod_pos[res_num] == 58: # converts S-(METHYLMERCURY)-L-CYSTEINE (CMH) to CYS 2055 2056 atom_site.setValue('CYS', 'label_comp_id', atom_row) 2057 2058 try: 2059 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 2060 except IndexError: 2061 pass 2062 2063 if (res_num, residue, 'CMH -> CYS') not in mod_row: 2064 mod_row.append((res_num, residue, 'CMH -> CYS')) 2065 2066 elif mod_pos[res_num] == 59: # converts D-HISTIDINE (DHI) to HIS 2067 2068 atom_site.setValue('HIS', 'label_comp_id', atom_row) 2069 2070 try: 2071 atom_site.setValue('HIS', 'auth_comp_id', atom_row) 2072 except IndexError: 2073 pass 2074 2075 if (res_num, residue, 'DHI -> HIS') not in mod_row: 2076 mod_row.append((res_num, residue, 'DHI -> HIS')) 2077 2078 elif mod_pos[res_num] == 60: # converts SELENOCYSTEINE (SEC) to CYS 2079 2080 atom_site.setValue('CYS', 'label_comp_id', atom_row) 2081 2082 try: 2083 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 2084 except IndexError: 2085 pass 2086 2087 if (res_num, residue, 'SEC -> CYS') not in mod_row: 2088 mod_row.append((res_num, residue, 'SEC -> CYS')) 2089 2090 elif mod_pos[res_num] == 61: # converts (betaR)-3-CHLORO-BETA-HYDROXY-D-TYROSINE (OMZ) to TYR 2091 2092 atom_site.setValue('TYR', 'label_comp_id', atom_row) 2093 2094 try: 2095 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 2096 except IndexError: 2097 pass 2098 2099 if (res_num, residue, 'OMZ -> TYR') not in mod_row: 2100 mod_row.append((res_num, residue, 'OMZ -> TYR')) 2101 2102 elif mod_pos[res_num] == 62: # converts S-ACETYL-CYSTEINE (SCY) to CYS 2103 2104 atom_site.setValue('CYS', 'label_comp_id', atom_row) 2105 2106 try: 2107 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 2108 except IndexError: 2109 pass 2110 2111 if (res_num, residue, 'SCY -> CYS') not in mod_row: 2112 mod_row.append((res_num, residue, 'SCY -> CYS')) 2113 2114 elif mod_pos[res_num] == 63: # converts S-OXYMETHIONINE (MHO) to MET 2115 2116 atom_site.setValue('MET', 'label_comp_id', atom_row) 2117 2118 try: 2119 atom_site.setValue('MET', 'auth_comp_id', atom_row) 2120 except IndexError: 2121 pass 2122 2123 if (res_num, residue, 'MHO -> MET') not in mod_row: 2124 mod_row.append((res_num, residue, 'MHO -> MET')) 2125 2126 elif mod_pos[res_num] == 64: # converts D-METHIONINE (MED) to MET 2127 2128 atom_site.setValue('MET', 'label_comp_id', atom_row) 2129 2130 try: 2131 atom_site.setValue('MET', 'auth_comp_id', atom_row) 2132 except IndexError: 2133 pass 2134 2135 if (res_num, residue, 'MED -> MET') not in mod_row: 2136 mod_row.append((res_num, residue, 'MED -> MET')) 2137 2138 elif mod_pos[res_num] == 65: # converts S-DIMETHYLARSINOYL-CYSTEINE (CAF) to CYS 2139 2140 atom_site.setValue('CYS', 'label_comp_id', atom_row) 2141 2142 try: 2143 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 2144 except IndexError: 2145 pass 2146 2147 if (res_num, residue, 'CAF -> CYS') not in mod_row: 2148 mod_row.append((res_num, residue, 'CAF -> CYS')) 2149 2150 elif mod_pos[res_num] == 66: # converts META-NITRO-TYROSINE (NIY) to TYR 2151 2152 atom_site.setValue('TYR', 'label_comp_id', atom_row) 2153 2154 try: 2155 atom_site.setValue('TYR', 'auth_comp_id', atom_row) 2156 except IndexError: 2157 pass 2158 2159 if (res_num, residue, 'NIY -> TYR') not in mod_row: 2160 mod_row.append((res_num, residue, 'NIY -> TYR')) 2161 2162 elif mod_pos[res_num] == 67: # converts O-ACETYLSERINE (OAS) to SER 2163 2164 atom_site.setValue('SER', 'label_comp_id', atom_row) 2165 2166 try: 2167 atom_site.setValue('SER', 'auth_comp_id', atom_row) 2168 except IndexError: 2169 pass 2170 2171 if (res_num, residue, 'OAS -> SER') not in mod_row: 2172 mod_row.append((res_num, residue, 'OAS -> SER')) 2173 2174 elif mod_pos[res_num] == 68: # converts S-METHYL-THIO-CYSTEINE (SCH) to CYS 2175 2176 atom_site.setValue('CYS', 'label_comp_id', atom_row) 2177 2178 try: 2179 atom_site.setValue('CYS', 'auth_comp_id', atom_row) 2180 except IndexError: 2181 pass 2182 2183 if (res_num, residue, 'SCH -> CYS') not in mod_row: 2184 mod_row.append((res_num, residue, 'SCH -> CYS')) 2185 2186 elif mod_pos[res_num] == 69: # converts 2-methyl-L-norleucine (MK8) to LEU 2187 2188 atom_site.setValue('LEU', 'label_comp_id', atom_row) 2189 2190 try: 2191 atom_site.setValue('LEU', 'auth_comp_id', atom_row) 2192 except IndexError: 2193 pass 2194 2195 if (res_num, residue, 'MK8 -> LEU') not in mod_row: 2196 mod_row.append((res_num, residue, 'MK8 -> LEU')) 2197 2198 elif mod_pos[res_num] == 70: # converts METHIONINE SULFOXIDE (SME) to MET 2199 2200 atom_site.setValue('MET', 'label_comp_id', atom_row) 2201 2202 try: 2203 atom_site.setValue('MET', 'auth_comp_id', atom_row) 2204 except IndexError: 2205 pass 2206 2207 if (res_num, residue, 'SME -> MET') not in mod_row: 2208 mod_row.append((res_num, residue, 'SME -> MET')) 2209 2210 elif mod_pos[res_num] == 71: # converts 5-HYDROXYLYSINE (LYZ) to LYS 2211 2212 atom_site.setValue('LYS', 'label_comp_id', atom_row) 2213 2214 try: 2215 atom_site.setValue('LYS', 'auth_comp_id', atom_row) 2216 except IndexError: 2217 pass 2218 2219 if (res_num, residue, 'LYZ -> LYS') not in mod_row: 2220 mod_row.append((res_num, residue, 'LYZ -> LYS')) 2221 2222 ######################################################################## 2223 ## Notify user about modification made to cif data ## 2224 ######################################################################## 2225 2226 if DEBUG_MODE: 2227 mod_model_num = len([ msg for msg in cha_row if msg[2] == 'model_num' ]) 2228 mod_ins_code = len([ msg for msg in cha_row if msg[2] == 'ins_code' ]) 2229 mod_group_PDB = len([ msg for msg in cha_row if msg[2] == 'group_PDB' ]) 2230 2231 if mod_model_num != 0: 2232 print ('! {p} {c}: modified atom_site.pdbx_PDB_model_num for {cr} residues to 1.'.format( 2233 p = pdb_code, 2234 c = chain, 2235 cr = mod_model_num)) 2236 2237 if mod_ins_code != 0: 2238 print ('! {p} {c}: modified atom_site.pdbx_PDB_ins_code for {cr} residues to "?".'.format( 2239 p = pdb_code, 2240 c = chain, 2241 cr = mod_ins_code)) 2242 2243 if mod_group_PDB != 0: 2244 print ('! {p} {c}: modified atom_site.group_PDB for {cr} residues to "ATOM".'.format( 2245 p = pdb_code, 2246 c = chain, 2247 cr = mod_group_PDB)) 2248 2249 for residue in reversed(mod_row): 2250 print ('! {p} {c}: modified cif pos {cr} ({nr}).'.format( 2251 p = pdb_code, 2252 c = chain, 2253 cr = residue[0], 2254 ca = residue[1], 2255 nr = residue[2])) 2256 2257 2258 for residue in reversed(rem_row): 2259 print ('! {p} {c}: removed cif pos {cr} ({ca})'.format( 2260 p = pdb_code, 2261 c = chain, 2262 cr = residue[0], 2263 ca = residue[1])) 2264 2265 cif_edits[pdb_code] = block 2266 2267 # write modified pir to disk 2268 for pdb_code in cif_edits: 2269 out = open(os.path.join(output_path, pdb_code + '.cif'), 'w') 2270 writer = PdbxWriter(out) 2271 writer.writeContainer(cif_edits[pdb_code]) 2272 2273 # Delete missing entries from the last template sequence to the first 2274 for row in reversed(del_row): 2275 template_grid.del_row(row) 2276 2277 return template_grid 2278 2279def remove_self_alignment(template_grid, query_name): 2280 """ Removes a self alignment from the final pir alignment to prevent clashes with MODELLER """ 2281 2282 to_delete = list() 2283 2284 for row in range(template_grid.get_grid_height()): 2285 if template_grid._pdb_code[row] == query_name: 2286 to_delete.append(row) 2287 2288 for row in reversed(to_delete): 2289 template_grid.del_row(row) 2290 2291 return True 2292 2293def write_to_file(line_list, fname): 2294 """ Writes the final pir file """ 2295 2296 with open(fname, 'w+') as fout: 2297 for line in line_list: 2298 fout.write(line + "\n") 2299 2300def arg(): 2301 import argparse 2302 description = """Creates a MODELLER alignment (*.pir) from a HHSearch results file (*.hhr).""" 2303 epilog= '2016 Harald Voehringer.' 2304 # Initiate a ArgumentParser Class 2305 parser = argparse.ArgumentParser(description = description, epilog = epilog) 2306 2307 # Call add_options to the parser 2308 parser.add_argument('input', help = 'results file from HHsearch with hit list and alignment', metavar = 'FILE') 2309 parser.add_argument('cifs', help = 'path to the folder containing cif files', metavar = 'DIR') 2310 parser.add_argument('pir', help = 'output file (PIR-formatted multiple alignment)', metavar = 'FILE') 2311 parser.add_argument('output', help = 'path to the folder where modified cif files should be written to', metavar = 'DIR') 2312 2313 parser.add_argument('-v', '--verbose', action = 'store_true', help = 'verbose mode') 2314 parser.add_argument('-m', nargs = '+', help = 'pick hits with specified indices (e.g. -m 2 5)', metavar = 'INT') 2315 parser.add_argument('-e', type = float, help = 'maximum E-Value threshold (e.g. -e 0.001)', metavar = 'FLOAT') 2316 parser.add_argument('-r', type = float, help = 'residue ratio (filter alignments that have contribute at least residues according to the specified ratio).', 2317 default = 0, metavar = 'FLOAT') 2318 2319 parser.add_argument('-c', help = 'convert non-canonical residues (default = True)', action = 'store_true', default = True) 2320 2321 2322 return parser 2323 2324 2325def main(): 2326 import sys 2327 parser = arg() 2328 args = parser.parse_args(sys.argv[1:]) 2329 2330 global DEBUG_MODE 2331 2332 if args.verbose: 2333 DEBUG_MODE = True 2334 2335 query_name, query_chain = get_query_name(args.input) 2336 2337 data = read_result(args.input) 2338 selected_templates = list() 2339 2340 if args.m and not args.e: 2341 selection = map(lambda x: int(x), args.m) 2342 print ('Selected templates {st}.'.format(st = ', '.join(args.m))) 2343 2344 for i in selection: 2345 tmp_info = str(data[i - 1].template_info.split('>')[1]) 2346 print ('{i}: {t}'.format( 2347 i = i, 2348 t = tmp_info[0:80])) 2349 2350 selected_templates.append(data[i - 1]) 2351 2352 elif args.e and not args.m: 2353 print ('Selected templates satisfying E-val <= {e}'.format(e = args.e)) 2354 2355 e_values = { float(j.evalue):i for i, j in enumerate(data) } 2356 selection = sorted([ val for key, val in e_values.items() if key <= args.e ]) 2357 2358 for i in selection: 2359 tmp_info = str(data[i - 1].template_info.split('>')[1]) 2360 print ('{i}: {t}'.format( 2361 i = i + 1, 2362 t = tmp_info[0:80])) 2363 2364 selected_templates.append(data[i - 1]) 2365 2366 elif args.m and args.e: 2367 print ('! Please do not use option -m and -e at the same time ! Exiting.') 2368 sys.exit() 2369 else: 2370 selected_templates = data 2371 2372 print ('Creating pir file using all templates ({n})'.format( 2373 n = len(selected_templates))) 2374 2375 query_grid = create_query_grid(selected_templates) # load query grid 2376 print ('query_grid') 2377 print(query_grid) 2378 gapless_query_grid = create_gapless_grid(query_grid) # remove gaps 2379 print ('gapless_query_grid') 2380 print(gapless_query_grid) 2381 processed_query_grid = process_query_grid(query_grid, gapless_query_grid) # insert gaps 2382 ##processed_query_grid = process_query_grid(query_grid, query_grid) # insert gaps 2383 print ('processed_query_grid') 2384 print (processed_query_grid) 2385 glob_seq = derive_global_seq(processed_query_grid, query_name, query_chain) # derive query sequence 2386 template_grid = create_template_grid(selected_templates) # create template grid 2387 print ('template_grid') 2388 print (template_grid) 2389 processed_template_grid = process_template_grid(query_grid, template_grid) # insert gaps to template sequnces 2390 print ('processed_query_grid') 2391 print (processed_query_grid) 2392 print ('hzhu processed_template_grid') 2393 print (processed_template_grid) 2394 final_grid = compare_with_cifs(processed_template_grid, args.cifs, args.output, args.c, args.r) # compare with atom section of cifs 2395 remove_self_alignment(final_grid, query_name) # remove self alignment if any 2396 write_to_file([glob_seq, final_grid.display()], args.pir) 2397 2398 2399if __name__ == "__main__": 2400 main() 2401