1#!/usr/local/bin/python3.8
2
3from hh_reader import read_result
4from copy import deepcopy
5from pdbx.reader.PdbxReader import PdbxReader
6from pdbx.writer.PdbxWriter import PdbxWriter
7import re, os, sys, tempfile, glob
8
9from operator import itemgetter  # hzhu
10from itertools import groupby    # hzhu
11
12EMPTY = '*'
13GAP = '-'
14DEBUG_MODE = False
15
16class Gap:
17    """ A gap is a continuous stretch of indels.
18        It is defined by a opening position and a size/length
19    """
20    def __init__(self, open_pos, size):
21        self.open_pos = open_pos   # gap opening position
22        self.size = size           # num of indels in the gap
23
24    def __repr__(self):
25        return 'Gap opening pos = %d, size = %d' % (self.open_pos, self.size)
26
27class Grid:
28    """
29    Implementation of 2D grid of cells
30    Includes boundary handling
31    """
32
33    def __init__(self, grid_height, grid_width):
34        """
35        Initializes grid to be empty, take height and width of grid as parameters
36        Indexed by rows (left to right), then by columns (top to bottom)
37        """
38
39        self._grid_height = grid_height
40        self._grid_width = grid_width
41        self._cells = [ [ EMPTY for dummy_col in range(self._grid_width) ]
42                       for dummy_row in range(self._grid_height)]
43
44    def __str__(self):
45        """ Return multi-line string represenation for grid """
46
47        ans = ''
48        for row in range(self._grid_height):
49            ans += ''.join(self._cells[row])
50            ans += '\n'
51        return ans
52
53    def clear(self):
54        """ Clears grid to be empty """
55
56        self._cells = [[EMPTY for dummy_col in range(self._grid_width)]
57                       for dummy_row in range(self._grid_height)]
58
59    def get_grid_height(self):
60        """ Return the height of the grid """
61
62        return self._grid_height
63
64    def get_grid_width(self):
65        """ Return the width of the grid """
66
67        return self._grid_width
68
69    def get_cell(self, row, col):
70        return self._cells[row][col]
71
72    def get_seq_start(self, row):
73        """ Returns the start position of the sequence """
74
75        index = 0
76        for pos in self._cells[row]:
77            if pos != EMPTY:
78                return index
79            index += 1
80
81        return None
82
83    def get_seq_end(self, row):
84        """ Returns the end position of the sequence """
85
86        index = 0
87        for pos in reversed(self._cells[row]):
88            if pos != EMPTY:
89                return self.get_grid_width() - index
90            index += 1
91
92        return None
93
94    def get_gaps(self, row):
95        """ Return the position of gaps in a row """
96
97        gaps = list()
98
99        index = 0
100        for pos in self._cells[row]:
101            if pos == GAP:
102                gaps.append(index)
103            index += 1
104
105        return gaps
106
107    def get_gaps_ref_gapless(self, row):
108        """ Return the pos of gaps in a row.
109            The opening positions of the gaps are wrt. the gapless seq
110        """
111        # get all the indels
112        indels = self.get_gaps(row)
113        gaps = []
114        # combine continuous indels into a gap
115        for k,i in groupby( enumerate(indels), lambda x: x[0]-x[1] ):
116            g = list(map(itemgetter(1), i))
117            gaps.append( Gap(g[0], len(g)) )
118
119        # offset the gap opening positions
120        for i in range(1, len(gaps)):
121            # offset by total gap number before
122            gaps[i].open_pos -= sum([gaps[j].size for j in range(i)])
123
124        return gaps  # a list of Gap instances
125
126    def get_seq_indeces(self, row):
127
128        seq = list()
129        for pos, res in enumerate(self._cells[row]):
130            if res != EMPTY and res != GAP:
131                seq.append(pos)
132
133        return seq
134
135    ## def get_gap_list(self):  # hzhu commented this out. wrote a new version
136    ##     """ Returns a list of list of all gap positions in the sequence grid. """
137    ##     gap_pos = set()
138
139    ##     for row in range(self.get_grid_height()):
140    ##         for gap in self.get_gaps(row):
141    ##             gap_pos.add(gap)
142
143    ##     gap_pos = list(sorted(gap_pos))
144    ##     boundaries = [ (x + 1) for x, y in zip(gap_pos, gap_pos[1:]) if y - x != 1 ]
145
146    ##     gap_list = list()
147    ##     prev = 0
148
149    ##     for boundary in boundaries:
150    ##         sub_list = [ pos for pos in gap_pos[prev:] if pos < boundary ]
151    ##         gap_list.append(sub_list)
152    ##         prev += len(sub_list)
153
154    ##     gap_list.append([ x for x in gap_pos[prev:]])
155
156    ##     return gap_list
157
158    def get_gap_list(self):
159        """ Returns a list of Gap instances for all rows in the grid
160        """
161        gap_dict = dict()  # each position should occur as gap at most once
162                           # keys are gap openning positions
163                           # values are Gap instances
164        gap_list = []
165        for row in range(self.get_grid_height()):
166            gap_pos = []
167            gaps = self.get_gaps_ref_gapless(row)
168
169            for g in gaps:
170                if g.open_pos in gap_dict: # if there is already gaps at this open pos
171                    if g.size > gap_dict[g.open_pos].size: # if new gap is bigger
172                        gap_dict[g.open_pos] = g  # keep the larger gap as they overlap
173                else:
174                    gap_dict[g.open_pos] = g
175
176        gap_list = sorted(list(gap_dict.values()), key=lambda x: x.open_pos) # sort according to start position
177        return gap_list  # a list of Gap instances
178
179    def set_gap(self, row, col):
180        """ Set cell with index (row, col) to be a gap """
181
182        self._cells[row][col] = GAP
183
184    def set_empty(self, row, col):
185        """ Set cell with index (row, col) to be a gap """
186
187        self._cells[row][col] = EMPTY
188
189    def set_cell(self, row, col, res):
190        """ Set cell with index (row, col) to be full """
191
192        self._cells[row][col] = res
193
194    def is_empty(self, row, col):
195        """ Checks whether cell with index (row, col) is empty """
196
197        return self._cells[row][col] == EMPTY
198
199    def is_gap(self, row, col):
200        """ Checks whetehr cell with indxex (row, col) is a gap """
201
202        return self._cells[row][col] == GAP
203
204    def insert_gaps(self, cols):
205        """ Inserts a gaps into a column of the template grid """
206
207        for col in cols:
208            for row in range(self._grid_height):
209                if col >= self.get_seq_start(row) and col < self.get_seq_end(row):
210                    self._cells[row].insert(col, GAP)
211                else:
212                    self._cells[row].insert(col, EMPTY)
213
214            self._grid_width += 1
215
216    def insert_gaps_row(self, cols, row):
217        """ Intert gaps into cols only for certain row"""
218        for col in cols:
219            if col >= self.get_seq_start(row) and col < self.get_seq_end(row):
220                self._cells[row].insert(col, GAP)
221            else:
222                self._cells[row].insert(col, EMPTY)
223            # NOTE: grid_with should not be changed after every row is updated.
224            #self._grid_width += 1
225
226    def clean_trail_empty(self):
227        """ Remove all trailing EMPTY and pad grid to same width"""
228        # first find out the max length (exluding trailing EMPTY)
229        max_width = 0
230        for row in range(self._grid_height):
231            for i in range(len(self._cells[row])-1, -1, -1):
232                if self._cells[row][i] != EMPTY:
233                    break
234            if i+1 > max_width:
235                max_width = i+1
236
237        # delete excessive EMPTY
238        for row in range(self._grid_height):
239            del self._cells[row][max_width:]
240
241        # then pad all rows to the same length
242        [self._cells[row].append( EMPTY * (max_width-len(self._cells[row])) ) \
243                                 for row in range(self._grid_height) if len(self._cells[row]) < max_width]
244        self._grid_width = max_width
245        return
246
247    def remove_gaps(self, keep_width=True): # hzhu add keep_width option
248        """ Removes all gaps from the grid. """
249
250        for row in range(self.get_grid_height()):
251            not_gap = list()
252            for col in range(self.get_grid_width()):
253                if not self.is_gap(row, col):
254                    not_gap.append(col)
255
256            self._cells[row] = [ self._cells[row][col] for col in not_gap ]
257
258            if keep_width:  # hzhu only pad to original width if desired
259                for del_pos in range(self._grid_width - len(not_gap)):
260                    self._cells[row].append(EMPTY)
261
262        if not keep_width: # hzhu if width is not kept, make sure width is consistent
263            self.clean_trail_empty()
264
265        return
266
267
268class QueryGrid(Grid):
269
270    def __init__(self, grid_height, grid_width):
271        Grid.__init__(self, grid_height, grid_width)
272
273    def get_query_start(self, row):
274        """ Returns the query start position """
275        return self.get_seq_start(row) + 1
276
277    def get_query_end(self, row):
278        """ Returns the query end postion """
279
280        return self.get_seq_end(row) - len(self.get_gaps(row))
281
282    def get_col_residue(self, col):
283        """ Tries to find a the query residue in a given column. Used by derive_global_seq() to
284        identify the global query sequence """
285
286        for row in range(self.get_grid_height()):
287            if not self.is_empty(row, col):
288                return self._cells[row][col]
289
290        return GAP
291
292class TemplateGrid(Grid):
293
294    def __init__(self, grid_height, grid_width):
295        Grid.__init__(self, grid_height, grid_width)
296
297        self._start = list()
298        self._end = list()
299        self._pdb_code = list()
300        self._chain = list()
301        self._organism = list()
302        self._resolution = list()
303
304    def display(self):
305        """ Return multi-line string represenation for grid """
306
307        ans = ''
308        for row in range(self._grid_height):
309            ans += '>P1;{p}\nstructure:{p}:{s}:{c}:{e}:{c}::{o}:{r}:\n{a}*\n'.format(
310                p = self._pdb_code[row],
311                s = add_white_space_end(self.get_template_start(row), 4),
312                e = add_white_space_end(self.get_template_end(row), 4),
313                c = self._chain[row],
314                o = self._organism[row],
315                r = self._resolution[row],
316                a = ''.join(self._cells[row]).replace(EMPTY, GAP).replace('#', GAP))
317
318        return ans
319
320    def debug(self, row):
321        """ Return multi-line string represenation for grid, for debugging purposes """
322
323        ans = '{p}\nInternal: {s}, {e} Query: {qs}, {qe} Gaps ({g1}): {g2}\n{seq}\n'.format(
324            p = self._pdb_code[row],
325            s = self.get_seq_start(row),
326            e = self.get_seq_end(row),
327            qs = self.get_template_start(row),
328            qe = self.get_template_end(row),
329            g1 = len(self.get_gaps(row)),
330            g2 = ', '.join([str(gap) for gap in self.get_gaps(row)]),
331            seq = ''.join(self._cells[row]))
332
333        return ans
334
335    def set_metadata(self, row, start, end, pdb_code, chain, organism, resolution):
336        """ Used by create_template_grid() to setup metadata of pir template """
337
338        self._start.append(start)
339        self._end.append(end)
340        self._pdb_code.append(pdb_code)
341        self._chain.append(chain)
342        self._organism.append(organism)
343        self._resolution.append(resolution)
344
345    def set_map(self, row, start, end):
346
347        self._start[row] = start
348        self._end[row] = end
349
350    def get_template_start(self, row):
351        """ Returns the template start position """
352
353        return self._start[row]
354
355    def get_template_end(self, row):
356        """ Return sthe template end position """
357
358        return self._end[row]
359
360    def del_row(self, row):
361        """ Removes a complete template entry from the grid """
362
363        del self._cells[row]
364        del self._start[row]
365        del self._end[row]
366        del self._pdb_code[row]
367        del self._chain[row]
368        del self._organism[row]
369        del self._resolution[row]
370        self._grid_height -= 1
371
372# Helper functions
373
374def add_white_space_end(string, length):
375    """ Adds whitespaces to a string until it has the wished length"""
376
377    edited_string = str(string)
378
379    if len(edited_string) >= length:
380        return string
381    else:
382        while len(edited_string) != length:
383            edited_string += ' '
384
385    return edited_string
386
387def convert_aa_code(three_letter, convert):
388    """
389    Assumes a string that contains a three letter aminoacid code and
390    returns the corresponding one letter code.
391    """
392
393    aa_code = {
394        'CYS': 'C',
395        'ASP': 'D',
396        'SER': 'S',
397        'GLN': 'Q',
398        'LYS': 'K',
399        'ILE': 'I',
400        'PRO': 'P',
401        'THR': 'T',
402        'PHE': 'F',
403        'ASN': 'N',
404        'GLY': 'G',
405        'HIS': 'H',
406        'LEU': 'L',
407        'ARG': 'R',
408        'TRP': 'W',
409        'ALA': 'A',
410        'VAL': 'V',
411        'GLU': 'E',
412        'TYR': 'Y',
413        'MET': 'M',
414      }
415
416    non_canonical = {
417        'MSE': 1,
418        'HYP': 2,
419        'MLY': 3,
420        'SEP': 4,
421        'TPO': 5,
422        'CSO': 6,
423        'PTR': 7,
424        'KCX': 8,
425        'CME': 9,
426        'CSD': 10,
427        'CAS': 11,
428        'MLE': 12,
429        'DAL': 13,
430        'CGU': 14,
431        'DLE': 15,
432        'FME': 16,
433        'DVA': 17,
434        'OCS': 18,
435        'DPR': 19,
436        'MVA': 20,
437        'TYS': 21,
438        'M3L': 22,
439        'SMC': 23,
440        'ALY': 24,
441        'CSX': 25,
442        'DCY': 26,
443        'NLE': 27,
444        'DGL': 28,
445        'DSN': 29,
446        'CSS': 30,
447        'DLY': 31,
448        'MLZ': 32,
449        'DPN': 33,
450        'DAR': 34,
451        'PHI': 35,
452        'IAS': 36,
453        'DAS': 37,
454        'HIC': 38,
455        'MP8': 39,
456        'DTH': 40,
457        'DIL': 41,
458        'MEN': 42,
459        'DTY': 43,
460        'CXM': 44,
461        'DGN': 45,
462        'DTR': 46,
463        'SAC': 47,
464        'DSG': 48,
465        'MME': 49,
466        'MAA': 50,
467        'YOF': 51,
468        'FP9': 52,
469        'FVA': 53,
470        'MLU': 54,
471        'OMY': 55,
472        'FGA': 56,
473        'MEA': 57,
474        'CMH': 58,
475        'DHI': 59,
476        'SEC': 60,
477        'OMZ': 61,
478        'SCY': 62,
479        'MHO': 63,
480        'MED': 64,
481        'CAF': 65,
482        'NIY': 66,
483        'OAS': 67,
484        'SCH': 68,
485        'MK8': 69,
486        'SME': 70,
487        'LYZ': 71
488    }
489
490    if three_letter in aa_code.keys():
491        return aa_code[three_letter]
492    elif convert and (three_letter in non_canonical.keys()):
493        return non_canonical[three_letter]
494    else:
495        return '-'
496
497
498def get_query_name(hhr_file):
499
500    with open(hhr_file) as fh:
501        for line in fh:
502            if line.startswith('Query'):
503                # match the PDB Code
504                m = re.search('(\d[A-Z0-9]{3})_(\S)', line)
505
506                if m:
507                    pdb_code = m.group(1)
508                    chain = m.group(2)
509                else:
510                    pdb_code = 'UKNP'
511                    chain = 'A'
512                    # raise ValueError('Input HHR-File Does not seem to be a PDB-Structure')
513
514                break
515
516    return pdb_code, chain
517
518def get_cif_files(folder):
519    """ Gets all cif files located in folder. """
520
521    return glob(os.path.join(folder, '*.cif'))
522
523def open_cif(cif_file):
524    """ Assumes a mmCif file and returns a data block used for subsequent procedures """
525    # The "usual" procedure to open a mmCIF with pdbX/mmCIF
526    with open(cif_file) as cif_fh:
527        data = []
528        reader = PdbxReader(cif_fh)
529        reader.read(data)
530
531    block = data[0]
532
533    return block
534
535def get_pdb_entry_id(block):
536    """ Extracts the PDB entry information of a cif file and returns it as a string """
537
538    entry = block.getObj('entry')
539    entry_id = entry.getValue('id')
540
541    return entry_id
542
543
544def template_id_to_pdb(template_id):
545    """
546    Extracts PDB ID and chain name from the provided template id
547    """
548    # match PDBID without chain (8fab, 1a01)
549    m = re.match(r'/^(\d[A-Za-z0-9]{3})$', template_id)
550    if m:
551        return m.group(1).upper(), 'A'
552
553    # PDB CODE with chain Identifier
554    m = re.match(r'^(\d[A-Za-z0-9]{3})_(\S)$', template_id)
555    if m:
556        return m.group(1).upper(), m.group(2).upper()
557
558    # Match DALI ID
559    m = re.match(r'^(\d[A-Za-z0-9]{3})([A-Za-z0-9]?)_\d+$', template_id)
560    if m:
561        return m.group(1).upper(), m.group(2).upper()
562
563    # No PDB code and chain identified
564    return None, None
565
566
567def create_template_grid(hhr_data):
568    """ Creates a template grid """
569
570    total_seq = len(hhr_data)
571    templ_max = max( [ hhr.start[0] + len(to_seq(hhr.template_ali)) for hhr in hhr_data ] ) - 1
572
573
574    template_grid = TemplateGrid(total_seq, templ_max)
575
576    for row, template in enumerate(hhr_data):
577        seq_start = template.start[0] - 1
578        templatealignment = to_seq(template.template_ali)
579        seq_end = seq_start + len(templatealignment)
580
581        # Load Meta Data
582        start = template.start[1]
583        end = template.end[1]
584
585        # Get pdb_code and chain identifier of template
586        pdb_code, chain =  template_id_to_pdb(template.template_id)
587
588        m = re.search("(\d+.\d+)A", template.template_info) # try to extract resolution of the structure
589
590        if m:
591            resolution = m.group(1)
592        else:
593            resolution = ""
594
595        m = re.search("\{(.*)\}", template.template_info) # try to extract the organism
596        if m:
597            organism = m.group(1).replace(":", " ") # make sure that no colons are in the organism
598        else:
599            organism = ""
600
601        template_grid.set_metadata(row, start, end, pdb_code, chain, organism, resolution)
602
603        # Write sequence into the grid
604        for pos, col in enumerate(range(seq_start, seq_end)):
605            template_grid.set_cell(row, col, templatealignment[pos])
606
607    return template_grid
608
609
610def to_seq(ali):
611    if isinstance(ali, list):
612        return ''.join(ali)
613    else:
614        return ali
615
616
617def create_query_grid(hhr_data):
618    """ Creates a Query Grid """
619
620    total_seq = len(hhr_data)
621    query_max = max( [ hhr.start[0] + len(to_seq(hhr.query_ali)) for hhr in hhr_data ] ) - 1
622
623    query_grid = QueryGrid(total_seq, query_max)
624
625    for row, query in enumerate(hhr_data):
626
627        queryalignment = to_seq(query.query_ali)
628        query_start = query.start[0] - 1
629        query_end = query_start + len(queryalignment)
630
631        for pos, col in enumerate(range(query_start, query_end)):
632            if queryalignment[pos] not in ['Z', 'U', 'O', 'J', 'X', 'B']: # CAUTION
633
634                query_grid.set_cell(row, col, queryalignment[pos])
635
636    return query_grid
637
638def create_gapless_grid(grid):
639    """ Returns a gapless grid """
640
641    gapless = deepcopy(grid)
642    gapless.remove_gaps(keep_width=False)  # hzhu:  shrink grid
643
644    return gapless
645
646def process_query_grid(query_grid, gapless_grid):
647    """ Processes a query grid sucht that it contains all gaps
648    """
649    gaplist = query_grid.get_gap_list()
650    off_set = 0
651
652    for g in gaplist:
653        gapless_grid.insert_gaps([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ])
654        off_set += g.size
655
656    return gapless_grid
657
658def derive_global_seq(processed_query_grid, query_name, query_chain):
659
660    global_seq = list()
661
662    for col in range(processed_query_grid.get_grid_width()):
663        global_seq.append(processed_query_grid.get_col_residue(col))
664
665    # this is the query entry
666    header = '>P1;{q}\nsequence:{q}:1    :{c}:{l}  :{c}::::\n'.format(
667        q = query_name,
668        l = len(global_seq),
669        c = query_chain)
670
671    return header + ''.join(global_seq) + '*'
672
673def process_template_grid(query_grid, template_grid):
674    """ Insertes Gaps into the template grid
675        Only add gaps from **other** query_grids into template grid (NOT gapless)
676    """
677    gaplist = query_grid.get_gap_list()  # use this to keep the offset
678
679    for row in range(template_grid.get_grid_height()):
680        # do NOT consider gaps in current query row
681        gaplist_row = query_grid.get_gaps_ref_gapless(row)
682        gapdict_row = dict(zip([g.open_pos for g in gaplist_row],
683                               [g.size     for g in gaplist_row]))
684        off_set = 0
685        for g in gaplist:
686            # if there is a gap with same opening position in the current row,
687            # only consider g if it is larger than the on in the current row
688            if g.open_pos in gapdict_row:
689                if g.size > gapdict_row[g.open_pos]:
690                    template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos,
691                                                                              g.open_pos+g.size-gapdict_row[g.open_pos]) ], row)
692            else:
693                template_grid.insert_gaps_row([ p + off_set for p in range(g.open_pos, g.open_pos+g.size) ], row)
694
695            off_set += g.size  # even if the gaps are not inserted, the offset should be adjusted
696
697    template_grid.clean_trail_empty()  # clean the redundant trailing EMPTY char
698
699    return template_grid
700
701def compare_with_cifs(template_grid, folder, output_path, convert, threshold):
702    """
703    Compare the PIR Alignment with Atomsection of a mmCIF file. To make the ATOM-Section of
704    a mmCIF file compatible with MODELLER, each residue has in the ATOM-Section has to match
705    corresponding positions in the PIR-Alignment
706    """
707
708    # glob the mmCif files from given directory and map the PDB identifier to the path
709    cif_files = glob.glob(os.path.join(folder, '*.cif'))
710    cif_paths = { path.split('/')[-1].split('.')[0].upper() : path for path in cif_files }
711    cif_edits = dict()
712
713
714    # create the path where renumbered cifs are saved to
715    if not os.path.exists(output_path):
716        os.mkdir(output_path)
717
718    # if the cif does not contain any residue of the por alignment we delete it
719    del_row = list()
720
721    for row in range(template_grid.get_grid_height()):
722        # get the pdb code and strand id from the current template
723        pdb_code = template_grid._pdb_code[row]
724        chain = template_grid._chain[row]  # hhr users pdb chain ID
725
726
727        # load mmCif file accordingly
728        if pdb_code in cif_edits.keys():
729            block = cif_edits[pdb_code]
730        else:
731            try:
732                block = open_cif(cif_paths[pdb_code])
733            except KeyError:
734                del_row.append(row)
735                print ('! Did not find the mmCIF file for {pdb}. Removing it from the alignment.'.format(
736                    pdb = pdb_code))
737                continue
738
739        # Create a mapping of the atom site
740        atom_site = block.getObj('atom_site')
741
742        ########################################################################
743        ## Get the mapping of the residues in the atom section                ##
744        ########################################################################
745
746        cif_seq = dict()
747        # For the case that we have to rename a chain
748        cif_chains = set([])
749
750        # Iterate through the atomsection of the cif file
751        for atom_row in range(0, atom_site.getRowCount()):
752
753            try:
754                if atom_site.getValue('label_comp_id', atom_row) == 'HOH':
755                    continue
756                cif_chain = atom_site.getValue('label_asym_id', atom_row)
757                pdb_chain = atom_site.getValue('auth_asym_id', atom_row)   # use PDB chain ID
758            except IndexError:
759                pass
760
761            cif_chains.add(cif_chain)
762
763            # We do not care about the residues apart from the chain
764            #if cif_chain != chain: # hzhu
765            if pdb_chain != chain:  # hhr uses PDB chain, not the cif chain! hzhu
766                continue
767            # and update the chain id from pdb_chain to cif_chain
768            if atom_site.getValue('group_PDB', atom_row).startswith('ATOM'):  # hzhu in case HETATM ruins ch id
769                template_grid._chain[row] = cif_chain
770
771            # get the residue and the residue number
772            try:
773                res_num = int(atom_site.getValue("label_seq_id", atom_row))
774            except ValueError:
775                continue
776
777            residue = atom_site.getValue('label_comp_id', atom_row)
778            residue = convert_aa_code(residue, convert)
779
780
781            if res_num not in cif_seq.keys():
782                cif_seq[res_num] = residue
783            elif res_num in cif_seq.keys() and cif_seq[res_num] == residue:
784                continue
785            elif res_num in cif_seq.keys() and cif_seq[res_num] != residue:
786                cif_seq[res_num] = '-'
787
788                if DEBUG_MODE:
789                    print ('! {p} {c}: mmCIF contains a residue position that is assigned {cr} to two residues. Removing it.'.format(
790                        p = pdb_code,
791                        c = chain,
792                        cr = res_num))
793
794        ########################################################################
795        ## Rename chain if necessary                                          ##
796        ########################################################################
797
798        chain_idx = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
799
800        if len(template_grid._chain[row]) != 1:
801            i = 0
802            new_chain = 0
803
804            while i < len(chain_idx):
805                if chain_idx[i] in cif_chains:
806                    if DEBUG_MODE:
807                        print ('! {p} {c}: Chain identifier {i} is already taken.'.format(
808                            p = pdb_code,
809                            c = chain,
810                            i = chain_idx[i]))
811                    i += 1
812                else:
813                    new_chain = chain_idx[i]
814                    break
815
816            if new_chain == 0:
817                if DEBUG_MODE:
818                    print ('! {p} {c}: Could not use {p}. The chain identifier {c} is not compatible with MODELLER (2 letters) and could not be renanmed.'.format(
819                        p = pdb_code,
820                        c = chain))
821
822                del_row.append(row)
823                continue
824
825            if new_chain != 0:
826                print ('Selected new chain name {c}'.format(c = new_chain))
827
828            #TODO
829
830        ########################################################################
831        ## Compare cif positions with the atom positions                      ##
832        ########################################################################
833
834        del_pos = list()
835        mod_pos = dict()
836        mapping = dict()
837
838        for pos_cif, pos_tem in zip(range(template_grid.get_template_start(row),
839            template_grid.get_template_end(row) + 1), template_grid.get_seq_indeces(row)):
840
841            res_tem = template_grid.get_cell(row, pos_tem)
842
843            try:
844                res_cif = cif_seq[pos_cif]
845            except KeyError:
846                res_cif = -1
847
848
849            match = True if res_tem == res_cif else False
850
851            if not match:
852                if res_cif == 1 and res_tem == 'M':
853                    mod_pos[pos_cif] = 1
854                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
855
856                elif res_cif == 2 and res_tem == 'P':
857
858                    mod_pos[pos_cif] = 2
859                    mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
860
861                elif res_cif == 3 and res_tem == 'K':
862
863                    mod_pos[pos_cif] = 3
864                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
865
866                elif res_cif == 4 and res_tem == 'S':
867
868                    mod_pos[pos_cif] = 4
869                    mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
870
871                elif res_cif == 5 and res_tem == 'T':
872
873                    mod_pos[pos_cif] = 5
874                    mapping[(pos_tem, res_tem)] = (pos_cif, 'T')
875
876                elif res_cif == 6 and res_tem == 'C':
877
878                    mod_pos[pos_cif] = 6
879                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
880
881                elif res_cif == 7 and res_tem == 'Y':
882
883                    mod_pos[pos_cif] = 7
884                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
885
886                elif res_cif == 8 and res_tem == 'K':
887
888                    mod_pos[pos_cif] = 8
889                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
890
891                elif res_cif == 9 and res_tem == 'C':
892
893                    mod_pos[pos_cif] = 9
894                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
895
896                elif res_cif == 10 and res_tem == 'A':
897
898                    mod_pos[pos_cif] = 10
899                    mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
900
901                elif res_cif == 11 and res_tem == 'C':
902
903                    mod_pos[pos_cif] = 11
904                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
905
906                elif res_cif == 12 and res_tem == 'L':
907
908                    mod_pos[pos_cif] = 12
909                    mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
910
911                elif res_cif == 13 and res_tem == 'A':
912
913                    mod_pos[pos_cif] = 13
914                    mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
915
916                elif res_cif == 14 and res_tem == 'E':
917
918                    mod_pos[pos_cif] = 14
919                    mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
920
921                elif res_cif == 15 and res_tem == 'L':
922
923                    mod_pos[pos_cif] = 15
924                    mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
925
926                elif res_cif == 16 and res_tem == 'M':
927
928                    mod_pos[pos_cif] = 16
929                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
930
931                elif res_cif == 17 and res_tem == 'V':
932
933                    mod_pos[pos_cif] = 17
934                    mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
935
936                elif res_cif == 18 and res_tem == 'C':
937
938                    mod_pos[pos_cif] = 18
939                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
940
941                elif res_cif == 19 and res_tem == 'P':
942
943                    mod_pos[pos_cif] = 19
944                    mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
945
946                elif res_cif == 20 and res_tem == 'V':
947
948                    mod_pos[pos_cif] = 20
949                    mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
950
951                elif res_cif == 21 and res_tem == 'Y':
952
953                    mod_pos[pos_cif] = 21
954                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
955
956                elif res_cif == 22 and res_tem == 'K':
957
958                    mod_pos[pos_cif] = 22
959                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
960
961                elif res_cif == 23 and res_tem == 'C':
962
963                    mod_pos[pos_cif] = 23
964                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
965
966                elif res_cif == 24 and res_tem == 'K':
967
968                    mod_pos[pos_cif] = 24
969                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
970
971                elif res_cif == 25 and res_tem == 'C':
972
973                    mod_pos[pos_cif] = 25
974                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
975
976                elif res_cif == 26 and res_tem == 'C':
977
978                    mod_pos[pos_cif] = 26
979                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
980
981                elif res_cif == 27 and res_tem == 'L':
982
983                    mod_pos[pos_cif] = 27
984                    mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
985
986                elif res_cif == 28 and res_tem == 'E':
987
988                    mod_pos[pos_cif] = 28
989                    mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
990
991                elif res_cif == 29 and res_tem == 'S':
992
993                    mod_pos[pos_cif] = 29
994                    mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
995
996                elif res_cif == 30 and res_tem == 'C':
997
998                    mod_pos[pos_cif] = 30
999                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1000
1001                elif res_cif == 31 and res_tem == 'K':
1002
1003                    mod_pos[pos_cif] = 31
1004                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
1005
1006                elif res_cif == 32 and res_tem == 'K':
1007
1008                    mod_pos[pos_cif] = 32
1009                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
1010
1011                elif res_cif == 33 and res_tem == 'F':
1012
1013                    mod_pos[pos_cif] = 33
1014                    mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
1015
1016                elif res_cif == 34 and res_tem == 'R':
1017
1018                    mod_pos[pos_cif] = 34
1019                    mapping[(pos_tem, res_tem)] = (pos_cif, 'R')
1020
1021                elif res_cif == 35 and res_tem == 'F':
1022
1023                    mod_pos[pos_cif] = 35
1024                    mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
1025
1026                elif res_cif == 36 and res_tem == 'D':
1027
1028                    mod_pos[pos_cif] = 36
1029                    mapping[(pos_tem, res_tem)] = (pos_cif, 'D')
1030
1031                elif res_cif == 37 and res_tem == 'D':
1032
1033                    mod_pos[pos_cif] = 37
1034                    mapping[(pos_tem, res_tem)] = (pos_cif, 'D')
1035
1036                elif res_cif == 38 and res_tem == 'H':
1037
1038                    mod_pos[pos_cif] = 38
1039                    mapping[(pos_tem, res_tem)] = (pos_cif, 'H')
1040
1041                elif res_cif == 39 and res_tem == 'P':
1042
1043                    mod_pos[pos_cif] = 39
1044                    mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
1045
1046                elif res_cif == 40 and res_tem == 'T':
1047
1048                    mod_pos[pos_cif] = 40
1049                    mapping[(pos_tem, res_tem)] = (pos_cif, 'T')
1050
1051                elif res_cif == 41 and res_tem == 'I':
1052
1053                    mod_pos[pos_cif] = 41
1054                    mapping[(pos_tem, res_tem)] = (pos_cif, 'I')
1055
1056                elif res_cif == 42 and res_tem == 'N':
1057
1058                    mod_pos[pos_cif] = 42
1059                    mapping[(pos_tem, res_tem)] = (pos_cif, 'N')
1060
1061                elif res_cif == 43 and res_tem == 'Y':
1062
1063                    mod_pos[pos_cif] = 43
1064                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
1065
1066                elif res_cif == 44 and res_tem == 'M':
1067
1068                    mod_pos[pos_cif] = 44
1069                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
1070
1071                elif res_cif == 45 and res_tem == 'G':
1072
1073                    mod_pos[pos_cif] = 45
1074                    mapping[(pos_tem, res_tem)] = (pos_cif, 'G')
1075
1076                elif res_cif == 46 and res_tem == 'W':
1077
1078                    mod_pos[pos_cif] = 46
1079                    mapping[(pos_tem, res_tem)] = (pos_cif, 'W')
1080
1081                elif res_cif == 47 and res_tem == 'S':
1082
1083                    mod_pos[pos_cif] = 47
1084                    mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
1085
1086                elif res_cif == 48 and res_tem == 'N':
1087
1088                    mod_pos[pos_cif] = 48
1089                    mapping[(pos_tem, res_tem)] = (pos_cif, 'N')
1090
1091                elif res_cif == 49 and res_tem == 'M':
1092
1093                    mod_pos[pos_cif] = 49
1094                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
1095
1096                elif res_cif == 50 and res_tem == 'A':
1097
1098                    mod_pos[pos_cif] = 50
1099                    mapping[(pos_tem, res_tem)] = (pos_cif, 'A')
1100
1101                elif res_cif == 51 and res_tem == 'Y':
1102
1103                    mod_pos[pos_cif] = 51
1104                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
1105
1106                elif res_cif == 52 and res_tem == 'P':
1107
1108                    mod_pos[pos_cif] = 52
1109                    mapping[(pos_tem, res_tem)] = (pos_cif, 'P')
1110
1111                elif res_cif == 53 and res_tem == 'V':
1112
1113                    mod_pos[pos_cif] = 53
1114                    mapping[(pos_tem, res_tem)] = (pos_cif, 'V')
1115
1116                elif res_cif == 54 and res_tem == 'L':
1117
1118                    mod_pos[pos_cif] = 54
1119                    mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
1120
1121                elif res_cif == 55 and res_tem == 'Y':
1122
1123                    mod_pos[pos_cif] = 55
1124                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
1125
1126                elif res_cif == 56 and res_tem == 'E':
1127
1128                    mod_pos[pos_cif] = 56
1129                    mapping[(pos_tem, res_tem)] = (pos_cif, 'E')
1130
1131                elif res_cif == 57 and res_tem == 'F':
1132
1133                    mod_pos[pos_cif] = 57
1134                    mapping[(pos_tem, res_tem)] = (pos_cif, 'F')
1135
1136                elif res_cif == 58 and res_tem == 'C':
1137
1138                    mod_pos[pos_cif] = 58
1139                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1140
1141                elif res_cif == 59 and res_tem == 'H':
1142
1143                    mod_pos[pos_cif] = 59
1144                    mapping[(pos_tem, res_tem)] = (pos_cif, 'H')
1145
1146                elif res_cif == 60 and res_tem == 'C':
1147
1148                    mod_pos[pos_cif] = 60
1149                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1150
1151                elif res_cif == 61 and res_tem == 'Y':
1152
1153                    mod_pos[pos_cif] = 61
1154                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
1155
1156                elif res_cif == 62 and res_tem == 'C':
1157
1158                    mod_pos[pos_cif] = 62
1159                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1160
1161                elif res_cif == 63 and res_tem == 'M':
1162
1163                    mod_pos[pos_cif] = 63
1164                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
1165
1166                elif res_cif == 64 and res_tem == 'M':
1167
1168                    mod_pos[pos_cif] = 64
1169                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
1170
1171                elif res_cif == 65 and res_tem == 'C':
1172
1173                    mod_pos[pos_cif] = 65
1174                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1175
1176                elif res_cif == 66 and res_tem == 'Y':
1177
1178                    mod_pos[pos_cif] = 66
1179                    mapping[(pos_tem, res_tem)] = (pos_cif, 'Y')
1180
1181                elif res_cif == 67 and res_tem == 'S':
1182
1183                    mod_pos[pos_cif] = 67
1184                    mapping[(pos_tem, res_tem)] = (pos_cif, 'S')
1185
1186                elif res_cif == 68 and res_tem == 'C':
1187
1188                    mod_pos[pos_cif] = 68
1189                    mapping[(pos_tem, res_tem)] = (pos_cif, 'C')
1190
1191                elif res_cif == 69 and res_tem == 'L':
1192
1193                    mod_pos[pos_cif] = 69
1194                    mapping[(pos_tem, res_tem)] = (pos_cif, 'L')
1195
1196                elif res_cif == 70 and res_tem == 'M':
1197
1198                    mod_pos[pos_cif] = 70
1199                    mapping[(pos_tem, res_tem)] = (pos_cif, 'M')
1200
1201                elif res_cif == 71 and res_tem == 'K':
1202
1203                    mod_pos[pos_cif] = 71
1204                    mapping[(pos_tem, res_tem)] = (pos_cif, 'K')
1205
1206                else:
1207                    # insert a gap
1208                    template_grid.set_empty(row, pos_tem)
1209                    mapping[(pos_tem, res_tem)] = (pos_cif, res_cif)
1210
1211                    if DEBUG_MODE:
1212                        print ('! {p} {c}: template pos {pt} ({rt}) does not match cif pos {pc} ({rc}). Replacing with gap.'.format(
1213                            p = pdb_code,
1214                            c = chain,
1215                            pt = pos_tem,
1216                            rt = res_tem,
1217                            pc = pos_cif,
1218                            rc = res_cif if res_cif != -1 else 'DNE'))
1219
1220                    if res_cif != -1:
1221                        del_pos.append(pos_cif)
1222            else:
1223                mapping[(pos_tem, res_tem)] = (pos_cif, res_cif)
1224
1225
1226        # adjust template start and end positions
1227        correct_mapping = { key:value for key, value in mapping.items() if key[1] == value[1] }
1228
1229        try:
1230            tstart = correct_mapping[sorted(correct_mapping.keys())[0]][0]
1231            tend = correct_mapping[sorted(correct_mapping.keys())[-1]][0]
1232            template_grid.set_map(row, tstart, tend)
1233        except IndexError:
1234            # This exception handles cases in which all residues were deleted
1235            if DEBUG_MODE:
1236                print ('! {p} {c}: Removing {p} from alignment. No residues matched the alignment sequence.'.format(
1237                    p = pdb_code,
1238                    c = chain))
1239
1240            del_row.append(row)
1241            continue
1242
1243        ########################################################################
1244        ## Delete rows from the PIR Alignment if the residue ratio is to low  ##
1245        ########################################################################
1246
1247        if threshold > 0:
1248
1249            gaps = 0
1250            res = 0
1251
1252            for col in range(template_grid.get_grid_width()):
1253                if template_grid.is_empty(row, col):
1254                    template_grid.set_gap(row, col)
1255
1256                if template_grid.is_gap(row, col):
1257                    gaps += 1
1258                else:
1259                    res += 1
1260
1261            ratio = res/float(gaps + res)
1262
1263            if ratio > threshold:
1264                print ('! Template {p} successfully passed residue ratio ({r:.2f} / {t}).'.format(
1265                    p = pdb_code,
1266                    r = ratio,
1267                    t = threshold ))
1268            else:
1269                print ('! Template {p} did not passed residue ratio ({r:.2f} / {t}). Removing it from pir Alignment.'.format(
1270                    p = pdb_code,
1271                    r = ratio,
1272                    t = threshold ))
1273
1274                if row not in del_row:
1275                    del_row.append(row)
1276                    continue
1277
1278        ########################################################################
1279        ## Edit cif files                                                     ##
1280        ########################################################################
1281
1282        rem_row = list() # verbosity: saves information about removed residues
1283        mod_row = list() # verbosity: saves information about modified residues
1284        cha_row = list() # verbosity: saves any other changes
1285
1286        for atom_row in reversed(range(0, atom_site.getRowCount())):
1287
1288            try:
1289                cif_chain = atom_site.getValue('label_asym_id', atom_row)
1290            except IndexError:
1291                pass
1292
1293            # We do not care about the residues apart from the chain
1294            if cif_chain != chain:
1295                continue
1296
1297            # get the residue number
1298            try:
1299                res_num = int(atom_site.getValue("label_seq_id", atom_row))
1300            except ValueError:
1301                continue
1302
1303            # pdb_PDB_model_num has to be set to 1
1304            try:
1305                model_num = int(atom_site.getValue('pdbx_PDB_model_num', atom_row))
1306            except IndexError:
1307                model_num = 1 # if we cannot extract, assume that it is alright
1308
1309            try:
1310                ins_code = atom_site.getValue('pdbx_PDB_ins_code', atom_row)
1311            except IndexError:
1312                ins_code = '?' # assume it has no insertion code
1313
1314            group_PDB = atom_site.getValue('group_PDB', atom_row)
1315            residue = atom_site.getValue('label_comp_id', atom_row)
1316            residue = convert_aa_code(residue, convert)
1317
1318            # MODELLER accepts only structures if pdbx_PDB_model_num is set to 1
1319            if model_num != 1:
1320
1321                if (res_num, residue, 'model_num') not in cha_row:
1322                    cha_row.append((res_num, residue, 'model_num'))
1323
1324                atom_site.setValue(1, "pdbx_PDB_model_num", atom_row)
1325
1326            if ins_code != '?':
1327
1328                if (res_num, residue, 'ins_code') not in cha_row:
1329                    cha_row.append((res_num, residue, 'ins_code'))
1330
1331                atom_site.setValue('?', "pdbx_PDB_ins_code", atom_row)
1332
1333            if group_PDB != 'ATOM':
1334
1335                if (res_num, residue, 'group_PDB') not in cha_row:
1336                    cha_row.append((res_num, residue, 'group_PDB'))
1337
1338                atom_site.setValue('ATOM', 'group_PDB', atom_row)
1339
1340            ########################################################################
1341            ## Delete residues                                                    ##
1342            ########################################################################
1343
1344            if res_num in del_pos:
1345                if (res_num, residue) not in rem_row:
1346                    rem_row.append((res_num, residue))
1347
1348                atom_site.removeRow(atom_row)
1349
1350            ########################################################################
1351            ## Modify residues                                                    ##
1352            ########################################################################
1353
1354            if res_num in mod_pos.keys():
1355
1356                # Get the data
1357                type_symbol = atom_site.getValue('type_symbol', atom_row)
1358                label_atom_id = atom_site.getValue('label_atom_id', atom_row)
1359                auth_atom_id = atom_site.getValue('auth_atom_id', atom_row)
1360
1361                if mod_pos[res_num] == 1: # try to convert MSE to M
1362
1363                    atom_site.setValue('MET', 'label_comp_id', atom_row)
1364
1365                    try:
1366                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
1367                    except IndexError:
1368                        pass
1369
1370                    if type_symbol == 'SE':
1371                        atom_site.setValue('S', 'type_symbol', atom_row)
1372                    if label_atom_id == 'SE':
1373                        atom_site.setValue('S', 'label_atom_id', atom_row)
1374                    if auth_atom_id == 'SE':
1375                        atom_site.setValue('S', 'auth_atom_id', atom_row)
1376
1377                    if (res_num, residue, 'MSE -> MET') not in mod_row:
1378                        mod_row.append((res_num, residue, 'MSE -> MET'))
1379
1380                elif mod_pos[res_num] == 2: # try to convert HYP to PRO
1381                    # apparently it is enough to rename the label_comp_id to PRO to get
1382                    # MODELLER working with Hydroxyprolines (HYP)
1383
1384                    atom_site.setValue('PRO', 'label_comp_id', atom_row)
1385
1386                    try:
1387                        atom_site.setValue('PRO', 'auth_comp_id', atom_row)
1388                    except IndexError:
1389                        pass
1390
1391                    if (res_num, residue, 'HYP -> PRO') not in mod_row:
1392                        mod_row.append((res_num, residue, 'HYP -> PRO'))
1393
1394                elif mod_pos[res_num] == 3: # try to convert MLY to LYS
1395
1396                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1397
1398                    try:
1399                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1400                    except IndexError:
1401                        pass
1402
1403                    if (res_num, residue, 'MLY -> LYS') not in mod_row:
1404                        mod_row.append((res_num, residue, 'MLY -> LYS'))
1405
1406                elif mod_pos[res_num] == 4: # converts Phosphoserine to Serine
1407
1408                    atom_site.setValue('SER', 'label_comp_id', atom_row)
1409
1410                    try:
1411                        atom_site.setValue('SER', 'auth_comp_id', atom_row)
1412                    except IndexError:
1413                        pass
1414
1415                    if (res_num, residue, 'SEP -> SER') not in mod_row:
1416                        mod_row.append((res_num, residue, 'SEP -> SER'))
1417
1418                elif mod_pos[res_num] == 5: # converts Phosphothreonine to Threonine
1419
1420                    atom_site.setValue('THR', 'label_comp_id', atom_row)
1421
1422                    try:
1423                        atom_site.setValue('THR', 'auth_comp_id', atom_row)
1424                    except IndexError:
1425                        pass
1426
1427                    if (res_num, residue, 'TPO -> THR') not in mod_row:
1428                        mod_row.append((res_num, residue, 'TPO -> THR'))
1429
1430                elif mod_pos[res_num] == 6: # converts S-HYDROXYCYSTEINE to Cysteine
1431
1432                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1433
1434                    try:
1435                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1436                    except IndexError:
1437                        pass
1438
1439                    if (res_num, residue, 'CSO -> CYS') not in mod_row:
1440                        mod_row.append((res_num, residue, 'CSO -> CYS'))
1441
1442                elif mod_pos[res_num] == 7: # converts O-PHOSPHOTYROSINE to Tyrosine
1443
1444                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
1445
1446                    try:
1447                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
1448                    except IndexError:
1449                        pass
1450
1451                    if (res_num, residue, 'PTR -> TYR') not in mod_row:
1452                        mod_row.append((res_num, residue, 'PTR -> TYR'))
1453
1454                elif mod_pos[res_num] == 8: # converts LYSINE NZ-CARBOXYLIC ACID to Lysine
1455
1456                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1457
1458                    try:
1459                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1460                    except IndexError:
1461                        pass
1462
1463                    if (res_num, residue, 'KCX -> LYS') not in mod_row:
1464                        mod_row.append((res_num, residue, 'KCX -> LYS'))
1465
1466                elif mod_pos[res_num] == 9: # converts S,S-(2-HYDROXYETHYL)THIOCYSTEINE to Cysteine
1467
1468                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1469
1470                    try:
1471                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1472                    except IndexError:
1473                        pass
1474
1475                    if (res_num, residue, 'CME -> CYS') not in mod_row:
1476                        mod_row.append((res_num, residue, 'CME -> CYS'))
1477
1478                elif mod_pos[res_num] == 10: # converts 3-SULFINOALANINE to Alanine
1479
1480                    atom_site.setValue('ALA', 'label_comp_id', atom_row)
1481
1482                    try:
1483                        atom_site.setValue('ALA', 'auth_comp_id', atom_row)
1484                    except IndexError:
1485                        pass
1486
1487                    if (res_num, residue, 'CSD -> ALA') not in mod_row:
1488                        mod_row.append((res_num, residue, 'CSD -> ALA'))
1489
1490                elif mod_pos[res_num] == 11: # converts S-(DIMETHYLARSENIC)CYSTEINE to Cysteine
1491
1492                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1493
1494                    try:
1495                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1496                    except IndexError:
1497                        pass
1498
1499                    if (res_num, residue, 'CAS -> CYS') not in mod_row:
1500                        mod_row.append((res_num, residue, 'CAS -> CYS'))
1501
1502                elif mod_pos[res_num] == 12: # converts N-METHYLLEUCINE (MLE) to Leucine
1503
1504                    atom_site.setValue('LEU', 'label_comp_id', atom_row)
1505
1506                    try:
1507                        atom_site.setValue('LEU', 'auth_comp_id', atom_row)
1508                    except IndexError:
1509                        pass
1510
1511                    if (res_num, residue, 'MLE -> LEU') not in mod_row:
1512                        mod_row.append((res_num, residue, 'MLE -> LEU'))
1513
1514                elif mod_pos[res_num] == 13: # converts D-ALANINE (DAL) to ALA
1515
1516                    atom_site.setValue('ALA', 'label_comp_id', atom_row)
1517
1518                    try:
1519                        atom_site.setValue('ALA', 'auth_comp_id', atom_row)
1520                    except IndexError:
1521                        pass
1522
1523                    if (res_num, residue, 'DAL -> ALA') not in mod_row:
1524                        mod_row.append((res_num, residue, 'DAL -> ALA'))
1525
1526                elif mod_pos[res_num] == 14: # converts GAMMA-CARBOXY-GLUTAMIC ACID (CGU) to GLU
1527
1528                    atom_site.setValue('GLU', 'label_comp_id', atom_row)
1529
1530                    try:
1531                        atom_site.setValue('GLU', 'auth_comp_id', atom_row)
1532                    except IndexError:
1533                        pass
1534
1535                    if (res_num, residue, 'CGU -> GLU') not in mod_row:
1536                        mod_row.append((res_num, residue, 'CGU -> GLU'))
1537
1538                elif mod_pos[res_num] == 15: # converts D-LEUCINE (DLE) to LEU
1539
1540                    atom_site.setValue('LEU', 'label_comp_id', atom_row)
1541
1542                    try:
1543                        atom_site.setValue('LEU', 'auth_comp_id', atom_row)
1544                    except IndexError:
1545                        pass
1546
1547                    if (res_num, residue, 'DLE -> LEU') not in mod_row:
1548                        mod_row.append((res_num, residue, 'DLE -> LEU'))
1549
1550                elif mod_pos[res_num] == 16: # converts N-FORMYLMETHIONINE (FME) to MET
1551
1552                    atom_site.setValue('MET', 'label_comp_id', atom_row)
1553
1554                    try:
1555                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
1556                    except IndexError:
1557                        pass
1558
1559                    if (res_num, residue, 'FME -> MET') not in mod_row:
1560                        mod_row.append((res_num, residue, 'FME -> MET'))
1561
1562                elif mod_pos[res_num] == 17: # converts D-VAL (DVA) to VAL
1563
1564                    atom_site.setValue('VAL', 'label_comp_id', atom_row)
1565
1566                    try:
1567                        atom_site.setValue('VAL', 'auth_comp_id', atom_row)
1568                    except IndexError:
1569                        pass
1570
1571                    if (res_num, residue, 'DVA -> VAL') not in mod_row:
1572                        mod_row.append((res_num, residue, 'DVA -> VAL'))
1573
1574                elif mod_pos[res_num] == 18: # converts CYSTEINESULFONIC ACID (OCS) to CYS
1575
1576                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1577
1578                    try:
1579                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1580                    except IndexError:
1581                        pass
1582
1583                    if (res_num, residue, 'OCS -> CYS') not in mod_row:
1584                        mod_row.append((res_num, residue, 'OCS -> CYS'))
1585
1586                elif mod_pos[res_num] == 19: # converts D-PROLINE (DPR) to PRO
1587
1588                    atom_site.setValue('PRO', 'label_comp_id', atom_row)
1589
1590                    try:
1591                        atom_site.setValue('PRO', 'auth_comp_id', atom_row)
1592                    except IndexError:
1593                        pass
1594
1595                    if (res_num, residue, 'DPR -> PRO') not in mod_row:
1596                        mod_row.append((res_num, residue, 'DPR -> PRO'))
1597
1598                elif mod_pos[res_num] == 20: # converts N-METHYLVALINE (MVA) to VAL
1599
1600                    atom_site.setValue('VAL', 'label_comp_id', atom_row)
1601
1602                    try:
1603                        atom_site.setValue('VAL', 'auth_comp_id', atom_row)
1604                    except IndexError:
1605                        pass
1606
1607                    if (res_num, residue, 'MVA -> VAL') not in mod_row:
1608                        mod_row.append((res_num, residue, 'MVA -> VAL'))
1609
1610                elif mod_pos[res_num] == 21: # converts O-SULFO-L-TYROSINE (TYS) to VAL
1611
1612                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
1613
1614                    try:
1615                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
1616                    except IndexError:
1617                        pass
1618
1619                    if (res_num, residue, 'TYS -> TYR') not in mod_row:
1620                        mod_row.append((res_num, residue, 'TYS -> TYR'))
1621
1622                elif mod_pos[res_num] == 22: # converts N-TRIMETHYLLYSINE (M3L) to LYS
1623
1624                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1625
1626                    try:
1627                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1628                    except IndexError:
1629                        pass
1630
1631                    if (res_num, residue, 'M3L -> LYS') not in mod_row:
1632                        mod_row.append((res_num, residue, 'M3L -> LYS'))
1633
1634                elif mod_pos[res_num] == 23: # converts S-METHYLCYSTEINE (SMC) to CYS
1635
1636                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1637
1638                    try:
1639                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1640                    except IndexError:
1641                        pass
1642
1643                    if (res_num, residue, 'SMC -> CYS') not in mod_row:
1644                        mod_row.append((res_num, residue, 'SMC -> CYS'))
1645
1646                elif mod_pos[res_num] == 24: # converts N(6)-ACETYLLYSINE (ALY) to LYS
1647
1648                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1649
1650                    try:
1651                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1652                    except IndexError:
1653                        pass
1654
1655                    if (res_num, residue, 'ALY -> LYS') not in mod_row:
1656                        mod_row.append((res_num, residue, 'ALY -> LYS'))
1657
1658                elif mod_pos[res_num] == 25: # converts S-OXY CYSTEINE (CSX) to CYS
1659
1660                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1661
1662                    try:
1663                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1664                    except IndexError:
1665                        pass
1666
1667                    if (res_num, residue, 'CSX -> CYS') not in mod_row:
1668                        mod_row.append((res_num, residue, 'CSX -> CYS'))
1669
1670                elif mod_pos[res_num] == 26: # converts D-CYSTEINE (DCY) to CYS
1671
1672                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1673
1674                    try:
1675                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1676                    except IndexError:
1677                        pass
1678
1679                    if (res_num, residue, 'DCY -> CYS') not in mod_row:
1680                        mod_row.append((res_num, residue, 'DCY -> CYS'))
1681
1682                elif mod_pos[res_num] == 27: # converts NORLEUCINE (NLE) to LEU
1683
1684                    atom_site.setValue('LEU', 'label_comp_id', atom_row)
1685
1686                    try:
1687                        atom_site.setValue('LEU', 'auth_comp_id', atom_row)
1688                    except IndexError:
1689                        pass
1690
1691                    if (res_num, residue, 'NLE -> LEU') not in mod_row:
1692                        mod_row.append((res_num, residue, 'NLE -> LEU'))
1693
1694                elif mod_pos[res_num] == 28: # converts D-GLUTAMIC ACID (DGL) to GLU
1695
1696                    atom_site.setValue('GLU', 'label_comp_id', atom_row)
1697
1698                    try:
1699                        atom_site.setValue('GLU', 'auth_comp_id', atom_row)
1700                    except IndexError:
1701                        pass
1702
1703                    if (res_num, residue, 'DGL -> GLU') not in mod_row:
1704                        mod_row.append((res_num, residue, 'DGL -> GLU'))
1705
1706                elif mod_pos[res_num] == 29: # converts D-SERINE (DSN) to SER
1707
1708                    atom_site.setValue('SER', 'label_comp_id', atom_row)
1709
1710                    try:
1711                        atom_site.setValue('SER', 'auth_comp_id', atom_row)
1712                    except IndexError:
1713                        pass
1714
1715                    if (res_num, residue, 'DSN -> SER') not in mod_row:
1716                        mod_row.append((res_num, residue, 'DSN -> SER'))
1717
1718                elif mod_pos[res_num] == 30: # converts S-MERCAPTOCYSTEINE (CSS) to CYS
1719
1720                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
1721
1722                    try:
1723                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
1724                    except IndexError:
1725                        pass
1726
1727                    if (res_num, residue, 'CSS -> CYS') not in mod_row:
1728                        mod_row.append((res_num, residue, 'CSS -> CYS'))
1729
1730                elif mod_pos[res_num] == 31: # converts D-LYSINE (DLY) to LYS
1731
1732                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1733
1734                    try:
1735                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1736                    except IndexError:
1737                        pass
1738
1739                    if (res_num, residue, 'DLY -> LYS') not in mod_row:
1740                        mod_row.append((res_num, residue, 'DLY -> LYS'))
1741
1742                elif mod_pos[res_num] == 32: # converts N-METHYL-LYSINE (MLZ) to LYS
1743
1744                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
1745
1746                    try:
1747                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
1748                    except IndexError:
1749                        pass
1750
1751                    if (res_num, residue, 'MLZ -> LYS') not in mod_row:
1752                        mod_row.append((res_num, residue, 'MLZ -> LYS'))
1753
1754                elif mod_pos[res_num] == 33: # converts D-PHENYLALANINE (DPN) to PHE
1755
1756                    atom_site.setValue('PHE', 'label_comp_id', atom_row)
1757
1758                    try:
1759                        atom_site.setValue('PHE', 'auth_comp_id', atom_row)
1760                    except IndexError:
1761                        pass
1762
1763                    if (res_num, residue, 'DPN -> PHE') not in mod_row:
1764                        mod_row.append((res_num, residue, 'DPN -> PHE'))
1765
1766                elif mod_pos[res_num] == 34: # converts D-ARGININE (DAR) to ARG
1767
1768                    atom_site.setValue('ARG', 'label_comp_id', atom_row)
1769
1770                    try:
1771                        atom_site.setValue('ARG', 'auth_comp_id', atom_row)
1772                    except IndexError:
1773                        pass
1774
1775                    if (res_num, residue, 'DAR -> ARG') not in mod_row:
1776                        mod_row.append((res_num, residue, 'DAR -> ARG'))
1777
1778                elif mod_pos[res_num] == 35: # converts IODO-PHENYLALANINE (PHI) to PHE
1779
1780                    atom_site.setValue('PHE', 'label_comp_id', atom_row)
1781
1782                    try:
1783                        atom_site.setValue('PHE', 'auth_comp_id', atom_row)
1784                    except IndexError:
1785                        pass
1786
1787                    if (res_num, residue, 'PHI -> PHE') not in mod_row:
1788                        mod_row.append((res_num, residue, 'PHI -> PHE'))
1789
1790                elif mod_pos[res_num] == 36: # converts BETA-L-ASPARTIC ACID (IAS) to ASP
1791
1792                    atom_site.setValue('ASP', 'label_comp_id', atom_row)
1793
1794                    try:
1795                        atom_site.setValue('ASP', 'auth_comp_id', atom_row)
1796                    except IndexError:
1797                        pass
1798
1799                    if (res_num, residue, 'IAS -> ASP') not in mod_row:
1800                        mod_row.append((res_num, residue, 'IAS -> ASP'))
1801
1802                elif mod_pos[res_num] == 37: # converts D-ASPARTIC ACID (DAS) to ASP
1803
1804                    atom_site.setValue('ASP', 'label_comp_id', atom_row)
1805
1806                    try:
1807                        atom_site.setValue('ASP', 'auth_comp_id', atom_row)
1808                    except IndexError:
1809                        pass
1810
1811                    if (res_num, residue, 'DAS -> ASP') not in mod_row:
1812                        mod_row.append((res_num, residue, 'DAS -> ASP'))
1813
1814                elif mod_pos[res_num] == 38: # converts 4-METHYL-HISTIDINE (HIC) to HIS
1815
1816                    atom_site.setValue('HIS', 'label_comp_id', atom_row)
1817
1818                    try:
1819                        atom_site.setValue('HIS', 'auth_comp_id', atom_row)
1820                    except IndexError:
1821                        pass
1822
1823                    if (res_num, residue, 'HIC -> HIS') not in mod_row:
1824                        mod_row.append((res_num, residue, 'HIC -> HIS'))
1825
1826                elif mod_pos[res_num] == 39: # converts (4R)-4-methyl-L-proline (MP8) to PRO
1827
1828                    atom_site.setValue('PRO', 'label_comp_id', atom_row)
1829
1830                    try:
1831                        atom_site.setValue('PRO', 'auth_comp_id', atom_row)
1832                    except IndexError:
1833                        pass
1834
1835                    if (res_num, residue, 'MP8 -> PRO') not in mod_row:
1836                        mod_row.append((res_num, residue, 'MP8 -> PRO'))
1837
1838                elif mod_pos[res_num] == 40: # converts D-THREONINE (DTH) to THR
1839
1840                    atom_site.setValue('THR', 'label_comp_id', atom_row)
1841
1842                    try:
1843                        atom_site.setValue('THR', 'auth_comp_id', atom_row)
1844                    except IndexError:
1845                        pass
1846
1847                    if (res_num, residue, 'DTH -> THR') not in mod_row:
1848                        mod_row.append((res_num, residue, 'DTH -> THR'))
1849
1850                elif mod_pos[res_num] == 41: # converts D-ISOLEUCINE (DIL) to ILE
1851
1852                    atom_site.setValue('ILE', 'label_comp_id', atom_row)
1853
1854                    try:
1855                        atom_site.setValue('ILE', 'auth_comp_id', atom_row)
1856                    except IndexError:
1857                        pass
1858
1859                    if (res_num, residue, 'DIL -> ILE') not in mod_row:
1860                        mod_row.append((res_num, residue, 'DIL -> ILE'))
1861
1862                elif mod_pos[res_num] == 42: # converts N-METHYL ASPARAGINE (MEN) to ASN
1863
1864                    atom_site.setValue('ASN', 'label_comp_id', atom_row)
1865
1866                    try:
1867                        atom_site.setValue('ASN', 'auth_comp_id', atom_row)
1868                    except IndexError:
1869                        pass
1870
1871                    if (res_num, residue, 'MEN -> ASN') not in mod_row:
1872                        mod_row.append((res_num, residue, 'MEN -> ASN'))
1873
1874                elif mod_pos[res_num] == 43: # converts D-TYROSINE (DTY) to TYR
1875
1876                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
1877
1878                    try:
1879                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
1880                    except IndexError:
1881                        pass
1882
1883                    if (res_num, residue, 'DTY -> TYR') not in mod_row:
1884                        mod_row.append((res_num, residue, 'DTY -> TYR'))
1885
1886                elif mod_pos[res_num] == 44: # converts N-CARBOXYMETHIONINE (CXM) to MET
1887
1888                    atom_site.setValue('MET', 'label_comp_id', atom_row)
1889
1890                    try:
1891                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
1892                    except IndexError:
1893                        pass
1894
1895                    if (res_num, residue, 'CXM -> MET') not in mod_row:
1896                        mod_row.append((res_num, residue, 'CXM -> MET'))
1897
1898                elif mod_pos[res_num] == 45: # converts D-GLUTAMINE (DGN) to MET
1899
1900                    atom_site.setValue('GLN', 'label_comp_id', atom_row)
1901
1902                    try:
1903                        atom_site.setValue('GLN', 'auth_comp_id', atom_row)
1904                    except IndexError:
1905                        pass
1906
1907                    if (res_num, residue, 'DGN -> GLN') not in mod_row:
1908                        mod_row.append((res_num, residue, 'DGN -> GLN'))
1909
1910                elif mod_pos[res_num] == 46: # converts D-TRYPTOPHAN (DTR) to TRP
1911
1912                    atom_site.setValue('TRP', 'label_comp_id', atom_row)
1913
1914                    try:
1915                        atom_site.setValue('TRP', 'auth_comp_id', atom_row)
1916                    except IndexError:
1917                        pass
1918
1919                    if (res_num, residue, 'DTR -> TRP') not in mod_row:
1920                        mod_row.append((res_num, residue, 'DTR -> TRP'))
1921
1922                elif mod_pos[res_num] == 47: # converts N-ACETYL-SERINE (SAC) to SER
1923
1924                    atom_site.setValue('SER', 'label_comp_id', atom_row)
1925
1926                    try:
1927                        atom_site.setValue('SER', 'auth_comp_id', atom_row)
1928                    except IndexError:
1929                        pass
1930
1931                    if (res_num, residue, 'SAC -> SER') not in mod_row:
1932                        mod_row.append((res_num, residue, 'SAC -> SER'))
1933
1934                elif mod_pos[res_num] == 48: # converts D-ASPARAGINE (DSG) to ASN
1935
1936                    atom_site.setValue('ASN', 'label_comp_id', atom_row)
1937
1938                    try:
1939                        atom_site.setValue('ASN', 'auth_comp_id', atom_row)
1940                    except IndexError:
1941                        pass
1942
1943                    if (res_num, residue, 'DSG -> ASN') not in mod_row:
1944                        mod_row.append((res_num, residue, 'DSG -> ASN'))
1945
1946                elif mod_pos[res_num] == 49: # converts N-METHYL METHIONINE (MME) to MET
1947
1948                    atom_site.setValue('MET', 'label_comp_id', atom_row)
1949
1950                    try:
1951                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
1952                    except IndexError:
1953                        pass
1954
1955                    if (res_num, residue, 'MME -> MET') not in mod_row:
1956                        mod_row.append((res_num, residue, 'MME -> MET'))
1957
1958                elif mod_pos[res_num] == 50: # converts N-methyl-L-alanine (MAA) to ALA
1959
1960                    atom_site.setValue('ALA', 'label_comp_id', atom_row)
1961
1962                    try:
1963                        atom_site.setValue('ALA', 'auth_comp_id', atom_row)
1964                    except IndexError:
1965                        pass
1966
1967                    if (res_num, residue, 'MAA -> ALA') not in mod_row:
1968                        mod_row.append((res_num, residue, 'MAA -> ALA'))
1969
1970                elif mod_pos[res_num] == 51: # converts 3-FLUOROTYROSINE (YOF) to TYR
1971
1972                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
1973
1974                    try:
1975                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
1976                    except IndexError:
1977                        pass
1978
1979                    if (res_num, residue, 'YOF -> TYR') not in mod_row:
1980                        mod_row.append((res_num, residue, 'YOF -> TYR'))
1981
1982                elif mod_pos[res_num] == 52: # converts (4R)-4-fluoro-L-proline (FP9) to PRO
1983
1984                    atom_site.setValue('PRO', 'label_comp_id', atom_row)
1985
1986                    try:
1987                        atom_site.setValue('PRO', 'auth_comp_id', atom_row)
1988                    except IndexError:
1989                        pass
1990
1991                    if (res_num, residue, 'FP9 -> PRO') not in mod_row:
1992                        mod_row.append((res_num, residue, 'FP9 -> PRO'))
1993
1994                elif mod_pos[res_num] == 53: # converts N-formyl-L-valine (FVA) to VAL
1995
1996                    atom_site.setValue('VAL', 'label_comp_id', atom_row)
1997
1998                    try:
1999                        atom_site.setValue('VAL', 'auth_comp_id', atom_row)
2000                    except IndexError:
2001                        pass
2002
2003                    if (res_num, residue, 'FVA -> VAL') not in mod_row:
2004                        mod_row.append((res_num, residue, 'FVA -> VAL'))
2005
2006                elif mod_pos[res_num] == 54: # converts N-methyl-D-leucine (MLU) to LEU
2007
2008                    atom_site.setValue('LEU', 'label_comp_id', atom_row)
2009
2010                    try:
2011                        atom_site.setValue('LEU', 'auth_comp_id', atom_row)
2012                    except IndexError:
2013                        pass
2014
2015                    if (res_num, residue, 'MLU -> LEU') not in mod_row:
2016                        mod_row.append((res_num, residue, 'MLU -> LEU'))
2017
2018                elif mod_pos[res_num] == 55: # converts (betaR)-3-chloro-beta-hydroxy-L-tyrosine (OMY) to TYR
2019
2020                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
2021
2022                    try:
2023                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
2024                    except IndexError:
2025                        pass
2026
2027                    if (res_num, residue, 'OMY -> TYR') not in mod_row:
2028                        mod_row.append((res_num, residue, 'OMY -> TYR'))
2029
2030                elif mod_pos[res_num] == 56: # converts GAMMA-D-GLUTAMIC ACID (FGA) to GLU
2031
2032                    atom_site.setValue('GLU', 'label_comp_id', atom_row)
2033
2034                    try:
2035                        atom_site.setValue('GLU', 'auth_comp_id', atom_row)
2036                    except IndexError:
2037                        pass
2038
2039                    if (res_num, residue, 'FGA -> GLU') not in mod_row:
2040                        mod_row.append((res_num, residue, 'FGA -> GLU'))
2041
2042                elif mod_pos[res_num] == 57: # converts N-METHYLPHENYLALANINE (MEA) to PHE
2043
2044                    atom_site.setValue('PHE', 'label_comp_id', atom_row)
2045
2046                    try:
2047                        atom_site.setValue('PHE', 'auth_comp_id', atom_row)
2048                    except IndexError:
2049                        pass
2050
2051                    if (res_num, residue, 'MEA -> PHE') not in mod_row:
2052                        mod_row.append((res_num, residue, 'MEA -> PHE'))
2053
2054                elif mod_pos[res_num] == 58: # converts S-(METHYLMERCURY)-L-CYSTEINE (CMH) to CYS
2055
2056                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
2057
2058                    try:
2059                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
2060                    except IndexError:
2061                        pass
2062
2063                    if (res_num, residue, 'CMH -> CYS') not in mod_row:
2064                        mod_row.append((res_num, residue, 'CMH -> CYS'))
2065
2066                elif mod_pos[res_num] == 59: # converts D-HISTIDINE (DHI) to HIS
2067
2068                    atom_site.setValue('HIS', 'label_comp_id', atom_row)
2069
2070                    try:
2071                        atom_site.setValue('HIS', 'auth_comp_id', atom_row)
2072                    except IndexError:
2073                        pass
2074
2075                    if (res_num, residue, 'DHI -> HIS') not in mod_row:
2076                        mod_row.append((res_num, residue, 'DHI -> HIS'))
2077
2078                elif mod_pos[res_num] == 60: # converts SELENOCYSTEINE (SEC) to CYS
2079
2080                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
2081
2082                    try:
2083                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
2084                    except IndexError:
2085                        pass
2086
2087                    if (res_num, residue, 'SEC -> CYS') not in mod_row:
2088                        mod_row.append((res_num, residue, 'SEC -> CYS'))
2089
2090                elif mod_pos[res_num] == 61: # converts (betaR)-3-CHLORO-BETA-HYDROXY-D-TYROSINE (OMZ) to TYR
2091
2092                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
2093
2094                    try:
2095                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
2096                    except IndexError:
2097                        pass
2098
2099                    if (res_num, residue, 'OMZ -> TYR') not in mod_row:
2100                        mod_row.append((res_num, residue, 'OMZ -> TYR'))
2101
2102                elif mod_pos[res_num] == 62: # converts S-ACETYL-CYSTEINE (SCY) to CYS
2103
2104                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
2105
2106                    try:
2107                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
2108                    except IndexError:
2109                        pass
2110
2111                    if (res_num, residue, 'SCY -> CYS') not in mod_row:
2112                        mod_row.append((res_num, residue, 'SCY -> CYS'))
2113
2114                elif mod_pos[res_num] == 63: # converts S-OXYMETHIONINE (MHO) to MET
2115
2116                    atom_site.setValue('MET', 'label_comp_id', atom_row)
2117
2118                    try:
2119                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
2120                    except IndexError:
2121                        pass
2122
2123                    if (res_num, residue, 'MHO -> MET') not in mod_row:
2124                        mod_row.append((res_num, residue, 'MHO -> MET'))
2125
2126                elif mod_pos[res_num] == 64: # converts D-METHIONINE (MED) to MET
2127
2128                    atom_site.setValue('MET', 'label_comp_id', atom_row)
2129
2130                    try:
2131                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
2132                    except IndexError:
2133                        pass
2134
2135                    if (res_num, residue, 'MED -> MET') not in mod_row:
2136                        mod_row.append((res_num, residue, 'MED -> MET'))
2137
2138                elif mod_pos[res_num] == 65: # converts S-DIMETHYLARSINOYL-CYSTEINE (CAF) to CYS
2139
2140                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
2141
2142                    try:
2143                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
2144                    except IndexError:
2145                        pass
2146
2147                    if (res_num, residue, 'CAF -> CYS') not in mod_row:
2148                        mod_row.append((res_num, residue, 'CAF -> CYS'))
2149
2150                elif mod_pos[res_num] == 66: # converts META-NITRO-TYROSINE (NIY) to TYR
2151
2152                    atom_site.setValue('TYR', 'label_comp_id', atom_row)
2153
2154                    try:
2155                        atom_site.setValue('TYR', 'auth_comp_id', atom_row)
2156                    except IndexError:
2157                        pass
2158
2159                    if (res_num, residue, 'NIY -> TYR') not in mod_row:
2160                        mod_row.append((res_num, residue, 'NIY -> TYR'))
2161
2162                elif mod_pos[res_num] == 67: # converts O-ACETYLSERINE (OAS) to SER
2163
2164                    atom_site.setValue('SER', 'label_comp_id', atom_row)
2165
2166                    try:
2167                        atom_site.setValue('SER', 'auth_comp_id', atom_row)
2168                    except IndexError:
2169                        pass
2170
2171                    if (res_num, residue, 'OAS -> SER') not in mod_row:
2172                        mod_row.append((res_num, residue, 'OAS -> SER'))
2173
2174                elif mod_pos[res_num] == 68: # converts S-METHYL-THIO-CYSTEINE (SCH) to CYS
2175
2176                    atom_site.setValue('CYS', 'label_comp_id', atom_row)
2177
2178                    try:
2179                        atom_site.setValue('CYS', 'auth_comp_id', atom_row)
2180                    except IndexError:
2181                        pass
2182
2183                    if (res_num, residue, 'SCH -> CYS') not in mod_row:
2184                        mod_row.append((res_num, residue, 'SCH -> CYS'))
2185
2186                elif mod_pos[res_num] == 69: # converts 2-methyl-L-norleucine (MK8) to LEU
2187
2188                    atom_site.setValue('LEU', 'label_comp_id', atom_row)
2189
2190                    try:
2191                        atom_site.setValue('LEU', 'auth_comp_id', atom_row)
2192                    except IndexError:
2193                        pass
2194
2195                    if (res_num, residue, 'MK8 -> LEU') not in mod_row:
2196                        mod_row.append((res_num, residue, 'MK8 -> LEU'))
2197
2198                elif mod_pos[res_num] == 70: # converts METHIONINE SULFOXIDE (SME) to MET
2199
2200                    atom_site.setValue('MET', 'label_comp_id', atom_row)
2201
2202                    try:
2203                        atom_site.setValue('MET', 'auth_comp_id', atom_row)
2204                    except IndexError:
2205                        pass
2206
2207                    if (res_num, residue, 'SME -> MET') not in mod_row:
2208                        mod_row.append((res_num, residue, 'SME -> MET'))
2209
2210                elif mod_pos[res_num] == 71: # converts 5-HYDROXYLYSINE (LYZ) to LYS
2211
2212                    atom_site.setValue('LYS', 'label_comp_id', atom_row)
2213
2214                    try:
2215                        atom_site.setValue('LYS', 'auth_comp_id', atom_row)
2216                    except IndexError:
2217                        pass
2218
2219                    if (res_num, residue, 'LYZ -> LYS') not in mod_row:
2220                        mod_row.append((res_num, residue, 'LYZ -> LYS'))
2221
2222        ########################################################################
2223        ## Notify user about modification made to cif data                    ##
2224        ########################################################################
2225
2226        if DEBUG_MODE:
2227            mod_model_num = len([ msg for msg in cha_row if msg[2] == 'model_num' ])
2228            mod_ins_code = len([ msg for msg in cha_row if msg[2] == 'ins_code' ])
2229            mod_group_PDB = len([ msg for msg in cha_row if msg[2] == 'group_PDB' ])
2230
2231            if mod_model_num != 0:
2232                print ('! {p} {c}: modified atom_site.pdbx_PDB_model_num for {cr} residues to 1.'.format(
2233                        p = pdb_code,
2234                        c = chain,
2235                        cr = mod_model_num))
2236
2237            if mod_ins_code != 0:
2238                print ('! {p} {c}: modified atom_site.pdbx_PDB_ins_code for {cr} residues to "?".'.format(
2239                        p = pdb_code,
2240                        c = chain,
2241                        cr = mod_ins_code))
2242
2243            if mod_group_PDB != 0:
2244                print ('! {p} {c}: modified atom_site.group_PDB for {cr} residues to "ATOM".'.format(
2245                        p = pdb_code,
2246                        c = chain,
2247                        cr = mod_group_PDB))
2248
2249            for residue in reversed(mod_row):
2250                print ('! {p} {c}: modified cif pos {cr} ({nr}).'.format(
2251                    p = pdb_code,
2252                    c = chain,
2253                    cr = residue[0],
2254                    ca = residue[1],
2255                    nr = residue[2]))
2256
2257
2258            for residue in reversed(rem_row):
2259                print ('! {p} {c}: removed cif pos {cr} ({ca})'.format(
2260                    p = pdb_code,
2261                    c = chain,
2262                    cr = residue[0],
2263                    ca = residue[1]))
2264
2265        cif_edits[pdb_code] = block
2266
2267    # write modified pir to disk
2268    for pdb_code in cif_edits:
2269        out = open(os.path.join(output_path, pdb_code + '.cif'), 'w')
2270        writer = PdbxWriter(out)
2271        writer.writeContainer(cif_edits[pdb_code])
2272
2273    # Delete missing entries from the last template sequence to the first
2274    for row in reversed(del_row):
2275        template_grid.del_row(row)
2276
2277    return template_grid
2278
2279def remove_self_alignment(template_grid, query_name):
2280    """ Removes a self alignment from the final pir alignment to prevent clashes with MODELLER """
2281
2282    to_delete = list()
2283
2284    for row in range(template_grid.get_grid_height()):
2285        if template_grid._pdb_code[row] == query_name:
2286            to_delete.append(row)
2287
2288    for row in reversed(to_delete):
2289        template_grid.del_row(row)
2290
2291    return True
2292
2293def write_to_file(line_list, fname):
2294    """ Writes the final pir file """
2295
2296    with open(fname, 'w+') as fout:
2297        for line in line_list:
2298            fout.write(line + "\n")
2299
2300def arg():
2301    import argparse
2302    description = """Creates a MODELLER alignment (*.pir) from a HHSearch results file (*.hhr)."""
2303    epilog= '2016 Harald Voehringer.'
2304    # Initiate a ArgumentParser Class
2305    parser = argparse.ArgumentParser(description = description, epilog = epilog)
2306
2307    # Call add_options to the parser
2308    parser.add_argument('input', help = 'results file from HHsearch with hit list and alignment', metavar = 'FILE')
2309    parser.add_argument('cifs', help = 'path to the folder containing cif files', metavar = 'DIR')
2310    parser.add_argument('pir', help = 'output file (PIR-formatted multiple alignment)', metavar = 'FILE')
2311    parser.add_argument('output', help = 'path to the folder where modified cif files should be written to', metavar = 'DIR')
2312
2313    parser.add_argument('-v', '--verbose', action = 'store_true', help = 'verbose mode')
2314    parser.add_argument('-m', nargs = '+', help = 'pick hits with specified indices (e.g. -m 2 5)', metavar = 'INT')
2315    parser.add_argument('-e', type = float, help = 'maximum E-Value threshold (e.g. -e 0.001)', metavar = 'FLOAT')
2316    parser.add_argument('-r', type = float, help = 'residue ratio (filter alignments that have contribute at least residues according to the specified ratio).',
2317        default = 0, metavar = 'FLOAT')
2318
2319    parser.add_argument('-c', help = 'convert non-canonical residues (default = True)', action = 'store_true', default = True)
2320
2321
2322    return parser
2323
2324
2325def main():
2326    import sys
2327    parser = arg()
2328    args = parser.parse_args(sys.argv[1:])
2329
2330    global DEBUG_MODE
2331
2332    if args.verbose:
2333        DEBUG_MODE = True
2334
2335    query_name, query_chain = get_query_name(args.input)
2336
2337    data = read_result(args.input)
2338    selected_templates = list()
2339
2340    if args.m and not args.e:
2341        selection = map(lambda x: int(x), args.m)
2342        print ('Selected templates {st}.'.format(st = ', '.join(args.m)))
2343
2344        for i in selection:
2345            tmp_info = str(data[i - 1].template_info.split('>')[1])
2346            print ('{i}: {t}'.format(
2347                i = i,
2348                t = tmp_info[0:80]))
2349
2350            selected_templates.append(data[i - 1])
2351
2352    elif args.e and not args.m:
2353        print ('Selected templates satisfying E-val <= {e}'.format(e = args.e))
2354
2355        e_values = { float(j.evalue):i for i, j in enumerate(data) }
2356        selection = sorted([ val for key, val in e_values.items() if key <= args.e ])
2357
2358        for i in selection:
2359            tmp_info = str(data[i - 1].template_info.split('>')[1])
2360            print ('{i}: {t}'.format(
2361                i = i + 1,
2362                t = tmp_info[0:80]))
2363
2364            selected_templates.append(data[i - 1])
2365
2366    elif args.m and args.e:
2367        print ('! Please do not use option -m and -e at the same time ! Exiting.')
2368        sys.exit()
2369    else:
2370        selected_templates = data
2371
2372        print ('Creating pir file using all templates ({n})'.format(
2373            n = len(selected_templates)))
2374
2375    query_grid = create_query_grid(selected_templates) # load query grid
2376    print ('query_grid')
2377    print(query_grid)
2378    gapless_query_grid = create_gapless_grid(query_grid) # remove gaps
2379    print ('gapless_query_grid')
2380    print(gapless_query_grid)
2381    processed_query_grid = process_query_grid(query_grid, gapless_query_grid) # insert gaps
2382    ##processed_query_grid = process_query_grid(query_grid, query_grid) # insert gaps
2383    print ('processed_query_grid')
2384    print (processed_query_grid)
2385    glob_seq = derive_global_seq(processed_query_grid, query_name, query_chain) # derive query sequence
2386    template_grid = create_template_grid(selected_templates) # create template grid
2387    print ('template_grid')
2388    print (template_grid)
2389    processed_template_grid = process_template_grid(query_grid, template_grid) # insert gaps to template sequnces
2390    print ('processed_query_grid')
2391    print (processed_query_grid)
2392    print ('hzhu processed_template_grid')
2393    print (processed_template_grid)
2394    final_grid = compare_with_cifs(processed_template_grid, args.cifs, args.output, args.c, args.r) # compare with atom section of cifs
2395    remove_self_alignment(final_grid, query_name) # remove self alignment if any
2396    write_to_file([glob_seq, final_grid.display()], args.pir)
2397
2398
2399if __name__ == "__main__":
2400    main()
2401