1 /* @source ajpdbio ************************************************************
2 **
3 ** Data structures and functions for reading and writing PDB format files.
4 ** Includes functions for writing a Pdb object (defined in ajpdb.h).
5 **
6 ** @author Copyright (c) 2004 Jon Ison
7 ** @version $Revision: 1.48 $
8 ** @modified $Date: 2012/12/07 10:16:59 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA  02110-1301,  USA.
25 **
26 ******************************************************************************/
27 
28 /* ======================================================================= */
29 /* ============================ include files ============================ */
30 /* ======================================================================= */
31 
32 #include "ajlib.h"
33 
34 #include "ajpdbio.h"
35 #include "ajbase.h"
36 #include "ajfileio.h"
37 
38 #include <limits.h>
39 #include <math.h>
40 
41 
42 
43 
44 /* ======================================================================= */
45 /* ============================ private data ============================= */
46 /* ======================================================================= */
47 
48 
49 
50 
51 /* @enumstatic PdbfileELinetype ***********************************************
52 **
53 ** AJAX PDB File Line Type enumeration for writing in PDB format
54 **
55 ** @value pdbfileELinetypeIgnore
56 ** Ignore this line when parsing coordinates
57 ** @value pdbfileELinetypeCoordinate
58 ** Coordinate line
59 ** @value pdbfileELinetypeHeterogen
60 ** Coordinate line for non-protein atoms
61 ** @value pdbfileELinetypeTER
62 ** TER record Chain terminator.
63 ** @value pdbfileELinetypeMODEL
64 ** MODEL record Specification of model number for multiple structures in a
65 ** single coordinate entry.
66 ** @value pdbfileELinetypeGroups
67 ** Coordinate line for groups that could not be associated with a SEQRES chain
68 ** @value pdbfileELinetypeWater
69 ** Coordinate line for water
70 ** @value pdbfileELinetypeENDMDL
71 ** ENDMDL record End-of-model record for multiple structures in a single
72 ** coordinate entry.
73 ** @@
74 ******************************************************************************/
75 
76 typedef enum PdbfileOLinetype
77 {
78     pdbfileELinetypeIgnore,
79     pdbfileELinetypeCoordinate,
80     pdbfileELinetypeHeterogen,
81     pdbfileELinetypeTER,
82     pdbfileELinetypeMODEL,
83     pdbfileELinetypeGroups,
84     pdbfileELinetypeWater,
85     pdbfileELinetypeENDMDL
86 } PdbfileELinetype;
87 
88 
89 
90 
91 /* @datastatic AjPElement *****************************************************
92 **
93 ** Nucleus Element object.
94 **
95 ** Object for holding a single secondary structure element when parsing PDB.
96 **
97 ** AjPElement is implemented as a pointer to a C data structure.
98 **
99 ** @alias AjSElement
100 ** @alias AjOElement
101 **
102 ** @attr elementId [AjPStr]    Element identifier (columns 12 - 14)
103 **                             SHEET ('E') or TURN ('T')
104 ** @attr initResName [AjPStr]  Name of first residue in each element (columns
105 **                             16 - 18 (HELIX & TURN) or 18 - 20 (SHEET) )
106 ** @attr initSeqNum [AjPStr]   Residue number (including insertion code)
107 **                             of first residue in each element
108 **                             (columns 22 - 26 (HELIX), 23 - 27 (SHEET)
109 **                             or 21 - 25 (TURN) )
110 ** @attr endResName [AjPStr]   Name of last residue in each element
111 **                             (columns 28 - 30 (HELIX), 29 - 31 (SHEET)
112 **                             or  27 - 29 (TURN) )
113 ** @attr endSeqNum [AjPStr]    Residue number (including insertion code) of
114 **                             last residue in each element
115 **                             (columns 34 - 38 (HELIX and SHEET)
116 **                             or 32 - 36 (TURN)  )
117 ** @attr helixClass [ajint]    Classes of helices (columns 39 - 40),
118 **                             an int from 1-10 from
119 ** @attr elementNum [ajint]    Serial number of the element (columns 8 - 10)
120 ** @attr elementType [char]    Element type COIL ('C'), HELIX ('H'),
121 ** @attr chainId [char]        Chain identifiers for chains containing the
122 **                             elements (column 20 (HELIX & TURN)
123 **                             or 22 (SHEET) )
124 ** @attr Padding [char[6]]     Padding to alignment boundary
125 **
126 ** http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html<br>
127 **
128 ** TYPE OF HELIX             CLASS NUMBER (COLUMNS 39 - 40)<br>
129 ** --------------------------------------------------------------<br>
130 ** Right-handed alpha (default)                1<br>
131 ** Right-handed omega                          2<br>
132 ** Right-handed pi                             3<br>
133 ** Right-handed gamma                          4<br>
134 ** Right-handed 310                            5<br>
135 ** Left-handed alpha                           6<br>
136 ** Left-handed omega                           7<br>
137 ** Left-handed gamma                           8<br>
138 ** 27 ribbon/helix                             9<br>
139 ** Polyproline                                10<br>
140 **
141 ** @@
142 ******************************************************************************/
143 
144 typedef struct AjSElement
145 {
146     AjPStr elementId;
147     AjPStr initResName;
148 
149     AjPStr initSeqNum;
150     AjPStr endResName;
151     AjPStr endSeqNum;
152     ajint helixClass;
153     ajint elementNum;
154 
155     char elementType;
156     char chainId;
157     char Padding[6];
158 } AjOElement;
159 
160 #define AjPElement AjOElement*
161 
162 
163 
164 
165 /* @datastatic AjPElements ****************************************************
166 **
167 ** Nucleus Elements object.
168 **
169 ** Object for holding secondary structure elements from a PDB file
170 **
171 ** AjPElements is implemented as a pointer to a C data structure.
172 **
173 **
174 **
175 ** @alias AjSElements
176 ** @alias AjOElements
177 **
178 **
179 **
180 ** @attr elms [AjPElement*] Secondary structure element array
181 ** @attr n [ajuint] Total no. of secondary structure elements
182 **                  (helices, strands or turns)
183 ** @attr Padding [char[4]] Padding to alignment boundary
184 ** @@
185 ******************************************************************************/
186 
187 typedef struct AjSElements
188 {
189     AjPElement *elms;
190     ajuint n;
191     char Padding[4];
192 } AjOElements;
193 
194 #define AjPElements AjOElements*
195 
196 
197 
198 
199 /* @datastatic AjPPdbfile *****************************************************
200 **
201 ** Nucleus Pdbfile object.
202 **
203 ** Holds a pdb file for parsing.
204 **
205 ** AjPPdbfile is implemented as a pointer to a C data structure.
206 **
207 ** @alias AjSPdbfile
208 ** @alias AjOPdbfile
209 **
210 ** @attr pdbid [AjPStr]       4 character pdb id code
211 ** @attr tercnt [ajuint]      The number of TER records in the pdb file
212 ** @attr toofewter [AjBool]   True if the file contained too few TER records
213 ** @attr modcnt [ajuint]      The number of MODEL records in the pdb file
214 **                            (does not count duplicate MODEL records
215 **                            that are masked out)
216 ** @attr nomod [AjBool]       True if the file contained no MODEL records
217 ** @attr compnd [AjPStr]      Text from COMPND records
218 ** @attr source [AjPStr]      Text from SOURCE records
219 ** @attr reso [float]         Resolution of structure
220 ** @attr method [AjEPdbMethod] AJAX PDB Method enumeration
221 ** @attr gpid [AjPChar]       Array of chain (group) id's for groups that
222 **                            cannot be associated with a chain in the SEQRES
223 **                            section
224 ** @attr idxfirst [ajuint]    Index in <lines> of first ATOM, HETATM or MODEL
225 **                            line
226 **
227 ** @attr nchains [ajuint]     Number of chains (from SEQRES record)
228 **                            for sizes of following attribute arrays
229 ** @attr seqres [AjPStr*]     Array of sequences taken from the SEQRES records
230 ** @attr seqresful [AjPStr*]  Array of sequences using 3 letter codes taken
231 **                            from the SEQRES records
232 ** @attr nres [ajint*]        Number of residues in each chain
233 ** @attr chainok [AjBool*]    Array of flags which are True if a chain in the
234 **                            SEQRES record contains >= minimum no. of amino
235 **                            acids and has a  unique chain identifier
236 ** @attr resn1ok [AjBool*]    Bool's for each chain which are TRUE if resn1
237 **                            was used to derive resni, i.e. gave correct
238 **                            alignment to seqres sequence.
239 **                            If False then resn2 was used.
240 ** @attr nligands [ajint*]    Number of ligands for each chain.  A ligand is a
241 **                            non-protein group associated with a chain
242 **                            in the SEQRES section.
243 ** @attr numHelices [ajint*]  No. of helices in each chain
244 ** @attr numStrands [ajint*]  No. of strands in each chain
245 ** @attr numSheets [ajint*]   No. of sheets in each chain
246 ** @attr numTurns [ajint*]    No. of turns in each chain
247 ** @attr chid [AjPChar]       Array of chain id's for chains from SEQRES
248 **                            records
249 **
250 ** @attr lines [AjPStr*]      Array of lines in the pdb file
251 ** @attr linetype [PdbfileELinetype*]
252 **                            Array of int's describing the lines, have values
253 **                            of pdbfileELinetypeIgnore (do not consider this line
254 **                            when parsing coordinates from the file),
255 **                            pdbfileELinetypeCoordinate (coordinate line
256 **                            (ATOM or HETATM record) for protein atoms,
257 **                            pdbfileELinetypeHeterogen (coordinate line for
258 **                            non-protein atoms), pdbfileELinetypeGroups
259 **                            (coordinate line for groups that could not be
260 **                            associated with a SEQRES chain),
261 **                            pdbfileELinetypeWater (coordinate line for
262 **                            water), pdbfileELinetypeTER (it is a TER record) or
263 **                            pdbfileELinetypeMODEL (it is a MODEL record).
264 ** @attr chnn [ajint*]        Array of chain numbers for each
265 **                            pdbfileELinetypeCoordinate &
266 **                            pdbfileELinetypeHeterogen line.
267 ** @attr gpn [ajint*]         Array of group numbers for each line. Each group
268 **                            (heterogen) is given a group number, that is
269 **                            either relative to a chain or the whole file
270 **                            (for groups that could  not be associated with
271 **                            a chain from the SEQRES records)
272 ** @attr modn [ajuint*]       Array of model numbers for each
273 **                            pdbfileELinetypeCoordinate line
274 **
275 ** @attr resni [ajint*]       Residue numbers for each pdbfileELinetypeCoordinate
276 **                            line. These give the correct index into the
277 **                            'seqres' sequences
278 ** @attr resn1 [ajint*]       Array of residue numbers for each
279 **                            pdbfileELinetypeCoordinate line. This is pdbn
280 **                            converted to a sequential integer where
281 **                            alternative residue numbering is presumed for
282 **                            lines where line[26] is used (residues for which
283 **                            oddnum == True are considered).
284 ** @attr resn2 [ajint*]       Array of residue numbers for each
285 **                            pdbfileELinetypeCoordinate line. This is pdbn
286 **                            converted to a sequential integer where
287 **                            heterogeneity is presumed for lines where
288 **                            line[26] is used (residues where oddnum == True
289 **                            are ignored).
290 ** @attr pdbn [AjPStr*]       Array with a residue number for each line
291 **                            for which 'coord' == ajTrue. This is the
292 **                            original residue number string (including
293 **                            insertion code) from the pdb file
294 ** @attr oddnum [AjBool*]     Bool's for each line which are TRUE for
295 **                            duplicate residues of heterogenous positions
296 **                            (e.g. if 2 different residues are both numbered
297 **                            '8' or one is '8' and the other '8A'
298 **                            for example then <oddnum> would be set True for
299 **                            the second residue. Heterogeneity is indicated
300 **                            by a character in position lines[26] (the same
301 **                            position used to indicate alternative residue
302 **                            numbering schemes).
303 ** @attr atype [AjPStr*]      Atom type for each line
304 ** @attr rtype [AjPStr*]      Residue type for each line
305 ** @attr x [float*]           x-coordinate for each line
306 ** @attr y [float*]           y-coordinate for each line
307 ** @attr z [float*]           z-coordinate for each line
308 ** @attr o [float*]           occupancy for each line
309 ** @attr b [float*]           thermal factor for each line
310 **
311 ** @attr elementNum [ajint*]  Serial number of the secondary structure element
312 **                            (columns 8 - 10)
313 ** @attr elementId [AjPStr*]  Secondary structure element identifier (columns
314 **                            12 - 14)
315 ** @attr elementType [char*]  Secondary structure element type COIL ('C'),
316 **                            HELIX ('H'), SHEET ('E') or TURN ('T')
317 ** @attr helixClass [ajint*]   Classes of helices (columns 39 - 40)  from
318 **       http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
319 **                             (see below). Has a value of 0 (printed out as
320 **                             '.') for non-helical elements.
321 ** @attr nlines [ajuint]      Number of lines in the pdb file and size of the
322 **                            following arrays
323 ** @attr ngroups [ajuint]     Number of groups (non-protein groups that
324 **                            could not be associated with a chain in the
325 **                            SEQRES section
326 ** @@
327 ******************************************************************************/
328 
329 typedef struct AjSPdbfile
330 {
331     AjPStr pdbid;
332     ajuint tercnt;
333     AjBool toofewter;
334     ajuint modcnt;
335     AjBool nomod;
336     AjPStr compnd;
337     AjPStr source;
338     float reso;
339     AjEPdbMethod method;
340 
341     AjPChar gpid;
342     ajuint idxfirst;
343 
344     ajuint nchains;
345     AjPStr *seqres;
346     AjPStr *seqresful;
347     ajint *nres;
348     AjBool *chainok;
349     AjBool *resn1ok;
350     ajint *nligands;
351     ajint *numHelices;
352     ajint *numStrands;
353     ajint *numSheets;
354     ajint *numTurns;
355     AjPChar chid;
356 
357     AjPStr *lines;
358     PdbfileELinetype *linetype;
359     ajint *chnn;
360     ajint *gpn;
361     ajuint *modn;
362     ajint *resni;
363     ajint *resn1;
364     ajint *resn2;
365     AjPStr *pdbn;
366     AjBool *oddnum;
367     AjPStr *atype;
368     AjPStr *rtype;
369     float *x;
370     float *y;
371     float *z;
372     float *o;
373     float *b;
374     ajint *elementNum;
375     AjPStr *elementId;
376     char *elementType;
377     ajint *helixClass;
378     ajuint nlines;
379     ajuint ngroups;
380 } AjOPdbfile;
381 
382 #define AjPPdbfile AjOPdbfile*
383 
384 
385 
386 
387 #define POS_CHID         21     /* Position in ATOM line of chain id.        */
388 
389 #define MAXMISSNTERM      3     /* A number of residues may be missing from
390                                  * the N-terminus of the SEQRES records
391                                  * relative to the ATOM records (e.g. MET and
392                                  * ACE often do not appear).  The parser will
393                                  * search and correct for such cases.
394                                  * MAXMISSNTERM is the maximum number of such
395                                  * missing residues that can be accounted
396                                  * for. */
397 
398 #if AJFALSE
399 /* DIAGNOSTIC */
400 AjPStr tempstr;
401 AjPFile tempfile;
402 #endif /* AJFALSE */
403 
404 
405 
406 
407 /* ======================================================================= */
408 /* ================= Prototypes for private functions ==================== */
409 /* ======================================================================= */
410 
411 /* These functions are for parsing PDB files */
412 #if AJFALSE
413 /* DIAGNOSTIC */
414 static void pdbioDiagnostic(AjPPdbfile pdbfile, ajint n);
415 #endif /* AJFALSE */
416 static void pdbioPdbfileDel(AjPPdbfile *Ppdbfile);
417 static AjPPdbfile pdbioReadLines(AjPFile inf);
418 static AjPPdbfile pdbioPdbfileNew(ajuint nlines, ajuint nchains);
419 static AjBool pdbioFirstPass(AjPPdbfile pdbfile, AjPFile flog,
420                              AjPElements *elms, AjBool camask);
421 static AjBool pdbioCheckChains(AjPPdbfile pdbfile, AjPFile flog,
422                                ajint min_chain_size);
423 static AjBool pdbioSeqresToSequence(const AjPStr seqres, AjPStr *seq,
424                                     AjBool camask, ajuint *len);
425 static AjBool pdbioCheckTer(AjPPdbfile pdbfile, AjPFile flog);
426 static AjBool pdbioNumberChains(AjPPdbfile pdbfile, AjPFile flog);
427 static AjBool pdbioNoMoreAtoms(AjPPdbfile pdbfile, ajuint linen);
428 static AjBool pdbioMaskChains(AjPPdbfile pdbfile, AjPFile flog,
429                               ajint min_chain_size, AjBool camask,
430                               AjBool camask1, AjBool atommask);
431 static AjBool pdbioStandardiseNumbering(AjPPdbfile pdbfile,
432                                         AjPFile flog);
433 static AjBool pdbioAlignNumbering(AjPPdbfile pdbfile, AjPFile flog,
434                                   ajuint lim, ajuint lim2);
435 static AjBool pdbioPdbfileToPdb(AjPPdb *Ppdb, AjPPdbfile pdbfile);
436 static ajint pdbioPdbfileFindLine(const AjPPdbfile pdbfile, ajint chn,
437                              ajint which, ajint pos);
438 
439 /* Functions for Elements object */
440 static AjPElements pdbioElementsNew(ajuint nelms);
441 static void pdbioElementsDel(AjPElements *Pelements);
442 
443 /* Functions for Element object */
444 static AjPElement pdbioElementNew(void);
445 static void pdbioElementDel(AjPElement *Pelement);
446 static AjBool pdbioPdbfileChain(char id, const AjPPdbfile pdb, ajint *chn);
447 static AjBool pdbioWriteElementData(AjPPdbfile pdbfile, AjPFile flog,
448                                const AjPElements elms);
449 
450 /* These functions are called by ajPdbWriteDomainRecordRaw */
451 static AjBool pdbioWriteHeaderScop(AjPFile outf, const AjPScop scop);
452 static AjBool pdbioWriteSeqresDomain(AjPFile errf, AjPFile outf,
453                                      const AjPPdb pdb, const AjPScop scop);
454 static AjBool pdbioWriteAtomDomainPdb(AjPFile errf, AjPFile outf,
455                                       const AjPPdb pdb, const AjPScop scop,
456                                       ajint mod);
457 static AjBool pdbioWriteAtomDomainIdx(AjPFile errf, AjPFile outf,
458                                       const AjPPdb pdb, const AjPScop scop,
459                                       ajint mod);
460 static AjBool pdbioWriteAtomDomain(AjPFile errf, AjPFile outf,
461                                    const AjPPdb pdb,
462                                    const AjPScop scop, ajuint mod,
463                                    AjEPdbMode mode);
464 
465 /* These functions are called by ajPdbWriteRecordRaw */
466 static AjBool pdbioWriteSeqresChain(AjPFile errf, AjPFile outf,
467                                     const AjPPdb pdb, ajint chn);
468 static AjBool pdbioWriteAtomChain(AjPFile outf, const AjPPdb pdb, ajuint mod,
469                                   ajuint chn, AjEPdbMode mode);
470 static AjBool pdbioWriteHeterogen(AjPFile outf, const AjPPdb pdb, ajuint mod);
471 static AjBool pdbioWriteHeader(AjPFile outf, const AjPPdb pdb);
472 static AjBool pdbioWriteTitle(AjPFile outf, const AjPPdb pdb);
473 static AjBool pdbioWriteCompnd(AjPFile outf, const AjPPdb pdb);
474 static AjBool pdbioWriteSource(AjPFile outf, const AjPPdb pdb);
475 static AjBool pdbioWriteEmptyRemark(AjPFile outf, const AjPPdb pdb);
476 static AjBool pdbioWriteResolution(AjPFile outf, const AjPPdb pdb);
477 
478 /* Others */
479 static AjBool pdbioWriteText(AjPFile outf, const AjPStr str,
480                              const char *prefix);
481 
482 
483 
484 
485 /* ======================================================================= */
486 /* ========================== private functions ========================== */
487 /* ======================================================================= */
488 
489 
490 
491 
492 /* @funcstatic pdbioWriteSeqresChain ******************************************
493 **
494 ** Writes sequence for a protein chain to an output file in pdb format
495 ** (SEQRES records).  Sequence is taken from a Pdb structure.
496 **
497 ** @param [w] errf [AjPFile] Output file stream for error messages
498 ** @param [w] outf [AjPFile] Output file stream
499 ** @param [r] pdb  [const AjPPdb] Pdb object
500 ** @param [r] chn  [ajint] chain number, beginning at 1
501 **
502 ** @return [AjBool] True on success
503 **
504 ** @release 2.9.0
505 ** @@
506 ******************************************************************************/
507 
pdbioWriteSeqresChain(AjPFile errf,AjPFile outf,const AjPPdb pdb,ajint chn)508 static AjBool pdbioWriteSeqresChain(AjPFile errf, AjPFile outf,
509                                     const AjPPdb pdb, ajint chn)
510 {
511     ajuint last_rn = 0U;
512     ajuint this_rn = 0U;
513     ajuint i = 0U;
514     ajuint j = 0U;
515     ajuint len = 0U;
516     const char *p;
517 
518     AjPStr tmp1 = NULL;
519     AjPStr tmp2 = NULL;
520     AjIList iter = NULL;
521     AjPAtom atom = NULL;
522 
523     tmp1 = ajStrNew();
524     tmp2 = ajStrNew();
525 
526     iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
527 
528     /* Iterate through list of atoms */
529     while ((atom = (AjPAtom) ajListIterGet(iter)))
530     {
531         /*
532         ** Hard-coded to work on model 1
533         ** Break if a non-protein atom is found or model no. !=1
534         ** Continue / break if a non-protein atom is found or model no. !=1
535         */
536         if (atom->Mod != 1)
537             break;
538 
539         if (atom->Type != 'P')
540             continue;
541 
542         /* If we are onto a new residue */
543         this_rn = atom->Idx;
544         if (this_rn != last_rn)
545         {
546             /* Assign sequence for residues missing from the linked list */
547             for (i = last_rn; i < this_rn - 1; i++)
548             {
549                 /* Check that position i is in range for the sequence */
550                 if (!ajResidueToTriplet(
551                         ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, i),
552                         &tmp2))
553                 {
554                     ajWarn("Index out of range in pdbioWriteSeqresChain");
555                     ajFmtPrintF(errf, "//\n%S\nERROR Index out "
556                                 "of range in pdbioWriteSeqresChain\n",
557                                 pdb->Pdb);
558 
559                     ajStrDel(&tmp1);
560                     ajStrDel(&tmp2);
561                     ajListIterDel(&iter);
562 
563                     return ajFalse;
564                 }
565                 else
566                 {
567                     ajStrAppendS(&tmp1, tmp2);
568                     ajStrAppendC(&tmp1, " ");
569                 }
570             }
571 
572             ajStrAppendS(&tmp1, atom->Id3);
573             ajStrAppendC(&tmp1, " ");
574 
575             last_rn = this_rn;
576         }
577     }
578 
579     /* Assign sequence for residues missing from end of linked list */
580     for (i = last_rn; i < pdb->Chains[chn - 1]->Nres; i++)
581         if (!ajResidueToTriplet(
582                 ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, i),
583                 &tmp2))
584         {
585             ajStrDel(&tmp1);
586             ajStrDel(&tmp2);
587             ajListIterDel(&iter);
588             ajWarn("Index out of range in pdbioWriteSeqresChain");
589             ajFmtPrintF(errf, "//\n%S\nERROR Index out of range "
590                         "in pdbioWriteSeqresChain\n", pdb->Pdb);
591 
592             return ajFalse;
593         }
594         else
595         {
596             ajStrAppendS(&tmp1, tmp2);
597             ajStrAppendC(&tmp1, " ");
598         }
599 
600     /* Print out SEQRES records */
601     for (p = ajStrGetPtr(tmp1), len = ajStrGetLen(tmp1), i = 0U, j = 1U;
602          i < len;
603          i += 52, j++, p += 52)
604         ajFmtPrintF(outf, "SEQRES%4u %c%5u  %-61.52s\n",
605                     j,
606                     pdb->Chains[chn - 1]->Id,
607                     pdb->Chains[chn - 1]->Nres,
608                     p);
609 
610     /* Tidy up */
611     ajStrDel(&tmp1);
612     ajStrDel(&tmp2);
613     ajListIterDel(&iter);
614 
615     return ajTrue;
616 }
617 
618 
619 
620 
621 /* @funcstatic pdbioWriteSeqresDomain *****************************************
622 **
623 ** Writes sequence for a SCOP domain to an output file in pdb format (SEQRES
624 ** records). Sequence is taken from a Pdb structure, domain definition is
625 ** taken from a Scop structure.  Where coordinates for multiple models (e.g.
626 ** NMR structures) are given, data for model 1 are written.
627 **
628 ** @param [w] errf [AjPFile] Output file stream for error messages
629 ** @param [w] outf [AjPFile] Output file stream
630 ** @param [r] pdb  [const AjPPdb] Pdb object
631 ** @param [r] scop [const AjPScop] Scop object
632 **
633 ** @return [AjBool] True on success
634 **
635 ** @release 2.9.0
636 ** @@
637 ******************************************************************************/
638 
pdbioWriteSeqresDomain(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop)639 static AjBool pdbioWriteSeqresDomain(AjPFile errf, AjPFile outf,
640                                      const AjPPdb pdb, const AjPScop scop)
641 {
642     ajuint last_rn = 0U;
643     ajuint this_rn = 0U;
644     ajuint i = 0U;
645     ajuint j = 0U;
646     ajint rcnt = 0;
647     ajuint len = 0U;
648     ajuint chn = 0U;
649     const char *p = NULL;
650     char id = '\0';
651 
652     AjPStr tmp1 = NULL;
653     AjPStr tmp2 = NULL;
654     AjBool found_start = ajFalse;
655     AjBool found_end = ajFalse;
656     AjBool nostart = ajFalse;
657     AjBool noend = ajFalse;
658     AjIList iter = NULL;
659     AjPAtom atom = NULL;
660     AjPStr tmpstr = NULL;
661 /*  AjPResidue *resarr = NULL; */
662 
663 
664     /* Allocate strings etc */
665     tmp1 = ajStrNew();
666     tmp2 = ajStrNew();
667     tmpstr = ajStrNew();
668 
669 
670     /* Loop for each chain in the domain */
671     for (i = 0U;
672          i < scop->Number;
673          i++,
674              found_start = ajFalse,
675              found_end = ajFalse,
676              last_rn = 0)
677     {
678 
679         /* Check for error in chain id */
680         if (!ajPdbChnidToNum(scop->Chain[i], pdb, &chn))
681         {
682             ajListIterDel(&iter);
683             ajStrDel(&tmp1);
684             ajStrDel(&tmp2);
685             ajStrDel(&tmpstr);
686 
687             ajWarn("Chain incompatibility error in "
688                    "pdbioWriteSeqresDomain");
689 
690             ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility "
691                         "error in pdbioWriteSeqresDomain\n",
692                         scop->Entry);
693 
694             return ajFalse;
695         }
696 
697 #if AJFALSE
698         if (resarr)
699             AJFREE(resarr);
700         ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
701 #endif /* AJFALSE */
702 
703         /* Initialise iterator for list of atoms */
704         iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
705 
706         /* Start of chain not specified */
707         if (!ajStrCmpC(scop->Start[i], "."))
708             nostart = ajTrue;
709         else
710             nostart = ajFalse;
711 
712         /* End of chain not specified */
713         if (!ajStrCmpC(scop->End[i], "."))
714             noend = ajTrue;
715         else
716             noend = ajFalse;
717 
718         /* Iterate through list of atoms */
719         while ((atom = (AjPAtom) ajListIterGet(iter)))
720         {
721             /* Hard-coded to work on model 1 */
722             /*
723             ** Continue / break if a non-protein atom is found or
724             ** model no. != 1
725             */
726             if (atom->Mod != 1)
727                 break;
728 
729             if (atom->Type != 'P')
730                 continue;
731 
732 
733             /* If there is a new residue */
734             this_rn = atom->Idx;
735 
736             if (this_rn != last_rn)
737             {
738                 /*
739                 ** The start position was specified, but has not
740                 ** been found yet
741                 */
742                 if (!found_start && !nostart)
743                 {
744                     ajStrAssignS(&tmpstr, scop->Start[i]);
745                     ajStrAppendK(&tmpstr, '*');
746 
747 
748                     /* Start position found */
749                     /* if(!ajStrCmpCaseS(atom->Pdb, scop->Start[i])) */
750                     if (ajStrMatchWildS(atom->Pdb, tmpstr))
751                         /*
752                         ** if(ajStrMatchWildS(resarr[atom->Idx-1]->Pdb,
753                         **                    tmpstr))
754                         */
755                     {
756                         if (!ajStrMatchS(atom->Pdb, scop->Start[i]))
757                             /* if(!ajStrMatchS(resarr[atom->Idx-1]->Pdb, *
758                              * scop->Start[i])) */
759                         {
760                             ajWarn("Domain start found by wildcard match "
761                                    "only in pdbioWriteSeqresDomain");
762                             ajFmtPrintF(errf, "//\n%S\nERROR Domain start "
763                                         "found "
764                                         "by wildcard match only in "
765                                         "pdbioWriteSeqresDomain\n",
766                                         scop->Entry);
767                         }
768 
769                         last_rn = this_rn;
770                         found_start = ajTrue;
771                     }
772                     else
773                     {
774                         last_rn = this_rn;
775                         continue;
776                     }
777 
778                 }
779 
780 
781                 /*
782                 ** Assign sequence for residues missing from the linked list
783                 ** of atoms of known structure
784                 */
785                 for (j = last_rn; j < this_rn - 1; j++)
786                 {
787                     /* Check that position j is in range for the sequence */
788                     if (!ajResidueToTriplet(
789                             ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, j),
790                             &tmp2))
791                     {
792                         ajListIterDel(&iter);
793                         ajStrDel(&tmp1);
794                         ajStrDel(&tmp2);
795                         ajStrDel(&tmpstr);
796 
797                         ajWarn("Index out of range in "
798                                "pdbioWriteSeqresDomain");
799                         ajFmtPrintF(errf, "//\n%S\nERROR Index out of range "
800                                     "in pdbioWriteSeqresDomain\n",
801                                     scop->Entry);
802 
803                         return ajFalse;
804                     }
805                     else
806                     {
807                         ajStrAppendS(&tmp1, tmp2);
808                         ajStrAppendC(&tmp1, " ");
809                         rcnt++;
810                     }
811                 }
812 
813                 last_rn = this_rn;
814 
815 
816                 /* Append the residue to the sequence */
817                 ajStrAppendS(&tmp1, atom->Id3);
818                 ajStrAppendC(&tmp1, " ");
819                 rcnt++;
820 
821 
822                 /* The end position was specified, but has not been found yet */
823                 if (!found_end && !noend)
824                 {
825                     ajStrAssignS(&tmpstr, scop->End[i]);
826                     ajStrAppendK(&tmpstr, '*');
827 
828 
829                     /* End found */
830                     /*
831                     ** if(!ajStrCmpCaseS(atom->Pdb, scop->End[i]))
832                     */
833                     if (ajStrMatchWildS(atom->Pdb, tmpstr))
834                         /*
835                         ** if(ajStrMatchWildS(resarr[atom->Idx-1]->Pdb,
836                         **                    tmpstr))
837                         */
838                     {
839                         if (!ajStrMatchS(atom->Pdb, scop->End[i]))
840                             /*
841                             ** if(!ajStrMatchS(resarr[atom->Idx-1]->Pdb,
842                             **                 scop->End[i]))
843                             */
844                         {
845                             ajWarn("Domain end found by wildcard match only "
846                                    "in pdbioWriteSeqresDomain");
847                             ajFmtPrintF(errf, "//\n%S\nERROR Domain end found "
848                                         "by wildcard match only in "
849                                         "pdbioWriteSeqresDomain\n",
850                                         scop->Entry);
851                         }
852 
853 
854                         found_end = ajTrue;
855                         break;
856                     }
857                 }
858             }
859         }
860 
861 
862         /* Domain start specified but not found */
863         if (!found_start && !nostart)
864         {
865             ajListIterDel(&iter);
866             ajStrDel(&tmp1);
867             ajStrDel(&tmp2);
868             ajStrDel(&tmpstr);
869 
870             ajWarn("Domain start not found in pdbioWriteSeqresDomain");
871             ajFmtPrintF(errf, "//\n%S\nERROR Domain start not found "
872                         "in pdbioWriteSeqresDomain\n", scop->Entry);
873 
874             return ajFalse;
875         }
876 
877 
878         /* Domain end specified but not found */
879         if (!found_end && !noend)
880         {
881             ajListIterDel(&iter);
882             ajStrDel(&tmp1);
883             ajStrDel(&tmp2);
884             ajStrDel(&tmpstr);
885 
886             ajWarn("Domain end not found in pdbioWriteSeqresDomain");
887             ajFmtPrintF(errf, "//\n%S\nERROR Domain end not found "
888                         "in pdbioWriteSeqresDomain\n", scop->Entry);
889 
890             return ajFalse;
891         }
892 
893         /*
894         ** Assign sequence for residues missing from end of linked list
895         ** Only needs to be done where the end of the domain is not specified
896         */
897         if (noend)
898         {
899             for (j = last_rn; j < pdb->Chains[chn - 1]->Nres; j++)
900                 if (!ajResidueToTriplet(
901                         ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, j),
902                         &tmp2))
903                 {
904                     ajStrDel(&tmp1);
905                     ajStrDel(&tmp2);
906                     ajStrDel(&tmpstr);
907 
908                     ajListIterDel(&iter);
909                     ajWarn("Index out of range in pdbioWriteSeqresDomain");
910                     ajFmtPrintF(errf, "//\n%S\nERROR Index out of "
911                                 "range in pdbioWriteSeqresDomain\n",
912                                 scop->Entry);
913 
914                     return ajFalse;
915                 }
916                 else
917                 {
918                     ajStrAppendS(&tmp1, tmp2);
919                     ajStrAppendC(&tmp1, " ");
920                     rcnt++;
921                 }
922         }
923 
924         ajListIterDel(&iter);
925     }
926 
927     /*
928     ** If the domain was composed of more than once chain then a '.' is
929     ** given as the chain identifier
930     */
931     if (scop->Number > 1)
932         id = '.';
933     else
934         id = pdb->Chains[chn - 1]->Id;
935 
936     /* Print out SEQRES records */
937     for (p = ajStrGetPtr(tmp1), len = ajStrGetLen(tmp1), i = 0U, j = 1;
938          i < len;
939          i += 52, j++, p += 52)
940         ajFmtPrintF(outf, "SEQRES%4u %c%5d  %-61.52s\n",
941                     j,
942                     id,
943                     rcnt,
944                     p);
945 
946 #if AJFALSE
947     if (resarr)
948         AJFREE(resarr);
949 #endif /* AJFALSE */
950     ajStrDel(&tmp1);
951     ajStrDel(&tmp2);
952     ajStrDel(&tmpstr);
953 
954     return ajTrue;
955 }
956 
957 
958 
959 
960 /* @funcstatic pdbioWriteAtomChain ********************************************
961 **
962 ** Writes coordinates for a protein chain to an output file in pdb format
963 ** (ATOM records). Coordinates are taken from a Pdb structure. The model
964 ** number argument should have a value of 1 for x-ray structures.
965 **
966 ** @param [w] outf [AjPFile] Output file stream
967 ** @param [r] pdb  [const AjPPdb]  Pdb object
968 ** @param [r] mod  [ajuint]   Model number, beginning at 1
969 ** @param [r] chn  [ajuint]   Chain number, beginning at 1
970 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration. Either ajEPdbModePdb
971 **                              or ajEPdbModeIdx if the original or
972 **                              corrected residue number is to be used.
973 **
974 ** @return [AjBool] True on success
975 **
976 ** @release 2.9.0
977 ** @@
978 ******************************************************************************/
979 
pdbioWriteAtomChain(AjPFile outf,const AjPPdb pdb,ajuint mod,ajuint chn,AjEPdbMode mode)980 static AjBool pdbioWriteAtomChain(AjPFile outf, const AjPPdb pdb,
981                              ajuint mod, ajuint chn,
982                              AjEPdbMode mode)
983 {
984     AjBool doneter = ajFalse;
985     AjIList iter = NULL;
986     AjPAtom atom1 = NULL;
987     AjPAtom atom2 = NULL;
988     ajint acnt = 0;
989 #if AJFALSE
990     AjPResidue *resarr = NULL;
991 #endif /* AJFALSE */
992 
993     /* Check args are not NULL */
994     if (!outf || !pdb || mod < 1 || chn < 1)
995         return ajFalse;
996 
997 #if AJFALSE
998     ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
999 #endif /* AJFALSE */
1000 
1001     doneter = ajFalse;
1002     iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
1003 
1004     while ((atom1 = (AjPAtom) ajListIterGet(iter)))
1005         if (atom1->Mod == mod)
1006             break;
1007 
1008     for (acnt = 1; atom1; atom1 = (AjPAtom) ajListIterGet(iter))
1009     {
1010         /* Break if on a new model */
1011         if (atom1->Mod != mod)
1012             break;
1013 
1014 
1015         /* End of protein atoms - so write a TER record */
1016         if (atom1->Type != 'P' && (!doneter))
1017         {
1018             switch (mode)
1019             {
1020                 case ajEPdbModeIdx:
1021                     ajFmtPrintF(outf, "%-6s%5d      %-4S%c%4d%54s\n",
1022                                 "TER",
1023                                 acnt++,
1024                                 atom2->Id3,
1025                                 pdb->Chains[chn - 1]->Id,
1026                                 atom2->Idx,
1027                                 " ");
1028                     break;
1029 
1030                 case ajEPdbModePdb:
1031                     ajFmtPrintF(outf, "%-6s%5d      %-4S%c%4S%54s\n",
1032                                 "TER",
1033                                 acnt++,
1034                                 atom2->Id3,
1035                                 pdb->Chains[chn - 1]->Id,
1036                                 atom2->Pdb,
1037                                 /* resarr[atom2->Idx-1]->Pdb, */
1038                                 " ");
1039                     break;
1040 
1041                 default:
1042                     ajFatal("Invalid mode in pdbioWriteAtomChain");
1043             }
1044 
1045             doneter = ajTrue;
1046         }
1047 
1048 
1049         /* Write out ATOM or HETATM line */
1050         if (atom1->Type == 'P')
1051             ajFmtPrintF(outf, "%-6s", "ATOM");
1052         else
1053             ajFmtPrintF(outf, "%-6s", "HETATM");
1054 
1055         switch (mode)
1056         {
1057             case ajEPdbModeIdx:
1058                 ajFmtPrintF(outf, "%5d  %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1059                             "%6.2f%6.2f%11s%-3c\n",
1060                             acnt++,
1061                             atom1->Atm,
1062                             atom1->Id3,
1063                             pdb->Chains[chn - 1]->Id,
1064                             atom1->Idx,
1065                             atom1->X,
1066                             atom1->Y,
1067                             atom1->Z,
1068                             atom1->O,
1069                             atom1->B,
1070                             " ",
1071                             *ajStrGetPtr(atom1->Atm));
1072                 break;
1073 
1074             case ajEPdbModePdb:
1075                 ajFmtPrintF(outf, "%5d  %-4S%-4S%c%4S%12.3f%8.3f%8.3f"
1076                             "%6.2f%6.2f%11s%-3c\n",
1077                             acnt++,
1078                             atom1->Atm,
1079                             atom1->Id3,
1080                             pdb->Chains[chn - 1]->Id,
1081                             atom1->Pdb,
1082                             /* resarr[atom1->Idx-1]->Pdb, */
1083                             atom1->X,
1084                             atom1->Y,
1085                             atom1->Z,
1086                             atom1->O,
1087                             atom1->B,
1088                             " ",
1089                             *ajStrGetPtr(atom1->Atm));
1090                 break;
1091 
1092             default:
1093                 ajFatal("Invalid mode in pdbioWriteAtomChain");
1094         }
1095 
1096         atom2 = atom1;
1097     }
1098 
1099     /* Write TER record if its not already done */
1100     if (!doneter)
1101     {
1102         ajFmtPrintF(outf, "%-6s%5d      %-4S%c%4d%54s\n",
1103                     "TER",
1104                     acnt++,
1105                     atom2->Id3,
1106                     pdb->Chains[chn - 1]->Id,
1107                     atom2->Idx,
1108                     " ");
1109         doneter = ajTrue;
1110     }
1111     ajListIterDel(&iter);
1112 
1113 #if AJFALSE
1114     if (resarr)
1115         AJFREE(resarr);
1116 #endif /* AJFALSE */
1117 
1118     return ajTrue;
1119 }
1120 
1121 
1122 
1123 
1124 /* @funcstatic pdbioWriteAtomDomain *******************************************
1125 **
1126 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1127 ** records).  Coordinates are taken from a Pdb structure, domain definition is
1128 ** taken from a Scop structure. The model number argument should have a value
1129 ** of 1 for x-ray structures. Coordinates for heterogens are NOT written to
1130 ** file.  The corrected residue numbers are given (these give an index into
1131 ** the SEQRES sequence.
1132 **
1133 ** @param [w] errf [AjPFile] Output file stream for error messages
1134 ** @param [w] outf [AjPFile] Output file stream
1135 ** @param [r] pdb  [const AjPPdb] Pdb object
1136 ** @param [r] scop [const AjPScop] Scop object
1137 ** @param [r] mod  [ajuint] Model number, beginning at 1
1138 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
1139 **                              or ajEPdbModeIdx if the original or corrected
1140 **                              residue number is to be used.
1141 **
1142 ** @return [AjBool] True on success
1143 **
1144 ** @release 2.9.0
1145 ** @@
1146 ******************************************************************************/
1147 
pdbioWriteAtomDomain(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajuint mod,AjEPdbMode mode)1148 static AjBool pdbioWriteAtomDomain(AjPFile errf, AjPFile outf,
1149                                    const AjPPdb pdb,
1150                                    const AjPScop scop, ajuint mod,
1151                                    AjEPdbMode mode)
1152 {
1153     /*
1154     ** rn_mod is a modifier to the residue number to give correct residue
1155     ** numbering for the domain
1156     */
1157     ajint acnt = 1;
1158     ajint rn_mod = 0;
1159     ajuint z = 0U;
1160     ajuint finalrn = 0U;
1161     ajuint chn = 0U;
1162     char id = '\0';
1163 
1164     AjBool found_start = ajFalse;
1165     AjBool found_end = ajFalse;
1166     AjBool nostart = ajFalse;
1167     AjBool noend = ajFalse;
1168     AjIList iter = NULL;
1169     AjPAtom atom1 = NULL;
1170     AjPAtom atom2 = NULL;
1171     AjPStr tmpstr = NULL;
1172 #if AJFALSE
1173     AjPResidue *resarr = NULL;
1174 #endif /* AJFALSE */
1175 
1176 
1177     if (!errf || !outf || !pdb || !scop)
1178         ajFatal("Bad args. passed to pdbioWriteAtomDomain");
1179 
1180 
1181     /* Allocate strings etc */
1182     tmpstr = ajStrNew();
1183 
1184 
1185 
1186     /* Loop for each chain in the domain */
1187     for (z = 0;
1188          z < scop->Number;
1189          z++, found_start = ajFalse, found_end = ajFalse)
1190     {
1191         /* Check for chain error */
1192         if (!ajPdbChnidToNum(scop->Chain[z], pdb, &chn))
1193         {
1194             ajListIterDel(&iter);
1195             ajWarn("Chain incompatibility error in "
1196                    "pdbioWriteAtomDomain");
1197             ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility "
1198                         "error in pdbioWriteAtomDomain\n",
1199                         scop->Entry);
1200             ajStrDel(&tmpstr);
1201 
1202             return ajFalse;
1203         }
1204 
1205 
1206 #if AJFALSE
1207         ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
1208 #endif /* AJFALSE */
1209 
1210         /* Iterate up to the correct model */
1211         iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
1212 
1213         while ((atom1 = (AjPAtom) ajListIterGet(iter)))
1214             if (atom1->Mod == mod)
1215                 break;
1216 
1217         if (!atom1)
1218             ajFatal("Unexpected error (atom1 == NULL) in "
1219                     "pdbioWriteAtomDomain\n");
1220 
1221         /* Increment res. counter from last chain if appropriate */
1222         if (noend)
1223             rn_mod += atom2->Idx;
1224         else
1225             rn_mod += finalrn;
1226 
1227         /* Start of chain was not specified */
1228         if (!ajStrCmpC(scop->Start[z], "."))
1229             nostart = ajTrue;
1230         else
1231             nostart = ajFalse;
1232 
1233         /* End of chain was not specified */
1234         if (!ajStrCmpC(scop->End[z], "."))
1235             noend = ajTrue;
1236         else
1237             noend = ajFalse;
1238 
1239         /* If the domain was composed of more than once chain then a '.' is
1240          * given as the chain identifier */
1241         if (scop->Number > 1)
1242             id = '.';
1243         else
1244             id = pdb->Chains[chn - 1]->Id;
1245 
1246         for (; atom1; atom1 = (AjPAtom) ajListIterGet(iter))
1247         {
1248             /*
1249             ** Continue / break if a non-protein atom is found or
1250             ** model no. is incorrect
1251             */
1252             if (atom1->Mod != mod)
1253                 break;
1254 
1255             if (atom1->Type != 'P')
1256                 continue;
1257 
1258             /*
1259             ** The start position was specified, but has not
1260             ** been found yet
1261             */
1262             if (!found_start && !nostart)
1263             {
1264                 ajStrAssignS(&tmpstr, scop->Start[z]);
1265                 ajStrAppendK(&tmpstr, '*');
1266 
1267                 /* Start position found */
1268                 /*
1269                 ** if(!ajStrCmpCaseS(atom1->Pdb, scop->Start[z]))
1270                 */
1271                 if (ajStrMatchWildS(atom1->Pdb, tmpstr))
1272                     /*
1273                     ** if(ajStrMatchWildS(resarr[atom1->Idx-1]->Pdb, tmpstr))
1274                     */
1275                 {
1276                     if (!ajStrMatchS(atom1->Pdb, scop->Start[z]))
1277                         /*
1278                         ** if(!ajStrMatchS(resarr[atom1->Idx-1]->Pdb,
1279                         **                 scop->Start[z]))
1280                         */
1281                     {
1282                         ajWarn("Domain start found by wildcard match only "
1283                                "in pdbioWriteAtomDomain");
1284                         ajFmtPrintF(errf, "//\n%S\nERROR Domain start found "
1285                                     "by wildcard match only in "
1286                                     "pdbioWriteAtomDomain\n", scop->Entry);
1287                     }
1288 
1289                     rn_mod -= atom1->Idx - 1;
1290                     found_start = ajTrue;
1291                 }
1292                 else
1293                     continue;
1294             }
1295 
1296             /* The end position was specified, but has not been found yet */
1297             if (!found_end && !noend)
1298             {
1299                 ajStrAssignS(&tmpstr, scop->End[z]);
1300                 ajStrAppendK(&tmpstr, '*');
1301 
1302                 /* End position found */
1303                 /*
1304                 ** if(!ajStrCmpCaseS(atom1->Pdb, scop->End[z]))
1305                 */
1306                 if (ajStrMatchWildS(atom1->Pdb, tmpstr))
1307                     /*
1308                     ** if(ajStrMatchWildS(resarr[atom1->Idx-1]->Pdb, tmpstr))
1309                     */
1310                 {
1311                     if (!ajStrMatchS(atom1->Pdb, scop->End[z]))
1312                         /*
1313                         ** if(!ajStrMatchS(resarr[atom1->Idx-1]->Pdb,
1314                         **                 scop->End[z]))
1315                         */
1316                     {
1317                         ajWarn("Domain end found by wildcard match only "
1318                                "in pdbioWriteAtomDomain");
1319                         ajFmtPrintF(errf, "//\n%S\nERROR Domain end found "
1320                                     "by wildcard match only in "
1321                                     "pdbioWriteAtomDomain\n", scop->Entry);
1322                     }
1323 
1324                     found_end = ajTrue;
1325                     finalrn = atom1->Idx;
1326                 }
1327             }
1328             else if (atom1->Idx != finalrn && !noend)
1329                 break;
1330 
1331             /* Write out ATOM line to pdb file */
1332             switch (mode)
1333             {
1334                 case ajEPdbModeIdx:
1335                     ajFmtPrintF(outf, "%-6s%5d  %-4S%-4S%c%4d%12.3f%8.3f"
1336                                 "%8.3f%6.2f%6.2f%11s%-3c\n",
1337                                 "ATOM",
1338                                 acnt++,
1339                                 atom1->Atm,
1340                                 atom1->Id3,
1341                                 id,
1342                                 atom1->Idx + rn_mod,
1343                                 atom1->X,
1344                                 atom1->Y,
1345                                 atom1->Z,
1346                                 atom1->O,
1347                                 atom1->B,
1348                                 " ",
1349                                 *ajStrGetPtr(atom1->Atm));
1350                     break;
1351 
1352                 case ajEPdbModePdb:
1353                     ajFmtPrintF(outf, "%-6s%5d  %-4S%-4S%c%4S%12.3f%8.3f"
1354                                 "%8.3f%6.2f%6.2f%11s%-3c\n",
1355                                 "ATOM",
1356                                 acnt++,
1357                                 atom1->Atm,
1358                                 atom1->Id3,
1359                                 id,
1360                                 atom1->Pdb,
1361                                 /* resarr[atom1->Idx-1]->Pdb, */
1362                                 atom1->X,
1363                                 atom1->Y,
1364                                 atom1->Z,
1365                                 atom1->O,
1366                                 atom1->B,
1367                                 " ",
1368                                 *ajStrGetPtr(atom1->Atm));
1369                     break;
1370 
1371                 default:
1372                     ajFatal("Invalid mode in pdbioWriteAtomDomain");
1373             }
1374 
1375             /* Assign pointer for this chain */
1376             atom2 = atom1;
1377         }
1378 
1379 
1380         /* Diagnostic if start was specified but not found */
1381         if (!found_start && !nostart)
1382         {
1383             ajListIterDel(&iter);
1384             ajWarn("Domain start not found in pdbioWriteAtomDomain");
1385             ajFmtPrintF(errf, "//\n%S\nERROR Domain start not "
1386                         "found in pdbioWriteAtomDomain\n", scop->Entry);
1387             ajStrDel(&tmpstr);
1388 
1389             return ajFalse;
1390         }
1391 
1392 
1393         /* Diagnostic if end was specified but not found */
1394         if (!found_end && !noend)
1395         {
1396             ajListIterDel(&iter);
1397             ajWarn("Domain end not found in pdbioWriteAtomDomain");
1398             ajFmtPrintF(errf, "//\n%S\nERROR Domain end not "
1399                         "found in pdbioWriteAtomDomain\n", scop->Entry);
1400             ajStrDel(&tmpstr);
1401 
1402             return ajFalse;
1403         }
1404 
1405 
1406         ajListIterDel(&iter);
1407     }
1408 
1409     if (!atom2)
1410         ajFatal("Unexpected error (atom2 == NULL) in pdbioWriteAtomDomain\n");
1411 
1412     /* Write the TER record to the pdb file */
1413     ajFmtPrintF(outf, "%-6s%5d      %-4S%c%4d%54s\n",
1414                 "TER",
1415                 acnt++,
1416                 atom2->Id3,
1417                 id,
1418                 atom2->Idx + rn_mod,
1419                 " ");
1420 
1421     ajStrDel(&tmpstr);
1422 
1423     return ajTrue;
1424 }
1425 
1426 
1427 
1428 
1429 /* @funcstatic pdbioWriteAtomDomainPdb ****************************************
1430 **
1431 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1432 ** records).  Coordinates are taken from a Pdb structure, domain definition
1433 ** is taken from a Scop structure.
1434 ** Coordinates for heterogens are NOT written to
1435 ** file.  The original (pdb) residue numbers are given (these do NOT give an
1436 ** index into the SEQRES sequence.  Use pdbioWriteAtomDomainIdx if you
1437 ** need an index into the SEQRES sequence.
1438 **
1439 ** @param [w] errf [AjPFile] Output file stream for error messages
1440 ** @param [w] outf [AjPFile] Output file stream
1441 ** @param [r] pdb  [const AjPPdb] Pdb object
1442 ** @param [r] scop [const AjPScop] Scop object
1443 ** @param [r] mod  [ajint] Model number, beginning at 1
1444 **
1445 ** @return [AjBool] True on success
1446 **
1447 ** @release 2.9.0
1448 ** @@
1449 ******************************************************************************/
1450 
pdbioWriteAtomDomainPdb(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajint mod)1451 static AjBool pdbioWriteAtomDomainPdb(AjPFile errf, AjPFile outf,
1452                                       const AjPPdb pdb,
1453                                       const AjPScop scop, ajint mod)
1454 {
1455     if (pdbioWriteAtomDomain(errf, outf, pdb, scop, mod, ajEPdbModePdb))
1456         return ajTrue;
1457 
1458     return ajFalse;
1459 }
1460 
1461 
1462 
1463 
1464 /* @funcstatic pdbioWriteAtomDomainIdx ****************************************
1465 **
1466 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1467 ** records).  Coordinates are taken from a Pdb structure, domain definition
1468 ** is taken from a Scop structure. The model number argument should have a
1469 ** value of 1 for x-ray structures. Coordinates for heterogens are NOT
1470 ** written to file.  The corrected residue numbers are given (these give an
1471 ** index into the SEQRES sequence).  Use pdbioWriteAtomDomainPdb if you
1472 ** wish to maintain the original residue number.
1473 **
1474 ** @param [w] errf [AjPFile] Output file stream for error messages
1475 ** @param [w] outf [AjPFile] Output file stream
1476 ** @param [r] pdb  [const AjPPdb]  Pdb object
1477 ** @param [r] scop [const AjPScop] Scop object
1478 ** @param [r] mod  [ajint]   Model number, beginning at 1
1479 **
1480 ** @return [AjBool] True on success
1481 **
1482 ** @release 2.9.0
1483 ** @@
1484 ******************************************************************************/
1485 
pdbioWriteAtomDomainIdx(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajint mod)1486 static AjBool pdbioWriteAtomDomainIdx(AjPFile errf, AjPFile outf,
1487                                       const AjPPdb pdb,
1488                                       const AjPScop scop, ajint mod)
1489 {
1490     if (pdbioWriteAtomDomain(errf, outf, pdb, scop, mod, ajEPdbModeIdx))
1491         return ajTrue;
1492 
1493     return ajFalse;
1494 }
1495 
1496 
1497 
1498 
1499 /* @funcstatic pdbioWriteHeterogen ********************************************
1500 **
1501 ** Writes coordinates for heterogens that could not be uniquely associated
1502 ** with a chain to an output file in pdb format (HETATM records). Coordinates
1503 ** are taken from a Pdb structure. The model number argument should have a
1504 ** value of 1 for x-ray structures.
1505 **
1506 ** @param [w] outf [AjPFile] Output file stream
1507 ** @param [r] pdb  [const AjPPdb]  Pdb object
1508 ** @param [r] mod  [ajuint] Model number, beginning at 1
1509 **
1510 ** @return [AjBool] True on success
1511 **
1512 ** @release 2.9.0
1513 ** @@
1514 ******************************************************************************/
1515 
pdbioWriteHeterogen(AjPFile outf,const AjPPdb pdb,ajuint mod)1516 static AjBool pdbioWriteHeterogen(AjPFile outf, const AjPPdb pdb, ajuint mod)
1517 {
1518     AjIList iter = NULL;
1519     AjPAtom atom = NULL;
1520     ajint acnt;
1521 
1522 
1523     /* Check args are not NULL */
1524     if (!outf || !pdb || mod < 1)
1525         return ajFalse;
1526 
1527 
1528     iter = ajListIterNewread(pdb->Groups);
1529 
1530     while ((atom = (AjPAtom) ajListIterGet(iter)))
1531         if (atom->Mod == mod)
1532             break;
1533 
1534     for (acnt = 1; atom; atom = (AjPAtom) ajListIterGet(iter))
1535     {
1536         /* Break if on t0 a new model */
1537         if (atom->Mod != mod)
1538             break;
1539 
1540         /* Write out HETATM line */
1541 
1542         if (atom->Type == 'H')
1543             ajFmtPrintF(outf, "%-6s%5d  %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1544                         "%6.2f%6.2f%11s%-3c\n",
1545                         "HETATM",
1546                         acnt++,
1547                         atom->Atm,
1548                         atom->Id3,
1549                         ajChararrGet(pdb->gpid, atom->Gpn - 1),
1550                         atom->Idx,
1551                         atom->X,
1552                         atom->Y,
1553                         atom->Z,
1554                         atom->O,
1555                         atom->B,
1556                         " ",
1557                         *ajStrGetPtr(atom->Atm));
1558         else
1559             ajFmtPrintF(outf, "%-6s%5d  %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1560                         "%6.2f%6.2f%11s%-3c\n",
1561                         "HETATM",
1562                         acnt++,
1563                         atom->Atm,
1564                         atom->Id3,
1565                         ' ',
1566                         atom->Idx,
1567                         atom->X,
1568                         atom->Y,
1569                         atom->Z,
1570                         atom->O,
1571                         atom->B,
1572                         " ",
1573                         *ajStrGetPtr(atom->Atm));
1574 /*      atom2 = atom; Unused variable */
1575     }
1576 
1577 
1578     ajListIterDel(&iter);
1579 
1580     return ajTrue;
1581 }
1582 
1583 
1584 
1585 
1586 /* @funcstatic pdbioWriteText *************************************************
1587 **
1588 ** Writes text to file in the format of pdb records
1589 **
1590 ** @param [w] outf   [AjPFile] Output file stream
1591 ** @param [r] str    [const AjPStr]  Text to print out
1592 ** @param [r] prefix [const char *]  pdb record (e.g. "HEADER")
1593 **
1594 **
1595 ** @return [AjBool] True on success
1596 **
1597 ** @release 2.9.0
1598 ** @@
1599 ******************************************************************************/
1600 
pdbioWriteText(AjPFile outf,const AjPStr str,const char * prefix)1601 static AjBool pdbioWriteText(AjPFile outf, const AjPStr str,
1602                              const char *prefix)
1603 {
1604     ajint n = 0;
1605     ajint l = 0;
1606     ajint c = 0;
1607 
1608     AjPStrTok handle = NULL;
1609     AjPStr token = NULL;
1610     AjPStr tmp = NULL;
1611 
1612     if (!outf)
1613         return ajFalse;
1614 
1615 
1616 
1617     /* Initialise strings */
1618     token = ajStrNew();
1619     tmp = ajStrNewC("");
1620 
1621 
1622     handle = ajStrTokenNewC(str, " \t\r\n");
1623 
1624     while (ajStrTokenNextParse(handle, &token))
1625     {
1626         if (!c)
1627             ajFmtPrintF(outf, "%-11s", prefix);
1628 
1629         if ((l = n + ajStrGetLen(token)) < 68)
1630         {
1631             if (c++)
1632                 ajStrAppendC(&tmp, " ");
1633             ajStrAppendS(&tmp, token);
1634             n = ++l;
1635         }
1636         else
1637         {
1638             ajFmtPrintF(outf, "%-*S\n", 69, tmp);
1639 
1640             ajStrAssignS(&tmp, token);
1641             ajStrAppendC(&tmp, " ");
1642             n = ajStrGetLen(token);
1643             c = 0;
1644         }
1645     }
1646 
1647     if (c)
1648         ajFmtPrintF(outf, "%-*S\n", 69, tmp);
1649 
1650 
1651     ajStrTokenDel(&handle);
1652     ajStrDel(&token);
1653     ajStrDel(&tmp);
1654 
1655     return ajTrue;
1656 }
1657 
1658 
1659 
1660 
1661 /* @funcstatic pdbioWriteHeader ***********************************************
1662 **
1663 ** Writes the Pdb element of a Pdb structure to an output file in pdb format
1664 **
1665 ** @param [w] outf [AjPFile] Output file stream
1666 ** @param [r] pdb  [const AjPPdb] Pdb object
1667 **
1668 ** @return [AjBool] True on success
1669 **
1670 ** @release 2.9.0
1671 ** @@
1672 ******************************************************************************/
1673 
pdbioWriteHeader(AjPFile outf,const AjPPdb pdb)1674 static AjBool pdbioWriteHeader(AjPFile outf, const AjPPdb pdb)
1675 {
1676     if (pdb && outf)
1677     {
1678         ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR %-45S\n",
1679                     "HEADER",
1680                     pdb->Pdb);
1681 
1682         return ajTrue;
1683     }
1684 
1685     return ajFalse;
1686 }
1687 
1688 
1689 
1690 
1691 /* @funcstatic pdbioWriteHeaderScop *******************************************
1692 **
1693 ** Writes the Entry element of a Scop structure to an output file in pdb
1694 ** format
1695 **
1696 ** @param [w] outf [AjPFile] Output file stream
1697 ** @param [r] scop [const AjPScop] Scop object
1698 **
1699 ** @return [AjBool] True on success
1700 **
1701 ** @release 2.9.0
1702 ** @@
1703 ******************************************************************************/
1704 
pdbioWriteHeaderScop(AjPFile outf,const AjPScop scop)1705 static AjBool pdbioWriteHeaderScop(AjPFile outf, const AjPScop scop)
1706 {
1707     if (scop && outf)
1708     {
1709         ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR SCOP DOMAIN %-33S\n",
1710                     "HEADER",
1711                     scop->Entry);
1712 
1713         return ajTrue;
1714     }
1715 
1716     return ajFalse;
1717 }
1718 
1719 
1720 
1721 
1722 /* @funcstatic pdbioWriteTitle ************************************************
1723 **
1724 ** Writes a TITLE record to an output file in pdb format
1725 ** The text is hard-coded.
1726 **
1727 ** @param [w] outf [AjPFile] Output file stream
1728 ** @param [r] pdb  [const AjPPdb] Pdb object
1729 **
1730 ** @return [AjBool] True on success
1731 **
1732 ** @release 2.9.0
1733 ** @@
1734 ******************************************************************************/
1735 
pdbioWriteTitle(AjPFile outf,const AjPPdb pdb)1736 static AjBool pdbioWriteTitle(AjPFile outf, const AjPPdb pdb)
1737 {
1738     if (pdb && outf)
1739     {
1740         ajFmtPrintF(outf, "%-11sTHIS FILE IS MISSING MOST RECORDS FROM THE "
1741                     "ORIGINAL PDB FILE%9s\n",
1742                     "TITLE", " ");
1743 
1744         return ajTrue;
1745     }
1746 
1747     return ajFalse;
1748 }
1749 
1750 
1751 
1752 
1753 /* @funcstatic pdbioWriteCompnd ***********************************************
1754 **
1755 ** Writes the Compnd element of a Pdb structure to an output file in pdb
1756 ** format
1757 **
1758 ** @param [w] outf [AjPFile] Output file stream
1759 ** @param [r] pdb  [const AjPPdb] Pdb object
1760 **
1761 ** @return [AjBool] True on success
1762 **
1763 ** @release 2.9.0
1764 ** @@
1765 ******************************************************************************/
1766 
pdbioWriteCompnd(AjPFile outf,const AjPPdb pdb)1767 static AjBool pdbioWriteCompnd(AjPFile outf, const AjPPdb pdb)
1768 {
1769     if (pdb && outf)
1770     {
1771         pdbioWriteText(outf, pdb->Compnd, "COMPND");
1772 
1773         return ajTrue;
1774     }
1775 
1776     return ajFalse;
1777 }
1778 
1779 
1780 
1781 
1782 /* @funcstatic pdbioWriteSource ***********************************************
1783 **
1784 ** Writes the Source element of a Pdb structure to an output file in pdb
1785 ** format
1786 **
1787 ** @param [w] outf [AjPFile] Output file stream
1788 ** @param [r] pdb  [const AjPPdb] Pdb object
1789 **
1790 ** @return [AjBool] True on success
1791 **
1792 ** @release 2.9.0
1793 ** @@
1794 ******************************************************************************/
1795 
pdbioWriteSource(AjPFile outf,const AjPPdb pdb)1796 static AjBool pdbioWriteSource(AjPFile outf, const AjPPdb pdb)
1797 {
1798     if (pdb && outf)
1799     {
1800         pdbioWriteText(outf, pdb->Source, "SOURCE");
1801 
1802         return ajTrue;
1803     }
1804 
1805     return ajFalse;
1806 }
1807 
1808 
1809 
1810 
1811 /* @funcstatic pdbioWriteEmptyRemark ******************************************
1812 **
1813 ** Writes an empty REMARK record to an output file in pdb format
1814 **
1815 ** @param [w] outf [AjPFile] Output file stream
1816 ** @param [r] pdb  [const AjPPdb] Pdb object
1817 **
1818 ** @return [AjBool] True on success
1819 **
1820 ** @release 2.9.0
1821 ** @@
1822 ******************************************************************************/
1823 
pdbioWriteEmptyRemark(AjPFile outf,const AjPPdb pdb)1824 static AjBool pdbioWriteEmptyRemark(AjPFile outf, const AjPPdb pdb)
1825 {
1826     if (pdb && outf)
1827     {
1828         ajFmtPrintF(outf, "%-11s%-69s\n", "REMARK", " ");
1829 
1830         return ajTrue;
1831     }
1832 
1833     return ajFalse;
1834 }
1835 
1836 
1837 
1838 
1839 /* @funcstatic pdbioWriteResolution *******************************************
1840 **
1841 ** Writes the Reso element of a Pdb structure to an output file in pdb
1842 ** format
1843 **
1844 ** @param [w] outf [AjPFile] Output file stream
1845 ** @param [r] pdb  [const AjPPdb] Pdb object
1846 **
1847 ** @return [AjBool] True on success
1848 **
1849 ** @release 2.9.0
1850 ** @@
1851 ******************************************************************************/
1852 
pdbioWriteResolution(AjPFile outf,const AjPPdb pdb)1853 static AjBool pdbioWriteResolution(AjPFile outf, const AjPPdb pdb)
1854 {
1855     if (pdb && outf)
1856     {
1857         ajFmtPrintF(outf, "%-11sRESOLUTION. %-6.2f%-51s\n",
1858                     "REMARK", pdb->Reso, "ANGSTROMS.");
1859 
1860         return ajTrue;
1861     }
1862 
1863     return ajFalse;
1864 }
1865 
1866 
1867 
1868 
1869 /* @funcstatic pdbioReadLines *************************************************
1870 **
1871 ** Reads a pdb file and returns a pointer to a partially filled Pdbfile object.
1872 ** All of the lines from the pdb file are written to the <lines> array of the
1873 ** object and the <nlines> element is written.
1874 **
1875 ** Memory for the object itself and any arrays whose size is equal to the
1876 ** number of lines is allocated:
1877 ** lines, linetype, chnn, gpn, modn, resni, resn1, resn2, pdbn, oddnum, atype,
1878 ** rtype, x,y,z,o,b, elementNum, elementId, elementType & helixClass.
1879 **
1880 ** The following elements are written:
1881 ** nlines, lines.
1882 **
1883 ** The linetype array is set to default value of pdbfileELinetypeIgnore
1884 **
1885 ** @param [u] inf  [AjPFile] Pointer to pdb file
1886 **
1887 ** @return [AjPPdbfile] Pdbfile object pointer, or NULL on failure.
1888 **
1889 ** @release 2.9.0
1890 ** @@
1891 ******************************************************************************/
1892 
pdbioReadLines(AjPFile inf)1893 static AjPPdbfile pdbioReadLines(AjPFile inf)
1894 {
1895     AjPPdbfile pdbfile = NULL;  /* pdbfile object to be returned */
1896     AjPList list = NULL;        /* List of lines in pdb file */
1897     AjPStr line = NULL;         /* A line from a pdb file */
1898     ajuint i = 0U;
1899 
1900 
1901     /* Check args */
1902     if (!inf)
1903         return NULL;
1904 
1905 
1906     /* Allocate list and pdbfile object */
1907     list = ajListstrNew();
1908     /* Don't know number of lines or chains yet */
1909     pdbfile = pdbioPdbfileNew(0, 0);
1910 
1911 
1912     /* Read pdb file and append lines to list */
1913     line = ajStrNew();
1914 
1915     while (ajReadlineTrim(inf, &line))
1916     {
1917         ajListstrPushAppend(list, line);
1918         line = ajStrNew();
1919     }
1920 
1921 
1922     /* Convert list to array in pdbfile object */
1923     pdbfile->nlines = (ajuint) ajListstrToarray(list, &pdbfile->lines);
1924 
1925     if (pdbfile->nlines == 0)
1926     {
1927         ajStrDel(&line);
1928         ajListstrFree(&list);
1929         pdbioPdbfileDel(&pdbfile);
1930 
1931         return NULL;
1932     }
1933 
1934 
1935 
1936     /* Allocate memory for x,y,z,o,b, modn, chnn, linetype, ok, coord and
1937      * pdbn arrays in pdbfile object */
1938     AJCNEW0(pdbfile->x, pdbfile->nlines);
1939     AJCNEW0(pdbfile->y, pdbfile->nlines);
1940     AJCNEW0(pdbfile->z, pdbfile->nlines);
1941     AJCNEW0(pdbfile->o, pdbfile->nlines);
1942     AJCNEW0(pdbfile->b, pdbfile->nlines);
1943 
1944     AJCNEW0(pdbfile->elementNum, pdbfile->nlines);
1945     AJCNEW0(pdbfile->elementType, pdbfile->nlines);
1946     AJCNEW0(pdbfile->helixClass, pdbfile->nlines);
1947     AJCNEW0(pdbfile->resni, pdbfile->nlines);
1948     AJCNEW0(pdbfile->resn1, pdbfile->nlines);
1949     AJCNEW0(pdbfile->resn2, pdbfile->nlines);
1950     AJCNEW0(pdbfile->modn, pdbfile->nlines);
1951 
1952     AJCNEW0(pdbfile->chnn, pdbfile->nlines);
1953     AJCNEW0(pdbfile->gpn, pdbfile->nlines);
1954 
1955     AJCNEW0(pdbfile->linetype, pdbfile->nlines);
1956 
1957     for (i = 0U; i < pdbfile->nlines; i++)
1958         pdbfile->linetype[i] = pdbfileELinetypeIgnore;
1959 
1960     AJCNEW0(pdbfile->pdbn, pdbfile->nlines);
1961 
1962     for (i = 0U; i < pdbfile->nlines; i++)
1963         pdbfile->pdbn[i] = ajStrNew();
1964 
1965     AJCNEW0(pdbfile->elementId, pdbfile->nlines);
1966 
1967     for (i = 0U; i < pdbfile->nlines; i++)
1968         pdbfile->elementId[i] = ajStrNew();
1969 
1970     AJCNEW0(pdbfile->atype, pdbfile->nlines);
1971 
1972     for (i = 0U; i < pdbfile->nlines; i++)
1973         pdbfile->atype[i] = ajStrNewRes(4);
1974 
1975     AJCNEW0(pdbfile->rtype, pdbfile->nlines);
1976 
1977     for (i = 0U; i < pdbfile->nlines; i++)
1978         pdbfile->rtype[i] = ajStrNewRes(4);
1979 
1980     AJCNEW0(pdbfile->oddnum, pdbfile->nlines);
1981 
1982     for (i = 0U; i < pdbfile->nlines; i++)
1983         pdbfile->oddnum[i] = ajFalse;
1984 
1985 #if AJFALSE
1986 /* DIAGNOSTIC */
1987     for (i = 0U; i < pdbfile->nlines; i++)
1988     {
1989         ajFmtPrintF(tempfile, "%S\n", pdbfile->lines[i]);
1990         fflush(tempfile->fp);
1991     }
1992 #endif /* AJFALSE */
1993 
1994     /* Tidy up and return */
1995     ajStrDel(&line);
1996     ajListstrFree(&list);
1997 
1998     return pdbfile;
1999 }
2000 
2001 
2002 
2003 
2004 /* @funcstatic pdbioPdbfileDel ************************************************
2005 **
2006 ** Destructor for a PDB File object.
2007 **
2008 ** @param [d] Ppdbfile [AjPPdbfile*] PDB File address
2009 **
2010 ** @return [void]
2011 **
2012 ** @release 2.9.0
2013 ** @@
2014 ******************************************************************************/
2015 
pdbioPdbfileDel(AjPPdbfile * Ppdbfile)2016 static void pdbioPdbfileDel(AjPPdbfile *Ppdbfile)
2017 {
2018     ajuint i = 0U;
2019 
2020     /* Check args */
2021     if (!*Ppdbfile)
2022     {
2023         ajWarn("NULL arg passed to pdbioPdbfileDel.\n");
2024 
2025         return;
2026     }
2027 
2028     if ((*Ppdbfile)->resn1ok)
2029         AJFREE((*Ppdbfile)->resn1ok);
2030 
2031     if ((*Ppdbfile)->nres)
2032         AJFREE((*Ppdbfile)->nres);
2033 
2034     if ((*Ppdbfile)->nligands)
2035         AJFREE((*Ppdbfile)->nligands);
2036 
2037     if ((*Ppdbfile)->chainok)
2038         AJFREE((*Ppdbfile)->chainok);
2039 
2040     if ((*Ppdbfile)->pdbid)
2041         ajStrDel(&(*Ppdbfile)->pdbid);
2042 
2043     if ((*Ppdbfile)->compnd)
2044         ajStrDel(&(*Ppdbfile)->compnd);
2045 
2046     if ((*Ppdbfile)->source)
2047         ajStrDel(&(*Ppdbfile)->source);
2048 
2049     if ((*Ppdbfile)->modn)
2050         AJFREE((*Ppdbfile)->modn);
2051 
2052     if ((*Ppdbfile)->x)
2053         AJFREE((*Ppdbfile)->x);
2054 
2055     if ((*Ppdbfile)->y)
2056         AJFREE((*Ppdbfile)->y);
2057 
2058     if ((*Ppdbfile)->z)
2059         AJFREE((*Ppdbfile)->z);
2060 
2061     if ((*Ppdbfile)->o)
2062         AJFREE((*Ppdbfile)->o);
2063 
2064     if ((*Ppdbfile)->b)
2065         AJFREE((*Ppdbfile)->b);
2066 
2067     if ((*Ppdbfile)->numHelices)
2068         AJFREE((*Ppdbfile)->numHelices);
2069 
2070     if ((*Ppdbfile)->numStrands)
2071         AJFREE((*Ppdbfile)->numStrands);
2072 
2073     if ((*Ppdbfile)->numSheets)
2074         AJFREE((*Ppdbfile)->numSheets);
2075 
2076     if ((*Ppdbfile)->numTurns)
2077         AJFREE((*Ppdbfile)->numTurns);
2078 
2079     if ((*Ppdbfile)->elementNum)
2080         AJFREE((*Ppdbfile)->elementNum);
2081 
2082     if ((*Ppdbfile)->elementType)
2083         AJFREE((*Ppdbfile)->elementType);
2084 
2085     if ((*Ppdbfile)->helixClass)
2086         AJFREE((*Ppdbfile)->helixClass);
2087 
2088     if ((*Ppdbfile)->resni)
2089         AJFREE((*Ppdbfile)->resni);
2090 
2091     if ((*Ppdbfile)->resn1)
2092         AJFREE((*Ppdbfile)->resn1);
2093 
2094     if ((*Ppdbfile)->resn2)
2095         AJFREE((*Ppdbfile)->resn2);
2096 
2097     if ((*Ppdbfile)->chnn)
2098         AJFREE((*Ppdbfile)->chnn);
2099 
2100     if ((*Ppdbfile)->gpn)
2101         AJFREE((*Ppdbfile)->gpn);
2102 
2103     if ((*Ppdbfile)->linetype)
2104         AJFREE((*Ppdbfile)->linetype);
2105 
2106     if ((*Ppdbfile)->oddnum)
2107         AJFREE((*Ppdbfile)->oddnum);
2108 
2109     if ((*Ppdbfile)->lines)
2110     {
2111         for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2112             ajStrDel(&(*Ppdbfile)->lines[i]);
2113 
2114         AJFREE((*Ppdbfile)->lines);
2115     }
2116 
2117     if ((*Ppdbfile)->pdbn)
2118     {
2119         for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2120             ajStrDel(&(*Ppdbfile)->pdbn[i]);
2121 
2122         AJFREE((*Ppdbfile)->pdbn);
2123     }
2124 
2125     if ((*Ppdbfile)->elementId)
2126     {
2127         for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2128             ajStrDel(&(*Ppdbfile)->elementId[i]);
2129 
2130         AJFREE((*Ppdbfile)->elementId);
2131     }
2132 
2133     if ((*Ppdbfile)->atype)
2134     {
2135         for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2136             ajStrDel(&(*Ppdbfile)->atype[i]);
2137 
2138         AJFREE((*Ppdbfile)->atype);
2139     }
2140 
2141     if ((*Ppdbfile)->rtype)
2142     {
2143         for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2144             ajStrDel(&(*Ppdbfile)->rtype[i]);
2145 
2146         AJFREE((*Ppdbfile)->rtype);
2147     }
2148 
2149     if ((*Ppdbfile)->seqres)
2150     {
2151         for (i = 0U; i < (*Ppdbfile)->nchains; i++)
2152             ajStrDel(&(*Ppdbfile)->seqres[i]);
2153 
2154         AJFREE((*Ppdbfile)->seqres);
2155     }
2156 
2157     if ((*Ppdbfile)->seqresful)
2158     {
2159         for (i = 0U; i < (*Ppdbfile)->nchains; i++)
2160             ajStrDel(&(*Ppdbfile)->seqresful[i]);
2161 
2162         AJFREE((*Ppdbfile)->seqresful);
2163     }
2164 
2165     if ((*Ppdbfile)->chid)
2166         ajChararrDel(&(*Ppdbfile)->chid);
2167 
2168     if ((*Ppdbfile)->gpid)
2169         ajChararrDel(&(*Ppdbfile)->gpid);
2170 
2171     AJFREE(*Ppdbfile);
2172     *Ppdbfile = NULL;
2173 
2174     return;
2175 }
2176 
2177 
2178 
2179 
2180 /* @funcstatic pdbioElementsNew ***********************************************
2181 **
2182 ** Constructor for Elements object.
2183 **
2184 ** @param [r] nelms [ajuint] Number of elements
2185 **
2186 ** @return [AjPElements] Pointer to Elements object, or NULL on failure.
2187 **
2188 ** @release 2.9.0
2189 ** @@
2190 ******************************************************************************/
2191 
pdbioElementsNew(ajuint nelms)2192 static AjPElements pdbioElementsNew(ajuint nelms)
2193 {
2194     ajuint i = 0U;
2195 
2196     AjPElements elements = NULL;
2197 
2198     AJNEW0(elements);
2199 
2200     elements->n = nelms;
2201 
2202     if (nelms)
2203     {
2204         AJCNEW0(elements->elms, nelms);
2205 
2206         for (i = 0U; i < nelms; i++)
2207             elements->elms[i] = pdbioElementNew();
2208     }
2209 #if AJFALSE
2210     else
2211         ajWarn("Value of zero passed to pdbioElementsNew");
2212 #endif /* AJFALSE */
2213 
2214     return elements;
2215 }
2216 
2217 
2218 
2219 
2220 /* @funcstatic  pdbioElementsDel **********************************************
2221 **
2222 ** Destructor for Elements object.
2223 **
2224 ** @param [d] Pelements [AjPElements*] Elements address
2225 **
2226 ** @return [void]
2227 **
2228 ** @release 2.9.0
2229 ** @@
2230 ******************************************************************************/
2231 
pdbioElementsDel(AjPElements * Pelements)2232 static void pdbioElementsDel(AjPElements *Pelements)
2233 {
2234     ajuint i = 0U;
2235 
2236     if (!*Pelements)
2237     {
2238         ajWarn("NULL arg passed to pdbioElementsDel");
2239 
2240         return;
2241     }
2242 
2243     if ((*Pelements)->elms)
2244     {
2245         for (i = 0U; i < (*Pelements)->n; i++)
2246             pdbioElementDel(&(*Pelements)->elms[i]);
2247 
2248         AJFREE((*Pelements)->elms);
2249     }
2250 
2251     AJFREE(*Pelements);
2252     *Pelements = NULL;
2253 
2254     return;
2255 }
2256 
2257 
2258 
2259 
2260 /* @funcstatic pdbioElementNew ************************************************
2261 **
2262 ** Constructor for Element object.
2263 **
2264 ** @return [AjPElement] Element object or NULL
2265 **
2266 ** @release 2.9.0
2267 ** @@
2268 ******************************************************************************/
2269 
pdbioElementNew(void)2270 static AjPElement pdbioElementNew(void)
2271 {
2272     AjPElement element = NULL;
2273 
2274     AJNEW0(element);
2275 
2276     element->elementId = ajStrNew();
2277     element->initResName = ajStrNew();
2278     element->initSeqNum = ajStrNew();
2279     element->endResName = ajStrNew();
2280     element->endSeqNum = ajStrNew();
2281 
2282     element->elementNum = 0;
2283     element->elementType = ' ';
2284     element->helixClass = 0;
2285     element->chainId = ' ';
2286 
2287     return element;
2288 }
2289 
2290 
2291 
2292 
2293 /* @funcstatic pdbioElementDel ************************************************
2294 **
2295 ** Destructor for Element object.
2296 **
2297 ** @param [d] Pelement [AjPElement*] Element object pointer
2298 **
2299 ** @return [void]
2300 **
2301 ** @release 2.9.0
2302 ** @@
2303 ******************************************************************************/
2304 
pdbioElementDel(AjPElement * Pelement)2305 static void pdbioElementDel(AjPElement *Pelement)
2306 {
2307     ajStrDel(&((*Pelement)->elementId));
2308     ajStrDel(&((*Pelement)->initResName));
2309     ajStrDel(&((*Pelement)->initSeqNum));
2310     ajStrDel(&((*Pelement)->endResName));
2311     ajStrDel(&((*Pelement)->endSeqNum));
2312 
2313     AJFREE(*Pelement);
2314     *Pelement = NULL;
2315 
2316     return;
2317 }
2318 
2319 
2320 
2321 
2322 /* @funcstatic pdbioPdbfileNew ************************************************
2323 **
2324 ** Constructor for Pdbfile object.
2325 **
2326 ** @param [r] nlines  [ajuint] No. of lines in pdb file
2327 ** @param [r] nchains [ajuint] No. of chains in pdb file
2328 **
2329 ** @return [AjPPdbfile] Pointer to pdbfile object, or NULL on failure.
2330 **
2331 ** @release 2.9.0
2332 ** @@
2333 ******************************************************************************/
2334 
pdbioPdbfileNew(ajuint nlines,ajuint nchains)2335 static AjPPdbfile pdbioPdbfileNew(ajuint nlines, ajuint nchains)
2336 {
2337     ajuint i = 0U;
2338 
2339     AjPPdbfile pdbfile = NULL;
2340 
2341     AJNEW0(pdbfile);
2342 
2343     pdbfile->pdbid = ajStrNew();
2344     pdbfile->compnd = ajStrNew();
2345     pdbfile->source = ajStrNew();
2346 
2347     pdbfile->nomod = ajFalse;
2348     pdbfile->toofewter = ajFalse;
2349 
2350     pdbfile->nchains = nchains;
2351 
2352     if (nchains)
2353     {
2354         AJCNEW0(pdbfile->resn1ok, nchains);
2355 
2356         for (i = 0U; i < nchains; i++)
2357             pdbfile->resn1ok[i] = ajTrue;
2358 
2359         AJCNEW0(pdbfile->numHelices, nchains);
2360 
2361         for (i = 0U; i < nchains; i++)
2362             pdbfile->numHelices[i] = ajTrue;
2363 
2364         AJCNEW0(pdbfile->numStrands, nchains);
2365 
2366         for (i = 0U; i < nchains; i++)
2367             pdbfile->numStrands[i] = ajTrue;
2368 
2369         AJCNEW0(pdbfile->numSheets, nchains);
2370 
2371         for (i = 0U; i < nchains; i++)
2372             pdbfile->numSheets[i] = ajTrue;
2373 
2374         AJCNEW0(pdbfile->numTurns, nchains);
2375 
2376         for (i = 0U; i < nchains; i++)
2377             pdbfile->numTurns[i] = ajTrue;
2378 
2379         AJCNEW0(pdbfile->chainok, nchains);
2380 
2381         for (i = 0U; i < nchains; i++)
2382             pdbfile->chainok[i] = ajTrue;
2383 
2384         AJCNEW0(pdbfile->nres, nchains);
2385 
2386         AJCNEW0(pdbfile->nligands, nchains);
2387 
2388         AJCNEW0(pdbfile->seqres, nchains);
2389 
2390         for (i = 0U; i < nchains; i++)
2391             pdbfile->seqres[i] = ajStrNew();
2392 
2393         AJCNEW0(pdbfile->seqresful, nchains);
2394 
2395         for (i = 0U; i < nchains; i++)
2396             pdbfile->seqresful[i] = ajStrNew();
2397 
2398         pdbfile->chid = ajChararrNewRes(nchains);
2399     }
2400     else
2401         pdbfile->chid = ajChararrNew();
2402 
2403     pdbfile->gpid = ajChararrNew();
2404 
2405     pdbfile->nlines = nlines;
2406 
2407     if (nlines)
2408     {
2409         AJCNEW0(pdbfile->x, nlines);
2410         AJCNEW0(pdbfile->y, nlines);
2411         AJCNEW0(pdbfile->z, nlines);
2412         AJCNEW0(pdbfile->o, nlines);
2413         AJCNEW0(pdbfile->b, nlines);
2414 
2415         AJCNEW0(pdbfile->elementNum, nlines);
2416         AJCNEW0(pdbfile->elementType, nlines);
2417         AJCNEW0(pdbfile->helixClass, nlines);
2418 
2419         AJCNEW0(pdbfile->resni, nlines);
2420         AJCNEW0(pdbfile->resn1, nlines);
2421         AJCNEW0(pdbfile->resn2, nlines);
2422         AJCNEW0(pdbfile->modn, nlines);
2423         AJCNEW0(pdbfile->chnn, nlines);
2424         AJCNEW0(pdbfile->gpn, nlines);
2425         AJCNEW0(pdbfile->linetype, nlines);
2426         AJCNEW0(pdbfile->oddnum, nlines);
2427         AJCNEW0(pdbfile->lines, nlines);
2428         AJCNEW0(pdbfile->pdbn, nlines);
2429         AJCNEW0(pdbfile->elementId, nlines);
2430         AJCNEW0(pdbfile->atype, nlines);
2431         AJCNEW0(pdbfile->rtype, nlines);
2432 
2433         for (i = 0U; i < nlines; i++)
2434         {
2435             pdbfile->linetype[i] = pdbfileELinetypeIgnore;
2436             pdbfile->oddnum[i] = ajFalse;
2437             pdbfile->lines[i] = ajStrNew();
2438             pdbfile->pdbn[i] = ajStrNew();
2439             pdbfile->elementId[i] = ajStrNew();
2440             pdbfile->atype[i] = ajStrNewRes(4);
2441             pdbfile->rtype[i] = ajStrNewRes(4);
2442         }
2443     }
2444 #if AJFALSE
2445     else
2446         ajWarn("Zero sized arg passed to pdbioPdbfileNew.\n");
2447 #endif /* AJFALSE */
2448 
2449     return pdbfile;
2450 }
2451 
2452 
2453 
2454 
2455 /* @funcstatic pdbioFirstPass *************************************************
2456 **
2457 ** The initial read of the pdb file as held in the <lines> array of a Pdbfile
2458 ** object. Bibliographic information is parsed, the number of chains
2459 ** determined and the sequences and chain ids from the SEQRES records are
2460 ** parsed. The line type (see below) of each line is determined, and for
2461 ** lines with coordinates, the residue type is parsed. A count of the number
2462 ** of TER records and the location of the first coordinate line is also
2463 ** determined.
2464 ** Secondary structure information is also parsed and an AjPElements
2465 ** object is written.
2466 **
2467 ** Memory for any arrays whose size is equal to the number of chains is
2468 ** allocated:
2469 ** seqres, seqresful, nres, chainok, resn1ok, nligands, numHelices,
2470 ** numStrands, numSheets, numTurns
2471 **
2472 ** The following arrays are written:
2473 ** seqres, seqresful, chid, pdbn, resn1 and resn2, rtype, linetype
2474 **
2475 ** The following elements are written:
2476 ** nchains, tercnt, modcnt, nomod, source, compnd, method, reso, idxfirst
2477 **
2478 ** The chainok & resn1ok arrays are set to default values of ajTrue
2479 **
2480 **
2481 ** Writing resn1/resn2 & pdbn arrays
2482 ** The pdbn array is the raw residue number (as a string) and is filled for
2483 ** lines for which <linetype> == pdbfileELinetypeCoordinate. The resn1/resn2 arrays are
2484 ** given initial values which at this stage are simply the integer component
2485 ** of pdbn. The values for resn1/resn2 are changed later in the program.
2486 **
2487 ** Writing modcnt and nomod elements
2488 ** modcnt is a count of the number of MODEL records (excluding duplicate
2489 ** records). However, if no MODEL records are found, modcnt is set to the
2490 ** minimum value of 1, and nomod is set to ajTrue.
2491 **
2492 ** Writing linetype array
2493 ** The linetype array is set as follows:
2494 **
2495 ** pdbfileELinetypeCoordinate for ATOM or HETATM records which contain both atom and
2496 ** residue  identifier codes and which are not duplicate positions.
2497 ** Duplicate positions for  (i) whole residues or (ii) individual atoms
2498 ** are presumed where a ATOM or HETATM record uses a value other than '1' or
2499 ** 'A' in the (i) residue alternate location indicator (column 17) or (ii)
2500 ** the first column of the atom name (column 13) respectively.
2501 **
2502 ** pdbfileELinetypeTER for TER records
2503 **
2504 ** pdbfileELinetypeMODEL for MODEL records
2505 **
2506 ** pdbfileELinetypeWater for HOH (should be HETATM records)
2507 **
2508 ** For all other lines, it is left as the default of pdbfileELinetypeIgnore (the value
2509 ** might change later in the program).
2510 **
2511 **
2512 ** @param [w] pdbfile  [AjPPdbfile]  Pdbfile object
2513 ** @param [u] flog     [AjPFile]       Log file (build diagnostics)
2514 ** @param [w] elms     [AjPElements*] Elements object pointer
2515 ** @param [r] camask   [AjBool]        Whether to mask non-amino acid residues
2516 **                                    within protein chains which do not
2517 **                                    have a C-alpha atom.
2518 **
2519 ** @return [AjBool]  True if file was parsed, False otherwise
2520 **
2521 ** @release 2.9.0
2522 ** @@
2523 ******************************************************************************/
2524 
pdbioFirstPass(AjPPdbfile pdbfile,AjPFile flog,AjPElements * elms,AjBool camask)2525 static AjBool pdbioFirstPass(AjPPdbfile pdbfile, AjPFile flog, AjPElements *elms,
2526                         AjBool camask)
2527 {
2528     ajuint i = 0U;              /* Loop counter */
2529     ajuint j = 0U;              /* Loop counter */
2530     ajuint k = 0U;              /* Loop counter */
2531     AjBool donefirstatom = ajFalse;     /* Flag for finding first ATOM or
2532                                          * HETATM line */
2533     char pdbn[6];               /* Residue number */
2534     AjBool resolfound = ajFalse;/* Flag for finding RESOLUTION record */
2535     AjBool seqresfound = ajFalse;       /* Flag for finding SEQRES record */
2536     AjPStr tmpstr = NULL;       /* A temp. string */
2537     AjPStr seqres = NULL;       /* Sequence from SEQRES records */
2538     ajuint seqreslen = 0U;      /* Indicated length of sequence from SEQRES
2539                                  * records */
2540     ajuint lenful = 0U;         /* Length of SEQRES sequence including ACE,
2541                                  * FOR & NH2 groups that might be discarded
2542                                  * by the call to pdbioSeqresToSequence */
2543     char last_id = ' ';         /* CHain id of last SEQRES line read */
2544     AjPStr tmpseq = NULL;       /* A temp. string for a sequence */
2545     AjPStr tmpseqful = NULL;    /* A temp. string for a sequence */
2546     AjPList listseqs = NULL;    /* For list of sequences from SEQRES records */
2547     AjPList listseqsful = NULL; /* For list of sequences (using 3-letter
2548                                  * codes) from SEQRES records */
2549     AjBool done_msg = ajFalse;  /* Flag for error messaging */
2550     AjPList listelms = NULL;    /* Temp. list of secondary structure elements
2551                                  * (from HELIX, SHEET and TURN records) */
2552     AjPElement elm = NULL;      /* Temp. Element object pointer */
2553     AjPElement FirstStrand = NULL;      /* Temp. pointer to first strand of
2554                                          * each sheet */
2555     AjBool doneFirstStrand = ajFalse;   /* Flag for parsing first strand of
2556                                          * each sheet */
2557     AjPStr LastSheetId = NULL;  /* Sheet identifier of the last sheet read in */
2558 
2559     /* Check args */
2560     if (!pdbfile || !flog || !(*elms))
2561     {
2562         ajWarn("Bad args passed to pdbioFirstPass\n");
2563 
2564         return ajFalse;
2565     }
2566 
2567     /* Allocate memory etc */
2568     tmpstr = ajStrNew();
2569     seqres = ajStrNew();
2570     LastSheetId = ajStrNew();
2571 
2572     listseqs = ajListstrNew();
2573     listseqsful = ajListstrNew();
2574     listelms = ajListNew();
2575 
2576     /* Start of main loop */
2577     for (i = 0U; i < pdbfile->nlines; i++)
2578     {
2579         if ((ajStrPrefixC(pdbfile->lines[i], "ATOM")) ||
2580             (ajStrPrefixC(pdbfile->lines[i], "HETATM")))
2581         {
2582             /* In instances where >1 residue positions are given, ignore all
2583              * but position 'A' & '1' In instances where >1 atom positions
2584              * are given, ignore all but position '1' */
2585             if (((ajStrGetCharPos(pdbfile->lines[i], 16) != ' ') &&
2586                  ((ajStrGetCharPos(pdbfile->lines[i], 16) != 'A') &&
2587                   (ajStrGetCharPos(pdbfile->lines[i], 16) != '1'))) ||
2588                 ((ajStrGetCharPos(pdbfile->lines[i], 12) != ' ') &&
2589                  ((ajStrGetCharPos(pdbfile->lines[i], 12) != '1') &&
2590                   (isdigit((int) ajStrGetCharPos(pdbfile->lines[i], 12))))))
2591             {
2592                 if (!done_msg)
2593                 {
2594                     ajFmtPrintF(flog, "%-15s%d\n", "DUPATOMRES", i + 1);
2595                     done_msg = ajTrue;
2596                 }
2597             }
2598             /* In instances where no atom or residue identity is given,
2599              * ignore line */
2600             else if ((ajStrGetCharPos(pdbfile->lines[i], 12) == ' ' &&
2601                       ajStrGetCharPos(pdbfile->lines[i], 13) == ' ' &&
2602                       ajStrGetCharPos(pdbfile->lines[i], 14) == ' ') ||
2603                      (ajStrGetCharPos(pdbfile->lines[i], 17) == ' ' &&
2604                       ajStrGetCharPos(pdbfile->lines[i], 19) == ' '))
2605             {
2606                 ajFmtPrintF(flog, "%-15s%d\n", "NOATOMRESID", i + 1);
2607             }
2608             else
2609             {
2610                 pdbfile->linetype[i] = pdbfileELinetypeCoordinate;
2611 
2612                 if (!donefirstatom)
2613                 {
2614                     donefirstatom = ajTrue;
2615                     pdbfile->idxfirst = i;
2616                 }
2617 
2618                 /* Write residue number for the line */
2619                 for (k = 22, j = 0; k <= 26; k++)
2620                     if ((isalnum((int) ajStrGetCharPos(pdbfile->lines[i], k)))
2621                         ||
2622                         ajStrGetCharPos(pdbfile->lines[i], k) == '-')
2623                         pdbn[j++] = ajStrGetCharPos(pdbfile->lines[i], k);
2624 
2625                 pdbn[j] = '\0';
2626 
2627                 ajStrAssignC(&(pdbfile->pdbn[i]), pdbn);
2628 
2629                 if (!ajFmtScanS(pdbfile->pdbn[i], "%d",
2630                                 &(pdbfile->resn1[i])))
2631                 {
2632                     ajFmtPrintF(flog, "%-15s%d\n", "ATOMNONUM", i + 1);
2633                     pdbfile->linetype[i] = pdbfileELinetypeIgnore;
2634                 }
2635                 else
2636                 {
2637                     pdbfile->resn2[i] = pdbfile->resn1[i];
2638 
2639                     /* Assign residue type */
2640                     ajStrAssignSubS(&pdbfile->rtype[i],
2641                                     pdbfile->lines[i], 17, 19);
2642                     ajStrRemoveWhite(&pdbfile->rtype[i]);
2643 
2644                     /* JONNEW */
2645                     if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
2646                         pdbfile->linetype[i] = pdbfileELinetypeWater;
2647                 }
2648             }
2649         }
2650         else if (ajStrPrefixC(pdbfile->lines[i], "SEQRES"))
2651         {
2652             seqresfound = ajTrue;
2653 
2654             /* Read first SEQRES line */
2655             ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18);
2656 
2657             if (!ajFmtScanS(seqres, "%u", &seqreslen))
2658                 ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESLEN",
2659                             pdbfile->nchains,
2660                             ajStrGetCharPos(pdbfile->lines[i], 11));
2661 
2662 
2663             ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70);
2664             /* Append a ' ' in case this is missing from the PDB file, e.g.
2665              * pdb1iie.ent */
2666             ajStrAppendK(&seqres, ' ');
2667 
2668             ajChararrPut(&(pdbfile->chid), 0,
2669                          (last_id = ajStrGetCharPos(pdbfile->lines[i], 11)));
2670 
2671             pdbfile->nchains++;
2672 
2673             /* Read subsequent SEQRES lines */
2674             for (i++; i < pdbfile->nlines; i++)
2675                 if (ajStrPrefixC(pdbfile->lines[i], "SEQRES"))
2676                 {
2677                     /* Still on same chain */
2678                     if (ajStrGetCharPos(pdbfile->lines[i], 11) == last_id)
2679                     {
2680                         ajStrAppendSubS(&seqres, pdbfile->lines[i], 19, 70);
2681                         /* Append a ' ' in case this is missing from the PDB
2682                          * file, e.g. pdb1iie.ent */
2683                         ajStrAppendK(&seqres, ' ');
2684                     }
2685                     /* On new chain */
2686                     else
2687                     {
2688                         tmpseq = ajStrNew();
2689 
2690                         /* Process last chain */
2691                         if (!pdbioSeqresToSequence(seqres, &tmpseq, camask,
2692                                                    &lenful))
2693                         {
2694                             ajWarn("Sequence conversion error in"
2695                                    " pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
2696                             ajStrDel(&tmpseq);
2697                             continue;
2698                         }
2699 
2700                         tmpseqful = ajStrNew();
2701                         ajStrAssignS(&tmpseqful, seqres);
2702 
2703                         /* Check length of sequence vs indicated length */
2704                         if (lenful != seqreslen)
2705                             ajFmtPrintF(flog, "%-15s%d (%c)\n",
2706                                         "SEQRESLENDIF",
2707                                         pdbfile->nchains,
2708                                         last_id);
2709 
2710                         /* Push sequences onto lists */
2711                         ajListstrPushAppend(listseqs, tmpseq);
2712                         ajListstrPushAppend(listseqsful, tmpseqful);
2713 
2714 
2715                         /* Read first SEQRES line of new chain */
2716                         ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18);
2717 
2718                         if (!ajFmtScanS(seqres, "%u", &seqreslen))
2719                             ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESLEN",
2720                                         pdbfile->nchains,
2721                                         ajStrGetCharPos(pdbfile->lines[i],
2722                                                         11));
2723 
2724                         ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70);
2725                         /* Append a ' ' in case this is missing from the PDB
2726                          * file, e.g. pdb1iie.ent */
2727                         ajStrAppendK(&seqres, ' ');
2728 
2729                         ajChararrPut(&(pdbfile->chid), pdbfile->nchains,
2730                                      (last_id
2731                                       = ajStrGetCharPos(pdbfile->lines[i],
2732                                                         11)));
2733 
2734 #if AJFALSE
2735                         if ((last_id = ajStrGetCharPos(pdbfile->lines[i],
2736                                                        11)) == ' ')
2737                             ajChararrPut(&(pdbfile->chid),
2738                                          pdbfile->nchains, '.');
2739                         else
2740                             ajChararrPut(&(pdbfile->chid),
2741                                          pdbfile->nchains, last_id);
2742 #endif /* AJFALSE */
2743 
2744                         pdbfile->nchains++;
2745                     }
2746                 }
2747                 else
2748                 {
2749                     tmpseq = ajStrNew();
2750 
2751                     /* Process last chain */
2752                     if (!pdbioSeqresToSequence(seqres, &tmpseq, camask,
2753                                                &lenful))
2754                     {
2755                         ajWarn("Sequence conversion error in "
2756                                "pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
2757                         ajStrDel(&tmpseq);
2758                         continue;
2759                     }
2760 
2761                     tmpseqful = ajStrNew();
2762                     ajStrAssignS(&tmpseqful, seqres);
2763 
2764 
2765                     /* Check length of sequence vs indicated length */
2766                     if (lenful != seqreslen)
2767                     {
2768                         ajFmtPrintF(flog, "%-15s%d (%c)\n",
2769                                     "SEQRESLENDIF",
2770                                     pdbfile->nchains,
2771                                     last_id);
2772                     }
2773 
2774                     /* Push sequences onto lists */
2775                     ajListstrPushAppend(listseqs, tmpseq);
2776                     ajListstrPushAppend(listseqsful, tmpseqful);
2777 
2778                     /* Convert lists to arrays in pdbfile object and delete
2779                      * list */
2780                     ajListstrToarray(listseqs, &(pdbfile->seqres));
2781                     ajListFree(&listseqs);
2782 
2783                     ajListstrToarray(listseqsful, &(pdbfile->seqresful));
2784                     ajListFree(&listseqsful);
2785 
2786                     /* i will get incremented in main loop */
2787                     i--;
2788                     break;
2789                 }
2790         }
2791         else if (ajStrPrefixC(pdbfile->lines[i], "COMPND"))
2792         {
2793             /* Read first COMPND line */
2794             ajStrAssignSubS(&(pdbfile->compnd),
2795                             pdbfile->lines[i], 10, 71);
2796 
2797             /* Read subsequent COMPND lines */
2798             for (i++; i < pdbfile->nlines; i++)
2799                 if (ajStrPrefixC(pdbfile->lines[i], "COMPND"))
2800                 {
2801                     ajStrAppendSubS(&(pdbfile->compnd),
2802                                     pdbfile->lines[i], 10, 71);
2803                 }
2804                 else
2805                 {
2806                     ajStrRemoveWhiteExcess(&(pdbfile->compnd));
2807                     /* i will get incremented in main loop */
2808                     i--;
2809                     break;
2810                 }
2811         }
2812         else if (ajStrPrefixC(pdbfile->lines[i], "SOURCE"))
2813         {
2814             /* Read first SOURCE line */
2815             ajStrAssignSubS(&(pdbfile->source),
2816                             pdbfile->lines[i], 10, 71);
2817 
2818             /* Read subsequent SOURCE lines */
2819             for (i++; i < pdbfile->nlines; i++)
2820                 if (ajStrPrefixC(pdbfile->lines[i], "SOURCE"))
2821                 {
2822                     ajStrAppendSubS(&(pdbfile->source),
2823                                     pdbfile->lines[i], 10, 71);
2824                 }
2825                 else
2826                 {
2827                     ajStrRemoveWhiteExcess(&(pdbfile->source));
2828                     /* i will get incremented in main loop */
2829                     i--;
2830                     break;
2831                 }
2832         }
2833         else if (ajStrPrefixC(pdbfile->lines[i], "TER"))
2834         {
2835             /* By default ok == ajTrue */
2836             /* pdbfile->ok[i] = ajTrue; */
2837             pdbfile->tercnt++;
2838             pdbfile->linetype[i] = pdbfileELinetypeTER;
2839         }
2840         else if (ajStrPrefixC(pdbfile->lines[i], "MODEL"))
2841         {
2842             pdbfile->modcnt++;
2843             pdbfile->linetype[i] = pdbfileELinetypeMODEL;
2844 
2845             if (!donefirstatom)
2846             {
2847                 donefirstatom = ajTrue;
2848                 pdbfile->idxfirst = i;
2849             }
2850         }
2851         else if (ajStrPrefixC(pdbfile->lines[i], "ENDMDL"))
2852         {
2853             pdbfile->linetype[i] = pdbfileELinetypeENDMDL;
2854         }
2855 
2856         else if ((!resolfound) && (ajStrPrefixC(pdbfile->lines[i],
2857                                                 "REMARK")))
2858         {
2859             /* Assign method and resolution */
2860             ajFmtScanS(pdbfile->lines[i], "%*s %*d %S", &tmpstr);
2861 
2862             if (!ajStrCmpLenC(tmpstr, "RESOLUTION", 10))
2863             {
2864                 resolfound = ajTrue;
2865 
2866                 if (isdigit((int) ajStrGetCharPos(pdbfile->lines[i], 23)))
2867                 {
2868                     if ((ajFmtScanS(pdbfile->lines[i],
2869                                     "%*s %*d %*s %f",
2870                                     &(pdbfile->reso))) != 1)
2871                         ajFmtPrintF(flog, "%-15s\n", "RESOLUNK");
2872 
2873                     pdbfile->method = ajEPdbMethodXray;
2874                 }
2875                 else
2876                 {
2877                     pdbfile->reso = 0;
2878                     pdbfile->method = ajEPdbMethodNmr;
2879                 }
2880             }
2881 
2882         }
2883         else if (ajStrPrefixC(pdbfile->lines[i], "HELIX"))
2884         {
2885             doneFirstStrand = ajFalse;
2886 
2887             elm = pdbioElementNew();
2888 
2889             ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
2890             ajStrRemoveWhite(&tmpstr);
2891             ajFmtScanS(tmpstr, "%d", &elm->elementNum);
2892 
2893             ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
2894             ajStrRemoveWhite(&elm->elementId);
2895 
2896             elm->elementType = 'H';
2897 
2898             ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17);
2899             ajStrRemoveWhite(&elm->initResName);
2900 
2901             ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 21, 25);
2902             ajStrRemoveWhite(&elm->initSeqNum);
2903 
2904             ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 27, 29);
2905             ajStrRemoveWhite(&elm->endResName);
2906 
2907             ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37);
2908             ajStrRemoveWhite(&elm->endSeqNum);
2909 
2910             elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19);
2911 
2912             if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 31))
2913             {
2914                 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
2915                             ajStrGetCharPos(pdbfile->lines[i], 31));
2916                 pdbioElementDel(&elm);
2917                 continue;
2918             }
2919 
2920             ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 38, 39);
2921             ajStrRemoveWhite(&tmpstr);
2922             ajFmtScanS(tmpstr, "%d", &elm->helixClass);
2923 
2924 
2925             /* Check that all records are present and flag an error if
2926              * they're not */
2927             if (MAJSTRGETLEN(elm->initResName)
2928                 && MAJSTRGETLEN(elm->initSeqNum)
2929                 && MAJSTRGETLEN(elm->endResName)
2930                 && MAJSTRGETLEN(elm->endSeqNum) &&
2931                 MAJSTRGETLEN(elm->elementId))
2932             {
2933                 ajListPushAppend(listelms, elm);
2934             }
2935             else
2936             {
2937                 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
2938                 pdbioElementDel(&elm);
2939                 continue;
2940             }
2941 
2942         }
2943         else if (ajStrPrefixC(pdbfile->lines[i], "SHEET"))
2944         {
2945             elm = pdbioElementNew();
2946 
2947             ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
2948             ajStrRemoveWhite(&tmpstr);
2949             ajFmtScanS(tmpstr, "%d", &elm->elementNum);
2950 
2951             ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
2952             ajStrRemoveWhite(&elm->elementId);
2953 
2954             if (!ajStrMatchS(elm->elementId, LastSheetId))
2955                 doneFirstStrand = ajFalse;
2956 
2957             elm->elementType = 'E';
2958 
2959             ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 17, 19);
2960             ajStrRemoveWhite(&elm->initResName);
2961 
2962             ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 22, 26);
2963             ajStrRemoveWhite(&elm->initSeqNum);
2964 
2965             ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 28, 30);
2966             ajStrRemoveWhite(&elm->endResName);
2967 
2968             ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37);
2969             ajStrRemoveWhite(&elm->endSeqNum);
2970 
2971             elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 21);
2972 
2973             if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 32))
2974             {
2975                 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
2976                             ajStrGetCharPos(pdbfile->lines[i], 32));
2977                 pdbioElementDel(&elm);
2978                 continue;
2979             }
2980 
2981 
2982             /* Check for beta-barrels - where the first and last strands are
2983              * identical requiring us to ignore the last strand */
2984 
2985             if (doneFirstStrand)
2986             {
2987                 if (ajStrMatchS(elm->initResName, FirstStrand->initResName) &&
2988                     ajStrMatchS(elm->endResName, FirstStrand->endResName) &&
2989                     ajStrMatchS(elm->initSeqNum, FirstStrand->initSeqNum) &&
2990                     ajStrMatchS(elm->endSeqNum, FirstStrand->endSeqNum))
2991                 {
2992                     pdbioElementDel(&elm);
2993                     continue;
2994                 }
2995             }
2996 
2997             /* Check that all records are present and flag an error if
2998              * they're not */
2999             if (MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum)
3000                 &&
3001                 MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum)
3002                 &&
3003                 MAJSTRGETLEN(elm->elementId))
3004             {
3005                 ajListPushAppend(listelms, elm);
3006             }
3007             else
3008             {
3009                 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
3010                 pdbioElementDel(&elm);
3011                 continue;
3012             }
3013 
3014             ajStrAssignS(&LastSheetId, elm->elementId);
3015             FirstStrand = elm;
3016             doneFirstStrand = ajTrue;
3017         }
3018         else if (ajStrPrefixC(pdbfile->lines[i], "TURN"))
3019         {
3020             doneFirstStrand = ajFalse;
3021 
3022             elm = pdbioElementNew();
3023 
3024             ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
3025             ajStrRemoveWhite(&tmpstr);
3026             ajFmtScanS(tmpstr, "%d", &elm->elementNum);
3027 
3028             ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
3029             ajStrRemoveWhite(&elm->elementId);
3030 
3031             elm->elementType = 'T';
3032 
3033             ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17);
3034             ajStrRemoveWhite(&elm->initResName);
3035 
3036             ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 20, 24);
3037             ajStrRemoveWhite(&elm->initSeqNum);
3038 
3039             ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 26, 28);
3040             ajStrRemoveWhite(&elm->endResName);
3041 
3042             ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 31, 35);
3043             ajStrRemoveWhite(&elm->endSeqNum);
3044 
3045             elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19);
3046 
3047             if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 30))
3048             {
3049                 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
3050                             ajStrGetCharPos(pdbfile->lines[i], 30));
3051                 pdbioElementDel(&elm);
3052                 continue;
3053             }
3054 
3055             /* Check that all records are present and flag an error if
3056              * they're not */
3057             if (MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum)
3058                 && MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum)
3059                 && MAJSTRGETLEN(elm->elementId))
3060             {
3061                 ajListPushAppend(listelms, elm);
3062             }
3063             else
3064             {
3065                 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
3066                 pdbioElementDel(&elm);
3067                 continue;
3068             }
3069         }
3070     }
3071 
3072 
3073     /* Write array in Elements structure */
3074     (*elms)->n = (ajuint) ajListToarray(listelms, (void ***) &(*elms)->elms);
3075 
3076 
3077     /* Generate diagnostics and set defaults */
3078     if ((ajStrGetLen(pdbfile->compnd) == 0))
3079     {
3080         ajStrAssignC(&pdbfile->compnd, ".");
3081         ajFmtPrintF(flog, "%-15s\n", "NOCOMPND");
3082     }
3083 
3084     if ((ajStrGetLen(pdbfile->source) == 0))
3085     {
3086         ajStrAssignC(&pdbfile->source, ".");
3087         ajFmtPrintF(flog, "%-15s\n", "NOSOURCE");
3088     }
3089 
3090     if ((pdbfile->method == ajEPdbMethodNmr) && (pdbfile->modcnt == 0))
3091         ajFmtPrintF(flog, "%-15s\n", "NOMODEL");
3092 
3093     if (!E_FPZERO(pdbfile->reso, U_FEPS) && pdbfile->modcnt)
3094     {
3095         ajFmtPrintF(flog, "%-15s\n", "RESOLMOD");
3096         pdbfile->method = ajEPdbMethodNmr;
3097     }
3098 
3099     /* Every pdb file is considered to have at least one model */
3100     if (pdbfile->modcnt == 0)
3101     {
3102         pdbfile->modcnt = 1;
3103         pdbfile->nomod = ajTrue;
3104     }
3105 
3106     if (!resolfound)
3107     {
3108         pdbfile->reso = 0;
3109         pdbfile->method = ajEPdbMethodNmr;
3110         ajFmtPrintF(flog, "%-15s\n", "NORESOLUTION");
3111     }
3112 
3113     if (!seqresfound)
3114     {
3115         ajWarn("No SEQRES record found in raw pdb file");
3116         ajFmtPrintF(flog, "%-15s\n", "NOSEQRES");
3117 
3118         /* Free memory and return */
3119         ajListFree(&listelms);
3120         ajStrDel(&LastSheetId);
3121         ajStrDel(&tmpstr);
3122         ajStrDel(&seqres);
3123 
3124         return ajFalse;
3125     }
3126 
3127     if (!donefirstatom)
3128     {
3129         ajWarn("No ATOM record found in raw pdb file");
3130         ajFmtPrintF(flog, "%-15s\n", "NOATOM");
3131 
3132         /* Free memory and return */
3133         ajListFree(&listelms);
3134         ajStrDel(&LastSheetId);
3135         ajStrDel(&tmpstr);
3136         ajStrDel(&seqres);
3137 
3138         return ajFalse;
3139     }
3140 
3141     AJCNEW0(pdbfile->nligands, pdbfile->nchains);
3142     AJCNEW0(pdbfile->nres, pdbfile->nchains);
3143     AJCNEW0(pdbfile->numHelices, pdbfile->nchains);
3144     AJCNEW0(pdbfile->numStrands, pdbfile->nchains);
3145     AJCNEW0(pdbfile->numSheets, pdbfile->nchains);
3146     AJCNEW0(pdbfile->numTurns, pdbfile->nchains);
3147     AJCNEW0(pdbfile->chainok, pdbfile->nchains);
3148     AJCNEW0(pdbfile->resn1ok, pdbfile->nchains);
3149 
3150     for (i = 0U; i < pdbfile->nchains; i++)
3151     {
3152         pdbfile->chainok[i] = ajTrue;
3153         pdbfile->resn1ok[i] = ajTrue;
3154     }
3155 
3156     /* Free memory and return */
3157     ajStrDel(&LastSheetId);
3158     ajListFree(&listelms);
3159 
3160     ajStrDel(&tmpstr);
3161     ajStrDel(&seqres);
3162 
3163     return ajTrue;
3164 }
3165 
3166 
3167 
3168 
3169 /* @funcstatic pdbioSeqresToSequence ******************************************
3170 **
3171 ** Reads a string containing a SEQRES sequence  (e.g. "ALA ALA LEU" ) and
3172 ** writes a string containing a normal sequence (e.g. "AAL").
3173 **
3174 ** @param [r] seqres   [const AjPStr]   SEQRES sequence
3175 ** @param [w] seq      [AjPStr *] Output sequence
3176 ** @param [r] camask   [AjBool]   Whether to ignore residues which do not
3177 ** have a C-alpha atom, these are defined as ACE, FOR and NH2 groups.
3178 ** @param [w] len      [ajuint *] Length of sequence INCLUDING ACE, FOR and
3179 **                                NH2 groups.
3180 **
3181 ** @return [AjBool] ajTrue on success, ajFalse otherwise.
3182 **
3183 ** @release 2.9.0
3184 ** @@
3185 ******************************************************************************/
3186 
pdbioSeqresToSequence(const AjPStr seqres,AjPStr * seq,AjBool camask,ajuint * len)3187 static AjBool pdbioSeqresToSequence(const AjPStr seqres,
3188                                     AjPStr *seq, AjBool camask,
3189                                     ajuint *len)
3190 {
3191     const AjPStr aa3 = NULL;
3192     char aa1 = '\0';
3193     ajuint nrem = 0;             /* No. 'residues' that were removed */
3194 
3195     /* Check args */
3196     if (!seqres || !seq)
3197     {
3198         ajWarn("Bad args passed to pdbioSeqresToSequence\n");
3199 
3200         return ajFalse;
3201     }
3202 
3203     /* Allocate memory */
3204 
3205     if ((aa3 = ajStrParseC(seqres, " \n")))
3206     {
3207         /* Parse seqres string */
3208         do
3209         {
3210             if (ajStrMatchC(aa3, "FOR") ||
3211                 ajStrMatchC(aa3, "ACE") ||
3212                 ajStrMatchC(aa3, "NH2"))
3213             {
3214                 if (camask)
3215                 {
3216                     nrem++;
3217                     continue;
3218                 }
3219             }
3220 
3221             ajResidueFromTriplet(aa3, &aa1);
3222             ajStrAppendK(seq, aa1);
3223         }
3224         while ((aa3 = ajStrParseC(NULL, " \n")));
3225     }
3226     else
3227         return ajFalse;
3228 
3229     *len = ajStrGetLen(*seq) + nrem;
3230 
3231     return ajTrue;
3232 }
3233 
3234 
3235 
3236 
3237 /* @funcstatic pdbioCheckChains ***********************************************
3238 **
3239 ** Reads a Pdbfile object and checks whether chains from the SEQRES records
3240 ** (i) use unique chain ids, (ii) do not use an id of a space (' ') alongside
3241 ** non-space chain ids and (iii) contain at least the user-defined threshold
3242 ** number of amino acid residues. If any of these conditions are not met then
3243 ** the chain is discarded (chainok array is set to ajFalse).
3244 **
3245 ** The chainok array is written.
3246 **
3247 **
3248 ** @param [w] pdbfile         [AjPPdbfile]   Pdbfile object
3249 ** @param [u] flog            [AjPFile]      Pointer to log file (build
3250 **                                           diagnostics).
3251 ** @param [r] min_chain_size  [ajint]        Minimum number of amino acids in
3252 **                                           a chain.
3253 **
3254 ** @return [AjBool]  True if SEQRES records contained at least one protein
3255 ** chain, False otherwise.
3256 **
3257 ** @release 2.9.0
3258 ** @@
3259 ******************************************************************************/
3260 
pdbioCheckChains(AjPPdbfile pdbfile,AjPFile flog,ajint min_chain_size)3261 static AjBool pdbioCheckChains(AjPPdbfile pdbfile, AjPFile flog,
3262                                ajint min_chain_size)
3263 {
3264     ajuint i = 0U;              /* Loop counter */
3265     ajuint j = 0U;              /* Loop counter */
3266     AjIStr iter = NULL;         /* Iterator for sequence strings */
3267     ajint aacnt = 0;            /* Counter for number of amino acids in
3268                                  * sequence strings */
3269 
3270     AjBool ok = ajFalse;        /* Flag which is True if amino acid chains
3271                                  * are found  in the SEQRES records */
3272 
3273     char id1 = ' ';             /* Chain id */
3274     char id2 = ' ';             /* Chain id */
3275     AjBool iderr = ajFalse;     /* ajTrue if both a space and a character
3276                                  * are used as chain id's in the same file */
3277 
3278     /* Check args */
3279     if (!pdbfile || !flog)
3280     {
3281         ajWarn("Bad args passed to pdbioCheckChains\n");
3282 
3283         return ajFalse;
3284     }
3285 
3286     /* Report problems with chain id's */
3287     for (i = 0U; i < pdbfile->nchains; i++)
3288     {
3289         id1 = ajChararrGet(pdbfile->chid, i);
3290 
3291         for (j = i + 1U; j < pdbfile->nchains; j++)
3292         {
3293             if (id1 == (id2 = ajChararrGet(pdbfile->chid, j)))
3294             {
3295                 ajFmtPrintF(flog, "%-15s%d (%c) %d (%c)\n", "CHAINIDS", i + 1,
3296                             ajChararrGet(pdbfile->chid, i), j + 1,
3297                             ajChararrGet(pdbfile->chid, j));
3298 
3299                 pdbfile->chainok[i] = ajFalse;
3300                 pdbfile->chainok[j] = ajFalse;
3301             }
3302 
3303             if ((((id1 == ' ') && (id2 != ' ')) || ((id2 == ' ') && (id1 != ' ')))
3304                 && (!(iderr)))
3305             {
3306                 ajFmtPrintF(flog, "%-15s\n", "CHAINIDSPC");
3307                 iderr = ajTrue;
3308             }
3309 
3310         }
3311     }
3312 
3313 
3314     /* Report problems with non-protein chains */
3315     for (i = 0U; i < pdbfile->nchains; i++)
3316     {
3317         if (!pdbfile->chainok[i])
3318             continue;
3319 
3320 
3321         aacnt = 0;
3322         iter = ajStrIterNew(pdbfile->seqres[i]);
3323 
3324         if (toupper((int) ajStrIterGetK(iter)) != 'X')
3325             ++aacnt;
3326 
3327         while (ajStrIterNext(iter))
3328             if (toupper((int) ajStrIterGetK(iter)) != 'X')
3329                 if (++aacnt >= min_chain_size)
3330                     break;
3331 
3332         ajStrIterDel(&iter);
3333 
3334 
3335         if (aacnt == 0)
3336         {
3337             ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESNOAA", i + 1,
3338                         ajChararrGet(pdbfile->chid, i));
3339             pdbfile->chainok[i] = ajFalse;
3340         }
3341         else if (aacnt < min_chain_size)
3342         {
3343             ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESFEWAA", i + 1,
3344                         ajChararrGet(pdbfile->chid, i));
3345 
3346             pdbfile->chainok[i] = ajFalse;
3347         }
3348         else
3349         {
3350             ok = ajTrue;
3351         }
3352     }
3353 
3354     /* Return now if no protein chains are found */
3355     if (!ok)
3356     {
3357         ajWarn("No protein chains found in raw pdb file");
3358         ajFmtPrintF(flog, "%-15s\n", "NOPROTEINS");
3359 
3360         return ajFalse;
3361     }
3362 
3363 
3364     return ajTrue;
3365 }
3366 
3367 
3368 
3369 
3370 /* @funcstatic pdbioCheckTer **************************************************
3371 **
3372 ** Reads a Pdbfile object and checks whether the expected number of TER
3373 ** and MODEL records are present. Any unwanted records (e.g. TER records that
3374 ** delimit fragments of chain digests and duplicate MODEL records) are
3375 ** discarded (the linetype array for the lines are set to pdbfileELinetypeIgnore).
3376 **
3377 ** The linetype array and modcnt variable may be modified. The toofewter
3378 ** element is written.
3379 ** The value of modcnt is reduced by 1 for each MODEL record that was masked
3380 ** but this is not done for tercnt.
3381 **
3382 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3383 ** @param [u] flog    [AjPFile]     Pointer to log file (build diagnostics)
3384 **
3385 ** @return [AjBool]  True on success, False otherwise.
3386 **
3387 ** @release 2.9.0
3388 ** @@
3389 ******************************************************************************/
3390 
pdbioCheckTer(AjPPdbfile pdbfile,AjPFile flog)3391 static AjBool pdbioCheckTer(AjPPdbfile pdbfile, AjPFile flog)
3392 {
3393     char aa = ' ';
3394     ajuint i = 0U;              /* Loop counter */
3395     AjBool toomany = ajFalse;
3396     AjBool toofew = ajFalse;
3397     AjPStr aa1 = NULL;
3398     AjPStr aa2 = NULL;
3399 
3400     /* Check args */
3401     if (!pdbfile || !flog)
3402     {
3403         ajWarn("Bad args passed to pdbioCheckTer\n");
3404 
3405         return ajFalse;
3406     }
3407 
3408     /* Allocate memory */
3409     aa1 = ajStrNew();
3410     aa2 = ajStrNew();
3411 
3412     /* Report problems with TER records */
3413     if (!pdbfile->tercnt)
3414         ajFmtPrintF(flog, "%-15s\n", "TERNONE");
3415     else
3416     {
3417         if (pdbfile->method == ajEPdbMethodNmr)
3418         {
3419             if (pdbfile->tercnt > (pdbfile->nchains *
3420                                    pdbfile->modcnt))
3421                 toomany = ajTrue;
3422             else if (pdbfile->tercnt < (pdbfile->nchains *
3423                                         pdbfile->modcnt))
3424                 toofew = ajTrue;
3425         }
3426         else
3427         {
3428             if (pdbfile->tercnt > pdbfile->nchains)
3429                 toomany = ajTrue;
3430             else if (pdbfile->tercnt < pdbfile->nchains)
3431                 toofew = ajTrue;
3432         }
3433     }
3434 
3435     /* Report diagnostics */
3436     if (toomany)
3437         ajFmtPrintF(flog, "%-15s\n", "TERTOOMANY");
3438     else if (toofew)
3439     {
3440         ajFmtPrintF(flog, "%-15s\n", "TERTOOFEW");
3441         pdbfile->toofewter = ajTrue;
3442     }
3443 
3444     /* Mask out the extra TER records */
3445     if (toomany)
3446     {
3447         for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
3448         {
3449             /* This is a TER record. Mask it out if it is flanked by ATOM or
3450              * HETATM records for AMINO ACIDS and with identical chain ids. */
3451             if (pdbfile->linetype[i] == pdbfileELinetypeTER)
3452             {
3453                 if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
3454                     (pdbfile->linetype[i + 1] == pdbfileELinetypeCoordinate) &&
3455                     (ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)
3456                      == ajStrGetCharPos(pdbfile->lines[i + 1], POS_CHID)))
3457                 {
3458                     ajStrAssignSubS(&aa1, pdbfile->lines[i - 1], 17, 19);
3459                     ajStrAssignSubS(&aa2, pdbfile->lines[i + 1], 17, 19);
3460 
3461                     if ((ajResidueFromTriplet(aa1, &aa)) &&
3462                         (ajResidueFromTriplet(aa2, &aa)))
3463                     {
3464                         pdbfile->linetype[i] = pdbfileELinetypeIgnore;
3465 
3466                     }
3467                 }
3468             }
3469         }
3470     }
3471 
3472     /* Check for duplicate MODEL records */
3473     for (i = pdbfile->idxfirst + 1U;
3474          i < pdbfile->nlines;
3475          i++)
3476     {
3477         if ((pdbfile->linetype[i - 1] == pdbfileELinetypeMODEL) &&
3478             (pdbfile->linetype[i] == pdbfileELinetypeMODEL))
3479         {
3480             pdbfile->linetype[i - 1] = pdbfileELinetypeIgnore;
3481             pdbfile->modcnt--;
3482             ajFmtPrintF(flog, "%-15s%d\n", "MODELDUP", i + 1);
3483         }
3484     }
3485 
3486     /* Tidy up and return */
3487     ajStrDel(&aa1);
3488     ajStrDel(&aa2);
3489 
3490     return ajTrue;
3491 }
3492 
3493 
3494 
3495 
3496 /* @funcstatic pdbioNumberChains **********************************************
3497 **
3498 ** Reads a Pdbfile object and assigns each ATOM or HETATM record to a specific
3499 ** chain and model. Lines containing coordinates for water molecules and
3500 ** other non-protein groups ("heterogens") are identified.  Water molecules are
3501 ** uniquely associated with a whole model whereas other non-protein groups are
3502 ** associated with a unique chain if possible. If this is not possible they
3503 ** are assigned a unique group number.
3504 **
3505 ** The modn, chnn and gpn arrays are written. The linetype array is modified.
3506 ** The nligands array (count of groups associated with a chain), ngroups
3507 ** element (count of groups not associated with a chain) and gpid (identifiers
3508 ** of these later groups) are written.
3509 **
3510 ** linetype array
3511 ** The linetype array is written with a value of pdbfileELinetypeWater for
3512 ** lines containing coordinates for water, and to pdbfileELinetypeHeterogen or
3513 ** pdbfileELinetypeGroups for non-protein groups that, respectively, could or
3514 ** could not be uniquely associated with a chain.
3515 **
3516 ** @param [w] pdbfile  [AjPPdbfile] Pdbfile object
3517 ** @param [u] flog     [AjPFile] Log file (build diagnostics)
3518 **
3519 ** @return [AjBool]  True on success, False otherwise
3520 **
3521 ** @release 2.9.0
3522 ** @@
3523 ******************************************************************************/
3524 
pdbioNumberChains(AjPPdbfile pdbfile,AjPFile flog)3525 static AjBool pdbioNumberChains(AjPPdbfile pdbfile, AjPFile flog)
3526 {
3527     char id = ' ';              /* Chain id */
3528     ajuint i = 0U;              /* Loop counter */
3529     ajuint j = 0U;              /* Loop counter */
3530     ajint mod = 0;              /* Model number */
3531 
3532     AjBool done = ajFalse;      /* True if we have assigned a chain id for
3533                                  * this line */
3534     ajuint this = 0U;           /* Chain number of last line read in */
3535     ajuint chn = 0U;            /* Chain number as index (starting from 0) */
3536     AjPInt gpns = NULL;         /* Gives the correct group number for groups
3537                                  * that could not be identified as belonging
3538                                  * to a chain, in cases where a single chain
3539                                  * only is present in the file */
3540     ajint gpn = 0;              /* Current group number */
3541     ajint offset = 0;           /* Offset for finding correct value for gpns
3542                                  * (for use with files with a single chain
3543                                  * only */
3544     AjBool *chndone = NULL;     /* Array whose elements are TRUE if we have
3545                                  * already read a line in belonging to the
3546                                  * appropriate chain for this model */
3547     AjPStr *htype = NULL;       /* Array holding the residue type of the last
3548                                  * heterogen read in for the appropriate
3549                                  * chain */
3550 
3551     /* Check args */
3552     if (!pdbfile || !flog)
3553     {
3554         ajWarn("Bad args passed to pdbioNumberChains\n");
3555 
3556         return ajFalse;
3557     }
3558 
3559     /* Allocate memory */
3560     gpns = ajIntNew();
3561 
3562     AJCNEW0(htype, pdbfile->nchains);
3563 
3564     for (i = 0U; i < pdbfile->nchains; i++)
3565         htype[i] = ajStrNew();
3566 
3567     AJCNEW0(chndone, pdbfile->nchains);
3568 
3569     for (i = 0U; i < pdbfile->nchains; i++)
3570         chndone[i] = ajFalse;
3571 
3572     if (((pdbfile->method == ajEPdbMethodNmr) && pdbfile->nomod) ||
3573         (pdbfile->method == ajEPdbMethodXray))
3574         mod = 1;
3575 
3576     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
3577     {
3578         /* JONNEW Lines may already have been assigned to pdbfileELinetypeWater
3579          * in pdbioFirstPass function, so we need to check here to ensure model
3580          * number gets assigned */
3581         if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
3582             (pdbfile->linetype[i] == pdbfileELinetypeWater))
3583         {
3584 
3585             pdbfile->modn[i] = mod;
3586 
3587             /* Assign appropriate chain number to any ATOM or HETATM line
3588              * which has an id exhibited in the SEQRES records. */
3589             done = ajFalse;
3590             id = ajStrGetCharPos(pdbfile->lines[i], POS_CHID);
3591 
3592             for (j = 0U; j < pdbfile->nchains; j++)
3593             {
3594                 if (ajChararrGet(pdbfile->chid, j) == id)
3595                 {
3596                     pdbfile->chnn[i] = j + 1;
3597                     chn = pdbfile->chnn[i] - 1;
3598                     this = j + 1;
3599 
3600                     if (chndone[this - 1])
3601                     {
3602                         /* Mark up water coordinates */
3603                         if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3604                             pdbfile->linetype[i] = pdbfileELinetypeWater;
3605                         else
3606                         {
3607                             /* Mark up ligand coordinates */
3608                             pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3609 
3610                             /* New heterogen */
3611                             if (!ajStrMatchS(htype[chn], pdbfile->rtype[i]))
3612                             {
3613                                 offset++;
3614                                 pdbfile->nligands[chn]++;
3615                                 pdbfile->gpn[i] = pdbfile->nligands[chn];
3616                                 ajStrAssignS(&htype[chn],
3617                                              pdbfile->rtype[i]);
3618                             }
3619                             /* More atoms of the same heterogen */
3620                             else
3621                                 pdbfile->gpn[i] = pdbfile->nligands[chn];
3622                         }
3623                     }
3624 #if AJFALSE
3625 /* Unused */
3626                     else
3627                         doneter = ajFalse;
3628 #endif /* AJFALSE */
3629 
3630                     done = ajTrue;
3631                     break;
3632                 }
3633             }
3634 
3635 
3636             if (!done)
3637             {
3638                 /* Any ATOM or HETATM record with a whitespace as chain id
3639                  * and which has not already been assigned belongs to the
3640                  * chain immediately preceding it. Assign these lines as
3641                  * NON_PROTEIN_CHAIN lines.  */
3642 
3643                 if (id == ' ')
3644                 {
3645                     /* This won't be set until we've read in at least one
3646                      * coordinate line for protein chain, so if this==0, set
3647                      * it to 1 (first chain). This prevents problems for
3648                      * 1qjh.pxyz */
3649 
3650                     if (this == 0)
3651                         this = 1;
3652 
3653                     pdbfile->chnn[i] = this;
3654                     chn = pdbfile->chnn[i] - 1;
3655 
3656                     /* Mark up water coordinates */
3657                     if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3658                         pdbfile->linetype[i] = pdbfileELinetypeWater;
3659                     else
3660                     {
3661                         /* Mark up ligand coordinates */
3662 
3663                         pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3664 
3665                         /* New ligand */
3666                         if (!ajStrMatchS(htype[chn], pdbfile->rtype[i]))
3667                         {
3668 
3669                             offset++;
3670                             pdbfile->nligands[chn]++;
3671                             pdbfile->gpn[i] = pdbfile->nligands[chn];
3672                             ajStrAssignS(&htype[chn], pdbfile->rtype[i]);
3673                         }
3674                         /* More atoms of the same heterogen */
3675                         else
3676                         {
3677                             pdbfile->gpn[i] = pdbfile->nligands[chn];
3678                         }
3679                     }
3680                 }
3681                 else
3682                     /* Assign any ATOM or HETATM records with a non-
3683                      * whitespace chain id that does not appear in the SEQRES
3684                      * records as a  NON_PROTEIN_CHAIN. Record the chain id's
3685                      * used and assign a GROUP NUMBER as appropriate. */
3686                 {
3687                     /* Mark up water coordinates */
3688                     if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3689                         pdbfile->linetype[i] = pdbfileELinetypeWater;
3690                     else
3691                     {
3692                         /* If there is a single chain only then the group is
3693                          * of course associated with that chain */
3694                         if (pdbfile->nchains == 1)
3695                         {
3696                             /* Mark up ligand coordinates */
3697                             pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3698                             pdbfile->chnn[i] = 1;
3699 
3700                             for (done = ajFalse, j = 0U;
3701                                  j < pdbfile->ngroups;
3702                                  j++)
3703                                 if (ajChararrGet(pdbfile->gpid, j) == id)
3704                                 {
3705                                     pdbfile->gpn[i] = ajIntGet(gpns, j);
3706                                     done = ajTrue;
3707                                     break;
3708                                 }
3709 
3710                             if (!done)
3711                             {
3712                                 ajIntPut(&gpns, j, (gpn = j + 1 + offset));
3713                                 pdbfile->gpn[i] = gpn;
3714 
3715                                 /* NO - this code would be used only if the
3716                                  * group could not be associated with a
3717                                  * chain. ajChararrPut(&(pdbfile->gpid),
3718                                  * pdbfile->ngroups, id); pdbfile->ngroups++; */
3719 
3720                                 /* Increment number of ligands and reset last
3721                                  * ligand type read in */
3722                                 pdbfile->nligands[0]++;
3723                                 ajStrAssignC(&htype[0], " ");
3724                             }
3725                         }
3726                         else
3727                         {
3728                             /* Mark up ligand coordinates */
3729                             pdbfile->linetype[i] = pdbfileELinetypeGroups;
3730 
3731                             for (done = ajFalse, j = 0U;
3732                                  j < pdbfile->ngroups;
3733                                  j++)
3734                                 if (ajChararrGet(pdbfile->gpid, j) == id)
3735                                 {
3736                                     pdbfile->gpn[i] = j + 1;
3737                                     done = ajTrue;
3738                                     break;
3739                                 }
3740 
3741                             if (!done)
3742                             {
3743                                 ajChararrPut(&(pdbfile->gpid),
3744                                              pdbfile->ngroups, id);
3745                                 pdbfile->ngroups++;
3746                                 pdbfile->gpn[i] = j + 1;
3747                             }
3748                         }
3749                     }
3750                 }
3751             }
3752         }
3753         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
3754         {
3755             mod++;
3756 
3757 /*          doneter=ajFalse;  Unused variable */
3758             for (j = 0U; j < pdbfile->nchains; j++)
3759                 chndone[j] = ajFalse;
3760 
3761 /*          doneoneter=ajFalse; Unused variable */
3762         }
3763         else if (pdbfile->linetype[i] == pdbfileELinetypeTER)
3764         {
3765             chndone[this - 1] = ajTrue;
3766 
3767 
3768 /*          prev = this; Unused variable */
3769 /*          doneoneter=ajTrue; Unused variable */
3770 /*          doneter=ajTrue; Unused variable */
3771         }
3772 
3773 
3774 
3775         /*
3776         ** Check for missing TER records.
3777         ** Where chains are not separated by TER records
3778         ** (the chain id changes from line to line without an intervening TER
3779         ** record and both chain id's are not whitespace).
3780         ** Where ATOM and HETATM groups are not separated
3781         ** by TER records (a chain id is given on one line, a whitespace chain
3782         ** id is given on the next line, and there is no intervening TER
3783         ** record).
3784         **
3785         ** This code is identical to a fragment from pdbioCheckTer.
3786         **
3787         ** Note that chndone only is modified. If the code in the function
3788         ** was made to use prev, doneoneter, doneter then the code below would
3789         ** also have to be modified
3790         **
3791         ** Only do this now where there aren't enough TER records in the file.
3792         ** Without this check, it was failing for cases where the order of
3793         ** chains is inconsistent (see around line 4095 of pdb1cm4.ent)
3794         */
3795 
3796         if (pdbfile->toofewter)
3797         {
3798             if (i > pdbfile->idxfirst)
3799                 if (pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate &&
3800                     ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
3801                      (pdbfile->linetype[i] == pdbfileELinetypeHeterogen)) &&
3802                     ((ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)) !=
3803                      ajStrGetCharPos(pdbfile->lines[i], POS_CHID)))
3804                 {
3805                     for (j = 0; j < pdbfile->nchains; j++)
3806                         if (ajChararrGet(pdbfile->chid, j)
3807                             == ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID))
3808                         {
3809                             chndone[j] = ajTrue;
3810                             break;
3811                         }
3812                 }
3813         }
3814     }
3815 
3816 
3817     /*
3818     ** The above code cannot cope for cases where the ATOM and HETATM records
3819     ** use the same (or no) chain identifier and are not separated by a TER
3820     ** record (e.g. 1rbp)
3821     **
3822     ** For files with less than the expected number of TER records,
3823     ** check again for COORDHET lines, which are identified as
3824     ** (i)  a line beginning with a HETATM record with the same chain
3825     ** identifier but lower residue number than the preceding line, or
3826     ** JONNEW
3827     ** (ii) a line beginning with a HETATM record which is not followed
3828     ** anywhere in the file by an ATOM record with the same chain identifier
3829     ** (from the PDB record) or number (assigned by parser)
3830     */
3831 
3832     for (i = 0; i < pdbfile->nchains; i++)
3833         ajStrAssignC(&htype[i], "\0");
3834 
3835 
3836     if (pdbfile->tercnt < (pdbfile->nchains * pdbfile->modcnt))
3837         for (i = pdbfile->idxfirst + 1; i < pdbfile->nlines; i++)
3838         {
3839             chn = pdbfile->chnn[i] - 1;
3840 
3841             if (pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate &&
3842                 pdbfile->linetype[i] == pdbfileELinetypeCoordinate &&
3843                 ((ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)) ==
3844                  ajStrGetCharPos(pdbfile->lines[i], POS_CHID)))
3845                 if (ajStrPrefixC(pdbfile->lines[i], "HETATM"))
3846                     if ((pdbfile->resn1[i] < pdbfile->resn1[i - 1]) ||
3847                         pdbioNoMoreAtoms(pdbfile, i))
3848                         /* if(pdbfile->resn1[i]<pdbfile->resn1[i-1]) */
3849                         while ((ajStrPrefixC(pdbfile->lines[i], "HETATM")))
3850                         {
3851                             if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3852                                 pdbfile->linetype[i] = pdbfileELinetypeWater;
3853                             else
3854                             {
3855                                 pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3856 
3857                                 /* New heterogen */
3858                                 if (!ajStrMatchS(htype[chn],
3859                                                  pdbfile->rtype[i]))
3860                                 {
3861                                     offset++;
3862                                     pdbfile->nligands[chn]++;
3863                                     pdbfile->gpn[i]
3864                                         = pdbfile->nligands[chn];
3865                                     ajStrAssignS(&htype[chn],
3866                                                  pdbfile->rtype[i]);
3867                                 }
3868                                 /* More atoms of the same heterogen */
3869                                 else
3870                                 {
3871                                     pdbfile->gpn[i]
3872                                         = pdbfile->nligands[chn];
3873                                 }
3874                             }
3875                             i++;
3876                         }
3877         }
3878 
3879     /* For files with a single chain only, set the number of groups that
3880      * could not be associated with a chain to zero */
3881     if (pdbfile->nchains == 1)
3882     {
3883         /* We might need to add ngroups to nligands[0] */
3884         if (pdbfile->ngroups != 0)
3885             ajFatal("Must check ngroups versus nligands in the file");
3886         pdbfile->ngroups = 0;
3887     }
3888 
3889 
3890 
3891     /* Tidy up and return */
3892     for (i = 0; i < pdbfile->nchains; i++)
3893         ajStrDel(&htype[i]);
3894 
3895     AJFREE(htype);
3896     ajIntDel(&gpns);
3897     AJFREE(chndone);
3898 
3899     return ajTrue;
3900 }
3901 
3902 
3903 
3904 
3905 /* @funcstatic pdbioNoMoreAtoms ***********************************************
3906 **
3907 ** This function is called by function <pdbioNumberChains> to identify ligands
3908 ** (COORDHET lines) in files with less than the expected number of TER
3909 ** records.
3910 ** These are identified here by a line beginning with a HETATM record which
3911 ** is not followed by an ATOM record with the same chain identifier (from
3912 ** the PDB record) or number (assigned by parser). The function returns if
3913 ** a line of a different chain is found or at the first non-ATOM/HETAM line.
3914 ** Additional processing is done in <pdbioNumberChains> itself.
3915 **
3916 ** @param [w] pdbfile  [AjPPdbfile] Pdbfile object
3917 ** @param [r] linen    [ajuint] Line number
3918 **
3919 ** @return [AjBool]  True (no more atoms), False otherwise
3920 **
3921 ** @release 3.0.0
3922 ** @@
3923 ******************************************************************************/
3924 
pdbioNoMoreAtoms(AjPPdbfile pdbfile,ajuint linen)3925 static AjBool pdbioNoMoreAtoms(AjPPdbfile pdbfile, ajuint linen)
3926 {
3927     ajuint i = 0U;
3928 
3929     for (i = linen + 1; i < pdbfile->nlines; i++)
3930     {
3931         if ((ajStrPrefixC(pdbfile->lines[i], "ATOM")))
3932         {
3933             /* Same chain */
3934             if ((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) ==
3935                  ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) ||
3936                 (pdbfile->chnn[linen] == pdbfile->chnn[i]))
3937                 return ajFalse;
3938             else
3939                 /* Different chain */
3940                 return ajTrue;
3941         }
3942         else if ((ajStrPrefixC(pdbfile->lines[i], "HETATM")))
3943         {
3944             /* Different chain */
3945             if ((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) !=
3946                  ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) ||
3947                 (pdbfile->chnn[linen] != pdbfile->chnn[i]))
3948                 return ajTrue;
3949         }
3950         else
3951             /* Different chain or near EOF */
3952             return ajTrue;
3953     }
3954 
3955     return ajTrue;
3956 }
3957 
3958 
3959 
3960 
3961 /* @funcstatic pdbioMaskChains ************************************************
3962 **
3963 ** Reads a Pdbfile object and checks to see whether the ATOM records for
3964 ** each chain contain sufficient amino acids. Any chains with insufficient
3965 ** amino acids either in the SEQRES or ATOM records, or with ambiguous chain
3966 ** id's are discarded. Optionally, amino acid residues and non-amino
3967 ** acid groups (e.g. ACE, NH2 etc) in protein chains with no CA atom are also
3968 ** discarded (the linetype array for the lines are set to pdbfileELinetypeIgnore).
3969 ** For non-amino acid groups,  the corresponding characters are removed from
3970 ** the sequence derived from the SEQRES records.  Coordinate data and atom
3971 ** type are parsed for each atom.  Optionally, amino acids or groups in
3972 ** protein chains with a single atom only are also discarded.
3973 **
3974 **
3975 ** Checks whether chains from the ATOM records contain at least the
3976 ** user-defined threshold number of amino acid residues. If not then the chain
3977 ** is discarded  (chainok array is set to ajFalse). If NO chains with
3978 ** sufficient residues are found, a "NOPROTEINS" error is generated and
3979 ** ajFalse is returned.
3980 **
3981 ** Writes the x,y,z,o,b and atype elements of a Pdbfile object.  The linetype,
3982 ** and possibly seqres, seqresful and nres arrays are modified.
3983 **
3984 ** linetype array
3985 ** Coordinate data are extracted for lines of linetype pdbfileELinetypeCoordinate,
3986 ** pdbfileELinetypeHeterogen and pdbfileELinetypeGroups.
3987 **
3988 ** seqres & seqresful arrays
3989 ** Three-letter codes of any groups that are (i) not standard amino acids and
3990 ** (ii) which do not contain a CA atom are removed from the seqres sequence
3991 ** if the <camask> is set. The seqresful array is an intermediate array to
3992 ** achieve this.
3993 **
3994 ** @param [w] pdbfile    [AjPPdbfile] Pdbfile object
3995 ** @param [u] flog       [AjPFile]     Log file (build diagnostics)
3996 ** @param [r] min_chain_size  [ajint]  Min. no. of amino acids in a chain
3997 ** @param [r] camask          [AjBool] Whether to mask non-amino acid
3998 **                                     residues within protein chains which
3999 **                                     do not have a C-alpha atom (remove them
4000 **                                     from the seqres sequence and set the
4001 **                                     linetype array for the lines
4002 **                                     to pdbfileELinetypeIgnore).
4003 ** @param [r] camask1         [AjBool]  Whether to mask amino acid residues
4004 **                                     within protein chains which do not have
4005 **                                     a C-alpha atom (set the linetype
4006 **                                     array for the lines to pdbfileELinetypeIgnore).
4007 ** @param [r] atommask        [AjBool] Whether to mask residues or groups
4008 **                                     with a single atom only.
4009 **
4010 ** @return [AjBool]  True on success, False otherwise
4011 **
4012 ** @release 2.9.0
4013 ** @@
4014 ******************************************************************************/
4015 
pdbioMaskChains(AjPPdbfile pdbfile,AjPFile flog,ajint min_chain_size,AjBool camask,AjBool camask1,AjBool atommask)4016 static AjBool pdbioMaskChains(AjPPdbfile pdbfile, AjPFile flog,
4017                          ajint min_chain_size,
4018                          AjBool camask, AjBool camask1,
4019                          AjBool atommask)
4020 {
4021     ajuint i = 0U;              /* Loop counter */
4022     ajuint j = 0U;              /* Loop counter */
4023     AjPStr aa3 = NULL;          /* Amino acid */
4024     ajint rcnt = 0;             /* Residue count */
4025     ajint acnt = 0;             /* Atom count */
4026     ajint modcnt = 0;           /* Count of MODEL records */
4027     ajuint lastatom = 0U;       /* Line number of last coordinate line read
4028                                  * in */
4029     ajuint firstatom = 0U;      /* Line number of coordinate line for first
4030                                  * atom of residue */
4031     AjBool noca = ajFalse;      /* True if this residue does not contain a CA
4032                                  * atom */
4033     ajint lastchn = 0;          /* Chain number of last line read in */
4034     AjBool *chainok;            /* Array of flags which are True if a chain
4035                                  * in the SEQRES records is found in the ATOM
4036                                  * records */
4037     char aa1 = ' ';             /* Amino acid id */
4038     AjPStr lastrn = NULL;       /* Number of last residue read in */
4039     AjBool msgdone = ajFalse;   /* Flag for message reporting */
4040     AjPStr sub = NULL;
4041     AjPStr tmpseq = NULL;
4042     ajuint lenful = 0U;         /* Length of SEQRES sequence including ACE,
4043                                  * FOR & NH2 groups that might be discarded
4044                                  * by the call to pdbioSeqresToSequence */
4045     ajuint ipos = 0U;
4046     char tmp = ' ';
4047     AjBool odd = ajFalse;       /* Whether the current residue / group is of
4048                                  * unknown type */
4049     AjBool ok = ajFalse;        /* True if the file, after processing by this
4050                                  * function, is found to contain at least one
4051                                  * chain for which chainok == ajTrue */
4052 
4053     /* Check args */
4054     if (!pdbfile || !flog)
4055     {
4056         ajWarn("Bad args passed to pdbioMaskChains\n");
4057 
4058         return ajFalse;
4059     }
4060 
4061     /* Allocate memory */
4062     AJCNEW0(chainok, pdbfile->nchains);
4063 
4064     for (i = 0U; i < pdbfile->nchains; i++)
4065         chainok[i] = ajFalse;
4066 
4067     aa3 = ajStrNew();
4068     lastrn = ajStrNew();
4069     sub = ajStrNew();
4070     ajStrAssignClear(&sub);
4071 
4072     firstatom = lastatom = pdbfile->idxfirst;
4073 
4074     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4075     {
4076         if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
4077             (pdbfile->linetype[i] == pdbfileELinetypeHeterogen) ||
4078             (pdbfile->linetype[i] == pdbfileELinetypeGroups) ||
4079             (pdbfile->linetype[i] == pdbfileELinetypeWater))
4080         {
4081             /* Assign x, y, z, o, b data */
4082             /* Replace this with code so that internals of structure are not
4083              * probed (when code becomes available) */
4084             if ((sscanf(&(pdbfile->lines[i]->Ptr[28]), "%f %f %f %f",
4085                         &(pdbfile->x[i]),
4086                         &(pdbfile->y[i]),
4087                         &(pdbfile->z[i]),
4088                         &(pdbfile->o[i]))) != 4)
4089                 ajFatal("Scan error in pdbioMaskChains\n"
4090                         "Email jison@hgmp.mrc.ac.uk");
4091 
4092             if (!sscanf(&(pdbfile->lines[i]->Ptr[60]), "%f",
4093                         &(pdbfile->b[i])))
4094                 ajFatal("Scan error in pdbioMaskChains\n"
4095                         "Email jison@hgmp.mrc.ac.uk");
4096 
4097             /* Usually position 12 is used for the alternative position
4098              * indicator (taken in the code below to be indicated by a
4099              * number) for atoms, but occasionally can be incorrectly used
4100              * for the atom type itself (indicated in the code below by a
4101              * character in pos 12). This code copes for both cases */
4102             /* Assign atom type */
4103             if (isalpha((int) pdbfile->lines[i]->Ptr[12]))
4104             {
4105                 ajStrAssignSubS(&pdbfile->atype[i],
4106                                 pdbfile->lines[i], 12, 15);
4107                 ajStrRemoveWhite(&pdbfile->atype[i]);
4108                 if (!msgdone)
4109                 {
4110                     ajFmtPrintF(flog, "%-15s%d\n", "ATOMCOL12", i + 1);
4111                     msgdone = ajTrue;
4112                 }
4113             }
4114             else
4115             {
4116                 ajStrAssignSubS(&pdbfile->atype[i],
4117                                 pdbfile->lines[i], 13, 15);
4118                 ajStrRemoveWhite(&pdbfile->atype[i]);
4119             }
4120 
4121         }
4122 
4123         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4124         {
4125             /* Check whether CA atom is present */
4126             if (!(ajStrCmpC(pdbfile->atype[i], "CA")))
4127                 noca = ajFalse;
4128 
4129             if (pdbfile->chnn[i] < lastchn)
4130                 ajFmtPrintF(flog, "%-15s%d\n", "CHAINORDER", i + 1);
4131 
4132             if (pdbfile->chnn[i] != lastchn)
4133             {
4134                 rcnt = 0;
4135                 lastchn = pdbfile->chnn[i];
4136             }
4137 
4138 
4139             /* If this is a new residue */
4140             if (!ajStrMatchS(pdbfile->pdbn[i], lastrn))
4141             {
4142                 /* Mask coordinate lines where there are only a single atom */
4143                 if (acnt == 1)
4144                 {
4145                     ajFmtPrintF(flog, "%-15s%d\n", "ATOMONEONLY", lastatom + 1);
4146 
4147                     if (atommask)
4148                         pdbfile->linetype[lastatom] = pdbfileELinetypeIgnore;
4149                 }
4150 
4151                 /* Mask coordinate lines for residues lacking a CA atom */
4152                 if (noca)
4153                 {
4154                     odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatom - 1],
4155                                                   &tmp)));
4156 
4157                     if ((camask && odd) ||
4158                         (camask1 && !odd))
4159                         for (j = firstatom; j <= lastatom; j++)
4160                             pdbfile->linetype[j] = pdbfileELinetypeIgnore;
4161 
4162                     /* Remove residues from SEQRES records */
4163                     if ((camask && odd))
4164                     {
4165                         ipos = pdbfile->chnn[firstatom] - 1;
4166                         ajStrExchangeSS(&pdbfile->seqresful[ipos],
4167                                         pdbfile->rtype[firstatom], sub);
4168                     }
4169 
4170                     if (firstatom == lastatom)
4171                         ajFmtPrintF(flog, "%-15s%d\n", "ATOMNOCA",
4172                                     firstatom + 1);
4173                     else
4174                         ajFmtPrintF(flog, "%-15s%d %d\n", "ATOMNOCA",
4175                                     firstatom + 1, lastatom + 1);
4176                 }
4177 
4178 
4179                 /* Increment the residue counter if the code is recognised */
4180                 if (ajResidueFromTriplet(pdbfile->rtype[i], &aa1))
4181                     rcnt++;
4182 
4183                 if (rcnt >= min_chain_size)
4184                     chainok[pdbfile->chnn[i] - 1] = ajTrue;
4185 
4186                 ajStrAssignS(&lastrn, pdbfile->pdbn[i]);
4187 
4188                 /* Set count of atoms to zero, set the position of the first
4189                  * atom and set flag for recognising CA atom */
4190                 acnt = 1;
4191                 firstatom = i;
4192 
4193                 if (!(ajStrCmpC(pdbfile->atype[i], "CA")))
4194                     noca = ajFalse;
4195                 else
4196                     noca = ajTrue;
4197             }
4198 
4199 
4200             /* Set the position for the last atom read in */
4201             lastatom = i;
4202 
4203 
4204             /* Increment the atom counter */
4205             acnt++;
4206         }
4207         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4208         {
4209             rcnt = 0;
4210             lastchn = 0;
4211 
4212             modcnt++;
4213 
4214             if (modcnt != 1)
4215             {
4216                 for (j = 0; j < pdbfile->nchains; j++)
4217                 {
4218                     /* Only bother reporting error messages if a message
4219                      * about the SEQRES records not containing enough aa's
4220                      * has not already been reported */
4221                     /* If pdbfile->chainok is False, leave it so  */
4222                     if (!pdbfile->chainok[j])
4223                         continue;
4224                     else
4225                     {
4226                         if (!chainok[j])
4227                         {
4228                             pdbfile->chainok[j] = chainok[j];
4229                             ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
4230                                         "ATOMFEWAA", j + 1,
4231                                         (ajChararrGet(pdbfile->chid, j)),
4232                                         modcnt);
4233 
4234                         }
4235                     }
4236                 }
4237             }
4238 
4239         }
4240     }
4241 
4242     /* Ensure that C-terminal residues are masked if necessary */
4243     /*
4244     ** else if((pdbfile->linetype[i] == pdbfileELinetypeTER) ||
4245     **         (pdbfile->linetype[i] == pdbfileELinetypeENDMDL))
4246     ** {
4247     */
4248     if (noca)
4249     {
4250         odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatom - 1], &tmp)));
4251 
4252 
4253         if ((camask && odd) ||
4254             (camask1 && !odd))
4255             for (j = firstatom; j <= lastatom; j++)
4256                 pdbfile->linetype[j] = pdbfileELinetypeIgnore;
4257 
4258         /* Remove residues from SEQRES records */
4259         if ((camask && odd))
4260         {
4261             ipos = pdbfile->chnn[firstatom] - 1;
4262             ajStrExchangeSS(&pdbfile->seqresful[ipos],
4263                             pdbfile->rtype[firstatom], sub);
4264         }
4265 
4266         if (firstatom == lastatom)
4267         {
4268             ajFmtPrintF(flog, "%-15s%d\n", "ATOMNOCA", firstatom + 1);
4269         }
4270 
4271         else
4272             ajFmtPrintF(flog, "%-15s%d %d\n", "ATOMNOCA",
4273                         firstatom + 1, lastatom + 1);
4274     }
4275 
4276     /*
4277     ** }
4278     */
4279 
4280     /* Write the new (masked) seqres sequences if necessary */
4281     if (camask)
4282     {
4283         for (i = 0; i < pdbfile->nchains; i++)
4284         {
4285             tmpseq = ajStrNew();
4286 
4287             if (!pdbioSeqresToSequence(pdbfile->seqresful[i],
4288                                        &tmpseq, camask, &lenful))
4289                 ajFatal("Sequence conversion error in "
4290                         "pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
4291 
4292             ajStrAssignS(&pdbfile->seqres[i], tmpseq);
4293             pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
4294 
4295             ajStrDel(&tmpseq);
4296         }
4297     }
4298 
4299 
4300 
4301     /* Set modcnt to 1 for xray structures */
4302     if (!modcnt)
4303         modcnt = 1;
4304 
4305 
4306     /* Check for xray structures or last model of nmr structures */
4307     for (i = 0; i < pdbfile->nchains; i++)
4308     {
4309         /* Only bother reporting error messages if a message about the SEQRES
4310          * records not containing enough aa's has not already been reported */
4311         /* If pdbfile->chainok is False, leave it so  */
4312         if (!pdbfile->chainok[i])
4313             continue;
4314         else
4315         {
4316             if (!chainok[i])
4317             {
4318                 pdbfile->chainok[i] = chainok[i];
4319                 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "ATOMFEWAA",
4320                             i + 1, (ajChararrGet(pdbfile->chid, i)), modcnt);
4321 
4322             }
4323         }
4324     }
4325 
4326 
4327     /* Mask out any chains with insufficient amino acids either in the SEQRES
4328      * or ATOM records */
4329     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4330         if (((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
4331              (pdbfile->linetype[i] == pdbfileELinetypeHeterogen)) &&
4332             (!pdbfile->chainok[pdbfile->chnn[i] - 1]))
4333         {
4334             pdbfile->linetype[i] = pdbfileELinetypeIgnore;
4335         }
4336 
4337 
4338 
4339     /* Check for missing TER records. Where chains are not separated by TER
4340      * records (the chain id changes from line to line without an intervening
4341      * TER record and both chain id's are not whitespace). Where ATOM and
4342      * HETATM groups are not separated by TER records (a chain id is given on
4343      * one line, a whitespace chain id is given on the next line, and there
4344      * is no intervening TER record) */
4345 
4346 
4347     for (i = pdbfile->idxfirst + 1;
4348          i < pdbfile->nlines; i++)
4349     {
4350         if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
4351             (pdbfile->linetype[i] == pdbfileELinetypeCoordinate) &&
4352             pdbfile->chnn[i - 1] != pdbfile->chnn[i])
4353             ajFmtPrintF(flog, "%-15s%d %d\n", "TERMISSCHN", i, i + 1);
4354         else if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
4355                  ((pdbfile->linetype[i] == pdbfileELinetypeHeterogen) ||
4356                   (pdbfile->linetype[i] == pdbfileELinetypeWater)) &&
4357                  pdbfile->chnn[i - 1] == pdbfile->chnn[i])
4358             ajFmtPrintF(flog, "%-15s%d %d\n", "TERMISSHET", i, i + 1);
4359     }
4360 
4361 
4362     /* Tidy up  */
4363     AJFREE(chainok);
4364     ajStrDel(&aa3);
4365     ajStrDel(&lastrn);
4366     ajStrDel(&sub);
4367 
4368 
4369     /* Report problems with non-protein chains */
4370     for (i = 0; i < pdbfile->nchains; i++)
4371         if (pdbfile->chainok[i])
4372         {
4373             ok = ajTrue;
4374             break;
4375         }
4376 
4377     /* Return now if no protein chains are found */
4378     if (!ok)
4379     {
4380         ajWarn("No protein chains found in raw pdb file");
4381         ajFmtPrintF(flog, "%-15s\n", "NOPROTEINS");
4382         return ajFalse;
4383     }
4384 
4385     return ajTrue;
4386 }
4387 
4388 
4389 
4390 
4391 /* @funcstatic pdbioStandardiseNumbering **************************************
4392 **
4393 ** Reads a Pdbfile object and standardises the two sets of residue numbers
4394 ** (resn1 & resn2 arrays) derived from the raw residue numbers. The residue
4395 ** numbering is corrected for zero or negative residue numbers, non-standard
4396 ** numbering schemes and any other cases of non-sequentiality (e.g. where the
4397 ** next residue number is lower than the previous one, see 1pca).
4398 ** resn1 gives the sequence presuming an alternative numbering scheme, resn2
4399 ** gives the sequence presuming heterogeneity. Heterogeneity is indicated by
4400 ** a character in position lines[26] (the same position used to indicate
4401 ** alternative residue numbering schemes).
4402 **
4403 ** The resn1 & resn2 arrays of a Pdbfile object are modified.  The oddnum
4404 ** array is written.
4405 **
4406 ** oddnum array
4407 ** This is an array of Bool's which are TRUE for duplicate residues of
4408 ** heterogenous positions (e.g. if 2 different residues are both numbered '8'
4409 ** or one is '8' and the other '8A' for example then <oddnum> would be set
4410 ** True for the second residue.
4411 **
4412 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
4413 ** @param [u] flog    [AjPFile]      Pointer to log file (build diagnostics)
4414 **
4415 ** @return [AjBool]  True on success, False otherwise
4416 **
4417 ** @release 2.9.0
4418 ** @@
4419 ******************************************************************************/
4420 
pdbioStandardiseNumbering(AjPPdbfile pdbfile,AjPFile flog)4421 static AjBool pdbioStandardiseNumbering(AjPPdbfile pdbfile, AjPFile flog)
4422 {
4423     ajuint i = 0U;
4424     ajint lastchn = -1;         /* Chain number of last line read in */
4425     AjBool first = ajFalse;     /* True if we have processed the first
4426                                  * residue in a chain */
4427     ajint first_num = 0;        /* Number of first residue in chain */
4428     ajint modrn = 0;            /* Corrected first residue number */
4429     AjBool neg = ajFalse;       /* True if first residue number is negative */
4430     AjBool zer = ajFalse;       /* True if first residue number is zero */
4431     AjBool report_neg = ajFalse;/* True if we have reported an error that a
4432                                  * residue number is negative for this chain */
4433     AjBool report_zer = ajFalse;/* True if we have reported an error that a
4434                                  * residue number is zero for this chain */
4435     ajint add = 0;              /* An amount to add to the residue numbers to
4436                                  * correct them */
4437     AjBool ignore = ajFalse;
4438     AjBool odd = ajFalse;
4439     ajint rn = 0;               /* Current residue number */
4440     ajint last_rn = 0;          /* Last residue number read in */
4441     ajint this_rn = 0;          /* Current residue number read in */
4442     char last = ' ';            /* Chain id of last chain */
4443     char curr = ' ';            /* Chain id of current chain */
4444     AjPStr last_rt = NULL;      /* Type of previous residue */
4445     AjPStr this_rt = NULL;      /* Type of current residue */
4446     AjBool report_nonstd = ajFalse;     /* True if we have reported an error
4447                                          * that a non-standard residue
4448                                          * numbering scheme is used for this
4449                                          * chain */
4450     AjBool report_nonsqt = ajFalse;     /* True if we have reported an error
4451                                          * that any other cases of
4452                                          * non-sequential numbering are found
4453                                          * for this this chain */
4454     char aa1 = ' ';             /* Amino acid single character code */
4455 
4456     ajuint ipos = 0;
4457 
4458 
4459     last_rt = ajStrNew();
4460     this_rt = ajStrNew();
4461 
4462 
4463     /* Check args */
4464     if (!pdbfile || !flog)
4465     {
4466         ajWarn("Bad args passed to pdbioStandardiseNumbering\n");
4467 
4468         return ajFalse;
4469     }
4470 
4471 
4472     /* Check whether the integer part of the original pdb numbering (at this
4473      * point in code held in resn1 and resn2) gives the correct index into
4474      * the SEQRES sequence */
4475     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4476     {
4477         /* If model number has gone past 1 then we must have checked all the
4478          * chains, so break */
4479         if (pdbfile->modn[i] > 1)
4480             break;
4481 
4482         if (pdbfile->linetype[i] != pdbfileELinetypeCoordinate)
4483             continue;
4484 
4485 
4486         /* If residue number is not negative, zero, or greater then the
4487          * length of the SEQRES sequence and if the residue matches then
4488          * continue */
4489 
4490         if ((pdbfile->resn1[i] <=
4491              pdbfile->nres[pdbfile->chnn[i] - 1]) ||
4492             (pdbfile->resn1[i] >= 1))
4493         {
4494             ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
4495             ipos = pdbfile->chnn[i] - 1;
4496 
4497             if (aa1 == ajStrGetCharPos(pdbfile->seqres[ipos],
4498                                        pdbfile->resn1[i] - 1))
4499                 continue;
4500         }
4501 
4502         /* Otherwise flag an error for this chain and move to the end of the
4503          * chain */
4504 
4505         ajFmtPrintF(flog, "%-15s%d (%c)\n", "BADINDEX",
4506                     pdbfile->chnn[i],
4507                     ajChararrGet(pdbfile->chid, pdbfile->chnn[i] - 1));
4508 
4509 
4510         for (lastchn = pdbfile->chnn[i]; i < pdbfile->nlines; i++)
4511         {
4512             if (pdbfile->linetype[i] != pdbfileELinetypeCoordinate)
4513                 continue;
4514 
4515             if (pdbfile->modn[i] > 1)
4516                 break;
4517 
4518             if (pdbfile->chnn[i] != lastchn)
4519             {
4520                 i--;
4521                 break;
4522             }
4523         }
4524     }
4525 
4526     /* Fix for zero or negative residue numbers. This is done for both resn1
4527      * and resn2 arrays of a Pdbfile object */
4528 
4529     for (first = ajFalse, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4530     {
4531         /* Coordinate line */
4532         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4533         {
4534             /* New chain */
4535             if (pdbfile->chnn[i] != lastchn)
4536             {
4537                 neg = ajFalse;
4538                 zer = ajFalse;
4539                 report_neg = ajFalse;
4540                 report_zer = ajFalse;
4541                 first = ajFalse;
4542                 lastchn = pdbfile->chnn[i];
4543             }
4544 
4545             rn = pdbfile->resn1[i];
4546 
4547             if (!first)
4548             {
4549                 first_num = rn;
4550 
4551                 if (first_num > 0)
4552                 {
4553                     /* Advance counter to next chain */
4554                     for (; i < pdbfile->nlines; i++)
4555                         if (((pdbfile->linetype[i] ==
4556                               pdbfileELinetypeCoordinate) && (pdbfile->chnn[i] != lastchn))
4557                             || pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4558                         {
4559                             neg = ajFalse;
4560                             zer = ajFalse;
4561                             report_neg = ajFalse;
4562                             report_zer = ajFalse;
4563                             first = ajFalse;
4564                             lastchn = -1;
4565 
4566                             break;
4567                         }
4568 
4569                     /* i will get incremented in main loop above */
4570                     i--;
4571                     continue;
4572                 }
4573 
4574                 first = ajTrue;
4575             }
4576             if (rn < 0)
4577             {
4578                 neg = ajTrue;
4579 
4580                 if (zer)
4581                     modrn = rn - (first_num - 1);
4582                 else
4583                     /* if(neg && !zer) */
4584                     modrn = rn - (first_num - 1);
4585 
4586                 if (!report_neg)
4587                 {
4588                     ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "NEGNUM",
4589                                 pdbfile->chnn[i],
4590                                 ajChararrGet(pdbfile->chid,
4591                                              (pdbfile->chnn[i] - 1)), i + 1);
4592                     report_neg = ajTrue;
4593                 }
4594             }
4595             else if (rn == 0)
4596             {
4597                 zer = ajTrue;
4598 
4599                 if (neg)
4600                     modrn = rn - (first_num - 1);
4601                 else
4602                     /* if(!neg) */
4603                     modrn = rn + 1;
4604 
4605                 if (!report_zer)
4606                 {
4607                     ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
4608                                 "ZERNUM", pdbfile->chnn[i],
4609                                 ajChararrGet(pdbfile->chid,
4610                                              (pdbfile->chnn[i] - 1)), i + 1);
4611                     report_zer = ajTrue;
4612                 }
4613             }
4614             else
4615                 /* rn is (+ve) */
4616             {
4617                 if (!neg && zer)
4618                     modrn = rn + 1;
4619                 else if (neg && zer)
4620                     modrn = rn - (first_num - 1);
4621                 else
4622                     /* if(neg && !zer) */
4623                     modrn = rn - (first_num);
4624             }
4625 
4626             pdbfile->resn1[i] = modrn;
4627             pdbfile->resn2[i] = modrn;
4628         }
4629         /* New model */
4630         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4631         {
4632             neg = ajFalse;
4633             zer = ajFalse;
4634             report_neg = ajFalse;
4635             report_zer = ajFalse;
4636             first = ajFalse;
4637             lastchn = -1;
4638         }
4639     }
4640 
4641 
4642 
4643     /* Fix non-standard residue numbering scheme. This is done for resn1
4644      * array of a Pdbfile object only. */
4645     for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4646     {
4647         /* Coordinate line */
4648         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4649         {
4650             /* New chain */
4651             if (pdbfile->chnn[i] != lastchn)
4652             {
4653                 add = 0;
4654                 first = ajFalse;
4655                 ignore = ajFalse;
4656                 lastchn = pdbfile->chnn[i];
4657                 report_nonstd = ajFalse;
4658             }
4659 
4660             if (!first)
4661             {
4662                 /* Remove the chmyotrypsin numbering code */
4663                 last_rn = pdbfile->resn1[i];
4664                 last = ajStrGetCharPos(pdbfile->lines[i], 26);
4665                 first = ajTrue;
4666                 continue;
4667             }
4668 
4669             rn = pdbfile->resn1[i];
4670             curr = ajStrGetCharPos(pdbfile->lines[i], 26);
4671 
4672             if (curr != last)
4673             {
4674                 if (rn == last_rn)
4675                 {
4676                     add++;
4677                     ignore = ajTrue;
4678                 }
4679             }
4680 
4681             if (rn != last_rn)
4682             {
4683                 ignore = ajFalse;
4684             }
4685 
4686 
4687 
4688             last = curr;
4689             last_rn = rn;
4690 
4691             pdbfile->resn1[i] = rn + add;
4692 
4693             if (ignore)
4694             {
4695                 pdbfile->oddnum[i] = ajTrue;
4696 
4697                 if (!report_nonstd)
4698                 {
4699                     ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "ODDNUM",
4700                                 pdbfile->chnn[i],
4701                                 ajChararrGet(pdbfile->chid,
4702                                              (pdbfile->chnn[i] - 1)), i + 1);
4703                     report_nonstd = ajTrue;
4704                 }
4705 
4706             }
4707 
4708         }
4709         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4710         {
4711             add = 0;
4712             first = ajFalse;
4713             ignore = ajFalse;
4714             lastchn = -1;
4715             report_nonstd = ajFalse;
4716         }
4717     }
4718 
4719 
4720 
4721 
4722 
4723 
4724 
4725     /* Fix remaining non-sequential residue numbering in resn1 array of
4726      * Pdbfile object */
4727     for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4728     {
4729         /* Coordinate line */
4730         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4731         {
4732             /* New chain */
4733             if (pdbfile->chnn[i] != lastchn)
4734             {
4735                 add = 0;
4736                 first = ajFalse;
4737                 ignore = ajFalse;
4738                 lastchn = pdbfile->chnn[i];
4739                 report_nonsqt = ajFalse;
4740             }
4741 
4742             if (!first)
4743             {
4744                 last_rn = pdbfile->resn1[i];
4745                 ajStrAssignS(&last_rt, pdbfile->rtype[i]);
4746 
4747                 first = ajTrue;
4748                 continue;
4749             }
4750 
4751             this_rn = pdbfile->resn1[i];
4752             ajStrAssignS(&this_rt, pdbfile->rtype[i]);
4753 
4754 
4755             /* A new residue is indicated if this ATOM is 'N' or if this is a
4756              * different residue type */
4757             if (!(ajStrCmpC(pdbfile->atype[i], "N")) ||
4758                 !(ajStrMatchS(this_rt, last_rt)))
4759             {
4760                 /* Check for duplicate residue numbers */
4761                 if (this_rn == last_rn)
4762                 {
4763                     add++;
4764                     ignore = ajTrue;
4765                     odd = ajTrue;
4766                 }
4767 
4768                 /* Check for drops in residue numbers, see 1pca */
4769                 if (this_rn < last_rn)
4770                 {
4771                     add += (last_rn - this_rn + 1);
4772                     ignore = ajTrue;
4773                     odd = ajFalse;
4774                 }
4775             }
4776 
4777 
4778             if (this_rn > last_rn)
4779                 ignore = ajFalse;
4780 
4781             pdbfile->resn1[i] = this_rn + add;
4782 
4783 
4784             ajStrAssignS(&last_rt, this_rt);
4785             last_rn = this_rn;
4786 
4787 
4788             if (ignore)
4789             {
4790                 if (odd)
4791                     pdbfile->oddnum[i] = ajTrue;
4792 
4793                 if (!report_nonsqt)
4794                 {
4795                     ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "NONSQNTL",
4796                                 pdbfile->chnn[i],
4797                                 ajChararrGet(pdbfile->chid,
4798                                              (pdbfile->chnn[i] - 1)), i + 1);
4799                     report_nonsqt = ajTrue;
4800                 }
4801             }
4802         }
4803 
4804         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4805         {
4806             add = 0;
4807             first = ajFalse;
4808             ignore = ajFalse;
4809             lastchn = -1;
4810             report_nonsqt = ajFalse;
4811         }
4812     }
4813 
4814 
4815 
4816     /* Fix remaining non-sequential residue numbering in resn2 array of
4817      * Pdbfile object (duplicate lines for presumed heterogenous residues
4818      * positions are ignored) */
4819     for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4820     {
4821         /* Coordinate line */
4822         if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) &&
4823             (!pdbfile->oddnum[i]))
4824         {
4825             /* New chain */
4826             if (pdbfile->chnn[i] != lastchn)
4827             {
4828                 add = 0;
4829                 first = ajFalse;
4830                 lastchn = pdbfile->chnn[i];
4831             }
4832 
4833             if (!first)
4834             {
4835                 last_rn = pdbfile->resn2[i];
4836                 ajStrAssignS(&last_rt, pdbfile->rtype[i]);
4837 
4838                 first = ajTrue;
4839                 continue;
4840             }
4841 
4842             this_rn = pdbfile->resn2[i];
4843             ajStrAssignS(&this_rt, pdbfile->rtype[i]);
4844 
4845 
4846             /* A new residue is indicated if this ATOM is 'N' or if this is a
4847              * different residue type */
4848             if (!(ajStrCmpC(pdbfile->atype[i], "N")) ||
4849                 !(ajStrMatchS(this_rt, last_rt)))
4850             {
4851                 /* Check for duplicate residue numbers */
4852                 if (this_rn == last_rn)
4853                     add++;
4854 
4855                 /* Check for drops in residue numbers, see 1pca */
4856                 if (this_rn < last_rn)
4857                     add += (last_rn - this_rn + 1);
4858             }
4859 
4860             pdbfile->resn2[i] = this_rn + add;
4861             ajStrAssignS(&last_rt, this_rt);
4862             last_rn = this_rn;
4863         }
4864         else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4865         {
4866             add = 0;
4867             first = ajFalse;
4868             lastchn = -1;
4869         }
4870     }
4871 
4872 
4873     /* Tidy up and return */
4874     ajStrDel(&last_rt);
4875     ajStrDel(&this_rt);
4876 
4877     return ajTrue;
4878 }
4879 
4880 
4881 
4882 
4883 /* @funcstatic pdbioAlignNumbering ********************************************
4884 **
4885 ** Reads a Pdbfile object and determines for each chain a set of residue
4886 ** numbers (the resni array) that give the correct index into the full length
4887 ** (SEQRES) sequence for residues listed in the ATOM records.
4888 **
4889 ** The resni and resn1ok arrays of a Pdbfile object are written.
4890 ** The seqres and nres elements may be modified for any missing N-terminal
4891 ** residues.
4892 **
4893 ** resn1ok array
4894 ** This array contains Bool's for each chain which are TRUE if resn1 was
4895 ** used to derive resni, i.e. gave correct alignment to the full-length
4896 ** (SEQRES) sequence.  If False then resn2 was used ( resn1 gives the
4897 ** sequence presuming an alternative numbering scheme, resn2 gives the
4898 ** sequence presuming heterogeneity).
4899 **
4900 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
4901 ** @param [u] flog    [AjPFile]      Pointer to log file (build diagnostics)
4902 ** @param [r] lim     [ajuint]        Max. no. permissible mismatches between
4903 **                                   the ATOM & SEQRES sequences.
4904 ** @param [r] lim2     [ajuint]       Max. no. residues to trim when checking
4905 **                                   for missing C-terminal SEQRES residues.
4906 **
4907 ** @return [AjBool]  True on success, False otherwise
4908 **
4909 ** @release 2.9.0
4910 ** @@
4911 ** Must comment what diagnostics this writes!
4912 ** This now writes its diagnostics to flog, rather than tempfile.
4913 ******************************************************************************/
4914 
pdbioAlignNumbering(AjPPdbfile pdbfile,AjPFile flog,ajuint lim,ajuint lim2)4915 static AjBool pdbioAlignNumbering(AjPPdbfile pdbfile, AjPFile flog, ajuint lim,
4916                              ajuint lim2)
4917 {
4918     /* Sequence and residue number data are given for each unique chain (data
4919      * for the first model only is considered when assigning residue numbers) */
4920 
4921 
4922     /* Rather than use, e.g. seq1, seq2 & seq, we could use a single 2
4923      * dimensional array, but we would need new code for 2d arrays of
4924      * characters */
4925 
4926 
4927     ajuint a = 0U;              /* Loop counter */
4928     ajuint b = 0U;              /* Loop counter */
4929     ajuint maxb = 0U;           /* Max value of b */
4930     ajuint i = 0U;              /* Loop counter */
4931     ajint j = 0;                /* Loop counter */
4932     ajint k = 0;                /* Loop counter */
4933     ajint x = 0;                /* Loop counter */
4934     ajint y = 0;                /* Loop counter */
4935     ajint z = 0;                /* Loop counter */
4936 
4937 
4938     AjPStr *seq1 = NULL;        /* Sequences of residues from ATOM records
4939                                  * (all residues) */
4940     AjPStr *seq2 = NULL;        /* Sequences of residues from ATOM records
4941                                  * (excluding residues for which oddnum array
4942                                  * in Pdbfile object is True) */
4943     AjPStr *seq = NULL;         /* Pointer to seq1 or seq2 */
4944     AjPStr seqbit = NULL;       /* Subsequence of seq (real copy) */
4945     ajlong lenseqbit = 0;       /* Length of seqbit */
4946 
4947 
4948     ajint *nres1 = NULL;        /* No. residues for seq1/arr1 */
4949     ajint *nres2 = NULL;        /* No. residues for seq2/arr2 */
4950     ajint *nres = NULL;         /* Pointer to nres1 or nres2 */
4951 
4952 
4953     AjPInt *num1 = NULL;        /* Residue numbers for seq1 (from resn1
4954                                  * element of the Pdbfile object) */
4955     AjPInt *num2 = NULL;        /* Residue numbers for seq2 (from resn2
4956                                  * element of the Pdbfile object) */
4957     AjPInt *num = NULL;         /* Pointer to num1 or num2 */
4958 
4959     AjPInt *idx = NULL;         /* Gives correct index into seqres sequence
4960                                  * (from Pdbfile object) for the current
4961                                  * sequence. These are residue numbers and
4962                                  * therefore idx would have a value of 1 for
4963                                  * the first seqres residue. */
4964     AjPInt *idx_full = NULL;    /* As idx but with empty array elements
4965                                  * replacing missing residues so that we can
4966                                  * index into idx_full using residue numbers
4967                                  * from num */
4968 
4969 
4970     ajint last1 = -1000;        /* Number of last residue for seq1/arr1 */
4971     ajint last2 = -1000;        /* Number of last residue for seq2/arr2 */
4972 
4973     char aa1 = ' ';             /* Amino acid single character code */
4974     ajint c = 0;                /* No. of current chain */
4975 
4976     AjBool done = ajFalse;      /* True if we have found the correct residue
4977                                  * numbering */
4978 
4979     char *insert = NULL;        /* String from N-terminus of ATOM sequence to
4980                                  * insert at N-terminus of SEQRES sequence in
4981                                  * case of the later missing residues */
4982     AjPStr tmpseqres = NULL;    /* Temp. string for seqres sequence from
4983                                  * Pdbfile object */
4984     AjPStr bit = NULL;          /* Temp. string for a bit of sequence */
4985     ajuint nmismatches = 0;     /* No. of mismatches between ATOM and SEQRES
4986                                  * sequence */
4987     ajlong loc = 0L;            /* Location of ATOM sequence in SEQRES
4988                                  * sequence (if applicable) */
4989     ajint len = 0;              /* Length of seqres sequence from Pdbfile
4990                                  * object */
4991     AjBool err = ajFalse;       /* True if a residue number from the ATOM
4992                                  * records would cause an array boundary
4993                                  * error in the seqres sequence */
4994     ajint siz_substr = 0;       /* Size of substring for alignment of ATOM
4995                                  * and SEQRES sequences */
4996     const char *atom_ptr = NULL;/* Pointer to ATOM sequence */
4997     const char *seqres_ptr = NULL;      /* Pointer to SEQRES sequence */
4998     const char *loc_ptr = NULL; /* Pointer for location of match of substring
4999                                  * to SEQRES sequence */
5000     AjPStr substr = NULL;       /* Substring of ATOM sequence */
5001     AjPStr substr2 = NULL;      /* Substring of ATOM sequence */
5002     ajint atom_idx = 0;         /* Index into ATOM sequence */
5003     ajint seqres_idx = 0;       /* Index into SEQRES sequence */
5004     ajint seqres_idx_last = 0;  /* Index into SEQRES sequence for C-terminal
5005                                  * residue of substring */
5006     char aa_last = ' ';         /* Amino acid residue code of C-terminal
5007                                  * residue of substring */
5008     AjBool fixed = ajFalse;     /* Whether the mismatch residue of the
5009                                  * substring was later aligned correctly */
5010     AjBool done_end = ajFalse;  /* True if we have aligned the terminus of
5011                                  * the ATOM sequence */
5012     AjBool founderr = ajFalse;  /* Match of substring of ATOM sequence to
5013                                  * SEQRES found with potential mismatched
5014                                  * residue */
5015     AjPStr msgstr = NULL;       /* A string to hold a message */
5016     AjPStr msgbit = NULL;       /* A temp. string to hold part of a message */
5017     ajint idx_misfit_atom = 0;  /* Index into ATOM sequence (seq) for first
5018                                  * residue that does not match SEQRES
5019                                  * sequence */
5020 
5021     ajint idx_misfit_seqres = 0;/* Index into SEQRES sequence for first
5022                                  * residue that does not match ATOM sequence */
5023     AjPStr aa_misfit = NULL;    /* Original (PDB) residue number for first
5024                                  * residue mismatch between ATOM and SEQRES
5025                                  * sequences */
5026     ajint this_num = 0;         /* Current residue number */
5027 /*DIAGNOSTIC*/
5028 #if AJFALSE
5029     ajint max = 0;              /* Used in diagnostics code */
5030 #endif /* AJFALSE */
5031 
5032     /* Check args */
5033     if (!pdbfile || !flog)
5034     {
5035         ajWarn("Bad args passed to pdbioAlignNumbering\n");
5036 
5037         return ajFalse;
5038     }
5039 
5040     /* Allocate memory for arrays etc */
5041     aa_misfit = ajStrNew();
5042     msgstr = ajStrNew();
5043     msgbit = ajStrNew();
5044     seqbit = ajStrNew();
5045 
5046     insert = ajCharNewRes(MAXMISSNTERM);
5047     tmpseqres = ajStrNew();
5048     bit = ajStrNew();
5049     substr = ajStrNew();
5050     substr2 = ajStrNew();
5051 
5052     AJCNEW0(seq1, pdbfile->nchains);
5053     AJCNEW0(seq2, pdbfile->nchains);
5054 
5055 
5056     AJCNEW0(num1, pdbfile->nchains);
5057     AJCNEW0(num2, pdbfile->nchains);
5058 
5059     AJCNEW0(idx, pdbfile->nchains);
5060     AJCNEW0(idx_full, pdbfile->nchains);
5061 
5062     AJCNEW0(nres1, pdbfile->nchains);
5063     AJCNEW0(nres2, pdbfile->nchains);
5064 
5065     for (i = 0U; i < pdbfile->nchains; i++)
5066     {
5067         if (!pdbfile->chainok[i])
5068             continue;
5069 
5070         seq1[i] = ajStrNew();
5071         seq2[i] = ajStrNew();
5072 
5073         num1[i] = ajIntNew();
5074         num2[i] = ajIntNew();
5075     }
5076 
5077     /* Assign arrays */
5078     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
5079     {
5080         /* Coordinate line */
5081         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
5082         {
5083             /* Break if we are no longer in the first model */
5084             if (pdbfile->modn[i] != 1)
5085                 break;
5086             else
5087                 c = pdbfile->chnn[i] - 1;
5088 
5089 #if AJFALSE
5090             ajFmtPrint("%S\n", pdbfile->lines[i]);
5091 #endif /* AJFALSE */
5092             if (pdbfile->resn1[i] != last1)
5093             {
5094                 ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
5095                 ajStrAppendK(&seq1[c], aa1);
5096 
5097                 ajIntPut(&num1[c], nres1[c], pdbfile->resn1[i]);
5098                 last1 = pdbfile->resn1[i];
5099                 nres1[c]++;
5100             }
5101 
5102             if ((pdbfile->resn2[i] != last2) && (!pdbfile->oddnum[i]))
5103             {
5104                 ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
5105                 ajStrAppendK(&seq2[c], aa1);
5106 
5107                 ajIntPut(&num2[c], nres2[c], pdbfile->resn2[i]);
5108                 last2 = pdbfile->resn2[i];
5109                 nres2[c]++;
5110             }
5111         }
5112     }
5113 
5114 
5115 
5116     /* Allocate memory for arrays of residue numbers */
5117     for (i = 0; i < pdbfile->nchains; i++)
5118     {
5119         if (!pdbfile->chainok[i])
5120             continue;
5121 
5122 
5123         /* Array must be big enough to cope with either sequence */
5124         if (nres1[i] > nres2[i])
5125             idx[i] = ajIntNewRes(nres1[i]);
5126         else
5127             idx[i] = ajIntNewRes(nres2[i]);
5128 
5129 
5130 
5131         /* Array must be big enough to cope with highest the residue number
5132          * from either array */
5133         if (ajIntGet(num1[i], nres1[i] - 1) > ajIntGet(num2[i], nres2[i] - 1))
5134             idx_full[i] = ajIntNewRes(ajIntGet(num1[i], nres1[i] - 1) + 1);
5135         else
5136             idx_full[i] = ajIntNewRes(ajIntGet(num2[i], nres2[i] - 1) + 1);
5137     }
5138 
5139 
5140 
5141 
5142     /* Loop for each chain */
5143     for (i = 0; i < pdbfile->nchains; i++)
5144     {
5145         /* Skip this chain if necessary */
5146         if (!(pdbfile->chainok[i]))
5147             continue;
5148         else
5149             ajStrAssignS(&tmpseqres, pdbfile->seqres[i]);
5150 
5151         /* Loop for checking for missing residues from N-term of SEQRES
5152          * sequence */
5153         for (done = ajFalse, j = 0; j < MAXMISSNTERM + 1; j++)
5154         {
5155             /* Loop for the 2 sequences derived from the ATOM records */
5156             for (x = 0; x < 2; x++)
5157             {
5158                 if (x == 0)
5159                 {
5160                     seq = seq1;
5161                     nres = nres1;
5162                     num = num1;
5163                 }
5164                 else
5165                 {
5166                     seq = seq2;
5167                     nres = nres2;
5168                     num = num2;
5169                 }
5170 
5171                 /* Restore the original seqres sequence */
5172                 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5173                 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5174 
5175 
5176                 /* Modify the seqres sequence in the Pdbfile object by adding
5177                  * the first j residues from the N-terminus of the ATOM
5178                  * sequence to the N-terminus of <seqres>. */
5179 
5180                 for (k = 0; (k < j) && (k < nres[i]); k++)
5181                     insert[k] = ajStrGetCharPos(seq[i], k);
5182 
5183                 insert[k] = '\0';
5184 
5185                 ajStrInsertC(&(pdbfile->seqres[i]), 0, insert);
5186                 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5187 
5188 #if AJFALSE
5189                 /* DIAGNOSTIC CODE */
5190                 ajFmtPrintF(flog, "\nChainMod %d"
5191                             " (seq %d)\n%S\n%S\n\n\n",
5192                             i + 1, x + 1,
5193                             seq[i],
5194                             pdbfile->seqres[i]);
5195 #endif /* AJFALSE */
5196 
5197                 /***********************************************/
5198                 /******************* STEP 1 ********************/
5199                 /***********************************************/
5200 #if AJFALSE
5201                 /* DIAGNOSTIC */
5202                 ajFmtPrintF(flog, "STEP1 tmpseqres: %S\n", tmpseqres);
5203 
5204                 ajFmtPrintF(flog, "chnn : %d\n"
5205                             "seq1 : %S\n"
5206                             "seq2 : %S\n"
5207                             "seqr : %S\n", i + 1, seq1[i], seq2[i],
5208                             pdbfile->seqres[i]);
5209                 ajFmtPrintF(flog, "\n");
5210                 if (ajStrMatchS(seq1[i], seq2[i]))
5211                     ajFmtPrintF(flog, "seq1 and seq2 match\n");
5212                 else
5213                     ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5214 
5215                 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5216                     ajFmtPrintF(flog, "seq1 and seqres match\n");
5217                 else
5218                     ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5219                 ajFmtPrintF(flog, "\n");
5220 #endif /* AJFALSE */
5221 
5222                 /* Check whether sequences are identical length */
5223                 if (nres[i] == pdbfile->nres[i])
5224                 {
5225                     /* Sequences are identical - assign residue numbers 'by
5226                      * hand' */
5227                     if (ajStrMatchS(seq[i], pdbfile->seqres[i]))
5228                     {
5229                         for (k = 0; k < nres[i]; k++)
5230                             ajIntPut(&idx[i], k, k + 1);
5231 
5232                         if (x == 0)
5233                             pdbfile->resn1ok[i] = ajTrue;
5234                         else
5235                             pdbfile->resn1ok[i] = ajFalse;
5236 
5237                         done = ajTrue;
5238 
5239 #if AJFALSE
5240                         /* DIAGNOSTIC */
5241                         ajFmtPrintF(flog, "STEP1 OK\n");
5242 #endif /* AJFALSE */
5243                         break;
5244                     }
5245                     /* Sequence are same length but contain mismatches */
5246                     else
5247                     {
5248                         for (ajStrAssignClear(&msgstr), nmismatches = 0, k = 0;
5249                              k < nres[i]; k++)
5250                             if (ajStrGetCharPos(seq[i], k) !=
5251                                 ajStrGetCharPos(pdbfile->seqres[i], k))
5252                             {
5253                                 nmismatches++;
5254                                 /* Correct the seqres sequence. Replace this
5255                                  * with appropriate library call once
5256                                  * available so we don't have to probe the
5257                                  * internals of the structure */
5258 
5259 
5260                                 /* a will give the number of the first
5261                                  * coordinate line for the mismatch residue
5262                                  * from the ATOM records */
5263                                 a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
5264                                                     ajIntGet(num[i], k));
5265 
5266 
5267                                 /* Get the id of the mismatch residue in the
5268                                  * SEQRES sequence.  */
5269                                 ajResidueToTriplet(pdbfile->seqres[i]->Ptr[k],
5270                                                    &aa_misfit);
5271 
5272                                 /* To give correct index into SEQRES records
5273                                  * in original PDB file, subtract j to
5274                                  * account for modifications to the
5275                                  * N-terminus that were made for missing
5276                                  * residues relative to ATOM sequence. A
5277                                  * further 1 is added to give a number
5278                                  * starting from 1 (rather than 0) */
5279 
5280                                 ajFmtPrintS(&msgbit, "%S%S:%S%d.    ",
5281                                             pdbfile->rtype[a],
5282                                             pdbfile->pdbn[a],
5283                                             aa_misfit, k - j + 1);
5284 
5285 
5286                                 ajStrAppendS(&msgstr, msgbit);
5287 
5288 
5289                                 pdbfile->seqres[i]->Ptr[k] =
5290                                     ajStrGetCharPos(seq[i], k);
5291 
5292                             }
5293 
5294 
5295                         /* Sequences are same length (acceptable number of
5296                          * mismatches) */
5297                         if (nmismatches <= lim)
5298                         {
5299                             if (nmismatches)
5300                                 ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n",
5301                                             "MISMATCH", i + 1,
5302                                             ajChararrGet(pdbfile->chid, i),
5303                                             nmismatches, msgstr);
5304 
5305 
5306                             for (k = 0; k < nres[i]; k++)
5307                                 ajIntPut(&idx[i], k, k + 1);
5308 
5309                             if (x == 0)
5310                                 pdbfile->resn1ok[i] = ajTrue;
5311                             else
5312                                 pdbfile->resn1ok[i] = ajFalse;
5313 
5314                             done = ajTrue;
5315 #if AJFALSE
5316                             /* DIAGNOSTIC */
5317                             ajFmtPrintF(flog, "STEP1 OK %d mismatches\n",
5318                                         nmismatches);
5319 #endif /* AJFALSE */
5320 
5321                             break;
5322                         }
5323                         else
5324                         {
5325                             /* Otherwise, sequences are same length
5326                              * (unacceptable number of mismatches) Restore
5327                              * the original seqres sequence */
5328                             ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5329                             pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5330 #if AJFALSE
5331                             /* DIAGNOSTIC */
5332                             ajFmtPrintF(flog,
5333                                         "STEP1 **NOT** OK %d mismatches\n",
5334                                         nmismatches);
5335 #endif /* AJFALSE */
5336                         }
5337                     }
5338                 }
5339 
5340                 /***********************************************/
5341                 /******************* STEP 2 ********************/
5342                 /***********************************************/
5343 #if AJFALSE
5344                 /* DIAGNOSTIC */
5345                 ajFmtPrintF(flog, "STEP2 tmpseqres: %S\n", tmpseqres);
5346 
5347                 ajFmtPrintF(flog, "chnn : %d\n"
5348                             "seq1 : %S\n"
5349                             "seq2 : %S\n"
5350                             "seqr : %S\n", i + 1, seq1[i], seq2[i],
5351                             pdbfile->seqres[i]);
5352                 ajFmtPrintF(flog, "\n");
5353                 if (ajStrMatchS(seq1[i], seq2[i]))
5354                     ajFmtPrintF(flog, "seq1 and seq2 match\n");
5355                 else
5356                     ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5357 
5358                 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5359                     ajFmtPrintF(flog, "seq1 and seqres match\n");
5360                 else
5361                     ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5362                 ajFmtPrintF(flog, "\n");
5363 #endif /* AJFALSE */
5364 
5365                 /* JONNEW The code below replaces 'ORIGINAL' code block */
5366                 maxb = lim2;
5367 
5368                 if (maxb >= MAJSTRGETLEN(seq[i]))
5369                     maxb = MAJSTRGETLEN(seq[i]) - 1;
5370 
5371 #if AJFALSE
5372                 /* DIAGNOSTIC */
5373                 ajFmtPrintF(flog, "maxb = %d\nlim2 = %d\n");
5374                 ajDebug("strlen: %d\n",
5375                         maxb, lim2, MAJSTRGETLEN(seq[i]));
5376 #endif /* AJFALSE */
5377 
5378                 /* First pass through loop is full-length sequence */
5379                 for (b = 0; b < maxb + 1; b++)
5380                 {
5381                     lenseqbit = MAJSTRGETLEN(seq[i]) - b;
5382                     ajStrAssignSubS(&seqbit, seq[i], 0, (lenseqbit - 1));
5383                     ajStrAssignSubS(&bit, seq[i], (lenseqbit), -1);
5384 
5385 #if AJFALSE
5386                     /* DIAGNOSTIC */
5387                     ajFmtPrintF(flog, "Trying ATOM substring %S\n"
5388                                 "versus SEQRES        %S\n",
5389                                 seqbit, pdbfile->seqres[i]);
5390 #endif /* AJFALSE */
5391 
5392                     /* Check whether ATOM is substring of SEQRES sequence */
5393                     if ((loc = ajStrFindS(pdbfile->seqres[i], seqbit)) != -1)
5394                     {
5395                         /* Check to ensure that the last substring residue is
5396                          * aligned to the last residue of the SEQRES residue,
5397                          * otherwise, problems would arise in cases where
5398                          * SEQRES sequence had C-terminal residues that were
5399                          * absent from the ATOM (& therefore also substring)
5400                          * sequence. */
5401                         if ((loc + lenseqbit) != pdbfile->nres[i])
5402                             break;
5403 
5404                         /* ATOM is substring of SEQRES sequence - assign
5405                          * residue numbers 'by hand' */
5406 
5407                         for (k = 0; k < nres[i]; k++)
5408                             ajIntPut(&idx[i], k, k + (ajint) loc + 1);
5409 
5410                         if (x == 0)
5411                             pdbfile->resn1ok[i] = ajTrue;
5412                         else
5413                             pdbfile->resn1ok[i] = ajFalse;
5414 
5415 
5416                         /* SEQRES sequence is missing C-terminal ATOM
5417                          * residues */
5418                         if (b)
5419                         {
5420                             ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
5421                                         "MISSCTERM", i + 1,
5422                                         ajChararrGet(pdbfile->chid, i), b);
5423 
5424                             ajStrAppendS(&(pdbfile->seqres[i]), bit);
5425                             pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5426                         }
5427 
5428                         done = ajTrue;
5429 #if AJFALSE
5430                         /* DIAGNOSTIC */
5431                         ajFmtPrintF(flog, "STEP2 OK\n");
5432 #endif /* AJFALSE */
5433                         break;
5434                     }
5435                 }
5436 
5437                 if (done)
5438                     break;
5439                 else
5440                 {
5441                     /* Otherwise, ATOM is NOT a substring of SEQRES sequence */
5442 #if AJFALSE
5443                     /* DIAGNOSTIC */
5444                     ajFmtPrintF(flog, "STEP2 **NOT** OK\n");
5445 #endif /* AJFALSE */
5446                 }
5447 
5448 #if AJFALSE
5449                 /* ORIGINAL */
5450                 if ((loc = ajStrFindS(pdbfile->seqres[i], seq[i])) != -1)
5451                 {
5452                     for (k = 0; k < nres[i]; k++)
5453                         ajIntPut(&idx[i], k, k + loc + 1);
5454 
5455                     if (x == 0)
5456                         pdbfile->resn1ok[i] = ajTrue;
5457                     else
5458                         pdbfile->resn1ok[i] = ajFalse;
5459 
5460                     done = ajTrue;
5461                     ajFmtPrintF(flog, "STEP2 OK\n");
5462                     break;
5463                 }
5464 #endif /* AJFALSE */
5465 
5466 
5467                 /***********************************************/
5468                 /******************* STEP 3 ********************/
5469                 /***********************************************/
5470 #if AJFALSE
5471                 /* DIAGNOSTIC */
5472                 ajFmtPrintF(flog, "STEP3 tmpseqres: %S\n", tmpseqres);
5473 
5474                 ajFmtPrintF(flog, "chnn : %d\n"
5475                             "seq1 : %S\n"
5476                             "seq2 : %S\n"
5477                             "seqr : %S\n", i + 1, seq1[i], seq2[i],
5478                             pdbfile->seqres[i]);
5479                 ajFmtPrintF(flog, "\n");
5480                 if (ajStrMatchS(seq1[i], seq2[i]))
5481                     ajFmtPrintF(flog, "seq1 and seq2 match\n");
5482                 else
5483                     ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5484 
5485                 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5486                     ajFmtPrintF(flog, "seq1 and seqres match\n");
5487                 else
5488                     ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5489                 ajFmtPrintF(flog, "\n");
5490 #endif /* AJFALSE */
5491 
5492 
5493                 /* Check whether SEQRES is substring of ATOM sequence */
5494                 /* This will only find omissions from the SEQRES sequence
5495                  * where the ATOM sequence would align to it without gaps,
5496                  * and where the SEQRES sequence does not have extra
5497                  * N-terminal residues relative to ATOM (such cases are
5498                  * caught in STEP 2) */
5499                 if ((loc = ajStrFindS(seq[i], pdbfile->seqres[i])) != -1)
5500                 {
5501                     /* SEQRES is substring of ATOM sequence - correct for
5502                      * residues missing from SEQRES sequence and assign
5503                      * residue numbers 'by hand' */
5504 
5505                     /* N-terminal insertion needed */
5506                     if (loc != 0)
5507                     {
5508                         ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSNTERM",
5509                                     i + 1, ajChararrGet(pdbfile->chid, i),
5510                                     loc);
5511 
5512 
5513                         ajStrAssignSubS(&bit, seq[i], 0, loc - 1);
5514                         ajStrInsertS(&(pdbfile->seqres[i]), 0, bit);
5515                         pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5516                     }
5517 
5518                     /* C-terminal insertion needed */
5519                     if (pdbfile->nres[i] != nres[i])
5520                     {
5521                         ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSCTERM",
5522                                     i + 1, ajChararrGet(pdbfile->chid, i),
5523                                     (nres[i] - pdbfile->nres[i]));
5524 
5525 
5526                         ajStrAssignSubS(&bit, seq[i], pdbfile->nres[i],
5527                                         nres[i] - 1);
5528                         ajStrAppendS(&(pdbfile->seqres[i]), bit);
5529                         pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5530                     }
5531 
5532                     for (k = 0; k < nres[i]; k++)
5533                         ajIntPut(&idx[i], k, k + 1);
5534 
5535                     if (x == 0)
5536                         pdbfile->resn1ok[i] = ajTrue;
5537                     else
5538                         pdbfile->resn1ok[i] = ajFalse;
5539 
5540 #if AJFALSE
5541                     /* DIAGNOSTIC */
5542                     ajFmtPrintF(flog, "STEP3 OK\n");
5543                     */
5544 #endif /* AJFALSE */
5545                         done = ajTrue;
5546                         break;
5547                 }
5548 
5549                 /* Otherwise, SEQRES is NOT a substring of the ATOM sequence */
5550 #if AJFALSE
5551                 /* DIAGNOSTIC */
5552                 ajFmtPrintF(flog, "STEP3 **NOT** OK\n");
5553                 */
5554 #endif /* AJFALSE */
5555 
5556                     /***********************************************/
5557                     /******************* STEP 4 ********************/
5558                     /***********************************************/
5559 #if AJFALSE
5560                     /* DIAGNOSTIC */
5561                     ajFmtPrintF(flog, "STEP4.1 tmpseqres: %S\n", tmpseqres);
5562 
5563                     ajFmtPrintF(flog, "chnn : %d\n"
5564                                 "seq1 : %S\n"
5565                                 "seq2 : %S\n"
5566                                 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5567                                 pdbfile->seqres[i]);
5568                     ajFmtPrintF(flog, "\n");
5569                     if (ajStrMatchS(seq1[i], seq2[i]))
5570                         ajFmtPrintF(flog, "seq1 and seq2 match\n");
5571                     else
5572                         ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5573 
5574                     if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5575                         ajFmtPrintF(flog, "seq1 and seqres match\n");
5576                     else
5577                         ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5578                     ajFmtPrintF(flog, "\n");
5579 
5580                     ajFmtPrintF(flog, "STEP4.2 tmpseqres: %S\n", tmpseqres);
5581 #endif /* AJFALSE */
5582 
5583                     /* Check whether residue numbering is correct (and count the
5584                      * number of mismatches) */
5585                     for (err = ajFalse, ajStrAssignClear(&msgstr), nmismatches = 0,
5586                              k = 0; k < nres[i]; k++)
5587                     {
5588                         this_num = ajIntGet(num[i], k);
5589 
5590                         /* Check to prevent array boundary error */
5591                         if (this_num <= pdbfile->nres[i])
5592                         {
5593                             if (ajStrGetCharPos(seq[i], k) !=
5594                                 ajStrGetCharPos(pdbfile->seqres[i], this_num - 1))
5595                             {
5596                                 nmismatches++;
5597                                 /* Correct the seqres sequence. Replace this with
5598                                  * appropriate library call once available so we
5599                                  * don't have to probe the internals of the
5600                                  * structure */
5601 
5602 
5603                                 /* a will give the number of the first coordinate
5604                                  * line for the mismatch residue from the ATOM
5605                                  * records */
5606                                 a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
5607                                                     ajIntGet(num[i], k));
5608 
5609                                 /* Get the id of the mismatch residue in the
5610                                  * SEQRES sequence.  */
5611 
5612                                 ajResidueToTriplet(
5613                                     pdbfile->seqres[i]->Ptr[this_num - 1],
5614                                     &aa_misfit);
5615 
5616                                 ajFmtPrintS(&msgbit, "%S%S:%S%d.    ",
5617                                             pdbfile->rtype[a], pdbfile->pdbn[a],
5618                                             aa_misfit, this_num - j);
5619 
5620                                 ajStrAppendS(&msgstr, msgbit);
5621 
5622                                 pdbfile->seqres[i]->Ptr[this_num - 1] =
5623                                     ajStrGetCharPos(seq[i], k);
5624                             }
5625                         }
5626                         else
5627                         {
5628                             err = ajTrue;
5629                             break;
5630                         }
5631                     }
5632 
5633 #if AJFALSE
5634                     /* DIAGNOSTIC */
5635                     ajFmtPrintF(flog, "STEP4.3 tmpseqres: %S\n", tmpseqres);
5636 #endif /* AJFALSE */
5637                     if (!err)
5638                     {
5639                         /* Residue numbering is correct (no or acceptable number
5640                          * of mismatches) */
5641                         if (nmismatches <= lim)
5642                         {
5643                             if (nmismatches)
5644                                 ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n",
5645                                             "MISMATCH",
5646                                             i + 1,
5647                                             ajChararrGet(pdbfile->chid, i),
5648                                             nmismatches, msgstr);
5649 
5650 
5651 
5652 
5653                             for (k = 0; k < nres[i]; k++)
5654                                 ajIntPut(&idx[i], k, ajIntGet(num[i], k));
5655 
5656                             if (x == 0)
5657                                 pdbfile->resn1ok[i] = ajTrue;
5658                             else
5659                                 pdbfile->resn1ok[i] = ajFalse;
5660 
5661                             done = ajTrue;
5662 
5663 #if AJFALSE
5664                             /* DIAGNOSTIC */
5665                             ajFmtPrintF(flog, "STEP4 OK %d mismatches\n",
5666                                         nmismatches);
5667 #endif /* AJFALSE */
5668 
5669                             break;
5670                         }
5671                         else
5672                         {
5673                             /* Otherwise, residue numbering is incorrect
5674                              * (unacceptable number of mismatches) Restore the
5675                              * original seqres sequence */
5676                             ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5677                             pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5678 
5679 #if AJFALSE
5680                             /* DIAGNOSTIC */
5681                             ajFmtPrintF(flog, "STEP4 **NOT** OK %d mismatches\n",
5682                                         nmismatches);
5683 #endif /* AJFALSE */
5684                         }
5685                     }
5686                     else
5687                     {
5688                         /* Otherwise, residue numbering is incorrect (residue
5689                          * number is out of range) Restore the original seqres
5690                          * sequence */
5691                         ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5692                         pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5693 
5694 
5695 #if AJFALSE
5696                         /* DIAGNOSTIC */
5697                         ajFmtPrintF(flog, "STEP4 **NOT** OK out_of_range\n");
5698 #endif /* AJFALSE */
5699                     }
5700 
5701 
5702                     /***********************************************/
5703                     /******************* STEP 5 ********************/
5704                     /***********************************************/
5705 #if AJFALSE
5706                     /* DIAGNOSTIC */
5707                     ajFmtPrintF(flog, "STEP5 tmpseqres: %S\n", tmpseqres);
5708 
5709                     ajFmtPrintF(flog, "chnn : %d\n"
5710                                 "seq1 : %S\n"
5711                                 "seq2 : %S\n"
5712                                 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5713                                 pdbfile->seqres[i]);
5714                     ajFmtPrintF(flog, "\n");
5715                     if (ajStrMatchS(seq1[i], seq2[i]))
5716                         ajFmtPrintF(flog, "seq1 and seq2 match\n");
5717                     else
5718                         ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5719 
5720                     if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5721                         ajFmtPrintF(flog, "seq1 and seqres match\n");
5722                     else
5723                         ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5724                     ajFmtPrintF(flog, "\n");
5725 #endif /* AJFALSE */
5726 
5727                     /* None of the measures above could find the correct residue
5728                      * numbering so try by alignment.
5729                      *
5730                      * Align the SEQRES sequence to the ATOM sequence by taking
5731                      * progressively decreasing sized substrings from the ATOM
5732                      * sequence.  It returns 0 if any of the alignments would not
5733                      * leave enough space to be able to align the rest of the
5734                      * ATOM sequence to the SEQRES sequence - i.e. alignments
5735                      * giving an overspill of the ATOM sequence past the
5736                      * C-terminus of the SEQRES sequence are NOT allowed.
5737                      *
5738                      * NO Mismatches are allowed at this stage */
5739 
5740 
5741                     for (done_end = ajFalse,
5742                              len = pdbfile->nres[i],
5743                              siz_substr = nres[i],
5744                              atom_ptr = ajStrGetPtr(seq[i]),
5745                              seqres_ptr = ajStrGetPtr(pdbfile->seqres[i]);
5746                          siz_substr > 0;)
5747                     {
5748                         ajStrAssignSubC(&substr, atom_ptr, 0, siz_substr - 1);
5749 
5750                         if ((loc_ptr = strstr(seqres_ptr,
5751                                               ajStrGetPtr(substr))) == NULL)
5752                         {
5753                             siz_substr--;
5754                             continue;
5755                         }
5756                         else
5757                         {
5758                             atom_idx = (int) ((atom_ptr - ajStrGetPtr(seq[i]))
5759                                               / sizeof (char));
5760                             seqres_idx = (int)
5761                                 ((loc_ptr - ajStrGetPtr(pdbfile->seqres[i]))
5762                                  / sizeof (char));
5763 
5764 
5765 
5766 
5767                             /* CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINER
5768                              * OF THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN
5769                              * THIS ALIGNMENT */
5770                             if ((nres[i] - atom_idx) > (len - seqres_idx))
5771                                 break;
5772 
5773                             for (k = 0, y = atom_idx, z = seqres_idx; k < siz_substr; k++,
5774                                      y++, z++)
5775                                 ajIntPut(&idx[i], y, z + 1);
5776 
5777 
5778 
5779                             /* Mark up last SEQRES residue as having been done */
5780                             if (y == nres[i])
5781                                 done_end = ajTrue;
5782                         }
5783 
5784                         atom_ptr += siz_substr;
5785                         seqres_ptr = loc_ptr + siz_substr;
5786                         siz_substr = nres[i] - (atom_idx + siz_substr);
5787                     }
5788 
5789 
5790                     /* Check to ensure that position for last residue has been
5791                      * worked out */
5792                     if (done_end)
5793                     {
5794 #if AJFALSE
5795                         /* DIAGNOSTIC */
5796                         ajFmtPrintF(flog, "chnn : %d\n"
5797                                     "seq1 : %S\n"
5798                                     "seq2 : %S\n"
5799                                     "seqr : %S\n", i + 1, seq1[i], seq2[i],
5800                                     pdbfile->seqres[i]);
5801                         ajFmtPrintF(flog, "\n");
5802                         if (ajStrMatchS(seq1[i], seq2[i]))
5803                             ajFmtPrintF(flog, "seq1 and seq2 match\n");
5804                         else
5805                             ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5806 
5807                         if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5808                             ajFmtPrintF(flog, "seq1 and seqres match\n");
5809                         else
5810                             ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5811                         ajFmtPrintF(flog, "\n");
5812 #endif /* AJFALSE */
5813 
5814                         /* Residue numbering is correct after alignment (no
5815                          * mismatches) */
5816                         if (x == 0)
5817                             pdbfile->resn1ok[i] = ajTrue;
5818                         else
5819                             pdbfile->resn1ok[i] = ajFalse;
5820 
5821                         ajFmtPrintF(flog, "%-15s%d (%c)\n", "GAPPEDOK", i + 1,
5822                                     ajChararrGet(pdbfile->chid, i));
5823 
5824 
5825 #if AJFALSE
5826                         /* DIAGNOSTIC */
5827                         ajFmtPrintF(flog, "STEP5 OK\n");
5828 #endif /* AJFALSE */
5829 
5830                         done = ajTrue;
5831                         break;
5832                     }
5833 
5834 #if AJFALSE
5835                     /* DIAGNOSTIC */
5836                     ajFmtPrintF(flog, "STEP5 **NOT** OK\n");
5837 #endif /* AJFALSE */
5838 
5839                     /* Otherwise, agreement could not be found */
5840 
5841                     /* array might contain junk values now but this should not
5842                      * matter as the array should be overwritten */
5843 
5844                     /***********************************************/
5845                     /******************* STEP 6 ********************/
5846                     /***********************************************/
5847 #if AJFALSE
5848                     /* DIAGNOSTIC */
5849                     ajFmtPrintF(flog, "STEP6 tmpseqres: %S\n",
5850                                 tmpseqres);
5851 
5852                     ajFmtPrintF(flog, "chnn : %d\n"
5853                                 "seq1 : %S\n"
5854                                 "seq2 : %S\n"
5855                                 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5856                                 pdbfile->seqres[i]);
5857                     ajFmtPrintF(flog, "\n");
5858                     if (ajStrMatchS(seq1[i], seq2[i]))
5859                         ajFmtPrintF(flog, "seq1 and seq2 match\n");
5860                     else
5861                         ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5862 
5863                     if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5864                         ajFmtPrintF(flog, "seq1 and seqres match\n");
5865                     else
5866                         ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5867                     ajFmtPrintF(flog, "\n");
5868 #endif /* AJFALSE */
5869 
5870 
5871                     /* Try again by alignment. Mismatches ARE allowed at this
5872                      * stage Must change: */
5873 
5874 
5875                     for (ajStrAssignClear(&msgstr),
5876                              nmismatches = 0,
5877                              done_end = ajFalse,
5878                              len = pdbfile->nres[i],
5879                              siz_substr = nres[i],
5880                              atom_ptr = ajStrGetPtr(seq[i]),
5881                              seqres_idx_last = -1,
5882                              seqres_ptr = ajStrGetPtr(pdbfile->seqres[i]);
5883                          siz_substr > 0;)
5884                     {
5885                         /* On the first pass, atom_ptr and seqres_ptr point to
5886                          * the start of the ATOM and SEQRES sequences
5887                          * respectively. */
5888 
5889 
5890                         founderr = ajFalse;
5891 
5892                         /* Copy block (of progressively decreasing size) from
5893                          * ATOM sequence to substring1  */
5894                         ajStrAssignSubC(&substr, atom_ptr, 0, siz_substr - 1);
5895 
5896 #if AJFALSE
5897                         /* DIAGNOSTIC */
5898                         ajFmtPrintF(flog, "\n***\n%12s\n%s\n%12s\n%s\n",
5899                                     "Aligning ", ajStrGetPtr(substr), "To ", seqres_ptr);
5900 #endif /* AJFALSE */
5901 
5902 
5903                         /* Set loc_ptr to point to the first occurrence of
5904                          * substring1 in SEQRES sequence */
5905                         /* If substring1 does not occur in SEQRES sequence */
5906                         if ((loc_ptr = strstr(seqres_ptr,
5907                                               ajStrGetPtr(substr))) == NULL)
5908                         {
5909                             /* See if there is an error in the residue id at the
5910                              * start of <substr> Create a new substring from the
5911                              * ATOM records but omit the first character. There
5912                              * is a special case if substr is only 1 character
5913                              * long. */
5914 
5915                             if (siz_substr == 1)
5916                             {
5917                                 loc_ptr = seqres_ptr;
5918                                 nmismatches++;
5919                                 founderr = ajTrue;
5920                             }
5921                             else
5922                             {
5923                                 /* Copy substring1 to substring2 but omit the
5924                                  * first character */
5925                                 ajStrAssignSubS(&substr2, substr, 1, -1);
5926 
5927 
5928 #if AJFALSE
5929                                 /* DIAGNOSTIC */
5930                                 ajFmtPrintF(flog,
5931                                             "\n!!!\n%12s\n%s\n%12s\n%s\n",
5932                                             "Aligning ", ajStrGetPtr(substr2), "To ",
5933                                             seqres_ptr);
5934 #endif /* AJFALSE */
5935 
5936                                 /* Set loc_ptr to point to the first occurrence
5937                                  * of substring1 in SEQRES sequence */
5938                                 /* If substring2 does not occur in the SEQRES
5939                                  * sequence, continue with a smaller substring */
5940                                 loc_ptr = strstr(seqres_ptr,
5941                                                  ajStrGetPtr(substr2));
5942                                 if (!loc_ptr)
5943                                 {
5944                                     siz_substr--;
5945                                     continue;
5946                                 }
5947                                 /* substring2 is found in the SEQRES sequence */
5948                                 else
5949                                 {
5950                                     /* If there is not enough space to
5951                                      * accommodate the 'missing' residue continue
5952                                      * (with a smaller substring) */
5953                                     if (loc_ptr == seqres_ptr)
5954                                     {
5955                                         siz_substr--;
5956                                         continue;
5957                                     }
5958 
5959 #if AJFALSE
5960                                     /* DIAGNOSTIC */
5961                                     ajFmtPrintF(flog, "\n\n\n");
5962                                     ajFmtPrintF(flog, "nmismatches = "
5963                                                 "%d\n%12s%s\n%12s%s\n%12s%s\n%12s%s\n",
5964                                                 nmismatches,
5965                                                 "atom_ptr:",
5966                                                 atom_ptr,
5967                                                 "seqres_ptr:",
5968                                                 seqres_ptr,
5969                                                 "substr:",
5970                                                 substr,
5971                                                 "substr2",
5972                                                 substr2);
5973 
5974                                     /* DIAGNOSTIC */
5975                                     ajFmtPrintF(flog,
5976                                                 "MISMATCH FOUND OK\n");
5977 #endif /* AJFALSE */
5978 
5979                                     /* There is enough space to accommodate
5980                                      * substring2 and the 'missing' (mismatch)
5981                                      * residue */
5982                                     nmismatches++;
5983                                     founderr = ajTrue;
5984                                 }
5985                             }
5986                         }
5987 #if AJFALSE
5988                         /* DIAGNOSTIC */
5989                         else
5990                             ajFmtPrintF(flog, "ALIGNMENT FOUND OK\n");
5991 #endif /* AJFALSE */
5992 
5993                         /* atom_idx and seqres_idx are set to give the index into
5994                          * ATOM and SEQRES sequences respectively for the
5995                          * position of match of N-terminal residue of substring
5996                          * (if founderr is True this will be the position of the
5997                          * N-terminal mismatch residue) */
5998                         atom_idx = (int) ((atom_ptr - ajStrGetPtr(seq[i])) / sizeof (char));
5999 
6000                         if (founderr)
6001                             seqres_idx = (int) (((loc_ptr - 1) -
6002                                                  ajStrGetPtr(pdbfile->seqres[i]))
6003                                                 / sizeof (char));
6004                         else
6005                             seqres_idx = (int) ((loc_ptr -
6006                                                  ajStrGetPtr(pdbfile->seqres[i]))
6007                                                 / sizeof (char));
6008 #if AJFALSE
6009                         /* DIAGNOSTIC */
6010                         ajFmtPrintF(flog, "seqres_idx : %d\n", seqres_idx);
6011 #endif /* AJFALSE */
6012 
6013                         /* If there was a mismatch residue, idx_misfit_atom and
6014                          * idx_misfit_seqres will give the index into the ATOM
6015                          * and SEQRES sequences respectively for its position */
6016                         if (founderr)
6017                         {
6018                             idx_misfit_atom = atom_idx;
6019                             idx_misfit_seqres = seqres_idx;
6020                         }
6021 
6022 
6023 
6024                         /* CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINDER OF
6025                          * THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN THIS
6026                          * ALIGNMENT */
6027                         if ((nres[i] - atom_idx) > (len - seqres_idx))
6028                             break;
6029 
6030                         /**************************************************/
6031                         /* This will have to change for 1st residue       */
6032                         /**************************************************/
6033 
6034                         /* Try and find an exact match within the gap for the
6035                          * mismatch residue */
6036                         fixed = ajFalse;
6037 
6038                         if (founderr)
6039                         {
6040 #if AJFALSE
6041                             /* DIAGNOSTIC */
6042                             ajFmtPrintF(flog,
6043                                         "About to try (seqres_idx_last: %d,  "
6044                                         "seqres_idx: %d) ...\n", seqres_idx_last,
6045                                         seqres_idx);
6046 #endif /* AJFALSE */
6047 
6048 
6049                             aa_last = ajStrGetCharFirst(substr);
6050 
6051                             for (z = seqres_idx_last + 1; z < seqres_idx; z++)
6052                             {
6053 #if AJFALSE
6054                                 /* DIAGNOSTIC */
6055                                 ajFmtPrintF(flog, "Trying ATOM:SEQRES  %c:%c\n",
6056                                             aa_last, pdbfile->seqres[i]->Ptr[z]);
6057 #endif /* AJFALSE */
6058 
6059                                 if (pdbfile->seqres[i]->Ptr[z] == aa_last)
6060                                 {
6061                                     nmismatches--;
6062                                     founderr = ajFalse;
6063                                     fixed = ajTrue;
6064 
6065 
6066                                     /* Assign residue number */
6067                                     ajIntPut(&idx[i], atom_idx, z + 1);
6068 
6069                                     for (k = 0, y = atom_idx + 1, z = seqres_idx + 1;
6070                                          k < siz_substr - 1; k++, y++, z++)
6071                                         ajIntPut(&idx[i], y, z + 1);
6072 
6073                                     break;
6074                                 }
6075                             }
6076                         }
6077 
6078                         if (!fixed)
6079                         {
6080 #if AJFALSE
6081                             /* DIAGNOSTIC */
6082                             ajFmtPrintF(flog, "FAILED TO FIX\n");
6083 #endif /* AJFALSE */
6084                             /* Assign residue number */
6085                             for (k = 0, y = atom_idx, z = seqres_idx; k < siz_substr;
6086                                  k++, y++, z++)
6087                                 ajIntPut(&idx[i], y, z + 1);
6088 
6089                         }
6090 
6091 
6092 
6093                         /* Mark up last SEQRES residue as having been done */
6094                         if (y == nres[i])
6095                             done_end = ajTrue;
6096 
6097 
6098 
6099 
6100 
6101                         /* If the substring matched but with a residue mismatch
6102                          * for the 1st residue */
6103                         /**************************************************/
6104                         /* This block should only be called if we         */
6105                         /* can't fit the mismatch residue in somewhere.   */
6106                         /**************************************************/
6107                         if (founderr)
6108                         {
6109                             /* a will give the number of the first coordinate
6110                              * line for the mismatch residue from the ATOM
6111                              * records */
6112 
6113                             a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
6114                                                 ajIntGet(num[i],
6115                                                          idx_misfit_atom));
6116 
6117                             /* Get the id of the mismatch residue in the SEQRES
6118                              * sequence.  */
6119                             ajResidueToTriplet(
6120                                 pdbfile->seqres[i]->Ptr[idx_misfit_seqres],
6121                                 &aa_misfit);
6122 
6123 
6124                             /* To give correct index into SEQRES records in
6125                              * original PDB file, subtract j to account for
6126                              * modifications to the N-terminus that were made for
6127                              * missing residues relative to ATOM sequence. A
6128                              * further 1 is added to give a number starting from
6129                              * 1 (rather than 0) */
6130 
6131 #if AJFALSE
6132                             /* DIAGNOSTIC */
6133                             ajFmtPrintF(flog, "a : %d\n".a);
6134                             ajFmtPrintF(flog, "pdbfile->rtype[a] : %S\n", pdbfile->rtype[a]);
6135                             ajFmtPrintF(flog, "pdbfile->pdbn[a] : %S\n",
6136                                         pdbfile->pdbn[a]);
6137 #endif /* AJFALSE */
6138 
6139                             ajFmtPrintS(&msgbit, "%S %S %S %d;    ",
6140                                         pdbfile->rtype[a], pdbfile->pdbn[a],
6141                                         aa_misfit, idx_misfit_seqres - j + 1);
6142 
6143 #if AJFALSE
6144                             /* DIAGNOSTIC */
6145                             ajFmtPrintS(&msgbit, "ATOM residue %d (%c) vs "
6146                                         "SEQRES residue %d (%c).   ",
6147                                         ajIntGet(num[i], atom_idx), ajStrGetCharFirst(substr),
6148                                         seqres_idx + 1, pdbfile->seqres[i]->Ptr[seqres_idx]);
6149 #endif /* AJFALSE */
6150 
6151                             ajStrAppendS(&msgstr, msgbit);
6152 
6153 
6154                             pdbfile->seqres[i]->Ptr[seqres_idx] =
6155                                 ajStrGetCharFirst(substr);
6156                         }
6157 
6158 
6159                         /* atom_ptr and seqres_ptr now point to 1 residue past
6160                          * the end of the match of the substring in the ATOM and
6161                          * SEQRES sequences respectively. */
6162 
6163                         atom_ptr += siz_substr;
6164 
6165                         if (founderr)
6166                             seqres_ptr = (loc_ptr - 1) + siz_substr;
6167                         else
6168                             seqres_ptr = loc_ptr + siz_substr;
6169 
6170                         siz_substr = nres[i] - (atom_idx + siz_substr);
6171 
6172 
6173 
6174                         /**************************************************/
6175                         /* Must assign index into SEQRES for              */
6176                         /* C-terminal residue of substring                */
6177                         /**************************************************/
6178 
6179 
6180                         /* seqres_idx_last is set to give the index into SEQRES
6181                          * sequence for the position of match of C-terminal
6182                          * residue of substring + 1 */
6183                         seqres_idx_last = (int) z - 1;
6184                     }
6185 
6186                     /* Check to ensure that position for last residue has been
6187                      * worked out */
6188                     if ((done_end) && (nmismatches <= lim))
6189                     {
6190                         if (nmismatches)
6191                             ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n", "GAPPED",
6192                                         i + 1,
6193                                         ajChararrGet(pdbfile->chid, i),
6194                                         nmismatches, msgstr);
6195                         else
6196                             ajFmtPrintF(flog, "%-15s%d (%c)\n", "GAPPEDOK", i + 1,
6197                                         ajChararrGet(pdbfile->chid, i));
6198 
6199 
6200                         /* Residue numbering is correct after alignment
6201                          * (acceptable number of mismatches) */
6202                         if (x == 0)
6203                             pdbfile->resn1ok[i] = ajTrue;
6204                         else
6205                             pdbfile->resn1ok[i] = ajFalse;
6206 
6207 #if AJFALSE
6208                         /* DIAGNOSTIC */
6209                         ajFmtPrintF(flog,
6210                                     "STEP6 OK %d mismatches\n", nmismatches);
6211 #endif /* AJFALSE */
6212 
6213                         done = ajTrue;
6214                         break;
6215                     }
6216 
6217 #if AJFALSE
6218                     /* DIAGNOSTIC */
6219                     ajFmtPrintF(flog, "STEP6 **NOT** OK %d mismatches\n",
6220                                 nmismatches);
6221 #endif /* AJFALSE */
6222 
6223 
6224                     /* Otherwise, agreement could not be found - unacceptable
6225                      * number of mismatches. Restore the original seqres sequence */
6226                     ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
6227                     pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
6228 
6229             }                   /* for(x = 0; x < 2; x++) */
6230 
6231             if (done)
6232                 break;
6233         }
6234 
6235         /* After trying 6 different alignment steps with (potentially
6236          * modified) seqres sequences and 2 (possibly different) sequences
6237          * derived from the ATOM records an alignment with agreement in
6238          * residue numbering still cannot be found. Use data from ATOM
6239          * records only - use seq1 (all residues) and presume there are no
6240          * missing residues. */
6241         if (!done)
6242         {
6243             ajFmtPrintF(flog, "%-15s%d (%c)\n", "NOMATCH", i + 1,
6244                         ajChararrGet(pdbfile->chid, i));
6245 
6246             ajStrAssignS(&(pdbfile->seqres[i]), seq1[i]);
6247             pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
6248 
6249             for (k = 0; k < nres1[i]; k++)
6250                 ajIntPut(&idx[i], k, k + 1);
6251 
6252             pdbfile->resn1ok[i] = ajTrue;
6253         }
6254         else
6255         {
6256             if (j)
6257                 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSNTERM", i + 1,
6258                             ajChararrGet(pdbfile->chid, i), j);
6259 
6260             if (!ajStrMatchS(seq1[i], seq2[i]))
6261             {
6262                 if (x == 0)
6263                     ajFmtPrintF(flog, "%-15s%d\n", "ALTERNOK", i + 1);
6264                 else
6265                     ajFmtPrintF(flog, "%-15s%d\n", "HETEROK", i + 1);
6266 
6267             }
6268         }
6269     }
6270 
6271 
6272     /* Write the index arrays */
6273     for (i = 0; i < pdbfile->nchains; i++)
6274     {
6275         if (!pdbfile->chainok[i])
6276             continue;
6277 
6278 
6279         if (pdbfile->resn1ok[i])
6280             for (j = 0; j < nres1[i]; j++)
6281                 ajIntPut(&idx_full[i], ajIntGet(num1[i], j),
6282                          ajIntGet(idx[i], j));
6283         else
6284             for (j = 0; j < nres2[i]; j++)
6285                 ajIntPut(&idx_full[i], ajIntGet(num2[i], j),
6286                          ajIntGet(idx[i], j));
6287     }
6288 
6289     /* Write the resni element of the Pdbfile object. These are the residue
6290      * numbers that give the correct index into the finalised seqres sequence */
6291 
6292     for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
6293     {
6294         if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
6295         {
6296             if (pdbfile->resn1ok[pdbfile->chnn[i] - 1])
6297             {
6298                 pdbfile->resni[i] =
6299                     ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6300                              pdbfile->resn1[i]);
6301 #if AJFALSE
6302                 /* DIAGNOSTIC */
6303                 ajFmtPrintF(flog, "Got position %d (%d) ok\n",
6304                             pdbfile->resn1[i],
6305                             ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6306                                      pdbfile->resn1[i]));
6307 #endif /* AJFALSE */
6308             }
6309 
6310             else
6311             {
6312                 pdbfile->resni[i] =
6313                     ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6314                              pdbfile->resn2[i]);
6315 #if AJFALSE
6316                 /* DIAGNOSTIC */
6317                 ajFmtPrintF(flog, "Got position %d (%d) ok\n",
6318                             pdbfile->resn2[i],
6319                             ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6320                                      pdbfile->resn2[i]));
6321 #endif /* AJFALSE */
6322             }
6323         }
6324     }
6325 
6326 #if AJFALSE
6327     /* DIAGNOSTIC */
6328 
6329     ajFmtPrintF(flog, "\n\n\n");
6330 
6331     seq = seq1;
6332 
6333     for (i = 0; i < pdbfile->nchains; i++)
6334     {
6335         if (!pdbfile->chainok[i])
6336         {
6337             ajFmtPrintF(flog,
6338                         "Chain %d\nSEQRES %S\nCHAIN NOT OK\n\n\n",
6339                         i + 1,
6340                         pdbfile->seqres[i]);
6341 
6342             continue;
6343         }
6344 
6345         ajFmtPrintF(flog,
6346                     "Chain %d\nSEQRES %S\nSEQ__1 %S\nSEQ__2 %S\n\n\n",
6347                     i + 1,
6348                     pdbfile->seqres[i],
6349                     seq1[i],
6350                     seq2[i]);
6351 
6352     }
6353 
6354 
6355     for (i = 0; i < pdbfile->nchains; i++)
6356     {
6357         if (!pdbfile->chainok[i])
6358             continue;
6359 
6360         if (nres1[i] > nres2[i])
6361             max = nres1[i];
6362         else
6363             max = nres2[i];
6364 
6365         ajFmtPrintF(flog, "CHAIN %d\n", i + 1);
6366         ajFmtPrintF(flog, "seqres %S\n", pdbfile->seqres[i]);
6367 
6368 
6369         ajFmtPrintF(flog, "%-6s%-6s%-6s%-6s%-6s%-6s%-6s\n",
6370                     "RES", "NUM", "SEQ1", "RESN1", "SEQ2", "RESN2", "IDX");
6371 
6372         for (j = 0; j < max; j++)
6373         {
6374             if (j < nres1[i] && j < nres2[i])
6375                 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6d%-6c%-6d%-6d\n",
6376                             "RES",
6377                             j + 1,
6378                             ajStrGetCharPos(seq1[i], j),
6379                             ajIntGet(num1[i], j),
6380                             ajStrGetCharPos(seq2[i], j),
6381                             ajIntGet(num2[i], j),
6382                             ajIntGet(idx[i], j));
6383             else if (j < nres1[i])
6384             {
6385                 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6d%-6c%-6c",
6386                             "RES",
6387                             j + 1,
6388                             ajStrGetCharPos(seq1[i], j),
6389                             ajIntGet(num1[i], j),
6390                             '.', '.');
6391 
6392 
6393                 if (pdbfile->resn1ok[i])
6394                     ajFmtPrintF(flog, "%-6d\n", ajIntGet(idx[i], j));
6395                 else
6396                     ajFmtPrintF(flog, "%-6c\n", '.');
6397 
6398             }
6399             else
6400             {
6401                 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6c%-6c%-6d",
6402                             "RES",
6403                             j + 1,
6404                             '.', '.',
6405                             ajStrGetCharPos(seq2[i], j),
6406                             ajIntGet(num2[i], j));
6407 
6408                 if (!pdbfile->resn1ok[i])
6409                     ajFmtPrintF(flog, "%-6d\n", ajIntGet(idx[i], j));
6410                 else
6411                     ajFmtPrintF(flog, "%-6c\n", '.');
6412             }
6413         }
6414     }
6415 #endif /* AJFALSE */
6416 
6417     /* Tidy up and return */
6418     ajStrDel(&aa_misfit);
6419     ajStrDel(&seqbit);
6420     ajStrDel(&msgstr);
6421     ajStrDel(&msgbit);
6422     AJFREE(insert);
6423     ajStrDel(&tmpseqres);
6424     ajStrDel(&bit);
6425     ajStrDel(&substr);
6426     ajStrDel(&substr2);
6427 
6428     for (i = 0U; i < pdbfile->nchains; i++)
6429     {
6430         if (!pdbfile->chainok[i])
6431             continue;
6432 
6433         ajStrDel(&seq1[i]);
6434         ajStrDel(&seq2[i]);
6435 
6436         ajIntDel(&num1[i]);
6437         ajIntDel(&num2[i]);
6438 
6439         ajIntDel(&idx[i]);
6440         ajIntDel(&idx_full[i]);
6441 
6442     }
6443 
6444     AJFREE(seq1);
6445     AJFREE(seq2);
6446 
6447     AJFREE(num1);
6448     AJFREE(num2);
6449 
6450     AJFREE(idx);
6451     AJFREE(idx_full);
6452 
6453     AJFREE(nres1);
6454     AJFREE(nres2);
6455 
6456     return ajTrue;
6457 }
6458 
6459 
6460 
6461 
6462 /* #funcstatic pdbioDiagnostic ************************************************
6463 **
6464 ** For printing out diagnostics for pdbparse build
6465 **
6466 **
6467 ** #param [r] pdbfile [AjPPdbfile]  Pdbfile object
6468 ** #param [r] n       [ajint]        Flag for controlling output
6469 **
6470 ** #return [AjBool]  True on success, False otherwise
6471 ** ##
6472 ******************************************************************************/
6473 
6474 #if AJFALSE
6475 /*THIS_DIAGNOSTIC*/
pdbioDiagnostic(AjPPdbfile pdbfile,ajint n)6476 static void pdbioDiagnostic(AjPPdbfile pdbfile, ajint n)
6477 {
6478     ajuint i = 0U;
6479 
6480     if (n == 0)
6481     {
6482         ajFmtPrintF(tempfile, "nchains: %d\n", pdbfile->nchains);
6483         for (i = 0U; i < pdbfile->nchains; i++)
6484             ajFmtPrintF(tempfile, "chid: %c\n",
6485                         ajChararrGet(pdbfile->chid, i));
6486         for (i = 0U; i < pdbfile->nchains; i++)
6487             ajFmtPrintF(tempfile, "seqres %d: %S\n", i, pdbfile->seqres[i]);
6488 
6489         ajFmtPrintF(tempfile, "tercnt: %d\n", pdbfile->tercnt);
6490         ajFmtPrintF(tempfile, "COMPND: %S\n", pdbfile->compnd);
6491         ajFmtPrintF(tempfile, "SOURCE: %S\n", pdbfile->source);
6492         ajFmtPrintF(tempfile, "reso: %f\n", pdbfile->reso);
6493         if (pdbfile->method == ajEPdbMethodXray)
6494             ajFmtPrintF(tempfile, "method: ajEPdbMethodXray\n");
6495         else
6496             ajFmtPrintF(tempfile, "method: ajEPdbMethodNmr\n");
6497     }
6498     else if (n == 1)
6499     {
6500         for (i = 0U; i < pdbfile->nlines; i++)
6501         {
6502             ajFmtPrintF(tempfile, "%-5S", pdbfile->pdbid);
6503 
6504             switch (pdbfile->linetype[i])
6505             {
6506                 case pdbfileELinetypeIgnore:
6507                 ajFmtPrintF(tempfile, "%-10s", "IGNORE");
6508                     break;
6509                 case pdbfileELinetypeCoordinate:
6510                 ajFmtPrintF(tempfile, "%-10s", "COORD");
6511                     break;
6512                 case pdbfileELinetypeHeterogen:
6513                 ajFmtPrintF(tempfile, "%-10s", "COORDHET");
6514                     break;
6515                 case pdbfileELinetypeGroups:
6516                 ajFmtPrintF(tempfile, "%-10s", "COORDGP");
6517                     break;
6518                 case pdbfileELinetypeWater:
6519                 ajFmtPrintF(tempfile, "%-10s", "COORDWAT");
6520                     break;
6521                 case pdbfileELinetypeTER:
6522                 ajFmtPrintF(tempfile, "%-10s", "TER");
6523                     break;
6524                 case pdbfileELinetypeMODEL:
6525                  ajFmtPrintF(tempfile, "%-10s", "MODEL");
6526                     break;
6527             }
6528 
6529             ajFmtPrintF(tempfile,
6530                         "M%-2dC%-2d%-6S%-5d%-5d%-4B%-4S%-4S%-7.3f%-7.3f"
6531                         "%-7.3f%-6.3f%-6.3f\n",
6532                         pdbfile->modn[i],
6533                         pdbfile->chnn[i],
6534                         pdbfile->pdbn[i],
6535                         pdbfile->resn1[i],
6536                         pdbfile->resn2[i],
6537                         pdbfile->oddnum[i],
6538                         pdbfile->atype[i],
6539                         pdbfile->rtype[i],
6540                         pdbfile->x[i],
6541                         pdbfile->y[i],
6542                         pdbfile->z[i],
6543                         pdbfile->o[i],
6544                         pdbfile->b[i]);
6545         }
6546     }
6547 
6548     return;
6549 }
6550 #endif /* AJFALSE */
6551 
6552 
6553 
6554 
6555 /* @funcstatic pdbioPdbfileToPdb **********************************************
6556 **
6557 ** Reads data from a Pdbfile object and writes a Pdb object.
6558 ** Chains that did not contain at least the user-defined threshold number of
6559 ** amino acid residues are discarded, i.e. are NOT copied and will NOT appear
6560 ** in the output file that is eventually generated.
6561 **
6562 **
6563 ** @param [w] Ppdb     [AjPPdb *]     Pdb object pointer
6564 ** @param [u] pdbfile     [AjPPdbfile]  Pdbfile object
6565 **
6566 ** @return [AjBool]  True on success, False otherwise
6567 **
6568 ** @release 2.9.0
6569 ** @@
6570 ******************************************************************************/
6571 
pdbioPdbfileToPdb(AjPPdb * Ppdb,AjPPdbfile pdbfile)6572 static AjBool pdbioPdbfileToPdb(AjPPdb *Ppdb, AjPPdbfile pdbfile)
6573 {
6574     ajuint i = 0U;              /* Loop counter */
6575     ajint idx = 0;              /* Index into chain array */
6576     ajuint j = 0U;              /* Loop counter */
6577     AjPAtom atom = NULL;        /* Atom object */
6578     AjPResidue residue = NULL;  /* Residue object */
6579     ajuint nchn = 0U;           /* No. chains that have min. no. of aa's */
6580     AjPInt lookup;              /* Array of chain numbers for chains in ret
6581                                  * for all chains in pdb.A '0' is given for
6582                                  * chains with < threshold no. of aa's */
6583     ajint chn = 0;
6584     ajuint rn_last = UINT_MAX;
6585     ajuint mn_last = UINT_MAX;
6586 
6587     ajint eNum;
6588     AjPStr eId = NULL;
6589     char eType;
6590     ajint eClass;
6591 
6592     if (!Ppdb || !pdbfile)
6593     {
6594         ajWarn("Bad args passed to pdbioPdbfileToPdb");
6595 
6596         return ajFalse;
6597     }
6598 
6599     if (*Ppdb)
6600     {
6601         ajWarn("Bad args passed to pdbioPdbfileToPdb - PDB object exists");
6602 
6603         return ajFalse;
6604     }
6605 
6606     eId = ajStrNew();
6607 
6608     lookup = ajIntNewRes(pdbfile->nchains);
6609     ajIntPut(&lookup, pdbfile->nchains - 1, 0);
6610 
6611     for (nchn = 0U, i = 0U; i < pdbfile->nchains; i++)
6612         if (pdbfile->chainok[i])
6613         {
6614             nchn++;
6615             ajIntPut(&lookup, i, nchn);
6616         }
6617 
6618     *Ppdb = ajPdbNew(nchn);
6619     (*Ppdb)->Nchn = nchn;
6620 
6621     ajStrAssignS(&((*Ppdb)->Pdb), pdbfile->pdbid);
6622     ajStrAssignS(&((*Ppdb)->Compnd), pdbfile->compnd);
6623     ajStrAssignS(&((*Ppdb)->Source), pdbfile->source);
6624     (*Ppdb)->Method = pdbfile->method;
6625     (*Ppdb)->Reso = pdbfile->reso;
6626     (*Ppdb)->Nmod = pdbfile->modcnt;
6627     (*Ppdb)->Ngp = pdbfile->ngroups;
6628     /* (*Ppdb)->Nchn   = pdbfile->nchains; */
6629 
6630     for (i = 0U; i < pdbfile->ngroups; i++)
6631         ajChararrPut(&((*Ppdb)->gpid), i, ajChararrGet(pdbfile->gpid, i));
6632 
6633     for (idx = -1, i = 0U; i < pdbfile->nchains; i++)
6634     {
6635         if (pdbfile->chainok[i])
6636             idx++;
6637         else
6638             continue;
6639 
6640         (*Ppdb)->Chains[idx]->Id = ajChararrGet(pdbfile->chid, i);
6641 
6642 
6643         /* These counts are no longer made from the PDB records. They are
6644          * only made if the file is annotated with stride secondary structure
6645          * info by using pdbstride */
6646 #if AJFALSE
6647         (*Ppdb)->Chains[idx]->numHelices = pdbfile->numHelices[i];
6648         (*Ppdb)->Chains[idx]->numStrands = pdbfile->numStrands[i];
6649         (*Ppdb)->Chains[idx]->numSheets = pdbfile->numSheets[i];
6650         (*Ppdb)->Chains[idx]->numTurns = pdbfile->numTurns[i];
6651 #endif /* AJFALSE */
6652         (*Ppdb)->Chains[idx]->Nres = pdbfile->nres[i];
6653         (*Ppdb)->Chains[idx]->Nlig = pdbfile->nligands[i];
6654         ajStrAssignS(&((*Ppdb)->Chains[idx]->Seq), pdbfile->seqres[i]);
6655     }
6656 
6657 
6658     for (j = pdbfile->idxfirst; j < pdbfile->nlines; j++)
6659     {
6660         if ((pdbfile->linetype[j] == pdbfileELinetypeCoordinate) ||
6661             (pdbfile->linetype[j] == pdbfileELinetypeHeterogen) ||
6662             (pdbfile->linetype[j] == pdbfileELinetypeGroups) ||
6663             (pdbfile->linetype[j] == pdbfileELinetypeWater))
6664         {
6665             /* Skip this line if it for a heterogenous (duplicate) position  */
6666             if ((!pdbfile->resn1ok[pdbfile->chnn[j] - 1]) && pdbfile->oddnum[j])
6667                 continue;
6668 
6669             atom = ajAtomNew();
6670 
6671             atom->Mod = pdbfile->modn[j];
6672             /* atom->Chn = pdbfile->chnn[j]; */
6673             atom->Chn = ajIntGet(lookup, pdbfile->chnn[j] - 1);
6674 
6675             atom->Gpn = pdbfile->gpn[j];
6676 
6677             switch (pdbfile->linetype[j])
6678             {
6679                 case pdbfileELinetypeHeterogen:
6680                 atom->Type = 'H';
6681                     break;
6682                 case pdbfileELinetypeGroups:
6683                 atom->Type = 'H';
6684                     break;
6685                 case pdbfileELinetypeCoordinate:
6686                 atom->Type = 'P';
6687                     break;
6688                 case pdbfileELinetypeWater:
6689                 atom->Type = 'W';
6690                     break;
6691                 default:
6692                     break;
6693             }
6694 
6695             atom->Idx = pdbfile->resni[j];
6696 
6697             ajStrAssignS(&atom->Pdb, pdbfile->pdbn[j]);
6698 
6699             if ((pdbfile->linetype[j] == pdbfileELinetypeHeterogen) ||
6700                 (pdbfile->linetype[j] == pdbfileELinetypeGroups) ||
6701                 (pdbfile->linetype[j] == pdbfileELinetypeWater))
6702                 atom->Id1 = '.';
6703             else
6704                 ajResidueFromTriplet(pdbfile->rtype[j], &atom->Id1);
6705 
6706             ajStrAssignS(&atom->Id3, pdbfile->rtype[j]);
6707             ajStrAssignS(&atom->Atm, pdbfile->atype[j]);
6708             atom->X = pdbfile->x[j];
6709             atom->Y = pdbfile->y[j];
6710             atom->Z = pdbfile->z[j];
6711             atom->O = pdbfile->o[j];
6712             atom->B = pdbfile->b[j];
6713 
6714 
6715             ajStrAssignS(&eId, pdbfile->elementId[j]);
6716             eNum = pdbfile->elementNum[j];
6717             eType = pdbfile->elementType[j];
6718             eClass = pdbfile->helixClass[j];
6719 
6720 
6721             if (pdbfile->linetype[j] == pdbfileELinetypeGroups)
6722                 ajListPushAppend((*Ppdb)->Groups, atom);
6723             else if (pdbfile->linetype[j] == pdbfileELinetypeWater)
6724                 ajListPushAppend((*Ppdb)->Water, atom);
6725             else
6726             {
6727                 if (pdbfile->chainok[pdbfile->chnn[j] - 1])
6728                 {
6729 #if AJFALSE
6730                     ajListPushAppend((*Ppdb)->Chains[pdbfile->chnn[j] - 1]->Atoms,
6731                                      atom);
6732                     ajListPushAppend((*Ppdb)->Chains[ajIntGet(lookup,
6733                                                               pdbfile->chnn[j] - 1) - 1]->Atoms, atom);
6734 #endif /* AJFALSE */
6735                     chn = ajIntGet(lookup, pdbfile->chnn[j] - 1) - 1;
6736 
6737                     ajListPushAppend((*Ppdb)->Chains[chn]->Atoms, atom);
6738 
6739                     /* Write residue object */
6740                     if (atom->Type == 'P')
6741                     {
6742                         /* New model */
6743                         if (atom->Mod != mn_last)
6744                         {
6745                             rn_last = UINT_MAX;
6746                             mn_last = atom->Mod;
6747                         }
6748 
6749                         /* New residue */
6750                         if (atom->Idx != rn_last)
6751                         {
6752                             residue = ajResidueNew();
6753 
6754                             residue->Mod = atom->Mod;
6755                             residue->Chn = atom->Chn;
6756                             residue->Idx = atom->Idx;
6757                             ajStrAssignS(&residue->Pdb, atom->Pdb);
6758                             residue->Id1 = atom->Id1;
6759                             ajStrAssignS(&residue->Id3, atom->Id3);
6760 
6761                             residue->eNum = eNum;
6762                             ajStrAssignS(&residue->eId, eId);
6763                             residue->eType = eType;
6764                             residue->eClass = eClass;
6765 
6766                             ajListPushAppend((*Ppdb)->Chains[chn]->Residues,
6767                                              (void *) residue);
6768                             rn_last = atom->Idx;
6769                         }
6770                     }
6771                 }
6772                 else
6773                     ajAtomDel(&atom);
6774             }
6775         }
6776         else
6777             continue;
6778     }
6779 
6780     ajIntDel(&lookup);
6781     ajStrDel(&eId);
6782 
6783     return ajTrue;
6784 }
6785 
6786 
6787 
6788 
6789 /* @funcstatic pdbioPdbfileFindLine *******************************************
6790 **
6791 ** Returns the line number of the first instance of a line with a specified
6792 ** residue and chain number.
6793 **
6794 ** @param [r] pdbfile     [const AjPPdbfile] Pdbfile object pointer
6795 ** @param [r] chn     [ajint] Chain number
6796 ** @param [r] which   [ajint] 0 or 1, refer to resn1 or resn2 residue
6797 ** @param [r] pos     [ajint] Residue number
6798 **
6799 ** @return [ajint]  Line number (index, i.e. starts from 0).
6800 **
6801 ** @release 2.9.0
6802 ** @@
6803 ******************************************************************************/
6804 
pdbioPdbfileFindLine(const AjPPdbfile pdbfile,ajint chn,ajint which,ajint pos)6805 static ajint pdbioPdbfileFindLine(const AjPPdbfile pdbfile, ajint chn,
6806                                   ajint which, ajint pos)
6807 {
6808     ajuint a = 0U;
6809     /* a will give the number of the first coordinate line for the mismatch
6810      * residue from the ATOM records */
6811 
6812     for (a = pdbfile->idxfirst; a < pdbfile->nlines; a++)
6813         if (pdbfile->linetype[a] == pdbfileELinetypeCoordinate &&
6814             pdbfile->chnn[a] == chn)
6815             /* First sequence (all residues) derived for atom records */
6816             /* OR Second sequence (excluding certain residues) derived for
6817              * atom records */
6818             if (((which == 0) && (pos == pdbfile->resn1[a])) ||
6819                 ((which == 1) && (pos == pdbfile->resn2[a])))
6820                 break;
6821 
6822     if (a == pdbfile->nlines)
6823         ajFatal("Unexpected loop failure in pdbioPdbfileFindLine. "
6824                 "Email jison@hgmp.mrc.ac.uk\n");
6825 
6826     return a;
6827 }
6828 
6829 
6830 
6831 
6832 /* @funcstatic pdbioPdbfileChain **********************************************
6833 **
6834 ** Finds the chain number for a given chain identifier in a pdbfile structure
6835 **
6836 ** @param [r] id  [char]        Chain identifier
6837 ** @param [r] pdbfile [const AjPPdbfile] Pdbfile object
6838 ** @param [w] chn [ajint *]     Chain number
6839 **
6840 ** @return [AjBool] True on success
6841 **
6842 ** @release 2.9.0
6843 ** @@
6844 ******************************************************************************/
6845 
pdbioPdbfileChain(char id,const AjPPdbfile pdbfile,ajint * chn)6846 static AjBool pdbioPdbfileChain(char id, const AjPPdbfile pdbfile, ajint *chn)
6847 {
6848     ajuint a = 0U;
6849 
6850     for (a = 0U; a < pdbfile->nchains; a++)
6851     {
6852         if (toupper((int) ajChararrGet(pdbfile->chid, a)) == toupper((int) id))
6853         {
6854             *chn = a + 1;
6855 
6856             return ajTrue;
6857         }
6858 
6859         /* Cope with chain id's of ' ' (which might be given as '.' in the
6860          * Pdbfile object) */
6861         if ((id == ' ') && (ajChararrGet(pdbfile->chid, a) == '.'))
6862         {
6863             *chn = a + 1;
6864 
6865             return ajTrue;
6866         }
6867     }
6868 
6869     /* A '.' may be given as the id for domains comprising more than one
6870      * chain */
6871     if (id == '.')
6872     {
6873         *chn = 1;
6874 
6875         return ajTrue;
6876     }
6877 
6878 
6879     return ajFalse;
6880 }
6881 
6882 
6883 
6884 
6885 /* @funcstatic pdbioWriteElementData ******************************************
6886 **
6887 ** Reads the secondary structure information from an Elements object
6888 ** and writes equivalent variables in an Pdbfile object.
6889 **
6890 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
6891 ** @param [u] flog    [AjPFile] Pointer to log file (build diagnostics)
6892 ** @param [r] elms    [const AjPElements] Elements object pointer
6893 **
6894 ** @return [AjBool]  True on success, False otherwise
6895 **
6896 ** @release 2.9.0
6897 ** @@
6898 ******************************************************************************/
6899 
pdbioWriteElementData(AjPPdbfile pdbfile,AjPFile flog,const AjPElements elms)6900 static AjBool pdbioWriteElementData(AjPPdbfile pdbfile, AjPFile flog,
6901                                const AjPElements elms)
6902 {
6903     ajuint x = 0U;
6904     ajint y = 0;
6905     ajint z = 0;
6906     ajuint modn = 0U;           /* Model number */
6907 
6908     ajuint idx = 0U;            /* idx into lines in Pdbfile object */
6909     ajuint idx_start = 0u;      /* Line index of start of element */
6910     ajuint idx_end = 0U;        /* Line index of end of element */
6911     ajuint idx_last = 0U;       /* Line index of last line to try */
6912     ajuint idx_tmp = 0U;        /* Temp. line index */
6913 
6914     ajint chn = 0;              /* Chain id of current element as integer */
6915 
6916     AjPInt nsheets = NULL;      /* Number of sheets in each chain */
6917     AjPStr *lastids = NULL;     /* Last sheet identifier read in for each
6918                                  * chain */
6919     AjBool found_start = ajFalse;       /* Whether start residue of the
6920                                          * current element has been found yet */
6921     AjBool found_end = ajFalse; /* Whether the end residue of the current
6922                                  * element has been found yet */
6923     ajint n_unknown = 0;        /* No. of unknown chain ids */
6924     AjPChar unknowns = NULL;    /* Unknown chain ids */
6925     AjBool found = ajFalse;     /* True if we have already reported an error
6926                                  * message for the unknown chain id */
6927 
6928     /* Check args */
6929     if (!pdbfile || !(elms) || !(flog))
6930     {
6931         ajWarn("NULL arg passed to pdbioWriteElementData");
6932 
6933         return ajFalse;
6934     }
6935 
6936     /* Assign default values for secondary structure fields in Pdbfile object */
6937     for (x = 0U; x < pdbfile->nlines; x++)
6938     {
6939         pdbfile->elementType[x] = '.';
6940         ajStrAssignC(&pdbfile->elementId[x], ".");
6941     }
6942 
6943     /* Allocate memory */
6944     nsheets = ajIntNewRes(pdbfile->nchains);
6945     ajIntPut(&nsheets, pdbfile->nchains, 0);
6946 
6947     unknowns = ajChararrNew();
6948 
6949     AJCNEW0(lastids, pdbfile->nchains);
6950 
6951     for (x = 0U; x < pdbfile->nchains; x++)
6952     {
6953         lastids[x] = ajStrNew();
6954         /* Assign a silly value for starters */
6955         ajStrAssignC(&lastids[x], "?????");
6956     }
6957 
6958 #if AJFALSE
6959     ajFmtPrint("LOOK HERE x:%d elms->n:%d\n", x, elms->n);
6960 #endif /* AJFALSE */
6961 
6962     for (modn = 1U; modn <= pdbfile->modcnt; modn++)
6963     {
6964         /* Loop for each element. Set the current line to the first line in
6965          * the file */
6966         for (idx = pdbfile->idxfirst,
6967                 x = 0U; x < elms->n; x++)
6968         {
6969             /* Find the chain number of the current element */
6970             if (!pdbioPdbfileChain(elms->elms[x]->chainId, pdbfile, &chn))
6971             {
6972                 /* Only report errors once for each unknown id */
6973                 if (modn == 1)
6974                 {
6975                     for (found = ajFalse, y = 0; y < n_unknown; y++)
6976                         if (ajChararrGet(unknowns, y) == elms->elms[x]->chainId)
6977                         {
6978                             found = ajTrue;
6979                             break;
6980                         }
6981 
6982                     if (!found)
6983                     {
6984                         ajFmtPrintF(flog, "%-15s%c %u\n", "SECCHAIN",
6985                                     elms->elms[x]->chainId, idx);
6986                         ajChararrPut(&unknowns, n_unknown,
6987                                      elms->elms[x]->chainId);
6988                         n_unknown++;
6989                     }
6990                 }
6991                 continue;
6992             }
6993 
6994             /* Only want to do this once (for the first model ) */
6995             if (modn == 1)
6996             {
6997                 /* Make a count of the number of beta sheets */
6998                 if (elms->elms[x]->elementType == 'E')
6999                     if (!ajStrMatchS(lastids[chn - 1], elms->elms[x]->elementId))
7000                     {
7001                         ajIntInc(&nsheets, chn - 1);
7002                         ajStrAssignS(&lastids[chn - 1], elms->elms[x]->elementId);
7003                     }
7004             }
7005 
7006             /* Loop for two passes.  z is for efficiency, if z == 0 it will
7007              * check from the current position up to the last coordinate
7008              * line, if z == 1 it will check from the first coordinate line up
7009              * to the last position checked */
7010             for (found_start = ajFalse, found_end = ajFalse,
7011                      z = 0; z < 2; z++)
7012             {
7013                 if (z == 0)
7014                     idx_last = pdbfile->nlines;
7015                 else
7016                 {
7017                     idx = pdbfile->idxfirst;
7018                     idx_last = idx_tmp;
7019                 }
7020 
7021                 /* Find the start and end of the current element (as an index
7022                  * into the line array) */
7023                 for (; idx < idx_last; idx++)
7024                 {
7025                     /* Find the correct chain and skip lines that are not for
7026                      * amino acids */
7027                     if ((pdbfile->chnn[idx] != chn) ||
7028                         (pdbfile->linetype[idx] != pdbfileELinetypeCoordinate) ||
7029                         (pdbfile->modn[idx] != modn))
7030                         continue;
7031 
7032                     /* We have not found the start residue yet */
7033                     if (!found_start)
7034                         if (ajStrMatchS(elms->elms[x]->initSeqNum,
7035                                         pdbfile->pdbn[idx]))
7036                             if (ajStrMatchS(elms->elms[x]->initResName,
7037                                             pdbfile->rtype[idx]))
7038                             {
7039                                 /* Residue number for start found and residue
7040                                  * type matches */
7041                                 idx_start = idx;
7042                                 /* printf("found_start !\n"); */
7043 
7044                                 found_start = ajTrue;
7045                             }
7046 
7047                     if (ajStrMatchS(elms->elms[x]->endSeqNum,
7048                                     pdbfile->pdbn[idx]))
7049                         if (ajStrMatchS(elms->elms[x]->endResName,
7050                                         pdbfile->rtype[idx]))
7051                         {
7052                             /* Residue number for end found and residue type
7053                              * matches */
7054                             idx_end = idx;
7055 
7056 /*                          printf("idx_end: %d ...", idx_end); */
7057 
7058                             /* Set the index to the LAST atom of the residue */
7059 
7060                             for (; idx_end < pdbfile->nlines; idx_end++)
7061                             {
7062                                 if (pdbfile->linetype[idx_end] !=
7063                                     pdbfileELinetypeCoordinate)
7064                                     continue;
7065 
7066                                 if (!ajStrMatchS(elms->elms[x]->endSeqNum,
7067                                                  pdbfile->pdbn[idx_end]) ||
7068                                     !ajStrMatchS(elms->elms[x]->endResName,
7069                                                  pdbfile->rtype[idx_end]) ||
7070                                     pdbfile->chnn[idx_end] != chn ||
7071                                     pdbfile->modn[idx_end] != modn)
7072                                     break;
7073                             }
7074 
7075                             idx_end--;
7076 
7077 #if AJFALSE
7078                             printf(" %d\n", idx_end);
7079 
7080                             ajFmtPrint("found_end !\n"
7081                                        "elms->elms[x]->endSeqNum   "
7082                                        ": pdbfile->pdbn[idx_end]  ===  %S : %S\n"
7083                                        "elms->elms[x]->endResName  "
7084                                        ": pdbfile->rtype[idx_end] ===  %S : %S\n"
7085                                        "pdbfile->chnn[idx_end]  "
7086                                        ": chn                        ===  %d : %d\n"
7087                                        "pdbfile->modn[idx_end]  "
7088                                        ": modn                       ===  %d : %d\n",
7089                                        elms->elms[x]->endSeqNum,
7090                                        pdbfile->pdbn[idx_end + 1],
7091                                        elms->elms[x]->endResName,
7092                                        pdbfile->rtype[idx_end + 1],
7093                                        pdbfile->chnn[idx_end + 1],
7094                                        chn,
7095                                        pdbfile->modn[idx_end + 1],
7096                                        modn);
7097 #endif /* AJFALSE */
7098 
7099                             found_end = ajTrue;
7100                             idx_tmp = idx;
7101                             break;
7102                         }
7103                 }
7104 
7105                 if (found_start && found_end)
7106                     break;
7107             }
7108 
7109             if (!found_start || !found_end)
7110             {
7111                 if (!found_start && !found_end)
7112                     ajFmtPrintF(flog, "%-15s%d %d %S %S %S %S\n", "SECBOTH",
7113                                 chn, modn, elms->elms[x]->initResName,
7114                                 elms->elms[x]->initSeqNum,
7115                                 elms->elms[x]->endResName,
7116                                 elms->elms[x]->endSeqNum);
7117                 else if (!found_start)
7118                     ajFmtPrintF(flog, "%-15s%d %d %S %S\n", "SECSTART",
7119                                 chn, modn, elms->elms[x]->initResName,
7120                                 elms->elms[x]->initSeqNum);
7121                 else if (!found_end)
7122                     ajFmtPrintF(flog, "%-15s%d %d %S %S\n", "SECEND", chn,
7123                                 modn, elms->elms[x]->endResName,
7124                                 elms->elms[x]->endSeqNum);
7125             }
7126 
7127             /* Assign secondary structure fields in Pdbfile object */
7128             for (idx = idx_start; idx <= idx_end; idx++)
7129             {
7130                 pdbfile->elementNum[idx] = elms->elms[x]->elementNum;
7131                 pdbfile->elementType[idx] = elms->elms[x]->elementType;
7132 
7133                 if (elms->elms[x]->elementType == 'H')
7134                     pdbfile->helixClass[idx] = elms->elms[x]->helixClass;
7135 
7136                 ajStrAssignS(&pdbfile->elementId[idx],
7137                              elms->elms[x]->elementId);
7138             }
7139 
7140             /* Only want to do this once */
7141             if (modn == 1)
7142             {
7143                 if (elms->elms[x]->elementType == 'H')
7144                     pdbfile->numHelices[chn - 1]++;
7145                 else if (elms->elms[x]->elementType == 'E')
7146                     pdbfile->numStrands[chn - 1]++;
7147                 else if (elms->elms[x]->elementType == 'T')
7148                     pdbfile->numTurns[chn - 1]++;
7149             }
7150         }
7151     }
7152 
7153     /* Assign number of sheets */
7154     if (modn == 1)
7155         for (x = 0; x < pdbfile->nchains; x++)
7156             pdbfile->numSheets[x] = ajIntGet(nsheets, x);
7157 
7158     /* Tidy up and return */
7159     ajIntDel(&nsheets);
7160 
7161     for (x = 0; x < pdbfile->nchains; x++)
7162         ajStrDel(&lastids[x]);
7163 
7164     AJFREE(lastids);
7165 
7166     ajChararrDel(&unknowns);
7167 
7168     return ajTrue;
7169 }
7170 
7171 
7172 
7173 
7174 /* ======================================================================= */
7175 /* =========================== constructors ============================== */
7176 /* ======================================================================= */
7177 
7178 
7179 
7180 
7181 /* @section Constructors ******************************************************
7182 **
7183 ** All constructors return a pointer to a new instance. It is the
7184 ** responsibility of the user to first destroy any previous instance. The
7185 ** target pointer does not need to be initialised to NULL, but it is good
7186 ** programming practice to do so anyway.
7187 **
7188 ******************************************************************************/
7189 
7190 
7191 
7192 
7193 /* @func  ajPdbReadRawNew *****************************************************
7194 **
7195 ** Reads a pdb file and returns a pointer to a filled Pdb object.
7196 **
7197 ** The pdb id is derived from the file name and extension of the pdb file
7198 ** (these are passed in by argument).
7199 **
7200 ** @param [u] inf            [AjPFile] Pointer to pdb file
7201 ** @param [r] pdbid          [const AjPStr]  PDB id code of pdb file
7202 ** @param [r] min_chain_size [ajint]  Minimum number of amino acids in a chain
7203 ** @param [r] max_mismatch   [ajint]  Maximum number of permissible mismatches
7204 **                                    between the ATOM and SEQRES sequences
7205 ** @param [r] max_trim       [ajint]  Max. no. residues to trim when checking
7206 **                                    for missing N- or C-terminal ATOM or
7207 **                                    SEQRES sequences.
7208 ** @param [r] camask         [AjBool]  Whether to mask non-amino acid groups
7209 ** within protein chains which do not have a C-alpha atom.
7210 ** @param [r] camask1        [AjBool]  Whether to mask amino acid residues
7211 ** within protein chains which do not have a C-alpha atom.
7212 ** @param [r] atommask       [AjBool]  Whether to mask residues or groups
7213 ** in protein chains with a single atom only.
7214 ** @param [u] flog           [AjPFile] Pointer to log file (build diagnostics)
7215 **
7216 ** @return [AjPPdb] pdb object pointer, or NULL on failure.
7217 **
7218 ** @release 2.9.0
7219 ** @@
7220 ******************************************************************************/
7221 
ajPdbReadRawNew(AjPFile inf,const AjPStr pdbid,ajint min_chain_size,ajint max_mismatch,ajint max_trim,AjBool camask,AjBool camask1,AjBool atommask,AjPFile flog)7222 AjPPdb ajPdbReadRawNew(AjPFile inf, const AjPStr pdbid, ajint min_chain_size,
7223                        ajint max_mismatch, ajint max_trim, AjBool camask,
7224                        AjBool camask1, AjBool atommask, AjPFile flog)
7225 {
7226     AjPPdbfile pdbfile = NULL;  /* Pdbfile structure (for raw data)    */
7227     AjPPdb ret = NULL;          /* Pdb structure (for parsed data)     */
7228     AjPElements elms = NULL;    /* Elements structure (for parsed data) */
7229 
7230 
7231     if (!inf || !flog)
7232     {
7233         ajWarn("Null arg passed to ajPdbReadRawNew");
7234 
7235         return NULL;
7236     }
7237 
7238 
7239     /* Write pdbfile structure */
7240     if (!(pdbfile = pdbioReadLines(inf)))
7241         return NULL;
7242 
7243     /* Allocate Elements object */
7244     elms = pdbioElementsNew(0);
7245 
7246 
7247     ajStrAssignS(&(pdbfile->pdbid), pdbid);
7248     ajStrFmtLower(&(pdbfile->pdbid));
7249 
7250 
7251     /* Initial read of pdb file, read sequences for chains from SEQRES
7252      * records, mark lines up to ignore or as coordinate lines, assigning
7253      * initial residue numbers, read bibliographic information etc. */
7254     if (!pdbioFirstPass(pdbfile, flog, &elms, camask))
7255     {
7256         pdbioPdbfileDel(&pdbfile);
7257         pdbioElementsDel(&elms);
7258 
7259         return NULL;
7260     }
7261 
7262 #if AJFALSE
7263     /* DIAGNOSTIC */
7264     pdbioDiagnostic(&pdbfile, 0);
7265     pdbioDiagnostic(&pdbfile, 1);
7266 #endif /* AJFALSE */
7267 
7268 
7269     /* Check that SEQRES records contain protein chains. Check that chain
7270      * id's are unique */
7271     if (!pdbioCheckChains(pdbfile, flog, min_chain_size))
7272     {
7273         pdbioPdbfileDel(&pdbfile);
7274         pdbioElementsDel(&elms);
7275 
7276         return NULL;
7277     }
7278 
7279 
7280     /* Check for correct number of TER records. Mask unwanted TER records */
7281     if (!pdbioCheckTer(pdbfile, flog))
7282     {
7283         pdbioPdbfileDel(&pdbfile);
7284         pdbioElementsDel(&elms);
7285 
7286         return NULL;
7287     }
7288 
7289 
7290     /* Assign model and chain number to each coordinate line. Mark up
7291      * non-protein coordinates */
7292     if (!pdbioNumberChains(pdbfile, flog))
7293     {
7294         pdbioPdbfileDel(&pdbfile);
7295         pdbioElementsDel(&elms);
7296 
7297         return NULL;
7298     }
7299 
7300     /* Mask any ATOM or HETATM records with chain id's of chains of
7301      * non-proteins or chains that have non-unique id's (chainok==ajFalse).
7302      * Check that ATOM records contain protein chains. */
7303     if (!pdbioMaskChains(pdbfile, flog, min_chain_size, camask,
7304                     camask1, atommask))
7305     {
7306         pdbioElementsDel(&elms);
7307         pdbioPdbfileDel(&pdbfile);
7308 
7309         return NULL;
7310 
7311     }
7312 
7313 #if AJFALSE
7314     /* DIAGNOSTIC */
7315     pdbioDiagnostic(pdbfile, 0);
7316     pdbioDiagnostic(pdbfile, 1);
7317 #endif /* AJFALSE */
7318 
7319     /* Standardise residue numbering */
7320     if (!pdbioStandardiseNumbering(pdbfile, flog))
7321     {
7322         pdbioPdbfileDel(&pdbfile);
7323         pdbioElementsDel(&elms);
7324 
7325         return NULL;
7326     }
7327 
7328 
7329     /* Find correct residue numbering */
7330     if (!pdbioAlignNumbering(pdbfile, flog, max_mismatch, max_trim))
7331     {
7332         pdbioPdbfileDel(&pdbfile);
7333         pdbioElementsDel(&elms);
7334         return NULL;
7335     }
7336 
7337     if (!pdbioWriteElementData(pdbfile, flog, elms))
7338     {
7339         pdbioPdbfileDel(&pdbfile);
7340         pdbioElementsDel(&elms);
7341 
7342         return NULL;
7343     }
7344 
7345     pdbioElementsDel(&elms);
7346 
7347     /* Copy data from Pdbfile object to Pdb object. pdbioPdbfileToPdb creates the
7348      * Pdb object (ret) */
7349     if (!pdbioPdbfileToPdb(&ret, pdbfile))
7350     {
7351         pdbioPdbfileDel(&pdbfile);
7352         ajPdbDel(&ret);
7353 
7354         return NULL;
7355     }
7356 
7357 
7358     /* Tidy up and return */
7359     pdbioPdbfileDel(&pdbfile);
7360 
7361     return ret;
7362 }
7363 
7364 
7365 
7366 
7367 /* ======================================================================= */
7368 /* =========================== destructors =============================== */
7369 /* ======================================================================= */
7370 
7371 
7372 
7373 
7374 /* @section Structure Destructors *********************************************
7375 **
7376 ** All destructor functions receive the address of the instance to be
7377 ** deleted.  The original pointer is set to NULL so is ready for re-use.
7378 **
7379 ******************************************************************************/
7380 
7381 
7382 
7383 
7384 /* ======================================================================= */
7385 /* ============================ Assignments ============================== */
7386 /* ======================================================================= */
7387 
7388 
7389 
7390 
7391 /* @section Assignments *******************************************************
7392 **
7393 ** These functions overwrite the instance provided as the first argument
7394 ** A NULL value is always acceptable so these functions are often used to
7395 ** create a new instance by assignment.
7396 **
7397 ******************************************************************************/
7398 
7399 
7400 
7401 
7402 /* ======================================================================= */
7403 /* ============================= Modifiers =============================== */
7404 /* ======================================================================= */
7405 
7406 
7407 
7408 
7409 /* @section Modifiers *********************************************************
7410 **
7411 ** These functions use the contents of an instance and update them.
7412 **
7413 ******************************************************************************/
7414 
7415 
7416 
7417 
7418 /* ======================================================================= */
7419 /* ========================== Operators ===================================*/
7420 /* ======================================================================= */
7421 
7422 
7423 
7424 
7425 /* @section Operators *********************************************************
7426 **
7427 ** These functions use the contents of an instance but do not make any
7428 ** changes.
7429 **
7430 ******************************************************************************/
7431 
7432 
7433 
7434 
7435 /* ======================================================================= */
7436 /* ============================== Casts ================================== */
7437 /* ======================================================================= */
7438 
7439 
7440 
7441 
7442 /* @section Casts *************************************************************
7443 **
7444 ** These functions examine the contents of an instance and return some
7445 ** derived information. Some of them provide access to the internal
7446 ** components of an instance. They are provided for programming convenience
7447 ** but should be used with caution.
7448 **
7449 ******************************************************************************/
7450 
7451 
7452 
7453 
7454 /* ======================================================================= */
7455 /* =========================== Reporters ==================================*/
7456 /* ======================================================================= */
7457 
7458 
7459 
7460 
7461 /* @section Reporters *********************************************************
7462 **
7463 ** These functions return the contents of an instance but do not make any
7464 ** changes.
7465 **
7466 ******************************************************************************/
7467 
7468 
7469 
7470 
7471 /* ======================================================================= */
7472 /* ========================== Input & Output ============================= */
7473 /* ======================================================================= */
7474 
7475 
7476 
7477 
7478 /* @section Input & output ****************************************************
7479 **
7480 ** These functions are used for formatted input and output to file.
7481 **
7482 ******************************************************************************/
7483 
7484 
7485 
7486 
7487 /* @func ajPdbWriteDomainRecordRaw ********************************************
7488 **
7489 ** Writes lines to a PDB file.  What is written depends upon the mode:
7490 ** ajEPdbioModeHeaderDomain  Header line for domain PDB file.
7491 ** ajEPdbioModeSeqresDomain  SEQRES records for domain.
7492 ** ajEPdbioModeAtomPdbDomain ATOM records for domain using original residue
7493 **                           numbers.
7494 ** ajEPdbioModeAtomIdxDomain ATOM records for domain using residues numbers
7495 **                           that give correct index into SEQRES sequence.
7496 **
7497 ** @param [u] mode [AjEPdbioMode] Mode that controls what is printed: one of
7498 **                         ajEPdbioModeHeaderDomain, ajEPdbioModeSeqresDomain,
7499 **                         ajEPdbioModeAtomPdbDomain, ajEPdbioModeAtomIdxDomain
7500 **
7501 ** @param [r] pdb  [const AjPPdb]  Pdb object
7502 ** @param [r] mod  [ajint] Model number
7503 ** @param [r] scop [const AjPScop] Scop object for domain
7504 ** @param [w] outf [AjPFile] Output file stream
7505 ** @param [w] errf [AjPFile] Output file stream for error messages
7506 **
7507 ** @return [AjBool] True on success
7508 **
7509 ** @release 2.9.0
7510 ** @@
7511 ******************************************************************************/
7512 
ajPdbWriteDomainRecordRaw(AjEPdbioMode mode,const AjPPdb pdb,ajint mod,const AjPScop scop,AjPFile outf,AjPFile errf)7513 AjBool ajPdbWriteDomainRecordRaw(AjEPdbioMode mode, const AjPPdb pdb,
7514                                  ajint mod, const AjPScop scop,
7515                                  AjPFile outf, AjPFile errf)
7516 {
7517     /* Check args */
7518     if (!outf || !scop)
7519         ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7520 
7521     switch (mode)
7522     {
7523         case ajEPdbioModeHeaderDomain:
7524             if (!pdbioWriteHeaderScop(outf, scop))
7525                 return ajFalse;
7526             break;
7527 
7528         case ajEPdbioModeSeqresDomain:
7529             if (!errf || !pdb)
7530                 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7531 
7532             if (!pdbioWriteSeqresDomain(errf, outf, pdb, scop))
7533                 return ajFalse;
7534             break;
7535 
7536         case ajEPdbioModeAtomPdbDomain:
7537             if (!errf || !pdb)
7538                 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7539 
7540             if (!pdbioWriteAtomDomainPdb(errf, outf, pdb, scop, mod))
7541                 return ajFalse;
7542             break;
7543 
7544         case ajEPdbioModeAtomIdxDomain:
7545             if (!errf || !pdb)
7546                 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7547 
7548             if (!pdbioWriteAtomDomainIdx(errf, outf, pdb, scop, mod))
7549                 return ajFalse;
7550             break;
7551 
7552         default:
7553             ajFatal("Invalid mode in ajPdbWriteDomainRecordRaw");
7554     }
7555 
7556     return ajTrue;
7557 }
7558 
7559 
7560 
7561 
7562 /* @func ajPdbWriteRecordRaw **************************************************
7563 **
7564 ** Writes lines in pdb format to a PDB file.  What is written depends upon
7565 ** the mode:
7566 ** ajEPdbioModeSeqResChain  SEQRES records for a chain.
7567 ** ajEPdbioModeAtomPdbChain ATOM records for chain using original residue
7568 **                          numbers.
7569 ** ajEPdbioModeAtomIdxChain ATOM records for domain using residues numbers that
7570 **                          give correct index into SEQRES sequence.
7571 ** ajEPdbioModeHeterogen    ATOM line for a heterogen (small ligand).
7572 ** ajEPdbioModeHeader       Header line.
7573 ** ajEPdbioModeTitle        Title line.
7574 ** ajEPdbioModeCompnd       COMPND records (info. on compound)
7575 ** ajEPdbioModeSource       SOURCE records (info. on protein source)
7576 ** ajEPdbioModeEmptyRemark  An empty REMARK record.
7577 ** ajEPdbioModeResolution   Record with resolution of the structure.
7578 **
7579 ** @param [u] mode  [AjEPdbioMode]  Mode that controls what is printed: one of
7580 **                           ajEPdbioModeSeqResChain, ajEPdbioModeAtomPdbChain,
7581 **                           ajEPdbioModeAtomIdxChain, ajEPdbioModeHeterogen,
7582 **                           ajEPdbioModeHeader, ajEPdbioModeTitle,
7583 **                           ajEPdbioModeCompnd, ajEPdbioModeSource,
7584 **                           ajEPdbioModeEmptyRemark, ajEPdbioModeResolution.
7585 ** @param [r] pdb   [const AjPPdb]  Pdb object
7586 ** @param [r] mod   [ajint]   Model number.
7587 ** @param [r] chn   [ajint]   Chain number.
7588 ** @param [w] outf  [AjPFile] Output file stream
7589 ** @param [w] errf  [AjPFile] Output file stream for error messages
7590 **
7591 ** @return [AjBool] True on success
7592 **
7593 ** @release 2.9.0
7594 ** @@
7595 ******************************************************************************/
7596 
ajPdbWriteRecordRaw(AjEPdbioMode mode,const AjPPdb pdb,ajint mod,ajint chn,AjPFile outf,AjPFile errf)7597 AjBool ajPdbWriteRecordRaw(AjEPdbioMode mode, const AjPPdb pdb, ajint mod,
7598                            ajint chn, AjPFile outf, AjPFile errf)
7599 {
7600     /* Check args */
7601     if (!outf || !pdb)
7602         ajFatal("Invalid args passed to ajPdbWriteRecordRaw");
7603 
7604     switch (mode)
7605     {
7606         case ajEPdbioModeSeqResChain:
7607             if (!errf)
7608                 ajFatal("Invalid args passed to ajPdbWriteRecordRaw");
7609 
7610             if (!pdbioWriteSeqresChain(errf, outf, pdb, chn))
7611                 return ajFalse;
7612             break;
7613 
7614         case ajEPdbioModeAtomPdbChain:
7615             if (!pdbioWriteAtomChain(outf, pdb, mod, chn, ajEPdbModePdb))
7616                 return ajFalse;
7617             break;
7618 
7619         case ajEPdbioModeAtomIdxChain:
7620             if (!pdbioWriteAtomChain(outf, pdb, mod, chn, ajEPdbModeIdx))
7621                 return ajFalse;
7622             break;
7623 
7624         case ajEPdbioModeHeterogen:
7625             if (!pdbioWriteHeterogen(outf, pdb, mod))
7626                 return ajFalse;
7627             break;
7628 
7629         case ajEPdbioModeHeader:
7630             if (!pdbioWriteHeader(outf, pdb))
7631                 return ajFalse;
7632             break;
7633 
7634         case ajEPdbioModeTitle:
7635             if (!pdbioWriteTitle(outf, pdb))
7636                 return ajFalse;
7637             break;
7638 
7639         case ajEPdbioModeCompnd:
7640             if (!pdbioWriteCompnd(outf, pdb))
7641                 return ajFalse;
7642             break;
7643 
7644         case ajEPdbioModeSource:
7645             if (!pdbioWriteSource(outf, pdb))
7646                 return ajFalse;
7647             break;
7648 
7649         case ajEPdbioModeEmptyRemark:
7650             if (!pdbioWriteEmptyRemark(outf, pdb))
7651                 return ajFalse;
7652             break;
7653 
7654         case ajEPdbioModeResolution:
7655             if (!pdbioWriteResolution(outf, pdb))
7656                 return ajFalse;
7657             break;
7658 
7659         default:
7660             ajFatal("Invalid mode in ajPdbWriteRecordRaw");
7661     }
7662 
7663     return ajTrue;
7664 }
7665 
7666 
7667 
7668 
7669 /* @func ajPdbWriteAllRaw *****************************************************
7670 **
7671 ** Writes a pdb file for a protein.
7672 **
7673 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
7674 **                              or ajEPdbModeIdx if the original or
7675 **                              corrected residue number is to be used.
7676 ** @param [r] pdb  [const AjPPdb]  Pdb object
7677 ** @param [w] outf [AjPFile] Output file stream
7678 ** @param [w] errf [AjPFile] Output file stream for error messages
7679 **
7680 ** @return [AjBool] True on success
7681 **
7682 ** @release 2.9.0
7683 ** @@
7684 ******************************************************************************/
7685 
ajPdbWriteAllRaw(AjEPdbMode mode,const AjPPdb pdb,AjPFile outf,AjPFile errf)7686 AjBool ajPdbWriteAllRaw(AjEPdbMode mode, const AjPPdb pdb,
7687                         AjPFile outf, AjPFile errf)
7688 {
7689     ajuint i = 0U;
7690     ajuint j = 0U;
7691 
7692     /* Write bibliographic info. */
7693     ajPdbWriteRecordRaw(ajEPdbioModeHeader, pdb, 0, 0, outf, NULL);
7694     ajPdbWriteRecordRaw(ajEPdbioModeTitle, pdb, 0, 0, outf, NULL);
7695     ajPdbWriteRecordRaw(ajEPdbioModeCompnd, pdb, 0, 0, outf, NULL);
7696     ajPdbWriteRecordRaw(ajEPdbioModeSource, pdb, 0, 0, outf, NULL);
7697     ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7698     ajPdbWriteRecordRaw(ajEPdbioModeResolution, pdb, 0, 0, outf, NULL);
7699     ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7700 
7701     /* Write SEQRES records */
7702     for (i = 0U; i < pdb->Nchn; i++)
7703         if (!ajPdbWriteRecordRaw(
7704                 ajEPdbioModeSeqResChain, pdb, 0, i + 1, outf, errf))
7705         {
7706             ajWarn("Error writing file in ajPdbWriteAllRaw");
7707 
7708             return ajFalse;
7709         }
7710 
7711 
7712     /* Loop for each model */
7713     for (j = 0U; j < pdb->Nmod; j++)
7714     {
7715         /* Write the MODEL record */
7716         if (pdb->Method == ajEPdbMethodNmr)
7717             ajFmtPrintF(outf, "MODEL%9d%66s\n", j + 1, " ");
7718 
7719 
7720         /* Write ATOM/HETATM records */
7721         for (i = 0U; i < pdb->Nchn; i++)
7722         {
7723             switch (mode)
7724             {
7725                 case ajEPdbModePdb:
7726                     if (!ajPdbWriteRecordRaw(
7727                             ajEPdbioModeAtomPdbChain, pdb, j + 1, i + 1,
7728                             outf, NULL))
7729                     {
7730                         ajWarn("Error writing file in ajPdbWriteAllRaw");
7731 
7732                         return ajFalse;
7733                     }
7734                     break;
7735 
7736                 case ajEPdbModeIdx:
7737                     if (!ajPdbWriteRecordRaw(
7738                             ajEPdbioModeAtomIdxChain, pdb, j + 1, i + 1,
7739                             outf, NULL))
7740                     {
7741                         ajWarn("Error writing file in ajPdbWriteAllRaw");
7742 
7743                         return ajFalse;
7744                     }
7745                     break;
7746 
7747                 default:
7748                     ajFatal("Invalid mode in ajPdbWriteAllRaw");
7749             }
7750 
7751 
7752             if (!ajPdbWriteRecordRaw(
7753                     ajEPdbioModeHeterogen, pdb, j + 1, 0,
7754                     outf, NULL))
7755             {
7756                 ajWarn("Error writing file in ajPdbWriteAllRaw");
7757 
7758                 return ajFalse;
7759             }
7760 
7761 
7762             /* Write ENDMDL record */
7763             if (pdb->Method == ajEPdbMethodNmr)
7764                 ajFmtPrintF(outf, "%-80s\n", "ENDMDL");
7765         }
7766 
7767     }
7768 
7769     /* Write END record */
7770     ajFmtPrintF(outf, "%-80s\n", "END");
7771 
7772     return ajTrue;
7773 
7774 }
7775 
7776 
7777 
7778 
7779 /* @func ajPdbWriteDomainRaw **************************************************
7780 **
7781 ** Writes a pdb file for a SCOP domain. Where coordinates for multiple
7782 ** models (e.g. NMR structures) are given, data for model 1 are written.
7783 ** Coordinates are taken from a Pdb structure, domain definition is taken
7784 ** from a Scop structure.
7785 ** In the pdb file, the coordinates are presented as belonging to a single
7786 ** chain regardless of how many chains the domain comprised.
7787 ** Coordinates for heterogens are NOT written to file.
7788 **
7789 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
7790 **                              or ajEPdbModeIdx if the original or
7791 **                              corrected residue number is to be used.
7792 ** @param [r] pdb  [const AjPPdb]  Pdb object
7793 ** @param [r] scop [const AjPScop] Scop object
7794 ** @param [w] outf [AjPFile] Output file stream
7795 ** @param [w] errf [AjPFile] Output file stream for error messages
7796 **
7797 ** @return [AjBool] True on success
7798 **
7799 ** @release 2.9.0
7800 ** @@
7801 ******************************************************************************/
7802 
ajPdbWriteDomainRaw(AjEPdbMode mode,const AjPPdb pdb,const AjPScop scop,AjPFile outf,AjPFile errf)7803 AjBool ajPdbWriteDomainRaw(AjEPdbMode mode, const AjPPdb pdb,
7804                            const AjPScop scop,
7805                            AjPFile outf, AjPFile errf)
7806 {
7807     ajuint i = 0U;              /* A counter */
7808     ajuint chn = 0U;            /* No. of the chain in the pdb structure */
7809 
7810     if (!pdb || !scop || !outf || !errf)
7811         ajFatal("Bad args passed to ajPdbWriteDomainRaw");
7812 
7813     /* Check for errors in chain identifier and length */
7814     for (i = 0U; i < scop->Number; i++)
7815         if (!ajPdbChnidToNum(scop->Chain[i], pdb, &chn))
7816         {
7817             ajWarn("Chain incompatibility error in "
7818                    "ajPdbWriteDomainRaw");
7819             ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility error "
7820                         "in ajPdbWriteDomainRaw\n", scop->Entry);
7821 
7822             return ajFalse;
7823         }
7824         else if (pdb->Chains[chn - 1]->Nres == 0)
7825         {
7826             ajWarn("Chain length zero");
7827             ajFmtPrintF(errf, "//\n%S\nERROR Chain length zero\n",
7828                         scop->Entry);
7829 
7830             return ajFalse;
7831         }
7832 
7833 
7834 
7835     /* Write bibliographic info. */
7836     ajPdbWriteDomainRecordRaw(
7837         ajEPdbioModeHeaderDomain, NULL, 0, scop, outf, NULL);
7838 
7839     ajPdbWriteRecordRaw(ajEPdbioModeTitle, pdb, 0, 0, outf, NULL);
7840     ajPdbWriteRecordRaw(ajEPdbioModeCompnd, pdb, 0, 0, outf, NULL);
7841     ajPdbWriteRecordRaw(ajEPdbioModeSource, pdb, 0, 0, outf, NULL);
7842     ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7843     ajPdbWriteRecordRaw(ajEPdbioModeResolution, pdb, 0, 0, outf, NULL);
7844     ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7845 
7846 
7847     /* Write SEQRES records */
7848     if (!ajPdbWriteDomainRecordRaw(ajEPdbioModeSeqresDomain, pdb, 0, scop,
7849                                    outf, errf))
7850     {
7851         ajWarn("Error writing file in ajPdbWriteDomainRaw");
7852 
7853         return ajFalse;
7854     }
7855 
7856 
7857     /* Write MODEL record, if appropriate */
7858     if (pdb->Method == ajEPdbMethodNmr)
7859         ajFmtPrintF(outf, "MODEL%9d%66s\n", 1, " ");
7860 
7861 
7862     /* Write ATOM/HETATM records */
7863     if (!pdbioWriteAtomDomain(errf, outf, pdb, scop, 1, mode))
7864     {
7865         ajWarn("Error writing file in ajPdbWriteDomainRaw");
7866 
7867         return ajFalse;
7868     }
7869 
7870 
7871     /* Write END/ENDMDL records */
7872     if (pdb->Method == ajEPdbMethodNmr)
7873         ajFmtPrintF(outf, "%-80s\n", "ENDMDL");
7874 
7875     ajFmtPrintF(outf, "%-80s\n", "END");
7876 
7877     return ajTrue;
7878 }
7879