1 /* @source ajpdbio ************************************************************
2 **
3 ** Data structures and functions for reading and writing PDB format files.
4 ** Includes functions for writing a Pdb object (defined in ajpdb.h).
5 **
6 ** @author Copyright (c) 2004 Jon Ison
7 ** @version $Revision: 1.48 $
8 ** @modified $Date: 2012/12/07 10:16:59 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA 02110-1301, USA.
25 **
26 ******************************************************************************/
27
28 /* ======================================================================= */
29 /* ============================ include files ============================ */
30 /* ======================================================================= */
31
32 #include "ajlib.h"
33
34 #include "ajpdbio.h"
35 #include "ajbase.h"
36 #include "ajfileio.h"
37
38 #include <limits.h>
39 #include <math.h>
40
41
42
43
44 /* ======================================================================= */
45 /* ============================ private data ============================= */
46 /* ======================================================================= */
47
48
49
50
51 /* @enumstatic PdbfileELinetype ***********************************************
52 **
53 ** AJAX PDB File Line Type enumeration for writing in PDB format
54 **
55 ** @value pdbfileELinetypeIgnore
56 ** Ignore this line when parsing coordinates
57 ** @value pdbfileELinetypeCoordinate
58 ** Coordinate line
59 ** @value pdbfileELinetypeHeterogen
60 ** Coordinate line for non-protein atoms
61 ** @value pdbfileELinetypeTER
62 ** TER record Chain terminator.
63 ** @value pdbfileELinetypeMODEL
64 ** MODEL record Specification of model number for multiple structures in a
65 ** single coordinate entry.
66 ** @value pdbfileELinetypeGroups
67 ** Coordinate line for groups that could not be associated with a SEQRES chain
68 ** @value pdbfileELinetypeWater
69 ** Coordinate line for water
70 ** @value pdbfileELinetypeENDMDL
71 ** ENDMDL record End-of-model record for multiple structures in a single
72 ** coordinate entry.
73 ** @@
74 ******************************************************************************/
75
76 typedef enum PdbfileOLinetype
77 {
78 pdbfileELinetypeIgnore,
79 pdbfileELinetypeCoordinate,
80 pdbfileELinetypeHeterogen,
81 pdbfileELinetypeTER,
82 pdbfileELinetypeMODEL,
83 pdbfileELinetypeGroups,
84 pdbfileELinetypeWater,
85 pdbfileELinetypeENDMDL
86 } PdbfileELinetype;
87
88
89
90
91 /* @datastatic AjPElement *****************************************************
92 **
93 ** Nucleus Element object.
94 **
95 ** Object for holding a single secondary structure element when parsing PDB.
96 **
97 ** AjPElement is implemented as a pointer to a C data structure.
98 **
99 ** @alias AjSElement
100 ** @alias AjOElement
101 **
102 ** @attr elementId [AjPStr] Element identifier (columns 12 - 14)
103 ** SHEET ('E') or TURN ('T')
104 ** @attr initResName [AjPStr] Name of first residue in each element (columns
105 ** 16 - 18 (HELIX & TURN) or 18 - 20 (SHEET) )
106 ** @attr initSeqNum [AjPStr] Residue number (including insertion code)
107 ** of first residue in each element
108 ** (columns 22 - 26 (HELIX), 23 - 27 (SHEET)
109 ** or 21 - 25 (TURN) )
110 ** @attr endResName [AjPStr] Name of last residue in each element
111 ** (columns 28 - 30 (HELIX), 29 - 31 (SHEET)
112 ** or 27 - 29 (TURN) )
113 ** @attr endSeqNum [AjPStr] Residue number (including insertion code) of
114 ** last residue in each element
115 ** (columns 34 - 38 (HELIX and SHEET)
116 ** or 32 - 36 (TURN) )
117 ** @attr helixClass [ajint] Classes of helices (columns 39 - 40),
118 ** an int from 1-10 from
119 ** @attr elementNum [ajint] Serial number of the element (columns 8 - 10)
120 ** @attr elementType [char] Element type COIL ('C'), HELIX ('H'),
121 ** @attr chainId [char] Chain identifiers for chains containing the
122 ** elements (column 20 (HELIX & TURN)
123 ** or 22 (SHEET) )
124 ** @attr Padding [char[6]] Padding to alignment boundary
125 **
126 ** http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html<br>
127 **
128 ** TYPE OF HELIX CLASS NUMBER (COLUMNS 39 - 40)<br>
129 ** --------------------------------------------------------------<br>
130 ** Right-handed alpha (default) 1<br>
131 ** Right-handed omega 2<br>
132 ** Right-handed pi 3<br>
133 ** Right-handed gamma 4<br>
134 ** Right-handed 310 5<br>
135 ** Left-handed alpha 6<br>
136 ** Left-handed omega 7<br>
137 ** Left-handed gamma 8<br>
138 ** 27 ribbon/helix 9<br>
139 ** Polyproline 10<br>
140 **
141 ** @@
142 ******************************************************************************/
143
144 typedef struct AjSElement
145 {
146 AjPStr elementId;
147 AjPStr initResName;
148
149 AjPStr initSeqNum;
150 AjPStr endResName;
151 AjPStr endSeqNum;
152 ajint helixClass;
153 ajint elementNum;
154
155 char elementType;
156 char chainId;
157 char Padding[6];
158 } AjOElement;
159
160 #define AjPElement AjOElement*
161
162
163
164
165 /* @datastatic AjPElements ****************************************************
166 **
167 ** Nucleus Elements object.
168 **
169 ** Object for holding secondary structure elements from a PDB file
170 **
171 ** AjPElements is implemented as a pointer to a C data structure.
172 **
173 **
174 **
175 ** @alias AjSElements
176 ** @alias AjOElements
177 **
178 **
179 **
180 ** @attr elms [AjPElement*] Secondary structure element array
181 ** @attr n [ajuint] Total no. of secondary structure elements
182 ** (helices, strands or turns)
183 ** @attr Padding [char[4]] Padding to alignment boundary
184 ** @@
185 ******************************************************************************/
186
187 typedef struct AjSElements
188 {
189 AjPElement *elms;
190 ajuint n;
191 char Padding[4];
192 } AjOElements;
193
194 #define AjPElements AjOElements*
195
196
197
198
199 /* @datastatic AjPPdbfile *****************************************************
200 **
201 ** Nucleus Pdbfile object.
202 **
203 ** Holds a pdb file for parsing.
204 **
205 ** AjPPdbfile is implemented as a pointer to a C data structure.
206 **
207 ** @alias AjSPdbfile
208 ** @alias AjOPdbfile
209 **
210 ** @attr pdbid [AjPStr] 4 character pdb id code
211 ** @attr tercnt [ajuint] The number of TER records in the pdb file
212 ** @attr toofewter [AjBool] True if the file contained too few TER records
213 ** @attr modcnt [ajuint] The number of MODEL records in the pdb file
214 ** (does not count duplicate MODEL records
215 ** that are masked out)
216 ** @attr nomod [AjBool] True if the file contained no MODEL records
217 ** @attr compnd [AjPStr] Text from COMPND records
218 ** @attr source [AjPStr] Text from SOURCE records
219 ** @attr reso [float] Resolution of structure
220 ** @attr method [AjEPdbMethod] AJAX PDB Method enumeration
221 ** @attr gpid [AjPChar] Array of chain (group) id's for groups that
222 ** cannot be associated with a chain in the SEQRES
223 ** section
224 ** @attr idxfirst [ajuint] Index in <lines> of first ATOM, HETATM or MODEL
225 ** line
226 **
227 ** @attr nchains [ajuint] Number of chains (from SEQRES record)
228 ** for sizes of following attribute arrays
229 ** @attr seqres [AjPStr*] Array of sequences taken from the SEQRES records
230 ** @attr seqresful [AjPStr*] Array of sequences using 3 letter codes taken
231 ** from the SEQRES records
232 ** @attr nres [ajint*] Number of residues in each chain
233 ** @attr chainok [AjBool*] Array of flags which are True if a chain in the
234 ** SEQRES record contains >= minimum no. of amino
235 ** acids and has a unique chain identifier
236 ** @attr resn1ok [AjBool*] Bool's for each chain which are TRUE if resn1
237 ** was used to derive resni, i.e. gave correct
238 ** alignment to seqres sequence.
239 ** If False then resn2 was used.
240 ** @attr nligands [ajint*] Number of ligands for each chain. A ligand is a
241 ** non-protein group associated with a chain
242 ** in the SEQRES section.
243 ** @attr numHelices [ajint*] No. of helices in each chain
244 ** @attr numStrands [ajint*] No. of strands in each chain
245 ** @attr numSheets [ajint*] No. of sheets in each chain
246 ** @attr numTurns [ajint*] No. of turns in each chain
247 ** @attr chid [AjPChar] Array of chain id's for chains from SEQRES
248 ** records
249 **
250 ** @attr lines [AjPStr*] Array of lines in the pdb file
251 ** @attr linetype [PdbfileELinetype*]
252 ** Array of int's describing the lines, have values
253 ** of pdbfileELinetypeIgnore (do not consider this line
254 ** when parsing coordinates from the file),
255 ** pdbfileELinetypeCoordinate (coordinate line
256 ** (ATOM or HETATM record) for protein atoms,
257 ** pdbfileELinetypeHeterogen (coordinate line for
258 ** non-protein atoms), pdbfileELinetypeGroups
259 ** (coordinate line for groups that could not be
260 ** associated with a SEQRES chain),
261 ** pdbfileELinetypeWater (coordinate line for
262 ** water), pdbfileELinetypeTER (it is a TER record) or
263 ** pdbfileELinetypeMODEL (it is a MODEL record).
264 ** @attr chnn [ajint*] Array of chain numbers for each
265 ** pdbfileELinetypeCoordinate &
266 ** pdbfileELinetypeHeterogen line.
267 ** @attr gpn [ajint*] Array of group numbers for each line. Each group
268 ** (heterogen) is given a group number, that is
269 ** either relative to a chain or the whole file
270 ** (for groups that could not be associated with
271 ** a chain from the SEQRES records)
272 ** @attr modn [ajuint*] Array of model numbers for each
273 ** pdbfileELinetypeCoordinate line
274 **
275 ** @attr resni [ajint*] Residue numbers for each pdbfileELinetypeCoordinate
276 ** line. These give the correct index into the
277 ** 'seqres' sequences
278 ** @attr resn1 [ajint*] Array of residue numbers for each
279 ** pdbfileELinetypeCoordinate line. This is pdbn
280 ** converted to a sequential integer where
281 ** alternative residue numbering is presumed for
282 ** lines where line[26] is used (residues for which
283 ** oddnum == True are considered).
284 ** @attr resn2 [ajint*] Array of residue numbers for each
285 ** pdbfileELinetypeCoordinate line. This is pdbn
286 ** converted to a sequential integer where
287 ** heterogeneity is presumed for lines where
288 ** line[26] is used (residues where oddnum == True
289 ** are ignored).
290 ** @attr pdbn [AjPStr*] Array with a residue number for each line
291 ** for which 'coord' == ajTrue. This is the
292 ** original residue number string (including
293 ** insertion code) from the pdb file
294 ** @attr oddnum [AjBool*] Bool's for each line which are TRUE for
295 ** duplicate residues of heterogenous positions
296 ** (e.g. if 2 different residues are both numbered
297 ** '8' or one is '8' and the other '8A'
298 ** for example then <oddnum> would be set True for
299 ** the second residue. Heterogeneity is indicated
300 ** by a character in position lines[26] (the same
301 ** position used to indicate alternative residue
302 ** numbering schemes).
303 ** @attr atype [AjPStr*] Atom type for each line
304 ** @attr rtype [AjPStr*] Residue type for each line
305 ** @attr x [float*] x-coordinate for each line
306 ** @attr y [float*] y-coordinate for each line
307 ** @attr z [float*] z-coordinate for each line
308 ** @attr o [float*] occupancy for each line
309 ** @attr b [float*] thermal factor for each line
310 **
311 ** @attr elementNum [ajint*] Serial number of the secondary structure element
312 ** (columns 8 - 10)
313 ** @attr elementId [AjPStr*] Secondary structure element identifier (columns
314 ** 12 - 14)
315 ** @attr elementType [char*] Secondary structure element type COIL ('C'),
316 ** HELIX ('H'), SHEET ('E') or TURN ('T')
317 ** @attr helixClass [ajint*] Classes of helices (columns 39 - 40) from
318 ** http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
319 ** (see below). Has a value of 0 (printed out as
320 ** '.') for non-helical elements.
321 ** @attr nlines [ajuint] Number of lines in the pdb file and size of the
322 ** following arrays
323 ** @attr ngroups [ajuint] Number of groups (non-protein groups that
324 ** could not be associated with a chain in the
325 ** SEQRES section
326 ** @@
327 ******************************************************************************/
328
329 typedef struct AjSPdbfile
330 {
331 AjPStr pdbid;
332 ajuint tercnt;
333 AjBool toofewter;
334 ajuint modcnt;
335 AjBool nomod;
336 AjPStr compnd;
337 AjPStr source;
338 float reso;
339 AjEPdbMethod method;
340
341 AjPChar gpid;
342 ajuint idxfirst;
343
344 ajuint nchains;
345 AjPStr *seqres;
346 AjPStr *seqresful;
347 ajint *nres;
348 AjBool *chainok;
349 AjBool *resn1ok;
350 ajint *nligands;
351 ajint *numHelices;
352 ajint *numStrands;
353 ajint *numSheets;
354 ajint *numTurns;
355 AjPChar chid;
356
357 AjPStr *lines;
358 PdbfileELinetype *linetype;
359 ajint *chnn;
360 ajint *gpn;
361 ajuint *modn;
362 ajint *resni;
363 ajint *resn1;
364 ajint *resn2;
365 AjPStr *pdbn;
366 AjBool *oddnum;
367 AjPStr *atype;
368 AjPStr *rtype;
369 float *x;
370 float *y;
371 float *z;
372 float *o;
373 float *b;
374 ajint *elementNum;
375 AjPStr *elementId;
376 char *elementType;
377 ajint *helixClass;
378 ajuint nlines;
379 ajuint ngroups;
380 } AjOPdbfile;
381
382 #define AjPPdbfile AjOPdbfile*
383
384
385
386
387 #define POS_CHID 21 /* Position in ATOM line of chain id. */
388
389 #define MAXMISSNTERM 3 /* A number of residues may be missing from
390 * the N-terminus of the SEQRES records
391 * relative to the ATOM records (e.g. MET and
392 * ACE often do not appear). The parser will
393 * search and correct for such cases.
394 * MAXMISSNTERM is the maximum number of such
395 * missing residues that can be accounted
396 * for. */
397
398 #if AJFALSE
399 /* DIAGNOSTIC */
400 AjPStr tempstr;
401 AjPFile tempfile;
402 #endif /* AJFALSE */
403
404
405
406
407 /* ======================================================================= */
408 /* ================= Prototypes for private functions ==================== */
409 /* ======================================================================= */
410
411 /* These functions are for parsing PDB files */
412 #if AJFALSE
413 /* DIAGNOSTIC */
414 static void pdbioDiagnostic(AjPPdbfile pdbfile, ajint n);
415 #endif /* AJFALSE */
416 static void pdbioPdbfileDel(AjPPdbfile *Ppdbfile);
417 static AjPPdbfile pdbioReadLines(AjPFile inf);
418 static AjPPdbfile pdbioPdbfileNew(ajuint nlines, ajuint nchains);
419 static AjBool pdbioFirstPass(AjPPdbfile pdbfile, AjPFile flog,
420 AjPElements *elms, AjBool camask);
421 static AjBool pdbioCheckChains(AjPPdbfile pdbfile, AjPFile flog,
422 ajint min_chain_size);
423 static AjBool pdbioSeqresToSequence(const AjPStr seqres, AjPStr *seq,
424 AjBool camask, ajuint *len);
425 static AjBool pdbioCheckTer(AjPPdbfile pdbfile, AjPFile flog);
426 static AjBool pdbioNumberChains(AjPPdbfile pdbfile, AjPFile flog);
427 static AjBool pdbioNoMoreAtoms(AjPPdbfile pdbfile, ajuint linen);
428 static AjBool pdbioMaskChains(AjPPdbfile pdbfile, AjPFile flog,
429 ajint min_chain_size, AjBool camask,
430 AjBool camask1, AjBool atommask);
431 static AjBool pdbioStandardiseNumbering(AjPPdbfile pdbfile,
432 AjPFile flog);
433 static AjBool pdbioAlignNumbering(AjPPdbfile pdbfile, AjPFile flog,
434 ajuint lim, ajuint lim2);
435 static AjBool pdbioPdbfileToPdb(AjPPdb *Ppdb, AjPPdbfile pdbfile);
436 static ajint pdbioPdbfileFindLine(const AjPPdbfile pdbfile, ajint chn,
437 ajint which, ajint pos);
438
439 /* Functions for Elements object */
440 static AjPElements pdbioElementsNew(ajuint nelms);
441 static void pdbioElementsDel(AjPElements *Pelements);
442
443 /* Functions for Element object */
444 static AjPElement pdbioElementNew(void);
445 static void pdbioElementDel(AjPElement *Pelement);
446 static AjBool pdbioPdbfileChain(char id, const AjPPdbfile pdb, ajint *chn);
447 static AjBool pdbioWriteElementData(AjPPdbfile pdbfile, AjPFile flog,
448 const AjPElements elms);
449
450 /* These functions are called by ajPdbWriteDomainRecordRaw */
451 static AjBool pdbioWriteHeaderScop(AjPFile outf, const AjPScop scop);
452 static AjBool pdbioWriteSeqresDomain(AjPFile errf, AjPFile outf,
453 const AjPPdb pdb, const AjPScop scop);
454 static AjBool pdbioWriteAtomDomainPdb(AjPFile errf, AjPFile outf,
455 const AjPPdb pdb, const AjPScop scop,
456 ajint mod);
457 static AjBool pdbioWriteAtomDomainIdx(AjPFile errf, AjPFile outf,
458 const AjPPdb pdb, const AjPScop scop,
459 ajint mod);
460 static AjBool pdbioWriteAtomDomain(AjPFile errf, AjPFile outf,
461 const AjPPdb pdb,
462 const AjPScop scop, ajuint mod,
463 AjEPdbMode mode);
464
465 /* These functions are called by ajPdbWriteRecordRaw */
466 static AjBool pdbioWriteSeqresChain(AjPFile errf, AjPFile outf,
467 const AjPPdb pdb, ajint chn);
468 static AjBool pdbioWriteAtomChain(AjPFile outf, const AjPPdb pdb, ajuint mod,
469 ajuint chn, AjEPdbMode mode);
470 static AjBool pdbioWriteHeterogen(AjPFile outf, const AjPPdb pdb, ajuint mod);
471 static AjBool pdbioWriteHeader(AjPFile outf, const AjPPdb pdb);
472 static AjBool pdbioWriteTitle(AjPFile outf, const AjPPdb pdb);
473 static AjBool pdbioWriteCompnd(AjPFile outf, const AjPPdb pdb);
474 static AjBool pdbioWriteSource(AjPFile outf, const AjPPdb pdb);
475 static AjBool pdbioWriteEmptyRemark(AjPFile outf, const AjPPdb pdb);
476 static AjBool pdbioWriteResolution(AjPFile outf, const AjPPdb pdb);
477
478 /* Others */
479 static AjBool pdbioWriteText(AjPFile outf, const AjPStr str,
480 const char *prefix);
481
482
483
484
485 /* ======================================================================= */
486 /* ========================== private functions ========================== */
487 /* ======================================================================= */
488
489
490
491
492 /* @funcstatic pdbioWriteSeqresChain ******************************************
493 **
494 ** Writes sequence for a protein chain to an output file in pdb format
495 ** (SEQRES records). Sequence is taken from a Pdb structure.
496 **
497 ** @param [w] errf [AjPFile] Output file stream for error messages
498 ** @param [w] outf [AjPFile] Output file stream
499 ** @param [r] pdb [const AjPPdb] Pdb object
500 ** @param [r] chn [ajint] chain number, beginning at 1
501 **
502 ** @return [AjBool] True on success
503 **
504 ** @release 2.9.0
505 ** @@
506 ******************************************************************************/
507
pdbioWriteSeqresChain(AjPFile errf,AjPFile outf,const AjPPdb pdb,ajint chn)508 static AjBool pdbioWriteSeqresChain(AjPFile errf, AjPFile outf,
509 const AjPPdb pdb, ajint chn)
510 {
511 ajuint last_rn = 0U;
512 ajuint this_rn = 0U;
513 ajuint i = 0U;
514 ajuint j = 0U;
515 ajuint len = 0U;
516 const char *p;
517
518 AjPStr tmp1 = NULL;
519 AjPStr tmp2 = NULL;
520 AjIList iter = NULL;
521 AjPAtom atom = NULL;
522
523 tmp1 = ajStrNew();
524 tmp2 = ajStrNew();
525
526 iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
527
528 /* Iterate through list of atoms */
529 while ((atom = (AjPAtom) ajListIterGet(iter)))
530 {
531 /*
532 ** Hard-coded to work on model 1
533 ** Break if a non-protein atom is found or model no. !=1
534 ** Continue / break if a non-protein atom is found or model no. !=1
535 */
536 if (atom->Mod != 1)
537 break;
538
539 if (atom->Type != 'P')
540 continue;
541
542 /* If we are onto a new residue */
543 this_rn = atom->Idx;
544 if (this_rn != last_rn)
545 {
546 /* Assign sequence for residues missing from the linked list */
547 for (i = last_rn; i < this_rn - 1; i++)
548 {
549 /* Check that position i is in range for the sequence */
550 if (!ajResidueToTriplet(
551 ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, i),
552 &tmp2))
553 {
554 ajWarn("Index out of range in pdbioWriteSeqresChain");
555 ajFmtPrintF(errf, "//\n%S\nERROR Index out "
556 "of range in pdbioWriteSeqresChain\n",
557 pdb->Pdb);
558
559 ajStrDel(&tmp1);
560 ajStrDel(&tmp2);
561 ajListIterDel(&iter);
562
563 return ajFalse;
564 }
565 else
566 {
567 ajStrAppendS(&tmp1, tmp2);
568 ajStrAppendC(&tmp1, " ");
569 }
570 }
571
572 ajStrAppendS(&tmp1, atom->Id3);
573 ajStrAppendC(&tmp1, " ");
574
575 last_rn = this_rn;
576 }
577 }
578
579 /* Assign sequence for residues missing from end of linked list */
580 for (i = last_rn; i < pdb->Chains[chn - 1]->Nres; i++)
581 if (!ajResidueToTriplet(
582 ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, i),
583 &tmp2))
584 {
585 ajStrDel(&tmp1);
586 ajStrDel(&tmp2);
587 ajListIterDel(&iter);
588 ajWarn("Index out of range in pdbioWriteSeqresChain");
589 ajFmtPrintF(errf, "//\n%S\nERROR Index out of range "
590 "in pdbioWriteSeqresChain\n", pdb->Pdb);
591
592 return ajFalse;
593 }
594 else
595 {
596 ajStrAppendS(&tmp1, tmp2);
597 ajStrAppendC(&tmp1, " ");
598 }
599
600 /* Print out SEQRES records */
601 for (p = ajStrGetPtr(tmp1), len = ajStrGetLen(tmp1), i = 0U, j = 1U;
602 i < len;
603 i += 52, j++, p += 52)
604 ajFmtPrintF(outf, "SEQRES%4u %c%5u %-61.52s\n",
605 j,
606 pdb->Chains[chn - 1]->Id,
607 pdb->Chains[chn - 1]->Nres,
608 p);
609
610 /* Tidy up */
611 ajStrDel(&tmp1);
612 ajStrDel(&tmp2);
613 ajListIterDel(&iter);
614
615 return ajTrue;
616 }
617
618
619
620
621 /* @funcstatic pdbioWriteSeqresDomain *****************************************
622 **
623 ** Writes sequence for a SCOP domain to an output file in pdb format (SEQRES
624 ** records). Sequence is taken from a Pdb structure, domain definition is
625 ** taken from a Scop structure. Where coordinates for multiple models (e.g.
626 ** NMR structures) are given, data for model 1 are written.
627 **
628 ** @param [w] errf [AjPFile] Output file stream for error messages
629 ** @param [w] outf [AjPFile] Output file stream
630 ** @param [r] pdb [const AjPPdb] Pdb object
631 ** @param [r] scop [const AjPScop] Scop object
632 **
633 ** @return [AjBool] True on success
634 **
635 ** @release 2.9.0
636 ** @@
637 ******************************************************************************/
638
pdbioWriteSeqresDomain(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop)639 static AjBool pdbioWriteSeqresDomain(AjPFile errf, AjPFile outf,
640 const AjPPdb pdb, const AjPScop scop)
641 {
642 ajuint last_rn = 0U;
643 ajuint this_rn = 0U;
644 ajuint i = 0U;
645 ajuint j = 0U;
646 ajint rcnt = 0;
647 ajuint len = 0U;
648 ajuint chn = 0U;
649 const char *p = NULL;
650 char id = '\0';
651
652 AjPStr tmp1 = NULL;
653 AjPStr tmp2 = NULL;
654 AjBool found_start = ajFalse;
655 AjBool found_end = ajFalse;
656 AjBool nostart = ajFalse;
657 AjBool noend = ajFalse;
658 AjIList iter = NULL;
659 AjPAtom atom = NULL;
660 AjPStr tmpstr = NULL;
661 /* AjPResidue *resarr = NULL; */
662
663
664 /* Allocate strings etc */
665 tmp1 = ajStrNew();
666 tmp2 = ajStrNew();
667 tmpstr = ajStrNew();
668
669
670 /* Loop for each chain in the domain */
671 for (i = 0U;
672 i < scop->Number;
673 i++,
674 found_start = ajFalse,
675 found_end = ajFalse,
676 last_rn = 0)
677 {
678
679 /* Check for error in chain id */
680 if (!ajPdbChnidToNum(scop->Chain[i], pdb, &chn))
681 {
682 ajListIterDel(&iter);
683 ajStrDel(&tmp1);
684 ajStrDel(&tmp2);
685 ajStrDel(&tmpstr);
686
687 ajWarn("Chain incompatibility error in "
688 "pdbioWriteSeqresDomain");
689
690 ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility "
691 "error in pdbioWriteSeqresDomain\n",
692 scop->Entry);
693
694 return ajFalse;
695 }
696
697 #if AJFALSE
698 if (resarr)
699 AJFREE(resarr);
700 ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
701 #endif /* AJFALSE */
702
703 /* Initialise iterator for list of atoms */
704 iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
705
706 /* Start of chain not specified */
707 if (!ajStrCmpC(scop->Start[i], "."))
708 nostart = ajTrue;
709 else
710 nostart = ajFalse;
711
712 /* End of chain not specified */
713 if (!ajStrCmpC(scop->End[i], "."))
714 noend = ajTrue;
715 else
716 noend = ajFalse;
717
718 /* Iterate through list of atoms */
719 while ((atom = (AjPAtom) ajListIterGet(iter)))
720 {
721 /* Hard-coded to work on model 1 */
722 /*
723 ** Continue / break if a non-protein atom is found or
724 ** model no. != 1
725 */
726 if (atom->Mod != 1)
727 break;
728
729 if (atom->Type != 'P')
730 continue;
731
732
733 /* If there is a new residue */
734 this_rn = atom->Idx;
735
736 if (this_rn != last_rn)
737 {
738 /*
739 ** The start position was specified, but has not
740 ** been found yet
741 */
742 if (!found_start && !nostart)
743 {
744 ajStrAssignS(&tmpstr, scop->Start[i]);
745 ajStrAppendK(&tmpstr, '*');
746
747
748 /* Start position found */
749 /* if(!ajStrCmpCaseS(atom->Pdb, scop->Start[i])) */
750 if (ajStrMatchWildS(atom->Pdb, tmpstr))
751 /*
752 ** if(ajStrMatchWildS(resarr[atom->Idx-1]->Pdb,
753 ** tmpstr))
754 */
755 {
756 if (!ajStrMatchS(atom->Pdb, scop->Start[i]))
757 /* if(!ajStrMatchS(resarr[atom->Idx-1]->Pdb, *
758 * scop->Start[i])) */
759 {
760 ajWarn("Domain start found by wildcard match "
761 "only in pdbioWriteSeqresDomain");
762 ajFmtPrintF(errf, "//\n%S\nERROR Domain start "
763 "found "
764 "by wildcard match only in "
765 "pdbioWriteSeqresDomain\n",
766 scop->Entry);
767 }
768
769 last_rn = this_rn;
770 found_start = ajTrue;
771 }
772 else
773 {
774 last_rn = this_rn;
775 continue;
776 }
777
778 }
779
780
781 /*
782 ** Assign sequence for residues missing from the linked list
783 ** of atoms of known structure
784 */
785 for (j = last_rn; j < this_rn - 1; j++)
786 {
787 /* Check that position j is in range for the sequence */
788 if (!ajResidueToTriplet(
789 ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, j),
790 &tmp2))
791 {
792 ajListIterDel(&iter);
793 ajStrDel(&tmp1);
794 ajStrDel(&tmp2);
795 ajStrDel(&tmpstr);
796
797 ajWarn("Index out of range in "
798 "pdbioWriteSeqresDomain");
799 ajFmtPrintF(errf, "//\n%S\nERROR Index out of range "
800 "in pdbioWriteSeqresDomain\n",
801 scop->Entry);
802
803 return ajFalse;
804 }
805 else
806 {
807 ajStrAppendS(&tmp1, tmp2);
808 ajStrAppendC(&tmp1, " ");
809 rcnt++;
810 }
811 }
812
813 last_rn = this_rn;
814
815
816 /* Append the residue to the sequence */
817 ajStrAppendS(&tmp1, atom->Id3);
818 ajStrAppendC(&tmp1, " ");
819 rcnt++;
820
821
822 /* The end position was specified, but has not been found yet */
823 if (!found_end && !noend)
824 {
825 ajStrAssignS(&tmpstr, scop->End[i]);
826 ajStrAppendK(&tmpstr, '*');
827
828
829 /* End found */
830 /*
831 ** if(!ajStrCmpCaseS(atom->Pdb, scop->End[i]))
832 */
833 if (ajStrMatchWildS(atom->Pdb, tmpstr))
834 /*
835 ** if(ajStrMatchWildS(resarr[atom->Idx-1]->Pdb,
836 ** tmpstr))
837 */
838 {
839 if (!ajStrMatchS(atom->Pdb, scop->End[i]))
840 /*
841 ** if(!ajStrMatchS(resarr[atom->Idx-1]->Pdb,
842 ** scop->End[i]))
843 */
844 {
845 ajWarn("Domain end found by wildcard match only "
846 "in pdbioWriteSeqresDomain");
847 ajFmtPrintF(errf, "//\n%S\nERROR Domain end found "
848 "by wildcard match only in "
849 "pdbioWriteSeqresDomain\n",
850 scop->Entry);
851 }
852
853
854 found_end = ajTrue;
855 break;
856 }
857 }
858 }
859 }
860
861
862 /* Domain start specified but not found */
863 if (!found_start && !nostart)
864 {
865 ajListIterDel(&iter);
866 ajStrDel(&tmp1);
867 ajStrDel(&tmp2);
868 ajStrDel(&tmpstr);
869
870 ajWarn("Domain start not found in pdbioWriteSeqresDomain");
871 ajFmtPrintF(errf, "//\n%S\nERROR Domain start not found "
872 "in pdbioWriteSeqresDomain\n", scop->Entry);
873
874 return ajFalse;
875 }
876
877
878 /* Domain end specified but not found */
879 if (!found_end && !noend)
880 {
881 ajListIterDel(&iter);
882 ajStrDel(&tmp1);
883 ajStrDel(&tmp2);
884 ajStrDel(&tmpstr);
885
886 ajWarn("Domain end not found in pdbioWriteSeqresDomain");
887 ajFmtPrintF(errf, "//\n%S\nERROR Domain end not found "
888 "in pdbioWriteSeqresDomain\n", scop->Entry);
889
890 return ajFalse;
891 }
892
893 /*
894 ** Assign sequence for residues missing from end of linked list
895 ** Only needs to be done where the end of the domain is not specified
896 */
897 if (noend)
898 {
899 for (j = last_rn; j < pdb->Chains[chn - 1]->Nres; j++)
900 if (!ajResidueToTriplet(
901 ajStrGetCharPos(pdb->Chains[chn - 1]->Seq, j),
902 &tmp2))
903 {
904 ajStrDel(&tmp1);
905 ajStrDel(&tmp2);
906 ajStrDel(&tmpstr);
907
908 ajListIterDel(&iter);
909 ajWarn("Index out of range in pdbioWriteSeqresDomain");
910 ajFmtPrintF(errf, "//\n%S\nERROR Index out of "
911 "range in pdbioWriteSeqresDomain\n",
912 scop->Entry);
913
914 return ajFalse;
915 }
916 else
917 {
918 ajStrAppendS(&tmp1, tmp2);
919 ajStrAppendC(&tmp1, " ");
920 rcnt++;
921 }
922 }
923
924 ajListIterDel(&iter);
925 }
926
927 /*
928 ** If the domain was composed of more than once chain then a '.' is
929 ** given as the chain identifier
930 */
931 if (scop->Number > 1)
932 id = '.';
933 else
934 id = pdb->Chains[chn - 1]->Id;
935
936 /* Print out SEQRES records */
937 for (p = ajStrGetPtr(tmp1), len = ajStrGetLen(tmp1), i = 0U, j = 1;
938 i < len;
939 i += 52, j++, p += 52)
940 ajFmtPrintF(outf, "SEQRES%4u %c%5d %-61.52s\n",
941 j,
942 id,
943 rcnt,
944 p);
945
946 #if AJFALSE
947 if (resarr)
948 AJFREE(resarr);
949 #endif /* AJFALSE */
950 ajStrDel(&tmp1);
951 ajStrDel(&tmp2);
952 ajStrDel(&tmpstr);
953
954 return ajTrue;
955 }
956
957
958
959
960 /* @funcstatic pdbioWriteAtomChain ********************************************
961 **
962 ** Writes coordinates for a protein chain to an output file in pdb format
963 ** (ATOM records). Coordinates are taken from a Pdb structure. The model
964 ** number argument should have a value of 1 for x-ray structures.
965 **
966 ** @param [w] outf [AjPFile] Output file stream
967 ** @param [r] pdb [const AjPPdb] Pdb object
968 ** @param [r] mod [ajuint] Model number, beginning at 1
969 ** @param [r] chn [ajuint] Chain number, beginning at 1
970 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration. Either ajEPdbModePdb
971 ** or ajEPdbModeIdx if the original or
972 ** corrected residue number is to be used.
973 **
974 ** @return [AjBool] True on success
975 **
976 ** @release 2.9.0
977 ** @@
978 ******************************************************************************/
979
pdbioWriteAtomChain(AjPFile outf,const AjPPdb pdb,ajuint mod,ajuint chn,AjEPdbMode mode)980 static AjBool pdbioWriteAtomChain(AjPFile outf, const AjPPdb pdb,
981 ajuint mod, ajuint chn,
982 AjEPdbMode mode)
983 {
984 AjBool doneter = ajFalse;
985 AjIList iter = NULL;
986 AjPAtom atom1 = NULL;
987 AjPAtom atom2 = NULL;
988 ajint acnt = 0;
989 #if AJFALSE
990 AjPResidue *resarr = NULL;
991 #endif /* AJFALSE */
992
993 /* Check args are not NULL */
994 if (!outf || !pdb || mod < 1 || chn < 1)
995 return ajFalse;
996
997 #if AJFALSE
998 ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
999 #endif /* AJFALSE */
1000
1001 doneter = ajFalse;
1002 iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
1003
1004 while ((atom1 = (AjPAtom) ajListIterGet(iter)))
1005 if (atom1->Mod == mod)
1006 break;
1007
1008 for (acnt = 1; atom1; atom1 = (AjPAtom) ajListIterGet(iter))
1009 {
1010 /* Break if on a new model */
1011 if (atom1->Mod != mod)
1012 break;
1013
1014
1015 /* End of protein atoms - so write a TER record */
1016 if (atom1->Type != 'P' && (!doneter))
1017 {
1018 switch (mode)
1019 {
1020 case ajEPdbModeIdx:
1021 ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n",
1022 "TER",
1023 acnt++,
1024 atom2->Id3,
1025 pdb->Chains[chn - 1]->Id,
1026 atom2->Idx,
1027 " ");
1028 break;
1029
1030 case ajEPdbModePdb:
1031 ajFmtPrintF(outf, "%-6s%5d %-4S%c%4S%54s\n",
1032 "TER",
1033 acnt++,
1034 atom2->Id3,
1035 pdb->Chains[chn - 1]->Id,
1036 atom2->Pdb,
1037 /* resarr[atom2->Idx-1]->Pdb, */
1038 " ");
1039 break;
1040
1041 default:
1042 ajFatal("Invalid mode in pdbioWriteAtomChain");
1043 }
1044
1045 doneter = ajTrue;
1046 }
1047
1048
1049 /* Write out ATOM or HETATM line */
1050 if (atom1->Type == 'P')
1051 ajFmtPrintF(outf, "%-6s", "ATOM");
1052 else
1053 ajFmtPrintF(outf, "%-6s", "HETATM");
1054
1055 switch (mode)
1056 {
1057 case ajEPdbModeIdx:
1058 ajFmtPrintF(outf, "%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1059 "%6.2f%6.2f%11s%-3c\n",
1060 acnt++,
1061 atom1->Atm,
1062 atom1->Id3,
1063 pdb->Chains[chn - 1]->Id,
1064 atom1->Idx,
1065 atom1->X,
1066 atom1->Y,
1067 atom1->Z,
1068 atom1->O,
1069 atom1->B,
1070 " ",
1071 *ajStrGetPtr(atom1->Atm));
1072 break;
1073
1074 case ajEPdbModePdb:
1075 ajFmtPrintF(outf, "%5d %-4S%-4S%c%4S%12.3f%8.3f%8.3f"
1076 "%6.2f%6.2f%11s%-3c\n",
1077 acnt++,
1078 atom1->Atm,
1079 atom1->Id3,
1080 pdb->Chains[chn - 1]->Id,
1081 atom1->Pdb,
1082 /* resarr[atom1->Idx-1]->Pdb, */
1083 atom1->X,
1084 atom1->Y,
1085 atom1->Z,
1086 atom1->O,
1087 atom1->B,
1088 " ",
1089 *ajStrGetPtr(atom1->Atm));
1090 break;
1091
1092 default:
1093 ajFatal("Invalid mode in pdbioWriteAtomChain");
1094 }
1095
1096 atom2 = atom1;
1097 }
1098
1099 /* Write TER record if its not already done */
1100 if (!doneter)
1101 {
1102 ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n",
1103 "TER",
1104 acnt++,
1105 atom2->Id3,
1106 pdb->Chains[chn - 1]->Id,
1107 atom2->Idx,
1108 " ");
1109 doneter = ajTrue;
1110 }
1111 ajListIterDel(&iter);
1112
1113 #if AJFALSE
1114 if (resarr)
1115 AJFREE(resarr);
1116 #endif /* AJFALSE */
1117
1118 return ajTrue;
1119 }
1120
1121
1122
1123
1124 /* @funcstatic pdbioWriteAtomDomain *******************************************
1125 **
1126 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1127 ** records). Coordinates are taken from a Pdb structure, domain definition is
1128 ** taken from a Scop structure. The model number argument should have a value
1129 ** of 1 for x-ray structures. Coordinates for heterogens are NOT written to
1130 ** file. The corrected residue numbers are given (these give an index into
1131 ** the SEQRES sequence.
1132 **
1133 ** @param [w] errf [AjPFile] Output file stream for error messages
1134 ** @param [w] outf [AjPFile] Output file stream
1135 ** @param [r] pdb [const AjPPdb] Pdb object
1136 ** @param [r] scop [const AjPScop] Scop object
1137 ** @param [r] mod [ajuint] Model number, beginning at 1
1138 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
1139 ** or ajEPdbModeIdx if the original or corrected
1140 ** residue number is to be used.
1141 **
1142 ** @return [AjBool] True on success
1143 **
1144 ** @release 2.9.0
1145 ** @@
1146 ******************************************************************************/
1147
pdbioWriteAtomDomain(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajuint mod,AjEPdbMode mode)1148 static AjBool pdbioWriteAtomDomain(AjPFile errf, AjPFile outf,
1149 const AjPPdb pdb,
1150 const AjPScop scop, ajuint mod,
1151 AjEPdbMode mode)
1152 {
1153 /*
1154 ** rn_mod is a modifier to the residue number to give correct residue
1155 ** numbering for the domain
1156 */
1157 ajint acnt = 1;
1158 ajint rn_mod = 0;
1159 ajuint z = 0U;
1160 ajuint finalrn = 0U;
1161 ajuint chn = 0U;
1162 char id = '\0';
1163
1164 AjBool found_start = ajFalse;
1165 AjBool found_end = ajFalse;
1166 AjBool nostart = ajFalse;
1167 AjBool noend = ajFalse;
1168 AjIList iter = NULL;
1169 AjPAtom atom1 = NULL;
1170 AjPAtom atom2 = NULL;
1171 AjPStr tmpstr = NULL;
1172 #if AJFALSE
1173 AjPResidue *resarr = NULL;
1174 #endif /* AJFALSE */
1175
1176
1177 if (!errf || !outf || !pdb || !scop)
1178 ajFatal("Bad args. passed to pdbioWriteAtomDomain");
1179
1180
1181 /* Allocate strings etc */
1182 tmpstr = ajStrNew();
1183
1184
1185
1186 /* Loop for each chain in the domain */
1187 for (z = 0;
1188 z < scop->Number;
1189 z++, found_start = ajFalse, found_end = ajFalse)
1190 {
1191 /* Check for chain error */
1192 if (!ajPdbChnidToNum(scop->Chain[z], pdb, &chn))
1193 {
1194 ajListIterDel(&iter);
1195 ajWarn("Chain incompatibility error in "
1196 "pdbioWriteAtomDomain");
1197 ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility "
1198 "error in pdbioWriteAtomDomain\n",
1199 scop->Entry);
1200 ajStrDel(&tmpstr);
1201
1202 return ajFalse;
1203 }
1204
1205
1206 #if AJFALSE
1207 ajListToarray(pdb->Chains[chn - 1]->Residues, (void ***) &resarr);
1208 #endif /* AJFALSE */
1209
1210 /* Iterate up to the correct model */
1211 iter = ajListIterNewread(pdb->Chains[chn - 1]->Atoms);
1212
1213 while ((atom1 = (AjPAtom) ajListIterGet(iter)))
1214 if (atom1->Mod == mod)
1215 break;
1216
1217 if (!atom1)
1218 ajFatal("Unexpected error (atom1 == NULL) in "
1219 "pdbioWriteAtomDomain\n");
1220
1221 /* Increment res. counter from last chain if appropriate */
1222 if (noend)
1223 rn_mod += atom2->Idx;
1224 else
1225 rn_mod += finalrn;
1226
1227 /* Start of chain was not specified */
1228 if (!ajStrCmpC(scop->Start[z], "."))
1229 nostart = ajTrue;
1230 else
1231 nostart = ajFalse;
1232
1233 /* End of chain was not specified */
1234 if (!ajStrCmpC(scop->End[z], "."))
1235 noend = ajTrue;
1236 else
1237 noend = ajFalse;
1238
1239 /* If the domain was composed of more than once chain then a '.' is
1240 * given as the chain identifier */
1241 if (scop->Number > 1)
1242 id = '.';
1243 else
1244 id = pdb->Chains[chn - 1]->Id;
1245
1246 for (; atom1; atom1 = (AjPAtom) ajListIterGet(iter))
1247 {
1248 /*
1249 ** Continue / break if a non-protein atom is found or
1250 ** model no. is incorrect
1251 */
1252 if (atom1->Mod != mod)
1253 break;
1254
1255 if (atom1->Type != 'P')
1256 continue;
1257
1258 /*
1259 ** The start position was specified, but has not
1260 ** been found yet
1261 */
1262 if (!found_start && !nostart)
1263 {
1264 ajStrAssignS(&tmpstr, scop->Start[z]);
1265 ajStrAppendK(&tmpstr, '*');
1266
1267 /* Start position found */
1268 /*
1269 ** if(!ajStrCmpCaseS(atom1->Pdb, scop->Start[z]))
1270 */
1271 if (ajStrMatchWildS(atom1->Pdb, tmpstr))
1272 /*
1273 ** if(ajStrMatchWildS(resarr[atom1->Idx-1]->Pdb, tmpstr))
1274 */
1275 {
1276 if (!ajStrMatchS(atom1->Pdb, scop->Start[z]))
1277 /*
1278 ** if(!ajStrMatchS(resarr[atom1->Idx-1]->Pdb,
1279 ** scop->Start[z]))
1280 */
1281 {
1282 ajWarn("Domain start found by wildcard match only "
1283 "in pdbioWriteAtomDomain");
1284 ajFmtPrintF(errf, "//\n%S\nERROR Domain start found "
1285 "by wildcard match only in "
1286 "pdbioWriteAtomDomain\n", scop->Entry);
1287 }
1288
1289 rn_mod -= atom1->Idx - 1;
1290 found_start = ajTrue;
1291 }
1292 else
1293 continue;
1294 }
1295
1296 /* The end position was specified, but has not been found yet */
1297 if (!found_end && !noend)
1298 {
1299 ajStrAssignS(&tmpstr, scop->End[z]);
1300 ajStrAppendK(&tmpstr, '*');
1301
1302 /* End position found */
1303 /*
1304 ** if(!ajStrCmpCaseS(atom1->Pdb, scop->End[z]))
1305 */
1306 if (ajStrMatchWildS(atom1->Pdb, tmpstr))
1307 /*
1308 ** if(ajStrMatchWildS(resarr[atom1->Idx-1]->Pdb, tmpstr))
1309 */
1310 {
1311 if (!ajStrMatchS(atom1->Pdb, scop->End[z]))
1312 /*
1313 ** if(!ajStrMatchS(resarr[atom1->Idx-1]->Pdb,
1314 ** scop->End[z]))
1315 */
1316 {
1317 ajWarn("Domain end found by wildcard match only "
1318 "in pdbioWriteAtomDomain");
1319 ajFmtPrintF(errf, "//\n%S\nERROR Domain end found "
1320 "by wildcard match only in "
1321 "pdbioWriteAtomDomain\n", scop->Entry);
1322 }
1323
1324 found_end = ajTrue;
1325 finalrn = atom1->Idx;
1326 }
1327 }
1328 else if (atom1->Idx != finalrn && !noend)
1329 break;
1330
1331 /* Write out ATOM line to pdb file */
1332 switch (mode)
1333 {
1334 case ajEPdbModeIdx:
1335 ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f"
1336 "%8.3f%6.2f%6.2f%11s%-3c\n",
1337 "ATOM",
1338 acnt++,
1339 atom1->Atm,
1340 atom1->Id3,
1341 id,
1342 atom1->Idx + rn_mod,
1343 atom1->X,
1344 atom1->Y,
1345 atom1->Z,
1346 atom1->O,
1347 atom1->B,
1348 " ",
1349 *ajStrGetPtr(atom1->Atm));
1350 break;
1351
1352 case ajEPdbModePdb:
1353 ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4S%12.3f%8.3f"
1354 "%8.3f%6.2f%6.2f%11s%-3c\n",
1355 "ATOM",
1356 acnt++,
1357 atom1->Atm,
1358 atom1->Id3,
1359 id,
1360 atom1->Pdb,
1361 /* resarr[atom1->Idx-1]->Pdb, */
1362 atom1->X,
1363 atom1->Y,
1364 atom1->Z,
1365 atom1->O,
1366 atom1->B,
1367 " ",
1368 *ajStrGetPtr(atom1->Atm));
1369 break;
1370
1371 default:
1372 ajFatal("Invalid mode in pdbioWriteAtomDomain");
1373 }
1374
1375 /* Assign pointer for this chain */
1376 atom2 = atom1;
1377 }
1378
1379
1380 /* Diagnostic if start was specified but not found */
1381 if (!found_start && !nostart)
1382 {
1383 ajListIterDel(&iter);
1384 ajWarn("Domain start not found in pdbioWriteAtomDomain");
1385 ajFmtPrintF(errf, "//\n%S\nERROR Domain start not "
1386 "found in pdbioWriteAtomDomain\n", scop->Entry);
1387 ajStrDel(&tmpstr);
1388
1389 return ajFalse;
1390 }
1391
1392
1393 /* Diagnostic if end was specified but not found */
1394 if (!found_end && !noend)
1395 {
1396 ajListIterDel(&iter);
1397 ajWarn("Domain end not found in pdbioWriteAtomDomain");
1398 ajFmtPrintF(errf, "//\n%S\nERROR Domain end not "
1399 "found in pdbioWriteAtomDomain\n", scop->Entry);
1400 ajStrDel(&tmpstr);
1401
1402 return ajFalse;
1403 }
1404
1405
1406 ajListIterDel(&iter);
1407 }
1408
1409 if (!atom2)
1410 ajFatal("Unexpected error (atom2 == NULL) in pdbioWriteAtomDomain\n");
1411
1412 /* Write the TER record to the pdb file */
1413 ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n",
1414 "TER",
1415 acnt++,
1416 atom2->Id3,
1417 id,
1418 atom2->Idx + rn_mod,
1419 " ");
1420
1421 ajStrDel(&tmpstr);
1422
1423 return ajTrue;
1424 }
1425
1426
1427
1428
1429 /* @funcstatic pdbioWriteAtomDomainPdb ****************************************
1430 **
1431 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1432 ** records). Coordinates are taken from a Pdb structure, domain definition
1433 ** is taken from a Scop structure.
1434 ** Coordinates for heterogens are NOT written to
1435 ** file. The original (pdb) residue numbers are given (these do NOT give an
1436 ** index into the SEQRES sequence. Use pdbioWriteAtomDomainIdx if you
1437 ** need an index into the SEQRES sequence.
1438 **
1439 ** @param [w] errf [AjPFile] Output file stream for error messages
1440 ** @param [w] outf [AjPFile] Output file stream
1441 ** @param [r] pdb [const AjPPdb] Pdb object
1442 ** @param [r] scop [const AjPScop] Scop object
1443 ** @param [r] mod [ajint] Model number, beginning at 1
1444 **
1445 ** @return [AjBool] True on success
1446 **
1447 ** @release 2.9.0
1448 ** @@
1449 ******************************************************************************/
1450
pdbioWriteAtomDomainPdb(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajint mod)1451 static AjBool pdbioWriteAtomDomainPdb(AjPFile errf, AjPFile outf,
1452 const AjPPdb pdb,
1453 const AjPScop scop, ajint mod)
1454 {
1455 if (pdbioWriteAtomDomain(errf, outf, pdb, scop, mod, ajEPdbModePdb))
1456 return ajTrue;
1457
1458 return ajFalse;
1459 }
1460
1461
1462
1463
1464 /* @funcstatic pdbioWriteAtomDomainIdx ****************************************
1465 **
1466 ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM
1467 ** records). Coordinates are taken from a Pdb structure, domain definition
1468 ** is taken from a Scop structure. The model number argument should have a
1469 ** value of 1 for x-ray structures. Coordinates for heterogens are NOT
1470 ** written to file. The corrected residue numbers are given (these give an
1471 ** index into the SEQRES sequence). Use pdbioWriteAtomDomainPdb if you
1472 ** wish to maintain the original residue number.
1473 **
1474 ** @param [w] errf [AjPFile] Output file stream for error messages
1475 ** @param [w] outf [AjPFile] Output file stream
1476 ** @param [r] pdb [const AjPPdb] Pdb object
1477 ** @param [r] scop [const AjPScop] Scop object
1478 ** @param [r] mod [ajint] Model number, beginning at 1
1479 **
1480 ** @return [AjBool] True on success
1481 **
1482 ** @release 2.9.0
1483 ** @@
1484 ******************************************************************************/
1485
pdbioWriteAtomDomainIdx(AjPFile errf,AjPFile outf,const AjPPdb pdb,const AjPScop scop,ajint mod)1486 static AjBool pdbioWriteAtomDomainIdx(AjPFile errf, AjPFile outf,
1487 const AjPPdb pdb,
1488 const AjPScop scop, ajint mod)
1489 {
1490 if (pdbioWriteAtomDomain(errf, outf, pdb, scop, mod, ajEPdbModeIdx))
1491 return ajTrue;
1492
1493 return ajFalse;
1494 }
1495
1496
1497
1498
1499 /* @funcstatic pdbioWriteHeterogen ********************************************
1500 **
1501 ** Writes coordinates for heterogens that could not be uniquely associated
1502 ** with a chain to an output file in pdb format (HETATM records). Coordinates
1503 ** are taken from a Pdb structure. The model number argument should have a
1504 ** value of 1 for x-ray structures.
1505 **
1506 ** @param [w] outf [AjPFile] Output file stream
1507 ** @param [r] pdb [const AjPPdb] Pdb object
1508 ** @param [r] mod [ajuint] Model number, beginning at 1
1509 **
1510 ** @return [AjBool] True on success
1511 **
1512 ** @release 2.9.0
1513 ** @@
1514 ******************************************************************************/
1515
pdbioWriteHeterogen(AjPFile outf,const AjPPdb pdb,ajuint mod)1516 static AjBool pdbioWriteHeterogen(AjPFile outf, const AjPPdb pdb, ajuint mod)
1517 {
1518 AjIList iter = NULL;
1519 AjPAtom atom = NULL;
1520 ajint acnt;
1521
1522
1523 /* Check args are not NULL */
1524 if (!outf || !pdb || mod < 1)
1525 return ajFalse;
1526
1527
1528 iter = ajListIterNewread(pdb->Groups);
1529
1530 while ((atom = (AjPAtom) ajListIterGet(iter)))
1531 if (atom->Mod == mod)
1532 break;
1533
1534 for (acnt = 1; atom; atom = (AjPAtom) ajListIterGet(iter))
1535 {
1536 /* Break if on t0 a new model */
1537 if (atom->Mod != mod)
1538 break;
1539
1540 /* Write out HETATM line */
1541
1542 if (atom->Type == 'H')
1543 ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1544 "%6.2f%6.2f%11s%-3c\n",
1545 "HETATM",
1546 acnt++,
1547 atom->Atm,
1548 atom->Id3,
1549 ajChararrGet(pdb->gpid, atom->Gpn - 1),
1550 atom->Idx,
1551 atom->X,
1552 atom->Y,
1553 atom->Z,
1554 atom->O,
1555 atom->B,
1556 " ",
1557 *ajStrGetPtr(atom->Atm));
1558 else
1559 ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f"
1560 "%6.2f%6.2f%11s%-3c\n",
1561 "HETATM",
1562 acnt++,
1563 atom->Atm,
1564 atom->Id3,
1565 ' ',
1566 atom->Idx,
1567 atom->X,
1568 atom->Y,
1569 atom->Z,
1570 atom->O,
1571 atom->B,
1572 " ",
1573 *ajStrGetPtr(atom->Atm));
1574 /* atom2 = atom; Unused variable */
1575 }
1576
1577
1578 ajListIterDel(&iter);
1579
1580 return ajTrue;
1581 }
1582
1583
1584
1585
1586 /* @funcstatic pdbioWriteText *************************************************
1587 **
1588 ** Writes text to file in the format of pdb records
1589 **
1590 ** @param [w] outf [AjPFile] Output file stream
1591 ** @param [r] str [const AjPStr] Text to print out
1592 ** @param [r] prefix [const char *] pdb record (e.g. "HEADER")
1593 **
1594 **
1595 ** @return [AjBool] True on success
1596 **
1597 ** @release 2.9.0
1598 ** @@
1599 ******************************************************************************/
1600
pdbioWriteText(AjPFile outf,const AjPStr str,const char * prefix)1601 static AjBool pdbioWriteText(AjPFile outf, const AjPStr str,
1602 const char *prefix)
1603 {
1604 ajint n = 0;
1605 ajint l = 0;
1606 ajint c = 0;
1607
1608 AjPStrTok handle = NULL;
1609 AjPStr token = NULL;
1610 AjPStr tmp = NULL;
1611
1612 if (!outf)
1613 return ajFalse;
1614
1615
1616
1617 /* Initialise strings */
1618 token = ajStrNew();
1619 tmp = ajStrNewC("");
1620
1621
1622 handle = ajStrTokenNewC(str, " \t\r\n");
1623
1624 while (ajStrTokenNextParse(handle, &token))
1625 {
1626 if (!c)
1627 ajFmtPrintF(outf, "%-11s", prefix);
1628
1629 if ((l = n + ajStrGetLen(token)) < 68)
1630 {
1631 if (c++)
1632 ajStrAppendC(&tmp, " ");
1633 ajStrAppendS(&tmp, token);
1634 n = ++l;
1635 }
1636 else
1637 {
1638 ajFmtPrintF(outf, "%-*S\n", 69, tmp);
1639
1640 ajStrAssignS(&tmp, token);
1641 ajStrAppendC(&tmp, " ");
1642 n = ajStrGetLen(token);
1643 c = 0;
1644 }
1645 }
1646
1647 if (c)
1648 ajFmtPrintF(outf, "%-*S\n", 69, tmp);
1649
1650
1651 ajStrTokenDel(&handle);
1652 ajStrDel(&token);
1653 ajStrDel(&tmp);
1654
1655 return ajTrue;
1656 }
1657
1658
1659
1660
1661 /* @funcstatic pdbioWriteHeader ***********************************************
1662 **
1663 ** Writes the Pdb element of a Pdb structure to an output file in pdb format
1664 **
1665 ** @param [w] outf [AjPFile] Output file stream
1666 ** @param [r] pdb [const AjPPdb] Pdb object
1667 **
1668 ** @return [AjBool] True on success
1669 **
1670 ** @release 2.9.0
1671 ** @@
1672 ******************************************************************************/
1673
pdbioWriteHeader(AjPFile outf,const AjPPdb pdb)1674 static AjBool pdbioWriteHeader(AjPFile outf, const AjPPdb pdb)
1675 {
1676 if (pdb && outf)
1677 {
1678 ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR %-45S\n",
1679 "HEADER",
1680 pdb->Pdb);
1681
1682 return ajTrue;
1683 }
1684
1685 return ajFalse;
1686 }
1687
1688
1689
1690
1691 /* @funcstatic pdbioWriteHeaderScop *******************************************
1692 **
1693 ** Writes the Entry element of a Scop structure to an output file in pdb
1694 ** format
1695 **
1696 ** @param [w] outf [AjPFile] Output file stream
1697 ** @param [r] scop [const AjPScop] Scop object
1698 **
1699 ** @return [AjBool] True on success
1700 **
1701 ** @release 2.9.0
1702 ** @@
1703 ******************************************************************************/
1704
pdbioWriteHeaderScop(AjPFile outf,const AjPScop scop)1705 static AjBool pdbioWriteHeaderScop(AjPFile outf, const AjPScop scop)
1706 {
1707 if (scop && outf)
1708 {
1709 ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR SCOP DOMAIN %-33S\n",
1710 "HEADER",
1711 scop->Entry);
1712
1713 return ajTrue;
1714 }
1715
1716 return ajFalse;
1717 }
1718
1719
1720
1721
1722 /* @funcstatic pdbioWriteTitle ************************************************
1723 **
1724 ** Writes a TITLE record to an output file in pdb format
1725 ** The text is hard-coded.
1726 **
1727 ** @param [w] outf [AjPFile] Output file stream
1728 ** @param [r] pdb [const AjPPdb] Pdb object
1729 **
1730 ** @return [AjBool] True on success
1731 **
1732 ** @release 2.9.0
1733 ** @@
1734 ******************************************************************************/
1735
pdbioWriteTitle(AjPFile outf,const AjPPdb pdb)1736 static AjBool pdbioWriteTitle(AjPFile outf, const AjPPdb pdb)
1737 {
1738 if (pdb && outf)
1739 {
1740 ajFmtPrintF(outf, "%-11sTHIS FILE IS MISSING MOST RECORDS FROM THE "
1741 "ORIGINAL PDB FILE%9s\n",
1742 "TITLE", " ");
1743
1744 return ajTrue;
1745 }
1746
1747 return ajFalse;
1748 }
1749
1750
1751
1752
1753 /* @funcstatic pdbioWriteCompnd ***********************************************
1754 **
1755 ** Writes the Compnd element of a Pdb structure to an output file in pdb
1756 ** format
1757 **
1758 ** @param [w] outf [AjPFile] Output file stream
1759 ** @param [r] pdb [const AjPPdb] Pdb object
1760 **
1761 ** @return [AjBool] True on success
1762 **
1763 ** @release 2.9.0
1764 ** @@
1765 ******************************************************************************/
1766
pdbioWriteCompnd(AjPFile outf,const AjPPdb pdb)1767 static AjBool pdbioWriteCompnd(AjPFile outf, const AjPPdb pdb)
1768 {
1769 if (pdb && outf)
1770 {
1771 pdbioWriteText(outf, pdb->Compnd, "COMPND");
1772
1773 return ajTrue;
1774 }
1775
1776 return ajFalse;
1777 }
1778
1779
1780
1781
1782 /* @funcstatic pdbioWriteSource ***********************************************
1783 **
1784 ** Writes the Source element of a Pdb structure to an output file in pdb
1785 ** format
1786 **
1787 ** @param [w] outf [AjPFile] Output file stream
1788 ** @param [r] pdb [const AjPPdb] Pdb object
1789 **
1790 ** @return [AjBool] True on success
1791 **
1792 ** @release 2.9.0
1793 ** @@
1794 ******************************************************************************/
1795
pdbioWriteSource(AjPFile outf,const AjPPdb pdb)1796 static AjBool pdbioWriteSource(AjPFile outf, const AjPPdb pdb)
1797 {
1798 if (pdb && outf)
1799 {
1800 pdbioWriteText(outf, pdb->Source, "SOURCE");
1801
1802 return ajTrue;
1803 }
1804
1805 return ajFalse;
1806 }
1807
1808
1809
1810
1811 /* @funcstatic pdbioWriteEmptyRemark ******************************************
1812 **
1813 ** Writes an empty REMARK record to an output file in pdb format
1814 **
1815 ** @param [w] outf [AjPFile] Output file stream
1816 ** @param [r] pdb [const AjPPdb] Pdb object
1817 **
1818 ** @return [AjBool] True on success
1819 **
1820 ** @release 2.9.0
1821 ** @@
1822 ******************************************************************************/
1823
pdbioWriteEmptyRemark(AjPFile outf,const AjPPdb pdb)1824 static AjBool pdbioWriteEmptyRemark(AjPFile outf, const AjPPdb pdb)
1825 {
1826 if (pdb && outf)
1827 {
1828 ajFmtPrintF(outf, "%-11s%-69s\n", "REMARK", " ");
1829
1830 return ajTrue;
1831 }
1832
1833 return ajFalse;
1834 }
1835
1836
1837
1838
1839 /* @funcstatic pdbioWriteResolution *******************************************
1840 **
1841 ** Writes the Reso element of a Pdb structure to an output file in pdb
1842 ** format
1843 **
1844 ** @param [w] outf [AjPFile] Output file stream
1845 ** @param [r] pdb [const AjPPdb] Pdb object
1846 **
1847 ** @return [AjBool] True on success
1848 **
1849 ** @release 2.9.0
1850 ** @@
1851 ******************************************************************************/
1852
pdbioWriteResolution(AjPFile outf,const AjPPdb pdb)1853 static AjBool pdbioWriteResolution(AjPFile outf, const AjPPdb pdb)
1854 {
1855 if (pdb && outf)
1856 {
1857 ajFmtPrintF(outf, "%-11sRESOLUTION. %-6.2f%-51s\n",
1858 "REMARK", pdb->Reso, "ANGSTROMS.");
1859
1860 return ajTrue;
1861 }
1862
1863 return ajFalse;
1864 }
1865
1866
1867
1868
1869 /* @funcstatic pdbioReadLines *************************************************
1870 **
1871 ** Reads a pdb file and returns a pointer to a partially filled Pdbfile object.
1872 ** All of the lines from the pdb file are written to the <lines> array of the
1873 ** object and the <nlines> element is written.
1874 **
1875 ** Memory for the object itself and any arrays whose size is equal to the
1876 ** number of lines is allocated:
1877 ** lines, linetype, chnn, gpn, modn, resni, resn1, resn2, pdbn, oddnum, atype,
1878 ** rtype, x,y,z,o,b, elementNum, elementId, elementType & helixClass.
1879 **
1880 ** The following elements are written:
1881 ** nlines, lines.
1882 **
1883 ** The linetype array is set to default value of pdbfileELinetypeIgnore
1884 **
1885 ** @param [u] inf [AjPFile] Pointer to pdb file
1886 **
1887 ** @return [AjPPdbfile] Pdbfile object pointer, or NULL on failure.
1888 **
1889 ** @release 2.9.0
1890 ** @@
1891 ******************************************************************************/
1892
pdbioReadLines(AjPFile inf)1893 static AjPPdbfile pdbioReadLines(AjPFile inf)
1894 {
1895 AjPPdbfile pdbfile = NULL; /* pdbfile object to be returned */
1896 AjPList list = NULL; /* List of lines in pdb file */
1897 AjPStr line = NULL; /* A line from a pdb file */
1898 ajuint i = 0U;
1899
1900
1901 /* Check args */
1902 if (!inf)
1903 return NULL;
1904
1905
1906 /* Allocate list and pdbfile object */
1907 list = ajListstrNew();
1908 /* Don't know number of lines or chains yet */
1909 pdbfile = pdbioPdbfileNew(0, 0);
1910
1911
1912 /* Read pdb file and append lines to list */
1913 line = ajStrNew();
1914
1915 while (ajReadlineTrim(inf, &line))
1916 {
1917 ajListstrPushAppend(list, line);
1918 line = ajStrNew();
1919 }
1920
1921
1922 /* Convert list to array in pdbfile object */
1923 pdbfile->nlines = (ajuint) ajListstrToarray(list, &pdbfile->lines);
1924
1925 if (pdbfile->nlines == 0)
1926 {
1927 ajStrDel(&line);
1928 ajListstrFree(&list);
1929 pdbioPdbfileDel(&pdbfile);
1930
1931 return NULL;
1932 }
1933
1934
1935
1936 /* Allocate memory for x,y,z,o,b, modn, chnn, linetype, ok, coord and
1937 * pdbn arrays in pdbfile object */
1938 AJCNEW0(pdbfile->x, pdbfile->nlines);
1939 AJCNEW0(pdbfile->y, pdbfile->nlines);
1940 AJCNEW0(pdbfile->z, pdbfile->nlines);
1941 AJCNEW0(pdbfile->o, pdbfile->nlines);
1942 AJCNEW0(pdbfile->b, pdbfile->nlines);
1943
1944 AJCNEW0(pdbfile->elementNum, pdbfile->nlines);
1945 AJCNEW0(pdbfile->elementType, pdbfile->nlines);
1946 AJCNEW0(pdbfile->helixClass, pdbfile->nlines);
1947 AJCNEW0(pdbfile->resni, pdbfile->nlines);
1948 AJCNEW0(pdbfile->resn1, pdbfile->nlines);
1949 AJCNEW0(pdbfile->resn2, pdbfile->nlines);
1950 AJCNEW0(pdbfile->modn, pdbfile->nlines);
1951
1952 AJCNEW0(pdbfile->chnn, pdbfile->nlines);
1953 AJCNEW0(pdbfile->gpn, pdbfile->nlines);
1954
1955 AJCNEW0(pdbfile->linetype, pdbfile->nlines);
1956
1957 for (i = 0U; i < pdbfile->nlines; i++)
1958 pdbfile->linetype[i] = pdbfileELinetypeIgnore;
1959
1960 AJCNEW0(pdbfile->pdbn, pdbfile->nlines);
1961
1962 for (i = 0U; i < pdbfile->nlines; i++)
1963 pdbfile->pdbn[i] = ajStrNew();
1964
1965 AJCNEW0(pdbfile->elementId, pdbfile->nlines);
1966
1967 for (i = 0U; i < pdbfile->nlines; i++)
1968 pdbfile->elementId[i] = ajStrNew();
1969
1970 AJCNEW0(pdbfile->atype, pdbfile->nlines);
1971
1972 for (i = 0U; i < pdbfile->nlines; i++)
1973 pdbfile->atype[i] = ajStrNewRes(4);
1974
1975 AJCNEW0(pdbfile->rtype, pdbfile->nlines);
1976
1977 for (i = 0U; i < pdbfile->nlines; i++)
1978 pdbfile->rtype[i] = ajStrNewRes(4);
1979
1980 AJCNEW0(pdbfile->oddnum, pdbfile->nlines);
1981
1982 for (i = 0U; i < pdbfile->nlines; i++)
1983 pdbfile->oddnum[i] = ajFalse;
1984
1985 #if AJFALSE
1986 /* DIAGNOSTIC */
1987 for (i = 0U; i < pdbfile->nlines; i++)
1988 {
1989 ajFmtPrintF(tempfile, "%S\n", pdbfile->lines[i]);
1990 fflush(tempfile->fp);
1991 }
1992 #endif /* AJFALSE */
1993
1994 /* Tidy up and return */
1995 ajStrDel(&line);
1996 ajListstrFree(&list);
1997
1998 return pdbfile;
1999 }
2000
2001
2002
2003
2004 /* @funcstatic pdbioPdbfileDel ************************************************
2005 **
2006 ** Destructor for a PDB File object.
2007 **
2008 ** @param [d] Ppdbfile [AjPPdbfile*] PDB File address
2009 **
2010 ** @return [void]
2011 **
2012 ** @release 2.9.0
2013 ** @@
2014 ******************************************************************************/
2015
pdbioPdbfileDel(AjPPdbfile * Ppdbfile)2016 static void pdbioPdbfileDel(AjPPdbfile *Ppdbfile)
2017 {
2018 ajuint i = 0U;
2019
2020 /* Check args */
2021 if (!*Ppdbfile)
2022 {
2023 ajWarn("NULL arg passed to pdbioPdbfileDel.\n");
2024
2025 return;
2026 }
2027
2028 if ((*Ppdbfile)->resn1ok)
2029 AJFREE((*Ppdbfile)->resn1ok);
2030
2031 if ((*Ppdbfile)->nres)
2032 AJFREE((*Ppdbfile)->nres);
2033
2034 if ((*Ppdbfile)->nligands)
2035 AJFREE((*Ppdbfile)->nligands);
2036
2037 if ((*Ppdbfile)->chainok)
2038 AJFREE((*Ppdbfile)->chainok);
2039
2040 if ((*Ppdbfile)->pdbid)
2041 ajStrDel(&(*Ppdbfile)->pdbid);
2042
2043 if ((*Ppdbfile)->compnd)
2044 ajStrDel(&(*Ppdbfile)->compnd);
2045
2046 if ((*Ppdbfile)->source)
2047 ajStrDel(&(*Ppdbfile)->source);
2048
2049 if ((*Ppdbfile)->modn)
2050 AJFREE((*Ppdbfile)->modn);
2051
2052 if ((*Ppdbfile)->x)
2053 AJFREE((*Ppdbfile)->x);
2054
2055 if ((*Ppdbfile)->y)
2056 AJFREE((*Ppdbfile)->y);
2057
2058 if ((*Ppdbfile)->z)
2059 AJFREE((*Ppdbfile)->z);
2060
2061 if ((*Ppdbfile)->o)
2062 AJFREE((*Ppdbfile)->o);
2063
2064 if ((*Ppdbfile)->b)
2065 AJFREE((*Ppdbfile)->b);
2066
2067 if ((*Ppdbfile)->numHelices)
2068 AJFREE((*Ppdbfile)->numHelices);
2069
2070 if ((*Ppdbfile)->numStrands)
2071 AJFREE((*Ppdbfile)->numStrands);
2072
2073 if ((*Ppdbfile)->numSheets)
2074 AJFREE((*Ppdbfile)->numSheets);
2075
2076 if ((*Ppdbfile)->numTurns)
2077 AJFREE((*Ppdbfile)->numTurns);
2078
2079 if ((*Ppdbfile)->elementNum)
2080 AJFREE((*Ppdbfile)->elementNum);
2081
2082 if ((*Ppdbfile)->elementType)
2083 AJFREE((*Ppdbfile)->elementType);
2084
2085 if ((*Ppdbfile)->helixClass)
2086 AJFREE((*Ppdbfile)->helixClass);
2087
2088 if ((*Ppdbfile)->resni)
2089 AJFREE((*Ppdbfile)->resni);
2090
2091 if ((*Ppdbfile)->resn1)
2092 AJFREE((*Ppdbfile)->resn1);
2093
2094 if ((*Ppdbfile)->resn2)
2095 AJFREE((*Ppdbfile)->resn2);
2096
2097 if ((*Ppdbfile)->chnn)
2098 AJFREE((*Ppdbfile)->chnn);
2099
2100 if ((*Ppdbfile)->gpn)
2101 AJFREE((*Ppdbfile)->gpn);
2102
2103 if ((*Ppdbfile)->linetype)
2104 AJFREE((*Ppdbfile)->linetype);
2105
2106 if ((*Ppdbfile)->oddnum)
2107 AJFREE((*Ppdbfile)->oddnum);
2108
2109 if ((*Ppdbfile)->lines)
2110 {
2111 for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2112 ajStrDel(&(*Ppdbfile)->lines[i]);
2113
2114 AJFREE((*Ppdbfile)->lines);
2115 }
2116
2117 if ((*Ppdbfile)->pdbn)
2118 {
2119 for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2120 ajStrDel(&(*Ppdbfile)->pdbn[i]);
2121
2122 AJFREE((*Ppdbfile)->pdbn);
2123 }
2124
2125 if ((*Ppdbfile)->elementId)
2126 {
2127 for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2128 ajStrDel(&(*Ppdbfile)->elementId[i]);
2129
2130 AJFREE((*Ppdbfile)->elementId);
2131 }
2132
2133 if ((*Ppdbfile)->atype)
2134 {
2135 for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2136 ajStrDel(&(*Ppdbfile)->atype[i]);
2137
2138 AJFREE((*Ppdbfile)->atype);
2139 }
2140
2141 if ((*Ppdbfile)->rtype)
2142 {
2143 for (i = 0U; i < (*Ppdbfile)->nlines; i++)
2144 ajStrDel(&(*Ppdbfile)->rtype[i]);
2145
2146 AJFREE((*Ppdbfile)->rtype);
2147 }
2148
2149 if ((*Ppdbfile)->seqres)
2150 {
2151 for (i = 0U; i < (*Ppdbfile)->nchains; i++)
2152 ajStrDel(&(*Ppdbfile)->seqres[i]);
2153
2154 AJFREE((*Ppdbfile)->seqres);
2155 }
2156
2157 if ((*Ppdbfile)->seqresful)
2158 {
2159 for (i = 0U; i < (*Ppdbfile)->nchains; i++)
2160 ajStrDel(&(*Ppdbfile)->seqresful[i]);
2161
2162 AJFREE((*Ppdbfile)->seqresful);
2163 }
2164
2165 if ((*Ppdbfile)->chid)
2166 ajChararrDel(&(*Ppdbfile)->chid);
2167
2168 if ((*Ppdbfile)->gpid)
2169 ajChararrDel(&(*Ppdbfile)->gpid);
2170
2171 AJFREE(*Ppdbfile);
2172 *Ppdbfile = NULL;
2173
2174 return;
2175 }
2176
2177
2178
2179
2180 /* @funcstatic pdbioElementsNew ***********************************************
2181 **
2182 ** Constructor for Elements object.
2183 **
2184 ** @param [r] nelms [ajuint] Number of elements
2185 **
2186 ** @return [AjPElements] Pointer to Elements object, or NULL on failure.
2187 **
2188 ** @release 2.9.0
2189 ** @@
2190 ******************************************************************************/
2191
pdbioElementsNew(ajuint nelms)2192 static AjPElements pdbioElementsNew(ajuint nelms)
2193 {
2194 ajuint i = 0U;
2195
2196 AjPElements elements = NULL;
2197
2198 AJNEW0(elements);
2199
2200 elements->n = nelms;
2201
2202 if (nelms)
2203 {
2204 AJCNEW0(elements->elms, nelms);
2205
2206 for (i = 0U; i < nelms; i++)
2207 elements->elms[i] = pdbioElementNew();
2208 }
2209 #if AJFALSE
2210 else
2211 ajWarn("Value of zero passed to pdbioElementsNew");
2212 #endif /* AJFALSE */
2213
2214 return elements;
2215 }
2216
2217
2218
2219
2220 /* @funcstatic pdbioElementsDel **********************************************
2221 **
2222 ** Destructor for Elements object.
2223 **
2224 ** @param [d] Pelements [AjPElements*] Elements address
2225 **
2226 ** @return [void]
2227 **
2228 ** @release 2.9.0
2229 ** @@
2230 ******************************************************************************/
2231
pdbioElementsDel(AjPElements * Pelements)2232 static void pdbioElementsDel(AjPElements *Pelements)
2233 {
2234 ajuint i = 0U;
2235
2236 if (!*Pelements)
2237 {
2238 ajWarn("NULL arg passed to pdbioElementsDel");
2239
2240 return;
2241 }
2242
2243 if ((*Pelements)->elms)
2244 {
2245 for (i = 0U; i < (*Pelements)->n; i++)
2246 pdbioElementDel(&(*Pelements)->elms[i]);
2247
2248 AJFREE((*Pelements)->elms);
2249 }
2250
2251 AJFREE(*Pelements);
2252 *Pelements = NULL;
2253
2254 return;
2255 }
2256
2257
2258
2259
2260 /* @funcstatic pdbioElementNew ************************************************
2261 **
2262 ** Constructor for Element object.
2263 **
2264 ** @return [AjPElement] Element object or NULL
2265 **
2266 ** @release 2.9.0
2267 ** @@
2268 ******************************************************************************/
2269
pdbioElementNew(void)2270 static AjPElement pdbioElementNew(void)
2271 {
2272 AjPElement element = NULL;
2273
2274 AJNEW0(element);
2275
2276 element->elementId = ajStrNew();
2277 element->initResName = ajStrNew();
2278 element->initSeqNum = ajStrNew();
2279 element->endResName = ajStrNew();
2280 element->endSeqNum = ajStrNew();
2281
2282 element->elementNum = 0;
2283 element->elementType = ' ';
2284 element->helixClass = 0;
2285 element->chainId = ' ';
2286
2287 return element;
2288 }
2289
2290
2291
2292
2293 /* @funcstatic pdbioElementDel ************************************************
2294 **
2295 ** Destructor for Element object.
2296 **
2297 ** @param [d] Pelement [AjPElement*] Element object pointer
2298 **
2299 ** @return [void]
2300 **
2301 ** @release 2.9.0
2302 ** @@
2303 ******************************************************************************/
2304
pdbioElementDel(AjPElement * Pelement)2305 static void pdbioElementDel(AjPElement *Pelement)
2306 {
2307 ajStrDel(&((*Pelement)->elementId));
2308 ajStrDel(&((*Pelement)->initResName));
2309 ajStrDel(&((*Pelement)->initSeqNum));
2310 ajStrDel(&((*Pelement)->endResName));
2311 ajStrDel(&((*Pelement)->endSeqNum));
2312
2313 AJFREE(*Pelement);
2314 *Pelement = NULL;
2315
2316 return;
2317 }
2318
2319
2320
2321
2322 /* @funcstatic pdbioPdbfileNew ************************************************
2323 **
2324 ** Constructor for Pdbfile object.
2325 **
2326 ** @param [r] nlines [ajuint] No. of lines in pdb file
2327 ** @param [r] nchains [ajuint] No. of chains in pdb file
2328 **
2329 ** @return [AjPPdbfile] Pointer to pdbfile object, or NULL on failure.
2330 **
2331 ** @release 2.9.0
2332 ** @@
2333 ******************************************************************************/
2334
pdbioPdbfileNew(ajuint nlines,ajuint nchains)2335 static AjPPdbfile pdbioPdbfileNew(ajuint nlines, ajuint nchains)
2336 {
2337 ajuint i = 0U;
2338
2339 AjPPdbfile pdbfile = NULL;
2340
2341 AJNEW0(pdbfile);
2342
2343 pdbfile->pdbid = ajStrNew();
2344 pdbfile->compnd = ajStrNew();
2345 pdbfile->source = ajStrNew();
2346
2347 pdbfile->nomod = ajFalse;
2348 pdbfile->toofewter = ajFalse;
2349
2350 pdbfile->nchains = nchains;
2351
2352 if (nchains)
2353 {
2354 AJCNEW0(pdbfile->resn1ok, nchains);
2355
2356 for (i = 0U; i < nchains; i++)
2357 pdbfile->resn1ok[i] = ajTrue;
2358
2359 AJCNEW0(pdbfile->numHelices, nchains);
2360
2361 for (i = 0U; i < nchains; i++)
2362 pdbfile->numHelices[i] = ajTrue;
2363
2364 AJCNEW0(pdbfile->numStrands, nchains);
2365
2366 for (i = 0U; i < nchains; i++)
2367 pdbfile->numStrands[i] = ajTrue;
2368
2369 AJCNEW0(pdbfile->numSheets, nchains);
2370
2371 for (i = 0U; i < nchains; i++)
2372 pdbfile->numSheets[i] = ajTrue;
2373
2374 AJCNEW0(pdbfile->numTurns, nchains);
2375
2376 for (i = 0U; i < nchains; i++)
2377 pdbfile->numTurns[i] = ajTrue;
2378
2379 AJCNEW0(pdbfile->chainok, nchains);
2380
2381 for (i = 0U; i < nchains; i++)
2382 pdbfile->chainok[i] = ajTrue;
2383
2384 AJCNEW0(pdbfile->nres, nchains);
2385
2386 AJCNEW0(pdbfile->nligands, nchains);
2387
2388 AJCNEW0(pdbfile->seqres, nchains);
2389
2390 for (i = 0U; i < nchains; i++)
2391 pdbfile->seqres[i] = ajStrNew();
2392
2393 AJCNEW0(pdbfile->seqresful, nchains);
2394
2395 for (i = 0U; i < nchains; i++)
2396 pdbfile->seqresful[i] = ajStrNew();
2397
2398 pdbfile->chid = ajChararrNewRes(nchains);
2399 }
2400 else
2401 pdbfile->chid = ajChararrNew();
2402
2403 pdbfile->gpid = ajChararrNew();
2404
2405 pdbfile->nlines = nlines;
2406
2407 if (nlines)
2408 {
2409 AJCNEW0(pdbfile->x, nlines);
2410 AJCNEW0(pdbfile->y, nlines);
2411 AJCNEW0(pdbfile->z, nlines);
2412 AJCNEW0(pdbfile->o, nlines);
2413 AJCNEW0(pdbfile->b, nlines);
2414
2415 AJCNEW0(pdbfile->elementNum, nlines);
2416 AJCNEW0(pdbfile->elementType, nlines);
2417 AJCNEW0(pdbfile->helixClass, nlines);
2418
2419 AJCNEW0(pdbfile->resni, nlines);
2420 AJCNEW0(pdbfile->resn1, nlines);
2421 AJCNEW0(pdbfile->resn2, nlines);
2422 AJCNEW0(pdbfile->modn, nlines);
2423 AJCNEW0(pdbfile->chnn, nlines);
2424 AJCNEW0(pdbfile->gpn, nlines);
2425 AJCNEW0(pdbfile->linetype, nlines);
2426 AJCNEW0(pdbfile->oddnum, nlines);
2427 AJCNEW0(pdbfile->lines, nlines);
2428 AJCNEW0(pdbfile->pdbn, nlines);
2429 AJCNEW0(pdbfile->elementId, nlines);
2430 AJCNEW0(pdbfile->atype, nlines);
2431 AJCNEW0(pdbfile->rtype, nlines);
2432
2433 for (i = 0U; i < nlines; i++)
2434 {
2435 pdbfile->linetype[i] = pdbfileELinetypeIgnore;
2436 pdbfile->oddnum[i] = ajFalse;
2437 pdbfile->lines[i] = ajStrNew();
2438 pdbfile->pdbn[i] = ajStrNew();
2439 pdbfile->elementId[i] = ajStrNew();
2440 pdbfile->atype[i] = ajStrNewRes(4);
2441 pdbfile->rtype[i] = ajStrNewRes(4);
2442 }
2443 }
2444 #if AJFALSE
2445 else
2446 ajWarn("Zero sized arg passed to pdbioPdbfileNew.\n");
2447 #endif /* AJFALSE */
2448
2449 return pdbfile;
2450 }
2451
2452
2453
2454
2455 /* @funcstatic pdbioFirstPass *************************************************
2456 **
2457 ** The initial read of the pdb file as held in the <lines> array of a Pdbfile
2458 ** object. Bibliographic information is parsed, the number of chains
2459 ** determined and the sequences and chain ids from the SEQRES records are
2460 ** parsed. The line type (see below) of each line is determined, and for
2461 ** lines with coordinates, the residue type is parsed. A count of the number
2462 ** of TER records and the location of the first coordinate line is also
2463 ** determined.
2464 ** Secondary structure information is also parsed and an AjPElements
2465 ** object is written.
2466 **
2467 ** Memory for any arrays whose size is equal to the number of chains is
2468 ** allocated:
2469 ** seqres, seqresful, nres, chainok, resn1ok, nligands, numHelices,
2470 ** numStrands, numSheets, numTurns
2471 **
2472 ** The following arrays are written:
2473 ** seqres, seqresful, chid, pdbn, resn1 and resn2, rtype, linetype
2474 **
2475 ** The following elements are written:
2476 ** nchains, tercnt, modcnt, nomod, source, compnd, method, reso, idxfirst
2477 **
2478 ** The chainok & resn1ok arrays are set to default values of ajTrue
2479 **
2480 **
2481 ** Writing resn1/resn2 & pdbn arrays
2482 ** The pdbn array is the raw residue number (as a string) and is filled for
2483 ** lines for which <linetype> == pdbfileELinetypeCoordinate. The resn1/resn2 arrays are
2484 ** given initial values which at this stage are simply the integer component
2485 ** of pdbn. The values for resn1/resn2 are changed later in the program.
2486 **
2487 ** Writing modcnt and nomod elements
2488 ** modcnt is a count of the number of MODEL records (excluding duplicate
2489 ** records). However, if no MODEL records are found, modcnt is set to the
2490 ** minimum value of 1, and nomod is set to ajTrue.
2491 **
2492 ** Writing linetype array
2493 ** The linetype array is set as follows:
2494 **
2495 ** pdbfileELinetypeCoordinate for ATOM or HETATM records which contain both atom and
2496 ** residue identifier codes and which are not duplicate positions.
2497 ** Duplicate positions for (i) whole residues or (ii) individual atoms
2498 ** are presumed where a ATOM or HETATM record uses a value other than '1' or
2499 ** 'A' in the (i) residue alternate location indicator (column 17) or (ii)
2500 ** the first column of the atom name (column 13) respectively.
2501 **
2502 ** pdbfileELinetypeTER for TER records
2503 **
2504 ** pdbfileELinetypeMODEL for MODEL records
2505 **
2506 ** pdbfileELinetypeWater for HOH (should be HETATM records)
2507 **
2508 ** For all other lines, it is left as the default of pdbfileELinetypeIgnore (the value
2509 ** might change later in the program).
2510 **
2511 **
2512 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
2513 ** @param [u] flog [AjPFile] Log file (build diagnostics)
2514 ** @param [w] elms [AjPElements*] Elements object pointer
2515 ** @param [r] camask [AjBool] Whether to mask non-amino acid residues
2516 ** within protein chains which do not
2517 ** have a C-alpha atom.
2518 **
2519 ** @return [AjBool] True if file was parsed, False otherwise
2520 **
2521 ** @release 2.9.0
2522 ** @@
2523 ******************************************************************************/
2524
pdbioFirstPass(AjPPdbfile pdbfile,AjPFile flog,AjPElements * elms,AjBool camask)2525 static AjBool pdbioFirstPass(AjPPdbfile pdbfile, AjPFile flog, AjPElements *elms,
2526 AjBool camask)
2527 {
2528 ajuint i = 0U; /* Loop counter */
2529 ajuint j = 0U; /* Loop counter */
2530 ajuint k = 0U; /* Loop counter */
2531 AjBool donefirstatom = ajFalse; /* Flag for finding first ATOM or
2532 * HETATM line */
2533 char pdbn[6]; /* Residue number */
2534 AjBool resolfound = ajFalse;/* Flag for finding RESOLUTION record */
2535 AjBool seqresfound = ajFalse; /* Flag for finding SEQRES record */
2536 AjPStr tmpstr = NULL; /* A temp. string */
2537 AjPStr seqres = NULL; /* Sequence from SEQRES records */
2538 ajuint seqreslen = 0U; /* Indicated length of sequence from SEQRES
2539 * records */
2540 ajuint lenful = 0U; /* Length of SEQRES sequence including ACE,
2541 * FOR & NH2 groups that might be discarded
2542 * by the call to pdbioSeqresToSequence */
2543 char last_id = ' '; /* CHain id of last SEQRES line read */
2544 AjPStr tmpseq = NULL; /* A temp. string for a sequence */
2545 AjPStr tmpseqful = NULL; /* A temp. string for a sequence */
2546 AjPList listseqs = NULL; /* For list of sequences from SEQRES records */
2547 AjPList listseqsful = NULL; /* For list of sequences (using 3-letter
2548 * codes) from SEQRES records */
2549 AjBool done_msg = ajFalse; /* Flag for error messaging */
2550 AjPList listelms = NULL; /* Temp. list of secondary structure elements
2551 * (from HELIX, SHEET and TURN records) */
2552 AjPElement elm = NULL; /* Temp. Element object pointer */
2553 AjPElement FirstStrand = NULL; /* Temp. pointer to first strand of
2554 * each sheet */
2555 AjBool doneFirstStrand = ajFalse; /* Flag for parsing first strand of
2556 * each sheet */
2557 AjPStr LastSheetId = NULL; /* Sheet identifier of the last sheet read in */
2558
2559 /* Check args */
2560 if (!pdbfile || !flog || !(*elms))
2561 {
2562 ajWarn("Bad args passed to pdbioFirstPass\n");
2563
2564 return ajFalse;
2565 }
2566
2567 /* Allocate memory etc */
2568 tmpstr = ajStrNew();
2569 seqres = ajStrNew();
2570 LastSheetId = ajStrNew();
2571
2572 listseqs = ajListstrNew();
2573 listseqsful = ajListstrNew();
2574 listelms = ajListNew();
2575
2576 /* Start of main loop */
2577 for (i = 0U; i < pdbfile->nlines; i++)
2578 {
2579 if ((ajStrPrefixC(pdbfile->lines[i], "ATOM")) ||
2580 (ajStrPrefixC(pdbfile->lines[i], "HETATM")))
2581 {
2582 /* In instances where >1 residue positions are given, ignore all
2583 * but position 'A' & '1' In instances where >1 atom positions
2584 * are given, ignore all but position '1' */
2585 if (((ajStrGetCharPos(pdbfile->lines[i], 16) != ' ') &&
2586 ((ajStrGetCharPos(pdbfile->lines[i], 16) != 'A') &&
2587 (ajStrGetCharPos(pdbfile->lines[i], 16) != '1'))) ||
2588 ((ajStrGetCharPos(pdbfile->lines[i], 12) != ' ') &&
2589 ((ajStrGetCharPos(pdbfile->lines[i], 12) != '1') &&
2590 (isdigit((int) ajStrGetCharPos(pdbfile->lines[i], 12))))))
2591 {
2592 if (!done_msg)
2593 {
2594 ajFmtPrintF(flog, "%-15s%d\n", "DUPATOMRES", i + 1);
2595 done_msg = ajTrue;
2596 }
2597 }
2598 /* In instances where no atom or residue identity is given,
2599 * ignore line */
2600 else if ((ajStrGetCharPos(pdbfile->lines[i], 12) == ' ' &&
2601 ajStrGetCharPos(pdbfile->lines[i], 13) == ' ' &&
2602 ajStrGetCharPos(pdbfile->lines[i], 14) == ' ') ||
2603 (ajStrGetCharPos(pdbfile->lines[i], 17) == ' ' &&
2604 ajStrGetCharPos(pdbfile->lines[i], 19) == ' '))
2605 {
2606 ajFmtPrintF(flog, "%-15s%d\n", "NOATOMRESID", i + 1);
2607 }
2608 else
2609 {
2610 pdbfile->linetype[i] = pdbfileELinetypeCoordinate;
2611
2612 if (!donefirstatom)
2613 {
2614 donefirstatom = ajTrue;
2615 pdbfile->idxfirst = i;
2616 }
2617
2618 /* Write residue number for the line */
2619 for (k = 22, j = 0; k <= 26; k++)
2620 if ((isalnum((int) ajStrGetCharPos(pdbfile->lines[i], k)))
2621 ||
2622 ajStrGetCharPos(pdbfile->lines[i], k) == '-')
2623 pdbn[j++] = ajStrGetCharPos(pdbfile->lines[i], k);
2624
2625 pdbn[j] = '\0';
2626
2627 ajStrAssignC(&(pdbfile->pdbn[i]), pdbn);
2628
2629 if (!ajFmtScanS(pdbfile->pdbn[i], "%d",
2630 &(pdbfile->resn1[i])))
2631 {
2632 ajFmtPrintF(flog, "%-15s%d\n", "ATOMNONUM", i + 1);
2633 pdbfile->linetype[i] = pdbfileELinetypeIgnore;
2634 }
2635 else
2636 {
2637 pdbfile->resn2[i] = pdbfile->resn1[i];
2638
2639 /* Assign residue type */
2640 ajStrAssignSubS(&pdbfile->rtype[i],
2641 pdbfile->lines[i], 17, 19);
2642 ajStrRemoveWhite(&pdbfile->rtype[i]);
2643
2644 /* JONNEW */
2645 if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
2646 pdbfile->linetype[i] = pdbfileELinetypeWater;
2647 }
2648 }
2649 }
2650 else if (ajStrPrefixC(pdbfile->lines[i], "SEQRES"))
2651 {
2652 seqresfound = ajTrue;
2653
2654 /* Read first SEQRES line */
2655 ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18);
2656
2657 if (!ajFmtScanS(seqres, "%u", &seqreslen))
2658 ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESLEN",
2659 pdbfile->nchains,
2660 ajStrGetCharPos(pdbfile->lines[i], 11));
2661
2662
2663 ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70);
2664 /* Append a ' ' in case this is missing from the PDB file, e.g.
2665 * pdb1iie.ent */
2666 ajStrAppendK(&seqres, ' ');
2667
2668 ajChararrPut(&(pdbfile->chid), 0,
2669 (last_id = ajStrGetCharPos(pdbfile->lines[i], 11)));
2670
2671 pdbfile->nchains++;
2672
2673 /* Read subsequent SEQRES lines */
2674 for (i++; i < pdbfile->nlines; i++)
2675 if (ajStrPrefixC(pdbfile->lines[i], "SEQRES"))
2676 {
2677 /* Still on same chain */
2678 if (ajStrGetCharPos(pdbfile->lines[i], 11) == last_id)
2679 {
2680 ajStrAppendSubS(&seqres, pdbfile->lines[i], 19, 70);
2681 /* Append a ' ' in case this is missing from the PDB
2682 * file, e.g. pdb1iie.ent */
2683 ajStrAppendK(&seqres, ' ');
2684 }
2685 /* On new chain */
2686 else
2687 {
2688 tmpseq = ajStrNew();
2689
2690 /* Process last chain */
2691 if (!pdbioSeqresToSequence(seqres, &tmpseq, camask,
2692 &lenful))
2693 {
2694 ajWarn("Sequence conversion error in"
2695 " pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
2696 ajStrDel(&tmpseq);
2697 continue;
2698 }
2699
2700 tmpseqful = ajStrNew();
2701 ajStrAssignS(&tmpseqful, seqres);
2702
2703 /* Check length of sequence vs indicated length */
2704 if (lenful != seqreslen)
2705 ajFmtPrintF(flog, "%-15s%d (%c)\n",
2706 "SEQRESLENDIF",
2707 pdbfile->nchains,
2708 last_id);
2709
2710 /* Push sequences onto lists */
2711 ajListstrPushAppend(listseqs, tmpseq);
2712 ajListstrPushAppend(listseqsful, tmpseqful);
2713
2714
2715 /* Read first SEQRES line of new chain */
2716 ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18);
2717
2718 if (!ajFmtScanS(seqres, "%u", &seqreslen))
2719 ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESLEN",
2720 pdbfile->nchains,
2721 ajStrGetCharPos(pdbfile->lines[i],
2722 11));
2723
2724 ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70);
2725 /* Append a ' ' in case this is missing from the PDB
2726 * file, e.g. pdb1iie.ent */
2727 ajStrAppendK(&seqres, ' ');
2728
2729 ajChararrPut(&(pdbfile->chid), pdbfile->nchains,
2730 (last_id
2731 = ajStrGetCharPos(pdbfile->lines[i],
2732 11)));
2733
2734 #if AJFALSE
2735 if ((last_id = ajStrGetCharPos(pdbfile->lines[i],
2736 11)) == ' ')
2737 ajChararrPut(&(pdbfile->chid),
2738 pdbfile->nchains, '.');
2739 else
2740 ajChararrPut(&(pdbfile->chid),
2741 pdbfile->nchains, last_id);
2742 #endif /* AJFALSE */
2743
2744 pdbfile->nchains++;
2745 }
2746 }
2747 else
2748 {
2749 tmpseq = ajStrNew();
2750
2751 /* Process last chain */
2752 if (!pdbioSeqresToSequence(seqres, &tmpseq, camask,
2753 &lenful))
2754 {
2755 ajWarn("Sequence conversion error in "
2756 "pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
2757 ajStrDel(&tmpseq);
2758 continue;
2759 }
2760
2761 tmpseqful = ajStrNew();
2762 ajStrAssignS(&tmpseqful, seqres);
2763
2764
2765 /* Check length of sequence vs indicated length */
2766 if (lenful != seqreslen)
2767 {
2768 ajFmtPrintF(flog, "%-15s%d (%c)\n",
2769 "SEQRESLENDIF",
2770 pdbfile->nchains,
2771 last_id);
2772 }
2773
2774 /* Push sequences onto lists */
2775 ajListstrPushAppend(listseqs, tmpseq);
2776 ajListstrPushAppend(listseqsful, tmpseqful);
2777
2778 /* Convert lists to arrays in pdbfile object and delete
2779 * list */
2780 ajListstrToarray(listseqs, &(pdbfile->seqres));
2781 ajListFree(&listseqs);
2782
2783 ajListstrToarray(listseqsful, &(pdbfile->seqresful));
2784 ajListFree(&listseqsful);
2785
2786 /* i will get incremented in main loop */
2787 i--;
2788 break;
2789 }
2790 }
2791 else if (ajStrPrefixC(pdbfile->lines[i], "COMPND"))
2792 {
2793 /* Read first COMPND line */
2794 ajStrAssignSubS(&(pdbfile->compnd),
2795 pdbfile->lines[i], 10, 71);
2796
2797 /* Read subsequent COMPND lines */
2798 for (i++; i < pdbfile->nlines; i++)
2799 if (ajStrPrefixC(pdbfile->lines[i], "COMPND"))
2800 {
2801 ajStrAppendSubS(&(pdbfile->compnd),
2802 pdbfile->lines[i], 10, 71);
2803 }
2804 else
2805 {
2806 ajStrRemoveWhiteExcess(&(pdbfile->compnd));
2807 /* i will get incremented in main loop */
2808 i--;
2809 break;
2810 }
2811 }
2812 else if (ajStrPrefixC(pdbfile->lines[i], "SOURCE"))
2813 {
2814 /* Read first SOURCE line */
2815 ajStrAssignSubS(&(pdbfile->source),
2816 pdbfile->lines[i], 10, 71);
2817
2818 /* Read subsequent SOURCE lines */
2819 for (i++; i < pdbfile->nlines; i++)
2820 if (ajStrPrefixC(pdbfile->lines[i], "SOURCE"))
2821 {
2822 ajStrAppendSubS(&(pdbfile->source),
2823 pdbfile->lines[i], 10, 71);
2824 }
2825 else
2826 {
2827 ajStrRemoveWhiteExcess(&(pdbfile->source));
2828 /* i will get incremented in main loop */
2829 i--;
2830 break;
2831 }
2832 }
2833 else if (ajStrPrefixC(pdbfile->lines[i], "TER"))
2834 {
2835 /* By default ok == ajTrue */
2836 /* pdbfile->ok[i] = ajTrue; */
2837 pdbfile->tercnt++;
2838 pdbfile->linetype[i] = pdbfileELinetypeTER;
2839 }
2840 else if (ajStrPrefixC(pdbfile->lines[i], "MODEL"))
2841 {
2842 pdbfile->modcnt++;
2843 pdbfile->linetype[i] = pdbfileELinetypeMODEL;
2844
2845 if (!donefirstatom)
2846 {
2847 donefirstatom = ajTrue;
2848 pdbfile->idxfirst = i;
2849 }
2850 }
2851 else if (ajStrPrefixC(pdbfile->lines[i], "ENDMDL"))
2852 {
2853 pdbfile->linetype[i] = pdbfileELinetypeENDMDL;
2854 }
2855
2856 else if ((!resolfound) && (ajStrPrefixC(pdbfile->lines[i],
2857 "REMARK")))
2858 {
2859 /* Assign method and resolution */
2860 ajFmtScanS(pdbfile->lines[i], "%*s %*d %S", &tmpstr);
2861
2862 if (!ajStrCmpLenC(tmpstr, "RESOLUTION", 10))
2863 {
2864 resolfound = ajTrue;
2865
2866 if (isdigit((int) ajStrGetCharPos(pdbfile->lines[i], 23)))
2867 {
2868 if ((ajFmtScanS(pdbfile->lines[i],
2869 "%*s %*d %*s %f",
2870 &(pdbfile->reso))) != 1)
2871 ajFmtPrintF(flog, "%-15s\n", "RESOLUNK");
2872
2873 pdbfile->method = ajEPdbMethodXray;
2874 }
2875 else
2876 {
2877 pdbfile->reso = 0;
2878 pdbfile->method = ajEPdbMethodNmr;
2879 }
2880 }
2881
2882 }
2883 else if (ajStrPrefixC(pdbfile->lines[i], "HELIX"))
2884 {
2885 doneFirstStrand = ajFalse;
2886
2887 elm = pdbioElementNew();
2888
2889 ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
2890 ajStrRemoveWhite(&tmpstr);
2891 ajFmtScanS(tmpstr, "%d", &elm->elementNum);
2892
2893 ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
2894 ajStrRemoveWhite(&elm->elementId);
2895
2896 elm->elementType = 'H';
2897
2898 ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17);
2899 ajStrRemoveWhite(&elm->initResName);
2900
2901 ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 21, 25);
2902 ajStrRemoveWhite(&elm->initSeqNum);
2903
2904 ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 27, 29);
2905 ajStrRemoveWhite(&elm->endResName);
2906
2907 ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37);
2908 ajStrRemoveWhite(&elm->endSeqNum);
2909
2910 elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19);
2911
2912 if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 31))
2913 {
2914 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
2915 ajStrGetCharPos(pdbfile->lines[i], 31));
2916 pdbioElementDel(&elm);
2917 continue;
2918 }
2919
2920 ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 38, 39);
2921 ajStrRemoveWhite(&tmpstr);
2922 ajFmtScanS(tmpstr, "%d", &elm->helixClass);
2923
2924
2925 /* Check that all records are present and flag an error if
2926 * they're not */
2927 if (MAJSTRGETLEN(elm->initResName)
2928 && MAJSTRGETLEN(elm->initSeqNum)
2929 && MAJSTRGETLEN(elm->endResName)
2930 && MAJSTRGETLEN(elm->endSeqNum) &&
2931 MAJSTRGETLEN(elm->elementId))
2932 {
2933 ajListPushAppend(listelms, elm);
2934 }
2935 else
2936 {
2937 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
2938 pdbioElementDel(&elm);
2939 continue;
2940 }
2941
2942 }
2943 else if (ajStrPrefixC(pdbfile->lines[i], "SHEET"))
2944 {
2945 elm = pdbioElementNew();
2946
2947 ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
2948 ajStrRemoveWhite(&tmpstr);
2949 ajFmtScanS(tmpstr, "%d", &elm->elementNum);
2950
2951 ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
2952 ajStrRemoveWhite(&elm->elementId);
2953
2954 if (!ajStrMatchS(elm->elementId, LastSheetId))
2955 doneFirstStrand = ajFalse;
2956
2957 elm->elementType = 'E';
2958
2959 ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 17, 19);
2960 ajStrRemoveWhite(&elm->initResName);
2961
2962 ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 22, 26);
2963 ajStrRemoveWhite(&elm->initSeqNum);
2964
2965 ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 28, 30);
2966 ajStrRemoveWhite(&elm->endResName);
2967
2968 ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37);
2969 ajStrRemoveWhite(&elm->endSeqNum);
2970
2971 elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 21);
2972
2973 if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 32))
2974 {
2975 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
2976 ajStrGetCharPos(pdbfile->lines[i], 32));
2977 pdbioElementDel(&elm);
2978 continue;
2979 }
2980
2981
2982 /* Check for beta-barrels - where the first and last strands are
2983 * identical requiring us to ignore the last strand */
2984
2985 if (doneFirstStrand)
2986 {
2987 if (ajStrMatchS(elm->initResName, FirstStrand->initResName) &&
2988 ajStrMatchS(elm->endResName, FirstStrand->endResName) &&
2989 ajStrMatchS(elm->initSeqNum, FirstStrand->initSeqNum) &&
2990 ajStrMatchS(elm->endSeqNum, FirstStrand->endSeqNum))
2991 {
2992 pdbioElementDel(&elm);
2993 continue;
2994 }
2995 }
2996
2997 /* Check that all records are present and flag an error if
2998 * they're not */
2999 if (MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum)
3000 &&
3001 MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum)
3002 &&
3003 MAJSTRGETLEN(elm->elementId))
3004 {
3005 ajListPushAppend(listelms, elm);
3006 }
3007 else
3008 {
3009 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
3010 pdbioElementDel(&elm);
3011 continue;
3012 }
3013
3014 ajStrAssignS(&LastSheetId, elm->elementId);
3015 FirstStrand = elm;
3016 doneFirstStrand = ajTrue;
3017 }
3018 else if (ajStrPrefixC(pdbfile->lines[i], "TURN"))
3019 {
3020 doneFirstStrand = ajFalse;
3021
3022 elm = pdbioElementNew();
3023
3024 ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9);
3025 ajStrRemoveWhite(&tmpstr);
3026 ajFmtScanS(tmpstr, "%d", &elm->elementNum);
3027
3028 ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13);
3029 ajStrRemoveWhite(&elm->elementId);
3030
3031 elm->elementType = 'T';
3032
3033 ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17);
3034 ajStrRemoveWhite(&elm->initResName);
3035
3036 ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 20, 24);
3037 ajStrRemoveWhite(&elm->initSeqNum);
3038
3039 ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 26, 28);
3040 ajStrRemoveWhite(&elm->endResName);
3041
3042 ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 31, 35);
3043 ajStrRemoveWhite(&elm->endSeqNum);
3044
3045 elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19);
3046
3047 if (elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 30))
3048 {
3049 ajFmtPrintF(flog, "%-15s%c %c\n", "SECTWOCHN", elm->chainId,
3050 ajStrGetCharPos(pdbfile->lines[i], 30));
3051 pdbioElementDel(&elm);
3052 continue;
3053 }
3054
3055 /* Check that all records are present and flag an error if
3056 * they're not */
3057 if (MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum)
3058 && MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum)
3059 && MAJSTRGETLEN(elm->elementId))
3060 {
3061 ajListPushAppend(listelms, elm);
3062 }
3063 else
3064 {
3065 ajFmtPrintF(flog, "%-15s%d\n", "SECMISS", i + 1);
3066 pdbioElementDel(&elm);
3067 continue;
3068 }
3069 }
3070 }
3071
3072
3073 /* Write array in Elements structure */
3074 (*elms)->n = (ajuint) ajListToarray(listelms, (void ***) &(*elms)->elms);
3075
3076
3077 /* Generate diagnostics and set defaults */
3078 if ((ajStrGetLen(pdbfile->compnd) == 0))
3079 {
3080 ajStrAssignC(&pdbfile->compnd, ".");
3081 ajFmtPrintF(flog, "%-15s\n", "NOCOMPND");
3082 }
3083
3084 if ((ajStrGetLen(pdbfile->source) == 0))
3085 {
3086 ajStrAssignC(&pdbfile->source, ".");
3087 ajFmtPrintF(flog, "%-15s\n", "NOSOURCE");
3088 }
3089
3090 if ((pdbfile->method == ajEPdbMethodNmr) && (pdbfile->modcnt == 0))
3091 ajFmtPrintF(flog, "%-15s\n", "NOMODEL");
3092
3093 if (!E_FPZERO(pdbfile->reso, U_FEPS) && pdbfile->modcnt)
3094 {
3095 ajFmtPrintF(flog, "%-15s\n", "RESOLMOD");
3096 pdbfile->method = ajEPdbMethodNmr;
3097 }
3098
3099 /* Every pdb file is considered to have at least one model */
3100 if (pdbfile->modcnt == 0)
3101 {
3102 pdbfile->modcnt = 1;
3103 pdbfile->nomod = ajTrue;
3104 }
3105
3106 if (!resolfound)
3107 {
3108 pdbfile->reso = 0;
3109 pdbfile->method = ajEPdbMethodNmr;
3110 ajFmtPrintF(flog, "%-15s\n", "NORESOLUTION");
3111 }
3112
3113 if (!seqresfound)
3114 {
3115 ajWarn("No SEQRES record found in raw pdb file");
3116 ajFmtPrintF(flog, "%-15s\n", "NOSEQRES");
3117
3118 /* Free memory and return */
3119 ajListFree(&listelms);
3120 ajStrDel(&LastSheetId);
3121 ajStrDel(&tmpstr);
3122 ajStrDel(&seqres);
3123
3124 return ajFalse;
3125 }
3126
3127 if (!donefirstatom)
3128 {
3129 ajWarn("No ATOM record found in raw pdb file");
3130 ajFmtPrintF(flog, "%-15s\n", "NOATOM");
3131
3132 /* Free memory and return */
3133 ajListFree(&listelms);
3134 ajStrDel(&LastSheetId);
3135 ajStrDel(&tmpstr);
3136 ajStrDel(&seqres);
3137
3138 return ajFalse;
3139 }
3140
3141 AJCNEW0(pdbfile->nligands, pdbfile->nchains);
3142 AJCNEW0(pdbfile->nres, pdbfile->nchains);
3143 AJCNEW0(pdbfile->numHelices, pdbfile->nchains);
3144 AJCNEW0(pdbfile->numStrands, pdbfile->nchains);
3145 AJCNEW0(pdbfile->numSheets, pdbfile->nchains);
3146 AJCNEW0(pdbfile->numTurns, pdbfile->nchains);
3147 AJCNEW0(pdbfile->chainok, pdbfile->nchains);
3148 AJCNEW0(pdbfile->resn1ok, pdbfile->nchains);
3149
3150 for (i = 0U; i < pdbfile->nchains; i++)
3151 {
3152 pdbfile->chainok[i] = ajTrue;
3153 pdbfile->resn1ok[i] = ajTrue;
3154 }
3155
3156 /* Free memory and return */
3157 ajStrDel(&LastSheetId);
3158 ajListFree(&listelms);
3159
3160 ajStrDel(&tmpstr);
3161 ajStrDel(&seqres);
3162
3163 return ajTrue;
3164 }
3165
3166
3167
3168
3169 /* @funcstatic pdbioSeqresToSequence ******************************************
3170 **
3171 ** Reads a string containing a SEQRES sequence (e.g. "ALA ALA LEU" ) and
3172 ** writes a string containing a normal sequence (e.g. "AAL").
3173 **
3174 ** @param [r] seqres [const AjPStr] SEQRES sequence
3175 ** @param [w] seq [AjPStr *] Output sequence
3176 ** @param [r] camask [AjBool] Whether to ignore residues which do not
3177 ** have a C-alpha atom, these are defined as ACE, FOR and NH2 groups.
3178 ** @param [w] len [ajuint *] Length of sequence INCLUDING ACE, FOR and
3179 ** NH2 groups.
3180 **
3181 ** @return [AjBool] ajTrue on success, ajFalse otherwise.
3182 **
3183 ** @release 2.9.0
3184 ** @@
3185 ******************************************************************************/
3186
pdbioSeqresToSequence(const AjPStr seqres,AjPStr * seq,AjBool camask,ajuint * len)3187 static AjBool pdbioSeqresToSequence(const AjPStr seqres,
3188 AjPStr *seq, AjBool camask,
3189 ajuint *len)
3190 {
3191 const AjPStr aa3 = NULL;
3192 char aa1 = '\0';
3193 ajuint nrem = 0; /* No. 'residues' that were removed */
3194
3195 /* Check args */
3196 if (!seqres || !seq)
3197 {
3198 ajWarn("Bad args passed to pdbioSeqresToSequence\n");
3199
3200 return ajFalse;
3201 }
3202
3203 /* Allocate memory */
3204
3205 if ((aa3 = ajStrParseC(seqres, " \n")))
3206 {
3207 /* Parse seqres string */
3208 do
3209 {
3210 if (ajStrMatchC(aa3, "FOR") ||
3211 ajStrMatchC(aa3, "ACE") ||
3212 ajStrMatchC(aa3, "NH2"))
3213 {
3214 if (camask)
3215 {
3216 nrem++;
3217 continue;
3218 }
3219 }
3220
3221 ajResidueFromTriplet(aa3, &aa1);
3222 ajStrAppendK(seq, aa1);
3223 }
3224 while ((aa3 = ajStrParseC(NULL, " \n")));
3225 }
3226 else
3227 return ajFalse;
3228
3229 *len = ajStrGetLen(*seq) + nrem;
3230
3231 return ajTrue;
3232 }
3233
3234
3235
3236
3237 /* @funcstatic pdbioCheckChains ***********************************************
3238 **
3239 ** Reads a Pdbfile object and checks whether chains from the SEQRES records
3240 ** (i) use unique chain ids, (ii) do not use an id of a space (' ') alongside
3241 ** non-space chain ids and (iii) contain at least the user-defined threshold
3242 ** number of amino acid residues. If any of these conditions are not met then
3243 ** the chain is discarded (chainok array is set to ajFalse).
3244 **
3245 ** The chainok array is written.
3246 **
3247 **
3248 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3249 ** @param [u] flog [AjPFile] Pointer to log file (build
3250 ** diagnostics).
3251 ** @param [r] min_chain_size [ajint] Minimum number of amino acids in
3252 ** a chain.
3253 **
3254 ** @return [AjBool] True if SEQRES records contained at least one protein
3255 ** chain, False otherwise.
3256 **
3257 ** @release 2.9.0
3258 ** @@
3259 ******************************************************************************/
3260
pdbioCheckChains(AjPPdbfile pdbfile,AjPFile flog,ajint min_chain_size)3261 static AjBool pdbioCheckChains(AjPPdbfile pdbfile, AjPFile flog,
3262 ajint min_chain_size)
3263 {
3264 ajuint i = 0U; /* Loop counter */
3265 ajuint j = 0U; /* Loop counter */
3266 AjIStr iter = NULL; /* Iterator for sequence strings */
3267 ajint aacnt = 0; /* Counter for number of amino acids in
3268 * sequence strings */
3269
3270 AjBool ok = ajFalse; /* Flag which is True if amino acid chains
3271 * are found in the SEQRES records */
3272
3273 char id1 = ' '; /* Chain id */
3274 char id2 = ' '; /* Chain id */
3275 AjBool iderr = ajFalse; /* ajTrue if both a space and a character
3276 * are used as chain id's in the same file */
3277
3278 /* Check args */
3279 if (!pdbfile || !flog)
3280 {
3281 ajWarn("Bad args passed to pdbioCheckChains\n");
3282
3283 return ajFalse;
3284 }
3285
3286 /* Report problems with chain id's */
3287 for (i = 0U; i < pdbfile->nchains; i++)
3288 {
3289 id1 = ajChararrGet(pdbfile->chid, i);
3290
3291 for (j = i + 1U; j < pdbfile->nchains; j++)
3292 {
3293 if (id1 == (id2 = ajChararrGet(pdbfile->chid, j)))
3294 {
3295 ajFmtPrintF(flog, "%-15s%d (%c) %d (%c)\n", "CHAINIDS", i + 1,
3296 ajChararrGet(pdbfile->chid, i), j + 1,
3297 ajChararrGet(pdbfile->chid, j));
3298
3299 pdbfile->chainok[i] = ajFalse;
3300 pdbfile->chainok[j] = ajFalse;
3301 }
3302
3303 if ((((id1 == ' ') && (id2 != ' ')) || ((id2 == ' ') && (id1 != ' ')))
3304 && (!(iderr)))
3305 {
3306 ajFmtPrintF(flog, "%-15s\n", "CHAINIDSPC");
3307 iderr = ajTrue;
3308 }
3309
3310 }
3311 }
3312
3313
3314 /* Report problems with non-protein chains */
3315 for (i = 0U; i < pdbfile->nchains; i++)
3316 {
3317 if (!pdbfile->chainok[i])
3318 continue;
3319
3320
3321 aacnt = 0;
3322 iter = ajStrIterNew(pdbfile->seqres[i]);
3323
3324 if (toupper((int) ajStrIterGetK(iter)) != 'X')
3325 ++aacnt;
3326
3327 while (ajStrIterNext(iter))
3328 if (toupper((int) ajStrIterGetK(iter)) != 'X')
3329 if (++aacnt >= min_chain_size)
3330 break;
3331
3332 ajStrIterDel(&iter);
3333
3334
3335 if (aacnt == 0)
3336 {
3337 ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESNOAA", i + 1,
3338 ajChararrGet(pdbfile->chid, i));
3339 pdbfile->chainok[i] = ajFalse;
3340 }
3341 else if (aacnt < min_chain_size)
3342 {
3343 ajFmtPrintF(flog, "%-15s%d (%c)\n", "SEQRESFEWAA", i + 1,
3344 ajChararrGet(pdbfile->chid, i));
3345
3346 pdbfile->chainok[i] = ajFalse;
3347 }
3348 else
3349 {
3350 ok = ajTrue;
3351 }
3352 }
3353
3354 /* Return now if no protein chains are found */
3355 if (!ok)
3356 {
3357 ajWarn("No protein chains found in raw pdb file");
3358 ajFmtPrintF(flog, "%-15s\n", "NOPROTEINS");
3359
3360 return ajFalse;
3361 }
3362
3363
3364 return ajTrue;
3365 }
3366
3367
3368
3369
3370 /* @funcstatic pdbioCheckTer **************************************************
3371 **
3372 ** Reads a Pdbfile object and checks whether the expected number of TER
3373 ** and MODEL records are present. Any unwanted records (e.g. TER records that
3374 ** delimit fragments of chain digests and duplicate MODEL records) are
3375 ** discarded (the linetype array for the lines are set to pdbfileELinetypeIgnore).
3376 **
3377 ** The linetype array and modcnt variable may be modified. The toofewter
3378 ** element is written.
3379 ** The value of modcnt is reduced by 1 for each MODEL record that was masked
3380 ** but this is not done for tercnt.
3381 **
3382 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3383 ** @param [u] flog [AjPFile] Pointer to log file (build diagnostics)
3384 **
3385 ** @return [AjBool] True on success, False otherwise.
3386 **
3387 ** @release 2.9.0
3388 ** @@
3389 ******************************************************************************/
3390
pdbioCheckTer(AjPPdbfile pdbfile,AjPFile flog)3391 static AjBool pdbioCheckTer(AjPPdbfile pdbfile, AjPFile flog)
3392 {
3393 char aa = ' ';
3394 ajuint i = 0U; /* Loop counter */
3395 AjBool toomany = ajFalse;
3396 AjBool toofew = ajFalse;
3397 AjPStr aa1 = NULL;
3398 AjPStr aa2 = NULL;
3399
3400 /* Check args */
3401 if (!pdbfile || !flog)
3402 {
3403 ajWarn("Bad args passed to pdbioCheckTer\n");
3404
3405 return ajFalse;
3406 }
3407
3408 /* Allocate memory */
3409 aa1 = ajStrNew();
3410 aa2 = ajStrNew();
3411
3412 /* Report problems with TER records */
3413 if (!pdbfile->tercnt)
3414 ajFmtPrintF(flog, "%-15s\n", "TERNONE");
3415 else
3416 {
3417 if (pdbfile->method == ajEPdbMethodNmr)
3418 {
3419 if (pdbfile->tercnt > (pdbfile->nchains *
3420 pdbfile->modcnt))
3421 toomany = ajTrue;
3422 else if (pdbfile->tercnt < (pdbfile->nchains *
3423 pdbfile->modcnt))
3424 toofew = ajTrue;
3425 }
3426 else
3427 {
3428 if (pdbfile->tercnt > pdbfile->nchains)
3429 toomany = ajTrue;
3430 else if (pdbfile->tercnt < pdbfile->nchains)
3431 toofew = ajTrue;
3432 }
3433 }
3434
3435 /* Report diagnostics */
3436 if (toomany)
3437 ajFmtPrintF(flog, "%-15s\n", "TERTOOMANY");
3438 else if (toofew)
3439 {
3440 ajFmtPrintF(flog, "%-15s\n", "TERTOOFEW");
3441 pdbfile->toofewter = ajTrue;
3442 }
3443
3444 /* Mask out the extra TER records */
3445 if (toomany)
3446 {
3447 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
3448 {
3449 /* This is a TER record. Mask it out if it is flanked by ATOM or
3450 * HETATM records for AMINO ACIDS and with identical chain ids. */
3451 if (pdbfile->linetype[i] == pdbfileELinetypeTER)
3452 {
3453 if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
3454 (pdbfile->linetype[i + 1] == pdbfileELinetypeCoordinate) &&
3455 (ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)
3456 == ajStrGetCharPos(pdbfile->lines[i + 1], POS_CHID)))
3457 {
3458 ajStrAssignSubS(&aa1, pdbfile->lines[i - 1], 17, 19);
3459 ajStrAssignSubS(&aa2, pdbfile->lines[i + 1], 17, 19);
3460
3461 if ((ajResidueFromTriplet(aa1, &aa)) &&
3462 (ajResidueFromTriplet(aa2, &aa)))
3463 {
3464 pdbfile->linetype[i] = pdbfileELinetypeIgnore;
3465
3466 }
3467 }
3468 }
3469 }
3470 }
3471
3472 /* Check for duplicate MODEL records */
3473 for (i = pdbfile->idxfirst + 1U;
3474 i < pdbfile->nlines;
3475 i++)
3476 {
3477 if ((pdbfile->linetype[i - 1] == pdbfileELinetypeMODEL) &&
3478 (pdbfile->linetype[i] == pdbfileELinetypeMODEL))
3479 {
3480 pdbfile->linetype[i - 1] = pdbfileELinetypeIgnore;
3481 pdbfile->modcnt--;
3482 ajFmtPrintF(flog, "%-15s%d\n", "MODELDUP", i + 1);
3483 }
3484 }
3485
3486 /* Tidy up and return */
3487 ajStrDel(&aa1);
3488 ajStrDel(&aa2);
3489
3490 return ajTrue;
3491 }
3492
3493
3494
3495
3496 /* @funcstatic pdbioNumberChains **********************************************
3497 **
3498 ** Reads a Pdbfile object and assigns each ATOM or HETATM record to a specific
3499 ** chain and model. Lines containing coordinates for water molecules and
3500 ** other non-protein groups ("heterogens") are identified. Water molecules are
3501 ** uniquely associated with a whole model whereas other non-protein groups are
3502 ** associated with a unique chain if possible. If this is not possible they
3503 ** are assigned a unique group number.
3504 **
3505 ** The modn, chnn and gpn arrays are written. The linetype array is modified.
3506 ** The nligands array (count of groups associated with a chain), ngroups
3507 ** element (count of groups not associated with a chain) and gpid (identifiers
3508 ** of these later groups) are written.
3509 **
3510 ** linetype array
3511 ** The linetype array is written with a value of pdbfileELinetypeWater for
3512 ** lines containing coordinates for water, and to pdbfileELinetypeHeterogen or
3513 ** pdbfileELinetypeGroups for non-protein groups that, respectively, could or
3514 ** could not be uniquely associated with a chain.
3515 **
3516 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3517 ** @param [u] flog [AjPFile] Log file (build diagnostics)
3518 **
3519 ** @return [AjBool] True on success, False otherwise
3520 **
3521 ** @release 2.9.0
3522 ** @@
3523 ******************************************************************************/
3524
pdbioNumberChains(AjPPdbfile pdbfile,AjPFile flog)3525 static AjBool pdbioNumberChains(AjPPdbfile pdbfile, AjPFile flog)
3526 {
3527 char id = ' '; /* Chain id */
3528 ajuint i = 0U; /* Loop counter */
3529 ajuint j = 0U; /* Loop counter */
3530 ajint mod = 0; /* Model number */
3531
3532 AjBool done = ajFalse; /* True if we have assigned a chain id for
3533 * this line */
3534 ajuint this = 0U; /* Chain number of last line read in */
3535 ajuint chn = 0U; /* Chain number as index (starting from 0) */
3536 AjPInt gpns = NULL; /* Gives the correct group number for groups
3537 * that could not be identified as belonging
3538 * to a chain, in cases where a single chain
3539 * only is present in the file */
3540 ajint gpn = 0; /* Current group number */
3541 ajint offset = 0; /* Offset for finding correct value for gpns
3542 * (for use with files with a single chain
3543 * only */
3544 AjBool *chndone = NULL; /* Array whose elements are TRUE if we have
3545 * already read a line in belonging to the
3546 * appropriate chain for this model */
3547 AjPStr *htype = NULL; /* Array holding the residue type of the last
3548 * heterogen read in for the appropriate
3549 * chain */
3550
3551 /* Check args */
3552 if (!pdbfile || !flog)
3553 {
3554 ajWarn("Bad args passed to pdbioNumberChains\n");
3555
3556 return ajFalse;
3557 }
3558
3559 /* Allocate memory */
3560 gpns = ajIntNew();
3561
3562 AJCNEW0(htype, pdbfile->nchains);
3563
3564 for (i = 0U; i < pdbfile->nchains; i++)
3565 htype[i] = ajStrNew();
3566
3567 AJCNEW0(chndone, pdbfile->nchains);
3568
3569 for (i = 0U; i < pdbfile->nchains; i++)
3570 chndone[i] = ajFalse;
3571
3572 if (((pdbfile->method == ajEPdbMethodNmr) && pdbfile->nomod) ||
3573 (pdbfile->method == ajEPdbMethodXray))
3574 mod = 1;
3575
3576 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
3577 {
3578 /* JONNEW Lines may already have been assigned to pdbfileELinetypeWater
3579 * in pdbioFirstPass function, so we need to check here to ensure model
3580 * number gets assigned */
3581 if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
3582 (pdbfile->linetype[i] == pdbfileELinetypeWater))
3583 {
3584
3585 pdbfile->modn[i] = mod;
3586
3587 /* Assign appropriate chain number to any ATOM or HETATM line
3588 * which has an id exhibited in the SEQRES records. */
3589 done = ajFalse;
3590 id = ajStrGetCharPos(pdbfile->lines[i], POS_CHID);
3591
3592 for (j = 0U; j < pdbfile->nchains; j++)
3593 {
3594 if (ajChararrGet(pdbfile->chid, j) == id)
3595 {
3596 pdbfile->chnn[i] = j + 1;
3597 chn = pdbfile->chnn[i] - 1;
3598 this = j + 1;
3599
3600 if (chndone[this - 1])
3601 {
3602 /* Mark up water coordinates */
3603 if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3604 pdbfile->linetype[i] = pdbfileELinetypeWater;
3605 else
3606 {
3607 /* Mark up ligand coordinates */
3608 pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3609
3610 /* New heterogen */
3611 if (!ajStrMatchS(htype[chn], pdbfile->rtype[i]))
3612 {
3613 offset++;
3614 pdbfile->nligands[chn]++;
3615 pdbfile->gpn[i] = pdbfile->nligands[chn];
3616 ajStrAssignS(&htype[chn],
3617 pdbfile->rtype[i]);
3618 }
3619 /* More atoms of the same heterogen */
3620 else
3621 pdbfile->gpn[i] = pdbfile->nligands[chn];
3622 }
3623 }
3624 #if AJFALSE
3625 /* Unused */
3626 else
3627 doneter = ajFalse;
3628 #endif /* AJFALSE */
3629
3630 done = ajTrue;
3631 break;
3632 }
3633 }
3634
3635
3636 if (!done)
3637 {
3638 /* Any ATOM or HETATM record with a whitespace as chain id
3639 * and which has not already been assigned belongs to the
3640 * chain immediately preceding it. Assign these lines as
3641 * NON_PROTEIN_CHAIN lines. */
3642
3643 if (id == ' ')
3644 {
3645 /* This won't be set until we've read in at least one
3646 * coordinate line for protein chain, so if this==0, set
3647 * it to 1 (first chain). This prevents problems for
3648 * 1qjh.pxyz */
3649
3650 if (this == 0)
3651 this = 1;
3652
3653 pdbfile->chnn[i] = this;
3654 chn = pdbfile->chnn[i] - 1;
3655
3656 /* Mark up water coordinates */
3657 if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3658 pdbfile->linetype[i] = pdbfileELinetypeWater;
3659 else
3660 {
3661 /* Mark up ligand coordinates */
3662
3663 pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3664
3665 /* New ligand */
3666 if (!ajStrMatchS(htype[chn], pdbfile->rtype[i]))
3667 {
3668
3669 offset++;
3670 pdbfile->nligands[chn]++;
3671 pdbfile->gpn[i] = pdbfile->nligands[chn];
3672 ajStrAssignS(&htype[chn], pdbfile->rtype[i]);
3673 }
3674 /* More atoms of the same heterogen */
3675 else
3676 {
3677 pdbfile->gpn[i] = pdbfile->nligands[chn];
3678 }
3679 }
3680 }
3681 else
3682 /* Assign any ATOM or HETATM records with a non-
3683 * whitespace chain id that does not appear in the SEQRES
3684 * records as a NON_PROTEIN_CHAIN. Record the chain id's
3685 * used and assign a GROUP NUMBER as appropriate. */
3686 {
3687 /* Mark up water coordinates */
3688 if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3689 pdbfile->linetype[i] = pdbfileELinetypeWater;
3690 else
3691 {
3692 /* If there is a single chain only then the group is
3693 * of course associated with that chain */
3694 if (pdbfile->nchains == 1)
3695 {
3696 /* Mark up ligand coordinates */
3697 pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3698 pdbfile->chnn[i] = 1;
3699
3700 for (done = ajFalse, j = 0U;
3701 j < pdbfile->ngroups;
3702 j++)
3703 if (ajChararrGet(pdbfile->gpid, j) == id)
3704 {
3705 pdbfile->gpn[i] = ajIntGet(gpns, j);
3706 done = ajTrue;
3707 break;
3708 }
3709
3710 if (!done)
3711 {
3712 ajIntPut(&gpns, j, (gpn = j + 1 + offset));
3713 pdbfile->gpn[i] = gpn;
3714
3715 /* NO - this code would be used only if the
3716 * group could not be associated with a
3717 * chain. ajChararrPut(&(pdbfile->gpid),
3718 * pdbfile->ngroups, id); pdbfile->ngroups++; */
3719
3720 /* Increment number of ligands and reset last
3721 * ligand type read in */
3722 pdbfile->nligands[0]++;
3723 ajStrAssignC(&htype[0], " ");
3724 }
3725 }
3726 else
3727 {
3728 /* Mark up ligand coordinates */
3729 pdbfile->linetype[i] = pdbfileELinetypeGroups;
3730
3731 for (done = ajFalse, j = 0U;
3732 j < pdbfile->ngroups;
3733 j++)
3734 if (ajChararrGet(pdbfile->gpid, j) == id)
3735 {
3736 pdbfile->gpn[i] = j + 1;
3737 done = ajTrue;
3738 break;
3739 }
3740
3741 if (!done)
3742 {
3743 ajChararrPut(&(pdbfile->gpid),
3744 pdbfile->ngroups, id);
3745 pdbfile->ngroups++;
3746 pdbfile->gpn[i] = j + 1;
3747 }
3748 }
3749 }
3750 }
3751 }
3752 }
3753 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
3754 {
3755 mod++;
3756
3757 /* doneter=ajFalse; Unused variable */
3758 for (j = 0U; j < pdbfile->nchains; j++)
3759 chndone[j] = ajFalse;
3760
3761 /* doneoneter=ajFalse; Unused variable */
3762 }
3763 else if (pdbfile->linetype[i] == pdbfileELinetypeTER)
3764 {
3765 chndone[this - 1] = ajTrue;
3766
3767
3768 /* prev = this; Unused variable */
3769 /* doneoneter=ajTrue; Unused variable */
3770 /* doneter=ajTrue; Unused variable */
3771 }
3772
3773
3774
3775 /*
3776 ** Check for missing TER records.
3777 ** Where chains are not separated by TER records
3778 ** (the chain id changes from line to line without an intervening TER
3779 ** record and both chain id's are not whitespace).
3780 ** Where ATOM and HETATM groups are not separated
3781 ** by TER records (a chain id is given on one line, a whitespace chain
3782 ** id is given on the next line, and there is no intervening TER
3783 ** record).
3784 **
3785 ** This code is identical to a fragment from pdbioCheckTer.
3786 **
3787 ** Note that chndone only is modified. If the code in the function
3788 ** was made to use prev, doneoneter, doneter then the code below would
3789 ** also have to be modified
3790 **
3791 ** Only do this now where there aren't enough TER records in the file.
3792 ** Without this check, it was failing for cases where the order of
3793 ** chains is inconsistent (see around line 4095 of pdb1cm4.ent)
3794 */
3795
3796 if (pdbfile->toofewter)
3797 {
3798 if (i > pdbfile->idxfirst)
3799 if (pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate &&
3800 ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
3801 (pdbfile->linetype[i] == pdbfileELinetypeHeterogen)) &&
3802 ((ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)) !=
3803 ajStrGetCharPos(pdbfile->lines[i], POS_CHID)))
3804 {
3805 for (j = 0; j < pdbfile->nchains; j++)
3806 if (ajChararrGet(pdbfile->chid, j)
3807 == ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID))
3808 {
3809 chndone[j] = ajTrue;
3810 break;
3811 }
3812 }
3813 }
3814 }
3815
3816
3817 /*
3818 ** The above code cannot cope for cases where the ATOM and HETATM records
3819 ** use the same (or no) chain identifier and are not separated by a TER
3820 ** record (e.g. 1rbp)
3821 **
3822 ** For files with less than the expected number of TER records,
3823 ** check again for COORDHET lines, which are identified as
3824 ** (i) a line beginning with a HETATM record with the same chain
3825 ** identifier but lower residue number than the preceding line, or
3826 ** JONNEW
3827 ** (ii) a line beginning with a HETATM record which is not followed
3828 ** anywhere in the file by an ATOM record with the same chain identifier
3829 ** (from the PDB record) or number (assigned by parser)
3830 */
3831
3832 for (i = 0; i < pdbfile->nchains; i++)
3833 ajStrAssignC(&htype[i], "\0");
3834
3835
3836 if (pdbfile->tercnt < (pdbfile->nchains * pdbfile->modcnt))
3837 for (i = pdbfile->idxfirst + 1; i < pdbfile->nlines; i++)
3838 {
3839 chn = pdbfile->chnn[i] - 1;
3840
3841 if (pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate &&
3842 pdbfile->linetype[i] == pdbfileELinetypeCoordinate &&
3843 ((ajStrGetCharPos(pdbfile->lines[i - 1], POS_CHID)) ==
3844 ajStrGetCharPos(pdbfile->lines[i], POS_CHID)))
3845 if (ajStrPrefixC(pdbfile->lines[i], "HETATM"))
3846 if ((pdbfile->resn1[i] < pdbfile->resn1[i - 1]) ||
3847 pdbioNoMoreAtoms(pdbfile, i))
3848 /* if(pdbfile->resn1[i]<pdbfile->resn1[i-1]) */
3849 while ((ajStrPrefixC(pdbfile->lines[i], "HETATM")))
3850 {
3851 if (ajStrMatchC(pdbfile->rtype[i], "HOH"))
3852 pdbfile->linetype[i] = pdbfileELinetypeWater;
3853 else
3854 {
3855 pdbfile->linetype[i] = pdbfileELinetypeHeterogen;
3856
3857 /* New heterogen */
3858 if (!ajStrMatchS(htype[chn],
3859 pdbfile->rtype[i]))
3860 {
3861 offset++;
3862 pdbfile->nligands[chn]++;
3863 pdbfile->gpn[i]
3864 = pdbfile->nligands[chn];
3865 ajStrAssignS(&htype[chn],
3866 pdbfile->rtype[i]);
3867 }
3868 /* More atoms of the same heterogen */
3869 else
3870 {
3871 pdbfile->gpn[i]
3872 = pdbfile->nligands[chn];
3873 }
3874 }
3875 i++;
3876 }
3877 }
3878
3879 /* For files with a single chain only, set the number of groups that
3880 * could not be associated with a chain to zero */
3881 if (pdbfile->nchains == 1)
3882 {
3883 /* We might need to add ngroups to nligands[0] */
3884 if (pdbfile->ngroups != 0)
3885 ajFatal("Must check ngroups versus nligands in the file");
3886 pdbfile->ngroups = 0;
3887 }
3888
3889
3890
3891 /* Tidy up and return */
3892 for (i = 0; i < pdbfile->nchains; i++)
3893 ajStrDel(&htype[i]);
3894
3895 AJFREE(htype);
3896 ajIntDel(&gpns);
3897 AJFREE(chndone);
3898
3899 return ajTrue;
3900 }
3901
3902
3903
3904
3905 /* @funcstatic pdbioNoMoreAtoms ***********************************************
3906 **
3907 ** This function is called by function <pdbioNumberChains> to identify ligands
3908 ** (COORDHET lines) in files with less than the expected number of TER
3909 ** records.
3910 ** These are identified here by a line beginning with a HETATM record which
3911 ** is not followed by an ATOM record with the same chain identifier (from
3912 ** the PDB record) or number (assigned by parser). The function returns if
3913 ** a line of a different chain is found or at the first non-ATOM/HETAM line.
3914 ** Additional processing is done in <pdbioNumberChains> itself.
3915 **
3916 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3917 ** @param [r] linen [ajuint] Line number
3918 **
3919 ** @return [AjBool] True (no more atoms), False otherwise
3920 **
3921 ** @release 3.0.0
3922 ** @@
3923 ******************************************************************************/
3924
pdbioNoMoreAtoms(AjPPdbfile pdbfile,ajuint linen)3925 static AjBool pdbioNoMoreAtoms(AjPPdbfile pdbfile, ajuint linen)
3926 {
3927 ajuint i = 0U;
3928
3929 for (i = linen + 1; i < pdbfile->nlines; i++)
3930 {
3931 if ((ajStrPrefixC(pdbfile->lines[i], "ATOM")))
3932 {
3933 /* Same chain */
3934 if ((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) ==
3935 ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) ||
3936 (pdbfile->chnn[linen] == pdbfile->chnn[i]))
3937 return ajFalse;
3938 else
3939 /* Different chain */
3940 return ajTrue;
3941 }
3942 else if ((ajStrPrefixC(pdbfile->lines[i], "HETATM")))
3943 {
3944 /* Different chain */
3945 if ((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) !=
3946 ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) ||
3947 (pdbfile->chnn[linen] != pdbfile->chnn[i]))
3948 return ajTrue;
3949 }
3950 else
3951 /* Different chain or near EOF */
3952 return ajTrue;
3953 }
3954
3955 return ajTrue;
3956 }
3957
3958
3959
3960
3961 /* @funcstatic pdbioMaskChains ************************************************
3962 **
3963 ** Reads a Pdbfile object and checks to see whether the ATOM records for
3964 ** each chain contain sufficient amino acids. Any chains with insufficient
3965 ** amino acids either in the SEQRES or ATOM records, or with ambiguous chain
3966 ** id's are discarded. Optionally, amino acid residues and non-amino
3967 ** acid groups (e.g. ACE, NH2 etc) in protein chains with no CA atom are also
3968 ** discarded (the linetype array for the lines are set to pdbfileELinetypeIgnore).
3969 ** For non-amino acid groups, the corresponding characters are removed from
3970 ** the sequence derived from the SEQRES records. Coordinate data and atom
3971 ** type are parsed for each atom. Optionally, amino acids or groups in
3972 ** protein chains with a single atom only are also discarded.
3973 **
3974 **
3975 ** Checks whether chains from the ATOM records contain at least the
3976 ** user-defined threshold number of amino acid residues. If not then the chain
3977 ** is discarded (chainok array is set to ajFalse). If NO chains with
3978 ** sufficient residues are found, a "NOPROTEINS" error is generated and
3979 ** ajFalse is returned.
3980 **
3981 ** Writes the x,y,z,o,b and atype elements of a Pdbfile object. The linetype,
3982 ** and possibly seqres, seqresful and nres arrays are modified.
3983 **
3984 ** linetype array
3985 ** Coordinate data are extracted for lines of linetype pdbfileELinetypeCoordinate,
3986 ** pdbfileELinetypeHeterogen and pdbfileELinetypeGroups.
3987 **
3988 ** seqres & seqresful arrays
3989 ** Three-letter codes of any groups that are (i) not standard amino acids and
3990 ** (ii) which do not contain a CA atom are removed from the seqres sequence
3991 ** if the <camask> is set. The seqresful array is an intermediate array to
3992 ** achieve this.
3993 **
3994 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
3995 ** @param [u] flog [AjPFile] Log file (build diagnostics)
3996 ** @param [r] min_chain_size [ajint] Min. no. of amino acids in a chain
3997 ** @param [r] camask [AjBool] Whether to mask non-amino acid
3998 ** residues within protein chains which
3999 ** do not have a C-alpha atom (remove them
4000 ** from the seqres sequence and set the
4001 ** linetype array for the lines
4002 ** to pdbfileELinetypeIgnore).
4003 ** @param [r] camask1 [AjBool] Whether to mask amino acid residues
4004 ** within protein chains which do not have
4005 ** a C-alpha atom (set the linetype
4006 ** array for the lines to pdbfileELinetypeIgnore).
4007 ** @param [r] atommask [AjBool] Whether to mask residues or groups
4008 ** with a single atom only.
4009 **
4010 ** @return [AjBool] True on success, False otherwise
4011 **
4012 ** @release 2.9.0
4013 ** @@
4014 ******************************************************************************/
4015
pdbioMaskChains(AjPPdbfile pdbfile,AjPFile flog,ajint min_chain_size,AjBool camask,AjBool camask1,AjBool atommask)4016 static AjBool pdbioMaskChains(AjPPdbfile pdbfile, AjPFile flog,
4017 ajint min_chain_size,
4018 AjBool camask, AjBool camask1,
4019 AjBool atommask)
4020 {
4021 ajuint i = 0U; /* Loop counter */
4022 ajuint j = 0U; /* Loop counter */
4023 AjPStr aa3 = NULL; /* Amino acid */
4024 ajint rcnt = 0; /* Residue count */
4025 ajint acnt = 0; /* Atom count */
4026 ajint modcnt = 0; /* Count of MODEL records */
4027 ajuint lastatom = 0U; /* Line number of last coordinate line read
4028 * in */
4029 ajuint firstatom = 0U; /* Line number of coordinate line for first
4030 * atom of residue */
4031 AjBool noca = ajFalse; /* True if this residue does not contain a CA
4032 * atom */
4033 ajint lastchn = 0; /* Chain number of last line read in */
4034 AjBool *chainok; /* Array of flags which are True if a chain
4035 * in the SEQRES records is found in the ATOM
4036 * records */
4037 char aa1 = ' '; /* Amino acid id */
4038 AjPStr lastrn = NULL; /* Number of last residue read in */
4039 AjBool msgdone = ajFalse; /* Flag for message reporting */
4040 AjPStr sub = NULL;
4041 AjPStr tmpseq = NULL;
4042 ajuint lenful = 0U; /* Length of SEQRES sequence including ACE,
4043 * FOR & NH2 groups that might be discarded
4044 * by the call to pdbioSeqresToSequence */
4045 ajuint ipos = 0U;
4046 char tmp = ' ';
4047 AjBool odd = ajFalse; /* Whether the current residue / group is of
4048 * unknown type */
4049 AjBool ok = ajFalse; /* True if the file, after processing by this
4050 * function, is found to contain at least one
4051 * chain for which chainok == ajTrue */
4052
4053 /* Check args */
4054 if (!pdbfile || !flog)
4055 {
4056 ajWarn("Bad args passed to pdbioMaskChains\n");
4057
4058 return ajFalse;
4059 }
4060
4061 /* Allocate memory */
4062 AJCNEW0(chainok, pdbfile->nchains);
4063
4064 for (i = 0U; i < pdbfile->nchains; i++)
4065 chainok[i] = ajFalse;
4066
4067 aa3 = ajStrNew();
4068 lastrn = ajStrNew();
4069 sub = ajStrNew();
4070 ajStrAssignClear(&sub);
4071
4072 firstatom = lastatom = pdbfile->idxfirst;
4073
4074 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4075 {
4076 if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
4077 (pdbfile->linetype[i] == pdbfileELinetypeHeterogen) ||
4078 (pdbfile->linetype[i] == pdbfileELinetypeGroups) ||
4079 (pdbfile->linetype[i] == pdbfileELinetypeWater))
4080 {
4081 /* Assign x, y, z, o, b data */
4082 /* Replace this with code so that internals of structure are not
4083 * probed (when code becomes available) */
4084 if ((sscanf(&(pdbfile->lines[i]->Ptr[28]), "%f %f %f %f",
4085 &(pdbfile->x[i]),
4086 &(pdbfile->y[i]),
4087 &(pdbfile->z[i]),
4088 &(pdbfile->o[i]))) != 4)
4089 ajFatal("Scan error in pdbioMaskChains\n"
4090 "Email jison@hgmp.mrc.ac.uk");
4091
4092 if (!sscanf(&(pdbfile->lines[i]->Ptr[60]), "%f",
4093 &(pdbfile->b[i])))
4094 ajFatal("Scan error in pdbioMaskChains\n"
4095 "Email jison@hgmp.mrc.ac.uk");
4096
4097 /* Usually position 12 is used for the alternative position
4098 * indicator (taken in the code below to be indicated by a
4099 * number) for atoms, but occasionally can be incorrectly used
4100 * for the atom type itself (indicated in the code below by a
4101 * character in pos 12). This code copes for both cases */
4102 /* Assign atom type */
4103 if (isalpha((int) pdbfile->lines[i]->Ptr[12]))
4104 {
4105 ajStrAssignSubS(&pdbfile->atype[i],
4106 pdbfile->lines[i], 12, 15);
4107 ajStrRemoveWhite(&pdbfile->atype[i]);
4108 if (!msgdone)
4109 {
4110 ajFmtPrintF(flog, "%-15s%d\n", "ATOMCOL12", i + 1);
4111 msgdone = ajTrue;
4112 }
4113 }
4114 else
4115 {
4116 ajStrAssignSubS(&pdbfile->atype[i],
4117 pdbfile->lines[i], 13, 15);
4118 ajStrRemoveWhite(&pdbfile->atype[i]);
4119 }
4120
4121 }
4122
4123 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4124 {
4125 /* Check whether CA atom is present */
4126 if (!(ajStrCmpC(pdbfile->atype[i], "CA")))
4127 noca = ajFalse;
4128
4129 if (pdbfile->chnn[i] < lastchn)
4130 ajFmtPrintF(flog, "%-15s%d\n", "CHAINORDER", i + 1);
4131
4132 if (pdbfile->chnn[i] != lastchn)
4133 {
4134 rcnt = 0;
4135 lastchn = pdbfile->chnn[i];
4136 }
4137
4138
4139 /* If this is a new residue */
4140 if (!ajStrMatchS(pdbfile->pdbn[i], lastrn))
4141 {
4142 /* Mask coordinate lines where there are only a single atom */
4143 if (acnt == 1)
4144 {
4145 ajFmtPrintF(flog, "%-15s%d\n", "ATOMONEONLY", lastatom + 1);
4146
4147 if (atommask)
4148 pdbfile->linetype[lastatom] = pdbfileELinetypeIgnore;
4149 }
4150
4151 /* Mask coordinate lines for residues lacking a CA atom */
4152 if (noca)
4153 {
4154 odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatom - 1],
4155 &tmp)));
4156
4157 if ((camask && odd) ||
4158 (camask1 && !odd))
4159 for (j = firstatom; j <= lastatom; j++)
4160 pdbfile->linetype[j] = pdbfileELinetypeIgnore;
4161
4162 /* Remove residues from SEQRES records */
4163 if ((camask && odd))
4164 {
4165 ipos = pdbfile->chnn[firstatom] - 1;
4166 ajStrExchangeSS(&pdbfile->seqresful[ipos],
4167 pdbfile->rtype[firstatom], sub);
4168 }
4169
4170 if (firstatom == lastatom)
4171 ajFmtPrintF(flog, "%-15s%d\n", "ATOMNOCA",
4172 firstatom + 1);
4173 else
4174 ajFmtPrintF(flog, "%-15s%d %d\n", "ATOMNOCA",
4175 firstatom + 1, lastatom + 1);
4176 }
4177
4178
4179 /* Increment the residue counter if the code is recognised */
4180 if (ajResidueFromTriplet(pdbfile->rtype[i], &aa1))
4181 rcnt++;
4182
4183 if (rcnt >= min_chain_size)
4184 chainok[pdbfile->chnn[i] - 1] = ajTrue;
4185
4186 ajStrAssignS(&lastrn, pdbfile->pdbn[i]);
4187
4188 /* Set count of atoms to zero, set the position of the first
4189 * atom and set flag for recognising CA atom */
4190 acnt = 1;
4191 firstatom = i;
4192
4193 if (!(ajStrCmpC(pdbfile->atype[i], "CA")))
4194 noca = ajFalse;
4195 else
4196 noca = ajTrue;
4197 }
4198
4199
4200 /* Set the position for the last atom read in */
4201 lastatom = i;
4202
4203
4204 /* Increment the atom counter */
4205 acnt++;
4206 }
4207 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4208 {
4209 rcnt = 0;
4210 lastchn = 0;
4211
4212 modcnt++;
4213
4214 if (modcnt != 1)
4215 {
4216 for (j = 0; j < pdbfile->nchains; j++)
4217 {
4218 /* Only bother reporting error messages if a message
4219 * about the SEQRES records not containing enough aa's
4220 * has not already been reported */
4221 /* If pdbfile->chainok is False, leave it so */
4222 if (!pdbfile->chainok[j])
4223 continue;
4224 else
4225 {
4226 if (!chainok[j])
4227 {
4228 pdbfile->chainok[j] = chainok[j];
4229 ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
4230 "ATOMFEWAA", j + 1,
4231 (ajChararrGet(pdbfile->chid, j)),
4232 modcnt);
4233
4234 }
4235 }
4236 }
4237 }
4238
4239 }
4240 }
4241
4242 /* Ensure that C-terminal residues are masked if necessary */
4243 /*
4244 ** else if((pdbfile->linetype[i] == pdbfileELinetypeTER) ||
4245 ** (pdbfile->linetype[i] == pdbfileELinetypeENDMDL))
4246 ** {
4247 */
4248 if (noca)
4249 {
4250 odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatom - 1], &tmp)));
4251
4252
4253 if ((camask && odd) ||
4254 (camask1 && !odd))
4255 for (j = firstatom; j <= lastatom; j++)
4256 pdbfile->linetype[j] = pdbfileELinetypeIgnore;
4257
4258 /* Remove residues from SEQRES records */
4259 if ((camask && odd))
4260 {
4261 ipos = pdbfile->chnn[firstatom] - 1;
4262 ajStrExchangeSS(&pdbfile->seqresful[ipos],
4263 pdbfile->rtype[firstatom], sub);
4264 }
4265
4266 if (firstatom == lastatom)
4267 {
4268 ajFmtPrintF(flog, "%-15s%d\n", "ATOMNOCA", firstatom + 1);
4269 }
4270
4271 else
4272 ajFmtPrintF(flog, "%-15s%d %d\n", "ATOMNOCA",
4273 firstatom + 1, lastatom + 1);
4274 }
4275
4276 /*
4277 ** }
4278 */
4279
4280 /* Write the new (masked) seqres sequences if necessary */
4281 if (camask)
4282 {
4283 for (i = 0; i < pdbfile->nchains; i++)
4284 {
4285 tmpseq = ajStrNew();
4286
4287 if (!pdbioSeqresToSequence(pdbfile->seqresful[i],
4288 &tmpseq, camask, &lenful))
4289 ajFatal("Sequence conversion error in "
4290 "pdbioFirstPass\nEmail jison@hgmp.mrc.ac.uk\n");
4291
4292 ajStrAssignS(&pdbfile->seqres[i], tmpseq);
4293 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
4294
4295 ajStrDel(&tmpseq);
4296 }
4297 }
4298
4299
4300
4301 /* Set modcnt to 1 for xray structures */
4302 if (!modcnt)
4303 modcnt = 1;
4304
4305
4306 /* Check for xray structures or last model of nmr structures */
4307 for (i = 0; i < pdbfile->nchains; i++)
4308 {
4309 /* Only bother reporting error messages if a message about the SEQRES
4310 * records not containing enough aa's has not already been reported */
4311 /* If pdbfile->chainok is False, leave it so */
4312 if (!pdbfile->chainok[i])
4313 continue;
4314 else
4315 {
4316 if (!chainok[i])
4317 {
4318 pdbfile->chainok[i] = chainok[i];
4319 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "ATOMFEWAA",
4320 i + 1, (ajChararrGet(pdbfile->chid, i)), modcnt);
4321
4322 }
4323 }
4324 }
4325
4326
4327 /* Mask out any chains with insufficient amino acids either in the SEQRES
4328 * or ATOM records */
4329 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4330 if (((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) ||
4331 (pdbfile->linetype[i] == pdbfileELinetypeHeterogen)) &&
4332 (!pdbfile->chainok[pdbfile->chnn[i] - 1]))
4333 {
4334 pdbfile->linetype[i] = pdbfileELinetypeIgnore;
4335 }
4336
4337
4338
4339 /* Check for missing TER records. Where chains are not separated by TER
4340 * records (the chain id changes from line to line without an intervening
4341 * TER record and both chain id's are not whitespace). Where ATOM and
4342 * HETATM groups are not separated by TER records (a chain id is given on
4343 * one line, a whitespace chain id is given on the next line, and there
4344 * is no intervening TER record) */
4345
4346
4347 for (i = pdbfile->idxfirst + 1;
4348 i < pdbfile->nlines; i++)
4349 {
4350 if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
4351 (pdbfile->linetype[i] == pdbfileELinetypeCoordinate) &&
4352 pdbfile->chnn[i - 1] != pdbfile->chnn[i])
4353 ajFmtPrintF(flog, "%-15s%d %d\n", "TERMISSCHN", i, i + 1);
4354 else if ((pdbfile->linetype[i - 1] == pdbfileELinetypeCoordinate) &&
4355 ((pdbfile->linetype[i] == pdbfileELinetypeHeterogen) ||
4356 (pdbfile->linetype[i] == pdbfileELinetypeWater)) &&
4357 pdbfile->chnn[i - 1] == pdbfile->chnn[i])
4358 ajFmtPrintF(flog, "%-15s%d %d\n", "TERMISSHET", i, i + 1);
4359 }
4360
4361
4362 /* Tidy up */
4363 AJFREE(chainok);
4364 ajStrDel(&aa3);
4365 ajStrDel(&lastrn);
4366 ajStrDel(&sub);
4367
4368
4369 /* Report problems with non-protein chains */
4370 for (i = 0; i < pdbfile->nchains; i++)
4371 if (pdbfile->chainok[i])
4372 {
4373 ok = ajTrue;
4374 break;
4375 }
4376
4377 /* Return now if no protein chains are found */
4378 if (!ok)
4379 {
4380 ajWarn("No protein chains found in raw pdb file");
4381 ajFmtPrintF(flog, "%-15s\n", "NOPROTEINS");
4382 return ajFalse;
4383 }
4384
4385 return ajTrue;
4386 }
4387
4388
4389
4390
4391 /* @funcstatic pdbioStandardiseNumbering **************************************
4392 **
4393 ** Reads a Pdbfile object and standardises the two sets of residue numbers
4394 ** (resn1 & resn2 arrays) derived from the raw residue numbers. The residue
4395 ** numbering is corrected for zero or negative residue numbers, non-standard
4396 ** numbering schemes and any other cases of non-sequentiality (e.g. where the
4397 ** next residue number is lower than the previous one, see 1pca).
4398 ** resn1 gives the sequence presuming an alternative numbering scheme, resn2
4399 ** gives the sequence presuming heterogeneity. Heterogeneity is indicated by
4400 ** a character in position lines[26] (the same position used to indicate
4401 ** alternative residue numbering schemes).
4402 **
4403 ** The resn1 & resn2 arrays of a Pdbfile object are modified. The oddnum
4404 ** array is written.
4405 **
4406 ** oddnum array
4407 ** This is an array of Bool's which are TRUE for duplicate residues of
4408 ** heterogenous positions (e.g. if 2 different residues are both numbered '8'
4409 ** or one is '8' and the other '8A' for example then <oddnum> would be set
4410 ** True for the second residue.
4411 **
4412 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
4413 ** @param [u] flog [AjPFile] Pointer to log file (build diagnostics)
4414 **
4415 ** @return [AjBool] True on success, False otherwise
4416 **
4417 ** @release 2.9.0
4418 ** @@
4419 ******************************************************************************/
4420
pdbioStandardiseNumbering(AjPPdbfile pdbfile,AjPFile flog)4421 static AjBool pdbioStandardiseNumbering(AjPPdbfile pdbfile, AjPFile flog)
4422 {
4423 ajuint i = 0U;
4424 ajint lastchn = -1; /* Chain number of last line read in */
4425 AjBool first = ajFalse; /* True if we have processed the first
4426 * residue in a chain */
4427 ajint first_num = 0; /* Number of first residue in chain */
4428 ajint modrn = 0; /* Corrected first residue number */
4429 AjBool neg = ajFalse; /* True if first residue number is negative */
4430 AjBool zer = ajFalse; /* True if first residue number is zero */
4431 AjBool report_neg = ajFalse;/* True if we have reported an error that a
4432 * residue number is negative for this chain */
4433 AjBool report_zer = ajFalse;/* True if we have reported an error that a
4434 * residue number is zero for this chain */
4435 ajint add = 0; /* An amount to add to the residue numbers to
4436 * correct them */
4437 AjBool ignore = ajFalse;
4438 AjBool odd = ajFalse;
4439 ajint rn = 0; /* Current residue number */
4440 ajint last_rn = 0; /* Last residue number read in */
4441 ajint this_rn = 0; /* Current residue number read in */
4442 char last = ' '; /* Chain id of last chain */
4443 char curr = ' '; /* Chain id of current chain */
4444 AjPStr last_rt = NULL; /* Type of previous residue */
4445 AjPStr this_rt = NULL; /* Type of current residue */
4446 AjBool report_nonstd = ajFalse; /* True if we have reported an error
4447 * that a non-standard residue
4448 * numbering scheme is used for this
4449 * chain */
4450 AjBool report_nonsqt = ajFalse; /* True if we have reported an error
4451 * that any other cases of
4452 * non-sequential numbering are found
4453 * for this this chain */
4454 char aa1 = ' '; /* Amino acid single character code */
4455
4456 ajuint ipos = 0;
4457
4458
4459 last_rt = ajStrNew();
4460 this_rt = ajStrNew();
4461
4462
4463 /* Check args */
4464 if (!pdbfile || !flog)
4465 {
4466 ajWarn("Bad args passed to pdbioStandardiseNumbering\n");
4467
4468 return ajFalse;
4469 }
4470
4471
4472 /* Check whether the integer part of the original pdb numbering (at this
4473 * point in code held in resn1 and resn2) gives the correct index into
4474 * the SEQRES sequence */
4475 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4476 {
4477 /* If model number has gone past 1 then we must have checked all the
4478 * chains, so break */
4479 if (pdbfile->modn[i] > 1)
4480 break;
4481
4482 if (pdbfile->linetype[i] != pdbfileELinetypeCoordinate)
4483 continue;
4484
4485
4486 /* If residue number is not negative, zero, or greater then the
4487 * length of the SEQRES sequence and if the residue matches then
4488 * continue */
4489
4490 if ((pdbfile->resn1[i] <=
4491 pdbfile->nres[pdbfile->chnn[i] - 1]) ||
4492 (pdbfile->resn1[i] >= 1))
4493 {
4494 ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
4495 ipos = pdbfile->chnn[i] - 1;
4496
4497 if (aa1 == ajStrGetCharPos(pdbfile->seqres[ipos],
4498 pdbfile->resn1[i] - 1))
4499 continue;
4500 }
4501
4502 /* Otherwise flag an error for this chain and move to the end of the
4503 * chain */
4504
4505 ajFmtPrintF(flog, "%-15s%d (%c)\n", "BADINDEX",
4506 pdbfile->chnn[i],
4507 ajChararrGet(pdbfile->chid, pdbfile->chnn[i] - 1));
4508
4509
4510 for (lastchn = pdbfile->chnn[i]; i < pdbfile->nlines; i++)
4511 {
4512 if (pdbfile->linetype[i] != pdbfileELinetypeCoordinate)
4513 continue;
4514
4515 if (pdbfile->modn[i] > 1)
4516 break;
4517
4518 if (pdbfile->chnn[i] != lastchn)
4519 {
4520 i--;
4521 break;
4522 }
4523 }
4524 }
4525
4526 /* Fix for zero or negative residue numbers. This is done for both resn1
4527 * and resn2 arrays of a Pdbfile object */
4528
4529 for (first = ajFalse, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4530 {
4531 /* Coordinate line */
4532 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4533 {
4534 /* New chain */
4535 if (pdbfile->chnn[i] != lastchn)
4536 {
4537 neg = ajFalse;
4538 zer = ajFalse;
4539 report_neg = ajFalse;
4540 report_zer = ajFalse;
4541 first = ajFalse;
4542 lastchn = pdbfile->chnn[i];
4543 }
4544
4545 rn = pdbfile->resn1[i];
4546
4547 if (!first)
4548 {
4549 first_num = rn;
4550
4551 if (first_num > 0)
4552 {
4553 /* Advance counter to next chain */
4554 for (; i < pdbfile->nlines; i++)
4555 if (((pdbfile->linetype[i] ==
4556 pdbfileELinetypeCoordinate) && (pdbfile->chnn[i] != lastchn))
4557 || pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4558 {
4559 neg = ajFalse;
4560 zer = ajFalse;
4561 report_neg = ajFalse;
4562 report_zer = ajFalse;
4563 first = ajFalse;
4564 lastchn = -1;
4565
4566 break;
4567 }
4568
4569 /* i will get incremented in main loop above */
4570 i--;
4571 continue;
4572 }
4573
4574 first = ajTrue;
4575 }
4576 if (rn < 0)
4577 {
4578 neg = ajTrue;
4579
4580 if (zer)
4581 modrn = rn - (first_num - 1);
4582 else
4583 /* if(neg && !zer) */
4584 modrn = rn - (first_num - 1);
4585
4586 if (!report_neg)
4587 {
4588 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "NEGNUM",
4589 pdbfile->chnn[i],
4590 ajChararrGet(pdbfile->chid,
4591 (pdbfile->chnn[i] - 1)), i + 1);
4592 report_neg = ajTrue;
4593 }
4594 }
4595 else if (rn == 0)
4596 {
4597 zer = ajTrue;
4598
4599 if (neg)
4600 modrn = rn - (first_num - 1);
4601 else
4602 /* if(!neg) */
4603 modrn = rn + 1;
4604
4605 if (!report_zer)
4606 {
4607 ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
4608 "ZERNUM", pdbfile->chnn[i],
4609 ajChararrGet(pdbfile->chid,
4610 (pdbfile->chnn[i] - 1)), i + 1);
4611 report_zer = ajTrue;
4612 }
4613 }
4614 else
4615 /* rn is (+ve) */
4616 {
4617 if (!neg && zer)
4618 modrn = rn + 1;
4619 else if (neg && zer)
4620 modrn = rn - (first_num - 1);
4621 else
4622 /* if(neg && !zer) */
4623 modrn = rn - (first_num);
4624 }
4625
4626 pdbfile->resn1[i] = modrn;
4627 pdbfile->resn2[i] = modrn;
4628 }
4629 /* New model */
4630 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4631 {
4632 neg = ajFalse;
4633 zer = ajFalse;
4634 report_neg = ajFalse;
4635 report_zer = ajFalse;
4636 first = ajFalse;
4637 lastchn = -1;
4638 }
4639 }
4640
4641
4642
4643 /* Fix non-standard residue numbering scheme. This is done for resn1
4644 * array of a Pdbfile object only. */
4645 for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4646 {
4647 /* Coordinate line */
4648 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4649 {
4650 /* New chain */
4651 if (pdbfile->chnn[i] != lastchn)
4652 {
4653 add = 0;
4654 first = ajFalse;
4655 ignore = ajFalse;
4656 lastchn = pdbfile->chnn[i];
4657 report_nonstd = ajFalse;
4658 }
4659
4660 if (!first)
4661 {
4662 /* Remove the chmyotrypsin numbering code */
4663 last_rn = pdbfile->resn1[i];
4664 last = ajStrGetCharPos(pdbfile->lines[i], 26);
4665 first = ajTrue;
4666 continue;
4667 }
4668
4669 rn = pdbfile->resn1[i];
4670 curr = ajStrGetCharPos(pdbfile->lines[i], 26);
4671
4672 if (curr != last)
4673 {
4674 if (rn == last_rn)
4675 {
4676 add++;
4677 ignore = ajTrue;
4678 }
4679 }
4680
4681 if (rn != last_rn)
4682 {
4683 ignore = ajFalse;
4684 }
4685
4686
4687
4688 last = curr;
4689 last_rn = rn;
4690
4691 pdbfile->resn1[i] = rn + add;
4692
4693 if (ignore)
4694 {
4695 pdbfile->oddnum[i] = ajTrue;
4696
4697 if (!report_nonstd)
4698 {
4699 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "ODDNUM",
4700 pdbfile->chnn[i],
4701 ajChararrGet(pdbfile->chid,
4702 (pdbfile->chnn[i] - 1)), i + 1);
4703 report_nonstd = ajTrue;
4704 }
4705
4706 }
4707
4708 }
4709 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4710 {
4711 add = 0;
4712 first = ajFalse;
4713 ignore = ajFalse;
4714 lastchn = -1;
4715 report_nonstd = ajFalse;
4716 }
4717 }
4718
4719
4720
4721
4722
4723
4724
4725 /* Fix remaining non-sequential residue numbering in resn1 array of
4726 * Pdbfile object */
4727 for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4728 {
4729 /* Coordinate line */
4730 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
4731 {
4732 /* New chain */
4733 if (pdbfile->chnn[i] != lastchn)
4734 {
4735 add = 0;
4736 first = ajFalse;
4737 ignore = ajFalse;
4738 lastchn = pdbfile->chnn[i];
4739 report_nonsqt = ajFalse;
4740 }
4741
4742 if (!first)
4743 {
4744 last_rn = pdbfile->resn1[i];
4745 ajStrAssignS(&last_rt, pdbfile->rtype[i]);
4746
4747 first = ajTrue;
4748 continue;
4749 }
4750
4751 this_rn = pdbfile->resn1[i];
4752 ajStrAssignS(&this_rt, pdbfile->rtype[i]);
4753
4754
4755 /* A new residue is indicated if this ATOM is 'N' or if this is a
4756 * different residue type */
4757 if (!(ajStrCmpC(pdbfile->atype[i], "N")) ||
4758 !(ajStrMatchS(this_rt, last_rt)))
4759 {
4760 /* Check for duplicate residue numbers */
4761 if (this_rn == last_rn)
4762 {
4763 add++;
4764 ignore = ajTrue;
4765 odd = ajTrue;
4766 }
4767
4768 /* Check for drops in residue numbers, see 1pca */
4769 if (this_rn < last_rn)
4770 {
4771 add += (last_rn - this_rn + 1);
4772 ignore = ajTrue;
4773 odd = ajFalse;
4774 }
4775 }
4776
4777
4778 if (this_rn > last_rn)
4779 ignore = ajFalse;
4780
4781 pdbfile->resn1[i] = this_rn + add;
4782
4783
4784 ajStrAssignS(&last_rt, this_rt);
4785 last_rn = this_rn;
4786
4787
4788 if (ignore)
4789 {
4790 if (odd)
4791 pdbfile->oddnum[i] = ajTrue;
4792
4793 if (!report_nonsqt)
4794 {
4795 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "NONSQNTL",
4796 pdbfile->chnn[i],
4797 ajChararrGet(pdbfile->chid,
4798 (pdbfile->chnn[i] - 1)), i + 1);
4799 report_nonsqt = ajTrue;
4800 }
4801 }
4802 }
4803
4804 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4805 {
4806 add = 0;
4807 first = ajFalse;
4808 ignore = ajFalse;
4809 lastchn = -1;
4810 report_nonsqt = ajFalse;
4811 }
4812 }
4813
4814
4815
4816 /* Fix remaining non-sequential residue numbering in resn2 array of
4817 * Pdbfile object (duplicate lines for presumed heterogenous residues
4818 * positions are ignored) */
4819 for (lastchn = -1, i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
4820 {
4821 /* Coordinate line */
4822 if ((pdbfile->linetype[i] == pdbfileELinetypeCoordinate) &&
4823 (!pdbfile->oddnum[i]))
4824 {
4825 /* New chain */
4826 if (pdbfile->chnn[i] != lastchn)
4827 {
4828 add = 0;
4829 first = ajFalse;
4830 lastchn = pdbfile->chnn[i];
4831 }
4832
4833 if (!first)
4834 {
4835 last_rn = pdbfile->resn2[i];
4836 ajStrAssignS(&last_rt, pdbfile->rtype[i]);
4837
4838 first = ajTrue;
4839 continue;
4840 }
4841
4842 this_rn = pdbfile->resn2[i];
4843 ajStrAssignS(&this_rt, pdbfile->rtype[i]);
4844
4845
4846 /* A new residue is indicated if this ATOM is 'N' or if this is a
4847 * different residue type */
4848 if (!(ajStrCmpC(pdbfile->atype[i], "N")) ||
4849 !(ajStrMatchS(this_rt, last_rt)))
4850 {
4851 /* Check for duplicate residue numbers */
4852 if (this_rn == last_rn)
4853 add++;
4854
4855 /* Check for drops in residue numbers, see 1pca */
4856 if (this_rn < last_rn)
4857 add += (last_rn - this_rn + 1);
4858 }
4859
4860 pdbfile->resn2[i] = this_rn + add;
4861 ajStrAssignS(&last_rt, this_rt);
4862 last_rn = this_rn;
4863 }
4864 else if (pdbfile->linetype[i] == pdbfileELinetypeMODEL)
4865 {
4866 add = 0;
4867 first = ajFalse;
4868 lastchn = -1;
4869 }
4870 }
4871
4872
4873 /* Tidy up and return */
4874 ajStrDel(&last_rt);
4875 ajStrDel(&this_rt);
4876
4877 return ajTrue;
4878 }
4879
4880
4881
4882
4883 /* @funcstatic pdbioAlignNumbering ********************************************
4884 **
4885 ** Reads a Pdbfile object and determines for each chain a set of residue
4886 ** numbers (the resni array) that give the correct index into the full length
4887 ** (SEQRES) sequence for residues listed in the ATOM records.
4888 **
4889 ** The resni and resn1ok arrays of a Pdbfile object are written.
4890 ** The seqres and nres elements may be modified for any missing N-terminal
4891 ** residues.
4892 **
4893 ** resn1ok array
4894 ** This array contains Bool's for each chain which are TRUE if resn1 was
4895 ** used to derive resni, i.e. gave correct alignment to the full-length
4896 ** (SEQRES) sequence. If False then resn2 was used ( resn1 gives the
4897 ** sequence presuming an alternative numbering scheme, resn2 gives the
4898 ** sequence presuming heterogeneity).
4899 **
4900 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
4901 ** @param [u] flog [AjPFile] Pointer to log file (build diagnostics)
4902 ** @param [r] lim [ajuint] Max. no. permissible mismatches between
4903 ** the ATOM & SEQRES sequences.
4904 ** @param [r] lim2 [ajuint] Max. no. residues to trim when checking
4905 ** for missing C-terminal SEQRES residues.
4906 **
4907 ** @return [AjBool] True on success, False otherwise
4908 **
4909 ** @release 2.9.0
4910 ** @@
4911 ** Must comment what diagnostics this writes!
4912 ** This now writes its diagnostics to flog, rather than tempfile.
4913 ******************************************************************************/
4914
pdbioAlignNumbering(AjPPdbfile pdbfile,AjPFile flog,ajuint lim,ajuint lim2)4915 static AjBool pdbioAlignNumbering(AjPPdbfile pdbfile, AjPFile flog, ajuint lim,
4916 ajuint lim2)
4917 {
4918 /* Sequence and residue number data are given for each unique chain (data
4919 * for the first model only is considered when assigning residue numbers) */
4920
4921
4922 /* Rather than use, e.g. seq1, seq2 & seq, we could use a single 2
4923 * dimensional array, but we would need new code for 2d arrays of
4924 * characters */
4925
4926
4927 ajuint a = 0U; /* Loop counter */
4928 ajuint b = 0U; /* Loop counter */
4929 ajuint maxb = 0U; /* Max value of b */
4930 ajuint i = 0U; /* Loop counter */
4931 ajint j = 0; /* Loop counter */
4932 ajint k = 0; /* Loop counter */
4933 ajint x = 0; /* Loop counter */
4934 ajint y = 0; /* Loop counter */
4935 ajint z = 0; /* Loop counter */
4936
4937
4938 AjPStr *seq1 = NULL; /* Sequences of residues from ATOM records
4939 * (all residues) */
4940 AjPStr *seq2 = NULL; /* Sequences of residues from ATOM records
4941 * (excluding residues for which oddnum array
4942 * in Pdbfile object is True) */
4943 AjPStr *seq = NULL; /* Pointer to seq1 or seq2 */
4944 AjPStr seqbit = NULL; /* Subsequence of seq (real copy) */
4945 ajlong lenseqbit = 0; /* Length of seqbit */
4946
4947
4948 ajint *nres1 = NULL; /* No. residues for seq1/arr1 */
4949 ajint *nres2 = NULL; /* No. residues for seq2/arr2 */
4950 ajint *nres = NULL; /* Pointer to nres1 or nres2 */
4951
4952
4953 AjPInt *num1 = NULL; /* Residue numbers for seq1 (from resn1
4954 * element of the Pdbfile object) */
4955 AjPInt *num2 = NULL; /* Residue numbers for seq2 (from resn2
4956 * element of the Pdbfile object) */
4957 AjPInt *num = NULL; /* Pointer to num1 or num2 */
4958
4959 AjPInt *idx = NULL; /* Gives correct index into seqres sequence
4960 * (from Pdbfile object) for the current
4961 * sequence. These are residue numbers and
4962 * therefore idx would have a value of 1 for
4963 * the first seqres residue. */
4964 AjPInt *idx_full = NULL; /* As idx but with empty array elements
4965 * replacing missing residues so that we can
4966 * index into idx_full using residue numbers
4967 * from num */
4968
4969
4970 ajint last1 = -1000; /* Number of last residue for seq1/arr1 */
4971 ajint last2 = -1000; /* Number of last residue for seq2/arr2 */
4972
4973 char aa1 = ' '; /* Amino acid single character code */
4974 ajint c = 0; /* No. of current chain */
4975
4976 AjBool done = ajFalse; /* True if we have found the correct residue
4977 * numbering */
4978
4979 char *insert = NULL; /* String from N-terminus of ATOM sequence to
4980 * insert at N-terminus of SEQRES sequence in
4981 * case of the later missing residues */
4982 AjPStr tmpseqres = NULL; /* Temp. string for seqres sequence from
4983 * Pdbfile object */
4984 AjPStr bit = NULL; /* Temp. string for a bit of sequence */
4985 ajuint nmismatches = 0; /* No. of mismatches between ATOM and SEQRES
4986 * sequence */
4987 ajlong loc = 0L; /* Location of ATOM sequence in SEQRES
4988 * sequence (if applicable) */
4989 ajint len = 0; /* Length of seqres sequence from Pdbfile
4990 * object */
4991 AjBool err = ajFalse; /* True if a residue number from the ATOM
4992 * records would cause an array boundary
4993 * error in the seqres sequence */
4994 ajint siz_substr = 0; /* Size of substring for alignment of ATOM
4995 * and SEQRES sequences */
4996 const char *atom_ptr = NULL;/* Pointer to ATOM sequence */
4997 const char *seqres_ptr = NULL; /* Pointer to SEQRES sequence */
4998 const char *loc_ptr = NULL; /* Pointer for location of match of substring
4999 * to SEQRES sequence */
5000 AjPStr substr = NULL; /* Substring of ATOM sequence */
5001 AjPStr substr2 = NULL; /* Substring of ATOM sequence */
5002 ajint atom_idx = 0; /* Index into ATOM sequence */
5003 ajint seqres_idx = 0; /* Index into SEQRES sequence */
5004 ajint seqres_idx_last = 0; /* Index into SEQRES sequence for C-terminal
5005 * residue of substring */
5006 char aa_last = ' '; /* Amino acid residue code of C-terminal
5007 * residue of substring */
5008 AjBool fixed = ajFalse; /* Whether the mismatch residue of the
5009 * substring was later aligned correctly */
5010 AjBool done_end = ajFalse; /* True if we have aligned the terminus of
5011 * the ATOM sequence */
5012 AjBool founderr = ajFalse; /* Match of substring of ATOM sequence to
5013 * SEQRES found with potential mismatched
5014 * residue */
5015 AjPStr msgstr = NULL; /* A string to hold a message */
5016 AjPStr msgbit = NULL; /* A temp. string to hold part of a message */
5017 ajint idx_misfit_atom = 0; /* Index into ATOM sequence (seq) for first
5018 * residue that does not match SEQRES
5019 * sequence */
5020
5021 ajint idx_misfit_seqres = 0;/* Index into SEQRES sequence for first
5022 * residue that does not match ATOM sequence */
5023 AjPStr aa_misfit = NULL; /* Original (PDB) residue number for first
5024 * residue mismatch between ATOM and SEQRES
5025 * sequences */
5026 ajint this_num = 0; /* Current residue number */
5027 /*DIAGNOSTIC*/
5028 #if AJFALSE
5029 ajint max = 0; /* Used in diagnostics code */
5030 #endif /* AJFALSE */
5031
5032 /* Check args */
5033 if (!pdbfile || !flog)
5034 {
5035 ajWarn("Bad args passed to pdbioAlignNumbering\n");
5036
5037 return ajFalse;
5038 }
5039
5040 /* Allocate memory for arrays etc */
5041 aa_misfit = ajStrNew();
5042 msgstr = ajStrNew();
5043 msgbit = ajStrNew();
5044 seqbit = ajStrNew();
5045
5046 insert = ajCharNewRes(MAXMISSNTERM);
5047 tmpseqres = ajStrNew();
5048 bit = ajStrNew();
5049 substr = ajStrNew();
5050 substr2 = ajStrNew();
5051
5052 AJCNEW0(seq1, pdbfile->nchains);
5053 AJCNEW0(seq2, pdbfile->nchains);
5054
5055
5056 AJCNEW0(num1, pdbfile->nchains);
5057 AJCNEW0(num2, pdbfile->nchains);
5058
5059 AJCNEW0(idx, pdbfile->nchains);
5060 AJCNEW0(idx_full, pdbfile->nchains);
5061
5062 AJCNEW0(nres1, pdbfile->nchains);
5063 AJCNEW0(nres2, pdbfile->nchains);
5064
5065 for (i = 0U; i < pdbfile->nchains; i++)
5066 {
5067 if (!pdbfile->chainok[i])
5068 continue;
5069
5070 seq1[i] = ajStrNew();
5071 seq2[i] = ajStrNew();
5072
5073 num1[i] = ajIntNew();
5074 num2[i] = ajIntNew();
5075 }
5076
5077 /* Assign arrays */
5078 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
5079 {
5080 /* Coordinate line */
5081 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
5082 {
5083 /* Break if we are no longer in the first model */
5084 if (pdbfile->modn[i] != 1)
5085 break;
5086 else
5087 c = pdbfile->chnn[i] - 1;
5088
5089 #if AJFALSE
5090 ajFmtPrint("%S\n", pdbfile->lines[i]);
5091 #endif /* AJFALSE */
5092 if (pdbfile->resn1[i] != last1)
5093 {
5094 ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
5095 ajStrAppendK(&seq1[c], aa1);
5096
5097 ajIntPut(&num1[c], nres1[c], pdbfile->resn1[i]);
5098 last1 = pdbfile->resn1[i];
5099 nres1[c]++;
5100 }
5101
5102 if ((pdbfile->resn2[i] != last2) && (!pdbfile->oddnum[i]))
5103 {
5104 ajResidueFromTriplet(pdbfile->rtype[i], &aa1);
5105 ajStrAppendK(&seq2[c], aa1);
5106
5107 ajIntPut(&num2[c], nres2[c], pdbfile->resn2[i]);
5108 last2 = pdbfile->resn2[i];
5109 nres2[c]++;
5110 }
5111 }
5112 }
5113
5114
5115
5116 /* Allocate memory for arrays of residue numbers */
5117 for (i = 0; i < pdbfile->nchains; i++)
5118 {
5119 if (!pdbfile->chainok[i])
5120 continue;
5121
5122
5123 /* Array must be big enough to cope with either sequence */
5124 if (nres1[i] > nres2[i])
5125 idx[i] = ajIntNewRes(nres1[i]);
5126 else
5127 idx[i] = ajIntNewRes(nres2[i]);
5128
5129
5130
5131 /* Array must be big enough to cope with highest the residue number
5132 * from either array */
5133 if (ajIntGet(num1[i], nres1[i] - 1) > ajIntGet(num2[i], nres2[i] - 1))
5134 idx_full[i] = ajIntNewRes(ajIntGet(num1[i], nres1[i] - 1) + 1);
5135 else
5136 idx_full[i] = ajIntNewRes(ajIntGet(num2[i], nres2[i] - 1) + 1);
5137 }
5138
5139
5140
5141
5142 /* Loop for each chain */
5143 for (i = 0; i < pdbfile->nchains; i++)
5144 {
5145 /* Skip this chain if necessary */
5146 if (!(pdbfile->chainok[i]))
5147 continue;
5148 else
5149 ajStrAssignS(&tmpseqres, pdbfile->seqres[i]);
5150
5151 /* Loop for checking for missing residues from N-term of SEQRES
5152 * sequence */
5153 for (done = ajFalse, j = 0; j < MAXMISSNTERM + 1; j++)
5154 {
5155 /* Loop for the 2 sequences derived from the ATOM records */
5156 for (x = 0; x < 2; x++)
5157 {
5158 if (x == 0)
5159 {
5160 seq = seq1;
5161 nres = nres1;
5162 num = num1;
5163 }
5164 else
5165 {
5166 seq = seq2;
5167 nres = nres2;
5168 num = num2;
5169 }
5170
5171 /* Restore the original seqres sequence */
5172 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5173 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5174
5175
5176 /* Modify the seqres sequence in the Pdbfile object by adding
5177 * the first j residues from the N-terminus of the ATOM
5178 * sequence to the N-terminus of <seqres>. */
5179
5180 for (k = 0; (k < j) && (k < nres[i]); k++)
5181 insert[k] = ajStrGetCharPos(seq[i], k);
5182
5183 insert[k] = '\0';
5184
5185 ajStrInsertC(&(pdbfile->seqres[i]), 0, insert);
5186 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5187
5188 #if AJFALSE
5189 /* DIAGNOSTIC CODE */
5190 ajFmtPrintF(flog, "\nChainMod %d"
5191 " (seq %d)\n%S\n%S\n\n\n",
5192 i + 1, x + 1,
5193 seq[i],
5194 pdbfile->seqres[i]);
5195 #endif /* AJFALSE */
5196
5197 /***********************************************/
5198 /******************* STEP 1 ********************/
5199 /***********************************************/
5200 #if AJFALSE
5201 /* DIAGNOSTIC */
5202 ajFmtPrintF(flog, "STEP1 tmpseqres: %S\n", tmpseqres);
5203
5204 ajFmtPrintF(flog, "chnn : %d\n"
5205 "seq1 : %S\n"
5206 "seq2 : %S\n"
5207 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5208 pdbfile->seqres[i]);
5209 ajFmtPrintF(flog, "\n");
5210 if (ajStrMatchS(seq1[i], seq2[i]))
5211 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5212 else
5213 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5214
5215 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5216 ajFmtPrintF(flog, "seq1 and seqres match\n");
5217 else
5218 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5219 ajFmtPrintF(flog, "\n");
5220 #endif /* AJFALSE */
5221
5222 /* Check whether sequences are identical length */
5223 if (nres[i] == pdbfile->nres[i])
5224 {
5225 /* Sequences are identical - assign residue numbers 'by
5226 * hand' */
5227 if (ajStrMatchS(seq[i], pdbfile->seqres[i]))
5228 {
5229 for (k = 0; k < nres[i]; k++)
5230 ajIntPut(&idx[i], k, k + 1);
5231
5232 if (x == 0)
5233 pdbfile->resn1ok[i] = ajTrue;
5234 else
5235 pdbfile->resn1ok[i] = ajFalse;
5236
5237 done = ajTrue;
5238
5239 #if AJFALSE
5240 /* DIAGNOSTIC */
5241 ajFmtPrintF(flog, "STEP1 OK\n");
5242 #endif /* AJFALSE */
5243 break;
5244 }
5245 /* Sequence are same length but contain mismatches */
5246 else
5247 {
5248 for (ajStrAssignClear(&msgstr), nmismatches = 0, k = 0;
5249 k < nres[i]; k++)
5250 if (ajStrGetCharPos(seq[i], k) !=
5251 ajStrGetCharPos(pdbfile->seqres[i], k))
5252 {
5253 nmismatches++;
5254 /* Correct the seqres sequence. Replace this
5255 * with appropriate library call once
5256 * available so we don't have to probe the
5257 * internals of the structure */
5258
5259
5260 /* a will give the number of the first
5261 * coordinate line for the mismatch residue
5262 * from the ATOM records */
5263 a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
5264 ajIntGet(num[i], k));
5265
5266
5267 /* Get the id of the mismatch residue in the
5268 * SEQRES sequence. */
5269 ajResidueToTriplet(pdbfile->seqres[i]->Ptr[k],
5270 &aa_misfit);
5271
5272 /* To give correct index into SEQRES records
5273 * in original PDB file, subtract j to
5274 * account for modifications to the
5275 * N-terminus that were made for missing
5276 * residues relative to ATOM sequence. A
5277 * further 1 is added to give a number
5278 * starting from 1 (rather than 0) */
5279
5280 ajFmtPrintS(&msgbit, "%S%S:%S%d. ",
5281 pdbfile->rtype[a],
5282 pdbfile->pdbn[a],
5283 aa_misfit, k - j + 1);
5284
5285
5286 ajStrAppendS(&msgstr, msgbit);
5287
5288
5289 pdbfile->seqres[i]->Ptr[k] =
5290 ajStrGetCharPos(seq[i], k);
5291
5292 }
5293
5294
5295 /* Sequences are same length (acceptable number of
5296 * mismatches) */
5297 if (nmismatches <= lim)
5298 {
5299 if (nmismatches)
5300 ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n",
5301 "MISMATCH", i + 1,
5302 ajChararrGet(pdbfile->chid, i),
5303 nmismatches, msgstr);
5304
5305
5306 for (k = 0; k < nres[i]; k++)
5307 ajIntPut(&idx[i], k, k + 1);
5308
5309 if (x == 0)
5310 pdbfile->resn1ok[i] = ajTrue;
5311 else
5312 pdbfile->resn1ok[i] = ajFalse;
5313
5314 done = ajTrue;
5315 #if AJFALSE
5316 /* DIAGNOSTIC */
5317 ajFmtPrintF(flog, "STEP1 OK %d mismatches\n",
5318 nmismatches);
5319 #endif /* AJFALSE */
5320
5321 break;
5322 }
5323 else
5324 {
5325 /* Otherwise, sequences are same length
5326 * (unacceptable number of mismatches) Restore
5327 * the original seqres sequence */
5328 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5329 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5330 #if AJFALSE
5331 /* DIAGNOSTIC */
5332 ajFmtPrintF(flog,
5333 "STEP1 **NOT** OK %d mismatches\n",
5334 nmismatches);
5335 #endif /* AJFALSE */
5336 }
5337 }
5338 }
5339
5340 /***********************************************/
5341 /******************* STEP 2 ********************/
5342 /***********************************************/
5343 #if AJFALSE
5344 /* DIAGNOSTIC */
5345 ajFmtPrintF(flog, "STEP2 tmpseqres: %S\n", tmpseqres);
5346
5347 ajFmtPrintF(flog, "chnn : %d\n"
5348 "seq1 : %S\n"
5349 "seq2 : %S\n"
5350 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5351 pdbfile->seqres[i]);
5352 ajFmtPrintF(flog, "\n");
5353 if (ajStrMatchS(seq1[i], seq2[i]))
5354 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5355 else
5356 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5357
5358 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5359 ajFmtPrintF(flog, "seq1 and seqres match\n");
5360 else
5361 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5362 ajFmtPrintF(flog, "\n");
5363 #endif /* AJFALSE */
5364
5365 /* JONNEW The code below replaces 'ORIGINAL' code block */
5366 maxb = lim2;
5367
5368 if (maxb >= MAJSTRGETLEN(seq[i]))
5369 maxb = MAJSTRGETLEN(seq[i]) - 1;
5370
5371 #if AJFALSE
5372 /* DIAGNOSTIC */
5373 ajFmtPrintF(flog, "maxb = %d\nlim2 = %d\n");
5374 ajDebug("strlen: %d\n",
5375 maxb, lim2, MAJSTRGETLEN(seq[i]));
5376 #endif /* AJFALSE */
5377
5378 /* First pass through loop is full-length sequence */
5379 for (b = 0; b < maxb + 1; b++)
5380 {
5381 lenseqbit = MAJSTRGETLEN(seq[i]) - b;
5382 ajStrAssignSubS(&seqbit, seq[i], 0, (lenseqbit - 1));
5383 ajStrAssignSubS(&bit, seq[i], (lenseqbit), -1);
5384
5385 #if AJFALSE
5386 /* DIAGNOSTIC */
5387 ajFmtPrintF(flog, "Trying ATOM substring %S\n"
5388 "versus SEQRES %S\n",
5389 seqbit, pdbfile->seqres[i]);
5390 #endif /* AJFALSE */
5391
5392 /* Check whether ATOM is substring of SEQRES sequence */
5393 if ((loc = ajStrFindS(pdbfile->seqres[i], seqbit)) != -1)
5394 {
5395 /* Check to ensure that the last substring residue is
5396 * aligned to the last residue of the SEQRES residue,
5397 * otherwise, problems would arise in cases where
5398 * SEQRES sequence had C-terminal residues that were
5399 * absent from the ATOM (& therefore also substring)
5400 * sequence. */
5401 if ((loc + lenseqbit) != pdbfile->nres[i])
5402 break;
5403
5404 /* ATOM is substring of SEQRES sequence - assign
5405 * residue numbers 'by hand' */
5406
5407 for (k = 0; k < nres[i]; k++)
5408 ajIntPut(&idx[i], k, k + (ajint) loc + 1);
5409
5410 if (x == 0)
5411 pdbfile->resn1ok[i] = ajTrue;
5412 else
5413 pdbfile->resn1ok[i] = ajFalse;
5414
5415
5416 /* SEQRES sequence is missing C-terminal ATOM
5417 * residues */
5418 if (b)
5419 {
5420 ajFmtPrintF(flog, "%-15s%d (%c) %d\n",
5421 "MISSCTERM", i + 1,
5422 ajChararrGet(pdbfile->chid, i), b);
5423
5424 ajStrAppendS(&(pdbfile->seqres[i]), bit);
5425 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5426 }
5427
5428 done = ajTrue;
5429 #if AJFALSE
5430 /* DIAGNOSTIC */
5431 ajFmtPrintF(flog, "STEP2 OK\n");
5432 #endif /* AJFALSE */
5433 break;
5434 }
5435 }
5436
5437 if (done)
5438 break;
5439 else
5440 {
5441 /* Otherwise, ATOM is NOT a substring of SEQRES sequence */
5442 #if AJFALSE
5443 /* DIAGNOSTIC */
5444 ajFmtPrintF(flog, "STEP2 **NOT** OK\n");
5445 #endif /* AJFALSE */
5446 }
5447
5448 #if AJFALSE
5449 /* ORIGINAL */
5450 if ((loc = ajStrFindS(pdbfile->seqres[i], seq[i])) != -1)
5451 {
5452 for (k = 0; k < nres[i]; k++)
5453 ajIntPut(&idx[i], k, k + loc + 1);
5454
5455 if (x == 0)
5456 pdbfile->resn1ok[i] = ajTrue;
5457 else
5458 pdbfile->resn1ok[i] = ajFalse;
5459
5460 done = ajTrue;
5461 ajFmtPrintF(flog, "STEP2 OK\n");
5462 break;
5463 }
5464 #endif /* AJFALSE */
5465
5466
5467 /***********************************************/
5468 /******************* STEP 3 ********************/
5469 /***********************************************/
5470 #if AJFALSE
5471 /* DIAGNOSTIC */
5472 ajFmtPrintF(flog, "STEP3 tmpseqres: %S\n", tmpseqres);
5473
5474 ajFmtPrintF(flog, "chnn : %d\n"
5475 "seq1 : %S\n"
5476 "seq2 : %S\n"
5477 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5478 pdbfile->seqres[i]);
5479 ajFmtPrintF(flog, "\n");
5480 if (ajStrMatchS(seq1[i], seq2[i]))
5481 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5482 else
5483 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5484
5485 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5486 ajFmtPrintF(flog, "seq1 and seqres match\n");
5487 else
5488 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5489 ajFmtPrintF(flog, "\n");
5490 #endif /* AJFALSE */
5491
5492
5493 /* Check whether SEQRES is substring of ATOM sequence */
5494 /* This will only find omissions from the SEQRES sequence
5495 * where the ATOM sequence would align to it without gaps,
5496 * and where the SEQRES sequence does not have extra
5497 * N-terminal residues relative to ATOM (such cases are
5498 * caught in STEP 2) */
5499 if ((loc = ajStrFindS(seq[i], pdbfile->seqres[i])) != -1)
5500 {
5501 /* SEQRES is substring of ATOM sequence - correct for
5502 * residues missing from SEQRES sequence and assign
5503 * residue numbers 'by hand' */
5504
5505 /* N-terminal insertion needed */
5506 if (loc != 0)
5507 {
5508 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSNTERM",
5509 i + 1, ajChararrGet(pdbfile->chid, i),
5510 loc);
5511
5512
5513 ajStrAssignSubS(&bit, seq[i], 0, loc - 1);
5514 ajStrInsertS(&(pdbfile->seqres[i]), 0, bit);
5515 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5516 }
5517
5518 /* C-terminal insertion needed */
5519 if (pdbfile->nres[i] != nres[i])
5520 {
5521 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSCTERM",
5522 i + 1, ajChararrGet(pdbfile->chid, i),
5523 (nres[i] - pdbfile->nres[i]));
5524
5525
5526 ajStrAssignSubS(&bit, seq[i], pdbfile->nres[i],
5527 nres[i] - 1);
5528 ajStrAppendS(&(pdbfile->seqres[i]), bit);
5529 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5530 }
5531
5532 for (k = 0; k < nres[i]; k++)
5533 ajIntPut(&idx[i], k, k + 1);
5534
5535 if (x == 0)
5536 pdbfile->resn1ok[i] = ajTrue;
5537 else
5538 pdbfile->resn1ok[i] = ajFalse;
5539
5540 #if AJFALSE
5541 /* DIAGNOSTIC */
5542 ajFmtPrintF(flog, "STEP3 OK\n");
5543 */
5544 #endif /* AJFALSE */
5545 done = ajTrue;
5546 break;
5547 }
5548
5549 /* Otherwise, SEQRES is NOT a substring of the ATOM sequence */
5550 #if AJFALSE
5551 /* DIAGNOSTIC */
5552 ajFmtPrintF(flog, "STEP3 **NOT** OK\n");
5553 */
5554 #endif /* AJFALSE */
5555
5556 /***********************************************/
5557 /******************* STEP 4 ********************/
5558 /***********************************************/
5559 #if AJFALSE
5560 /* DIAGNOSTIC */
5561 ajFmtPrintF(flog, "STEP4.1 tmpseqres: %S\n", tmpseqres);
5562
5563 ajFmtPrintF(flog, "chnn : %d\n"
5564 "seq1 : %S\n"
5565 "seq2 : %S\n"
5566 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5567 pdbfile->seqres[i]);
5568 ajFmtPrintF(flog, "\n");
5569 if (ajStrMatchS(seq1[i], seq2[i]))
5570 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5571 else
5572 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5573
5574 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5575 ajFmtPrintF(flog, "seq1 and seqres match\n");
5576 else
5577 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5578 ajFmtPrintF(flog, "\n");
5579
5580 ajFmtPrintF(flog, "STEP4.2 tmpseqres: %S\n", tmpseqres);
5581 #endif /* AJFALSE */
5582
5583 /* Check whether residue numbering is correct (and count the
5584 * number of mismatches) */
5585 for (err = ajFalse, ajStrAssignClear(&msgstr), nmismatches = 0,
5586 k = 0; k < nres[i]; k++)
5587 {
5588 this_num = ajIntGet(num[i], k);
5589
5590 /* Check to prevent array boundary error */
5591 if (this_num <= pdbfile->nres[i])
5592 {
5593 if (ajStrGetCharPos(seq[i], k) !=
5594 ajStrGetCharPos(pdbfile->seqres[i], this_num - 1))
5595 {
5596 nmismatches++;
5597 /* Correct the seqres sequence. Replace this with
5598 * appropriate library call once available so we
5599 * don't have to probe the internals of the
5600 * structure */
5601
5602
5603 /* a will give the number of the first coordinate
5604 * line for the mismatch residue from the ATOM
5605 * records */
5606 a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
5607 ajIntGet(num[i], k));
5608
5609 /* Get the id of the mismatch residue in the
5610 * SEQRES sequence. */
5611
5612 ajResidueToTriplet(
5613 pdbfile->seqres[i]->Ptr[this_num - 1],
5614 &aa_misfit);
5615
5616 ajFmtPrintS(&msgbit, "%S%S:%S%d. ",
5617 pdbfile->rtype[a], pdbfile->pdbn[a],
5618 aa_misfit, this_num - j);
5619
5620 ajStrAppendS(&msgstr, msgbit);
5621
5622 pdbfile->seqres[i]->Ptr[this_num - 1] =
5623 ajStrGetCharPos(seq[i], k);
5624 }
5625 }
5626 else
5627 {
5628 err = ajTrue;
5629 break;
5630 }
5631 }
5632
5633 #if AJFALSE
5634 /* DIAGNOSTIC */
5635 ajFmtPrintF(flog, "STEP4.3 tmpseqres: %S\n", tmpseqres);
5636 #endif /* AJFALSE */
5637 if (!err)
5638 {
5639 /* Residue numbering is correct (no or acceptable number
5640 * of mismatches) */
5641 if (nmismatches <= lim)
5642 {
5643 if (nmismatches)
5644 ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n",
5645 "MISMATCH",
5646 i + 1,
5647 ajChararrGet(pdbfile->chid, i),
5648 nmismatches, msgstr);
5649
5650
5651
5652
5653 for (k = 0; k < nres[i]; k++)
5654 ajIntPut(&idx[i], k, ajIntGet(num[i], k));
5655
5656 if (x == 0)
5657 pdbfile->resn1ok[i] = ajTrue;
5658 else
5659 pdbfile->resn1ok[i] = ajFalse;
5660
5661 done = ajTrue;
5662
5663 #if AJFALSE
5664 /* DIAGNOSTIC */
5665 ajFmtPrintF(flog, "STEP4 OK %d mismatches\n",
5666 nmismatches);
5667 #endif /* AJFALSE */
5668
5669 break;
5670 }
5671 else
5672 {
5673 /* Otherwise, residue numbering is incorrect
5674 * (unacceptable number of mismatches) Restore the
5675 * original seqres sequence */
5676 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5677 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5678
5679 #if AJFALSE
5680 /* DIAGNOSTIC */
5681 ajFmtPrintF(flog, "STEP4 **NOT** OK %d mismatches\n",
5682 nmismatches);
5683 #endif /* AJFALSE */
5684 }
5685 }
5686 else
5687 {
5688 /* Otherwise, residue numbering is incorrect (residue
5689 * number is out of range) Restore the original seqres
5690 * sequence */
5691 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
5692 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
5693
5694
5695 #if AJFALSE
5696 /* DIAGNOSTIC */
5697 ajFmtPrintF(flog, "STEP4 **NOT** OK out_of_range\n");
5698 #endif /* AJFALSE */
5699 }
5700
5701
5702 /***********************************************/
5703 /******************* STEP 5 ********************/
5704 /***********************************************/
5705 #if AJFALSE
5706 /* DIAGNOSTIC */
5707 ajFmtPrintF(flog, "STEP5 tmpseqres: %S\n", tmpseqres);
5708
5709 ajFmtPrintF(flog, "chnn : %d\n"
5710 "seq1 : %S\n"
5711 "seq2 : %S\n"
5712 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5713 pdbfile->seqres[i]);
5714 ajFmtPrintF(flog, "\n");
5715 if (ajStrMatchS(seq1[i], seq2[i]))
5716 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5717 else
5718 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5719
5720 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5721 ajFmtPrintF(flog, "seq1 and seqres match\n");
5722 else
5723 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5724 ajFmtPrintF(flog, "\n");
5725 #endif /* AJFALSE */
5726
5727 /* None of the measures above could find the correct residue
5728 * numbering so try by alignment.
5729 *
5730 * Align the SEQRES sequence to the ATOM sequence by taking
5731 * progressively decreasing sized substrings from the ATOM
5732 * sequence. It returns 0 if any of the alignments would not
5733 * leave enough space to be able to align the rest of the
5734 * ATOM sequence to the SEQRES sequence - i.e. alignments
5735 * giving an overspill of the ATOM sequence past the
5736 * C-terminus of the SEQRES sequence are NOT allowed.
5737 *
5738 * NO Mismatches are allowed at this stage */
5739
5740
5741 for (done_end = ajFalse,
5742 len = pdbfile->nres[i],
5743 siz_substr = nres[i],
5744 atom_ptr = ajStrGetPtr(seq[i]),
5745 seqres_ptr = ajStrGetPtr(pdbfile->seqres[i]);
5746 siz_substr > 0;)
5747 {
5748 ajStrAssignSubC(&substr, atom_ptr, 0, siz_substr - 1);
5749
5750 if ((loc_ptr = strstr(seqres_ptr,
5751 ajStrGetPtr(substr))) == NULL)
5752 {
5753 siz_substr--;
5754 continue;
5755 }
5756 else
5757 {
5758 atom_idx = (int) ((atom_ptr - ajStrGetPtr(seq[i]))
5759 / sizeof (char));
5760 seqres_idx = (int)
5761 ((loc_ptr - ajStrGetPtr(pdbfile->seqres[i]))
5762 / sizeof (char));
5763
5764
5765
5766
5767 /* CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINER
5768 * OF THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN
5769 * THIS ALIGNMENT */
5770 if ((nres[i] - atom_idx) > (len - seqres_idx))
5771 break;
5772
5773 for (k = 0, y = atom_idx, z = seqres_idx; k < siz_substr; k++,
5774 y++, z++)
5775 ajIntPut(&idx[i], y, z + 1);
5776
5777
5778
5779 /* Mark up last SEQRES residue as having been done */
5780 if (y == nres[i])
5781 done_end = ajTrue;
5782 }
5783
5784 atom_ptr += siz_substr;
5785 seqres_ptr = loc_ptr + siz_substr;
5786 siz_substr = nres[i] - (atom_idx + siz_substr);
5787 }
5788
5789
5790 /* Check to ensure that position for last residue has been
5791 * worked out */
5792 if (done_end)
5793 {
5794 #if AJFALSE
5795 /* DIAGNOSTIC */
5796 ajFmtPrintF(flog, "chnn : %d\n"
5797 "seq1 : %S\n"
5798 "seq2 : %S\n"
5799 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5800 pdbfile->seqres[i]);
5801 ajFmtPrintF(flog, "\n");
5802 if (ajStrMatchS(seq1[i], seq2[i]))
5803 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5804 else
5805 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5806
5807 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5808 ajFmtPrintF(flog, "seq1 and seqres match\n");
5809 else
5810 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5811 ajFmtPrintF(flog, "\n");
5812 #endif /* AJFALSE */
5813
5814 /* Residue numbering is correct after alignment (no
5815 * mismatches) */
5816 if (x == 0)
5817 pdbfile->resn1ok[i] = ajTrue;
5818 else
5819 pdbfile->resn1ok[i] = ajFalse;
5820
5821 ajFmtPrintF(flog, "%-15s%d (%c)\n", "GAPPEDOK", i + 1,
5822 ajChararrGet(pdbfile->chid, i));
5823
5824
5825 #if AJFALSE
5826 /* DIAGNOSTIC */
5827 ajFmtPrintF(flog, "STEP5 OK\n");
5828 #endif /* AJFALSE */
5829
5830 done = ajTrue;
5831 break;
5832 }
5833
5834 #if AJFALSE
5835 /* DIAGNOSTIC */
5836 ajFmtPrintF(flog, "STEP5 **NOT** OK\n");
5837 #endif /* AJFALSE */
5838
5839 /* Otherwise, agreement could not be found */
5840
5841 /* array might contain junk values now but this should not
5842 * matter as the array should be overwritten */
5843
5844 /***********************************************/
5845 /******************* STEP 6 ********************/
5846 /***********************************************/
5847 #if AJFALSE
5848 /* DIAGNOSTIC */
5849 ajFmtPrintF(flog, "STEP6 tmpseqres: %S\n",
5850 tmpseqres);
5851
5852 ajFmtPrintF(flog, "chnn : %d\n"
5853 "seq1 : %S\n"
5854 "seq2 : %S\n"
5855 "seqr : %S\n", i + 1, seq1[i], seq2[i],
5856 pdbfile->seqres[i]);
5857 ajFmtPrintF(flog, "\n");
5858 if (ajStrMatchS(seq1[i], seq2[i]))
5859 ajFmtPrintF(flog, "seq1 and seq2 match\n");
5860 else
5861 ajFmtPrintF(flog, "seq1 and seq2 DO NOT match\n");
5862
5863 if (ajStrMatchS(seq1[i], pdbfile->seqres[i]))
5864 ajFmtPrintF(flog, "seq1 and seqres match\n");
5865 else
5866 ajFmtPrintF(flog, "seq1 and seqres DO NOT match\n");
5867 ajFmtPrintF(flog, "\n");
5868 #endif /* AJFALSE */
5869
5870
5871 /* Try again by alignment. Mismatches ARE allowed at this
5872 * stage Must change: */
5873
5874
5875 for (ajStrAssignClear(&msgstr),
5876 nmismatches = 0,
5877 done_end = ajFalse,
5878 len = pdbfile->nres[i],
5879 siz_substr = nres[i],
5880 atom_ptr = ajStrGetPtr(seq[i]),
5881 seqres_idx_last = -1,
5882 seqres_ptr = ajStrGetPtr(pdbfile->seqres[i]);
5883 siz_substr > 0;)
5884 {
5885 /* On the first pass, atom_ptr and seqres_ptr point to
5886 * the start of the ATOM and SEQRES sequences
5887 * respectively. */
5888
5889
5890 founderr = ajFalse;
5891
5892 /* Copy block (of progressively decreasing size) from
5893 * ATOM sequence to substring1 */
5894 ajStrAssignSubC(&substr, atom_ptr, 0, siz_substr - 1);
5895
5896 #if AJFALSE
5897 /* DIAGNOSTIC */
5898 ajFmtPrintF(flog, "\n***\n%12s\n%s\n%12s\n%s\n",
5899 "Aligning ", ajStrGetPtr(substr), "To ", seqres_ptr);
5900 #endif /* AJFALSE */
5901
5902
5903 /* Set loc_ptr to point to the first occurrence of
5904 * substring1 in SEQRES sequence */
5905 /* If substring1 does not occur in SEQRES sequence */
5906 if ((loc_ptr = strstr(seqres_ptr,
5907 ajStrGetPtr(substr))) == NULL)
5908 {
5909 /* See if there is an error in the residue id at the
5910 * start of <substr> Create a new substring from the
5911 * ATOM records but omit the first character. There
5912 * is a special case if substr is only 1 character
5913 * long. */
5914
5915 if (siz_substr == 1)
5916 {
5917 loc_ptr = seqres_ptr;
5918 nmismatches++;
5919 founderr = ajTrue;
5920 }
5921 else
5922 {
5923 /* Copy substring1 to substring2 but omit the
5924 * first character */
5925 ajStrAssignSubS(&substr2, substr, 1, -1);
5926
5927
5928 #if AJFALSE
5929 /* DIAGNOSTIC */
5930 ajFmtPrintF(flog,
5931 "\n!!!\n%12s\n%s\n%12s\n%s\n",
5932 "Aligning ", ajStrGetPtr(substr2), "To ",
5933 seqres_ptr);
5934 #endif /* AJFALSE */
5935
5936 /* Set loc_ptr to point to the first occurrence
5937 * of substring1 in SEQRES sequence */
5938 /* If substring2 does not occur in the SEQRES
5939 * sequence, continue with a smaller substring */
5940 loc_ptr = strstr(seqres_ptr,
5941 ajStrGetPtr(substr2));
5942 if (!loc_ptr)
5943 {
5944 siz_substr--;
5945 continue;
5946 }
5947 /* substring2 is found in the SEQRES sequence */
5948 else
5949 {
5950 /* If there is not enough space to
5951 * accommodate the 'missing' residue continue
5952 * (with a smaller substring) */
5953 if (loc_ptr == seqres_ptr)
5954 {
5955 siz_substr--;
5956 continue;
5957 }
5958
5959 #if AJFALSE
5960 /* DIAGNOSTIC */
5961 ajFmtPrintF(flog, "\n\n\n");
5962 ajFmtPrintF(flog, "nmismatches = "
5963 "%d\n%12s%s\n%12s%s\n%12s%s\n%12s%s\n",
5964 nmismatches,
5965 "atom_ptr:",
5966 atom_ptr,
5967 "seqres_ptr:",
5968 seqres_ptr,
5969 "substr:",
5970 substr,
5971 "substr2",
5972 substr2);
5973
5974 /* DIAGNOSTIC */
5975 ajFmtPrintF(flog,
5976 "MISMATCH FOUND OK\n");
5977 #endif /* AJFALSE */
5978
5979 /* There is enough space to accommodate
5980 * substring2 and the 'missing' (mismatch)
5981 * residue */
5982 nmismatches++;
5983 founderr = ajTrue;
5984 }
5985 }
5986 }
5987 #if AJFALSE
5988 /* DIAGNOSTIC */
5989 else
5990 ajFmtPrintF(flog, "ALIGNMENT FOUND OK\n");
5991 #endif /* AJFALSE */
5992
5993 /* atom_idx and seqres_idx are set to give the index into
5994 * ATOM and SEQRES sequences respectively for the
5995 * position of match of N-terminal residue of substring
5996 * (if founderr is True this will be the position of the
5997 * N-terminal mismatch residue) */
5998 atom_idx = (int) ((atom_ptr - ajStrGetPtr(seq[i])) / sizeof (char));
5999
6000 if (founderr)
6001 seqres_idx = (int) (((loc_ptr - 1) -
6002 ajStrGetPtr(pdbfile->seqres[i]))
6003 / sizeof (char));
6004 else
6005 seqres_idx = (int) ((loc_ptr -
6006 ajStrGetPtr(pdbfile->seqres[i]))
6007 / sizeof (char));
6008 #if AJFALSE
6009 /* DIAGNOSTIC */
6010 ajFmtPrintF(flog, "seqres_idx : %d\n", seqres_idx);
6011 #endif /* AJFALSE */
6012
6013 /* If there was a mismatch residue, idx_misfit_atom and
6014 * idx_misfit_seqres will give the index into the ATOM
6015 * and SEQRES sequences respectively for its position */
6016 if (founderr)
6017 {
6018 idx_misfit_atom = atom_idx;
6019 idx_misfit_seqres = seqres_idx;
6020 }
6021
6022
6023
6024 /* CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINDER OF
6025 * THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN THIS
6026 * ALIGNMENT */
6027 if ((nres[i] - atom_idx) > (len - seqres_idx))
6028 break;
6029
6030 /**************************************************/
6031 /* This will have to change for 1st residue */
6032 /**************************************************/
6033
6034 /* Try and find an exact match within the gap for the
6035 * mismatch residue */
6036 fixed = ajFalse;
6037
6038 if (founderr)
6039 {
6040 #if AJFALSE
6041 /* DIAGNOSTIC */
6042 ajFmtPrintF(flog,
6043 "About to try (seqres_idx_last: %d, "
6044 "seqres_idx: %d) ...\n", seqres_idx_last,
6045 seqres_idx);
6046 #endif /* AJFALSE */
6047
6048
6049 aa_last = ajStrGetCharFirst(substr);
6050
6051 for (z = seqres_idx_last + 1; z < seqres_idx; z++)
6052 {
6053 #if AJFALSE
6054 /* DIAGNOSTIC */
6055 ajFmtPrintF(flog, "Trying ATOM:SEQRES %c:%c\n",
6056 aa_last, pdbfile->seqres[i]->Ptr[z]);
6057 #endif /* AJFALSE */
6058
6059 if (pdbfile->seqres[i]->Ptr[z] == aa_last)
6060 {
6061 nmismatches--;
6062 founderr = ajFalse;
6063 fixed = ajTrue;
6064
6065
6066 /* Assign residue number */
6067 ajIntPut(&idx[i], atom_idx, z + 1);
6068
6069 for (k = 0, y = atom_idx + 1, z = seqres_idx + 1;
6070 k < siz_substr - 1; k++, y++, z++)
6071 ajIntPut(&idx[i], y, z + 1);
6072
6073 break;
6074 }
6075 }
6076 }
6077
6078 if (!fixed)
6079 {
6080 #if AJFALSE
6081 /* DIAGNOSTIC */
6082 ajFmtPrintF(flog, "FAILED TO FIX\n");
6083 #endif /* AJFALSE */
6084 /* Assign residue number */
6085 for (k = 0, y = atom_idx, z = seqres_idx; k < siz_substr;
6086 k++, y++, z++)
6087 ajIntPut(&idx[i], y, z + 1);
6088
6089 }
6090
6091
6092
6093 /* Mark up last SEQRES residue as having been done */
6094 if (y == nres[i])
6095 done_end = ajTrue;
6096
6097
6098
6099
6100
6101 /* If the substring matched but with a residue mismatch
6102 * for the 1st residue */
6103 /**************************************************/
6104 /* This block should only be called if we */
6105 /* can't fit the mismatch residue in somewhere. */
6106 /**************************************************/
6107 if (founderr)
6108 {
6109 /* a will give the number of the first coordinate
6110 * line for the mismatch residue from the ATOM
6111 * records */
6112
6113 a = pdbioPdbfileFindLine(pdbfile, i + 1, x,
6114 ajIntGet(num[i],
6115 idx_misfit_atom));
6116
6117 /* Get the id of the mismatch residue in the SEQRES
6118 * sequence. */
6119 ajResidueToTriplet(
6120 pdbfile->seqres[i]->Ptr[idx_misfit_seqres],
6121 &aa_misfit);
6122
6123
6124 /* To give correct index into SEQRES records in
6125 * original PDB file, subtract j to account for
6126 * modifications to the N-terminus that were made for
6127 * missing residues relative to ATOM sequence. A
6128 * further 1 is added to give a number starting from
6129 * 1 (rather than 0) */
6130
6131 #if AJFALSE
6132 /* DIAGNOSTIC */
6133 ajFmtPrintF(flog, "a : %d\n".a);
6134 ajFmtPrintF(flog, "pdbfile->rtype[a] : %S\n", pdbfile->rtype[a]);
6135 ajFmtPrintF(flog, "pdbfile->pdbn[a] : %S\n",
6136 pdbfile->pdbn[a]);
6137 #endif /* AJFALSE */
6138
6139 ajFmtPrintS(&msgbit, "%S %S %S %d; ",
6140 pdbfile->rtype[a], pdbfile->pdbn[a],
6141 aa_misfit, idx_misfit_seqres - j + 1);
6142
6143 #if AJFALSE
6144 /* DIAGNOSTIC */
6145 ajFmtPrintS(&msgbit, "ATOM residue %d (%c) vs "
6146 "SEQRES residue %d (%c). ",
6147 ajIntGet(num[i], atom_idx), ajStrGetCharFirst(substr),
6148 seqres_idx + 1, pdbfile->seqres[i]->Ptr[seqres_idx]);
6149 #endif /* AJFALSE */
6150
6151 ajStrAppendS(&msgstr, msgbit);
6152
6153
6154 pdbfile->seqres[i]->Ptr[seqres_idx] =
6155 ajStrGetCharFirst(substr);
6156 }
6157
6158
6159 /* atom_ptr and seqres_ptr now point to 1 residue past
6160 * the end of the match of the substring in the ATOM and
6161 * SEQRES sequences respectively. */
6162
6163 atom_ptr += siz_substr;
6164
6165 if (founderr)
6166 seqres_ptr = (loc_ptr - 1) + siz_substr;
6167 else
6168 seqres_ptr = loc_ptr + siz_substr;
6169
6170 siz_substr = nres[i] - (atom_idx + siz_substr);
6171
6172
6173
6174 /**************************************************/
6175 /* Must assign index into SEQRES for */
6176 /* C-terminal residue of substring */
6177 /**************************************************/
6178
6179
6180 /* seqres_idx_last is set to give the index into SEQRES
6181 * sequence for the position of match of C-terminal
6182 * residue of substring + 1 */
6183 seqres_idx_last = (int) z - 1;
6184 }
6185
6186 /* Check to ensure that position for last residue has been
6187 * worked out */
6188 if ((done_end) && (nmismatches <= lim))
6189 {
6190 if (nmismatches)
6191 ajFmtPrintF(flog, "%-15s%d (%c) %d %S\n", "GAPPED",
6192 i + 1,
6193 ajChararrGet(pdbfile->chid, i),
6194 nmismatches, msgstr);
6195 else
6196 ajFmtPrintF(flog, "%-15s%d (%c)\n", "GAPPEDOK", i + 1,
6197 ajChararrGet(pdbfile->chid, i));
6198
6199
6200 /* Residue numbering is correct after alignment
6201 * (acceptable number of mismatches) */
6202 if (x == 0)
6203 pdbfile->resn1ok[i] = ajTrue;
6204 else
6205 pdbfile->resn1ok[i] = ajFalse;
6206
6207 #if AJFALSE
6208 /* DIAGNOSTIC */
6209 ajFmtPrintF(flog,
6210 "STEP6 OK %d mismatches\n", nmismatches);
6211 #endif /* AJFALSE */
6212
6213 done = ajTrue;
6214 break;
6215 }
6216
6217 #if AJFALSE
6218 /* DIAGNOSTIC */
6219 ajFmtPrintF(flog, "STEP6 **NOT** OK %d mismatches\n",
6220 nmismatches);
6221 #endif /* AJFALSE */
6222
6223
6224 /* Otherwise, agreement could not be found - unacceptable
6225 * number of mismatches. Restore the original seqres sequence */
6226 ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres);
6227 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
6228
6229 } /* for(x = 0; x < 2; x++) */
6230
6231 if (done)
6232 break;
6233 }
6234
6235 /* After trying 6 different alignment steps with (potentially
6236 * modified) seqres sequences and 2 (possibly different) sequences
6237 * derived from the ATOM records an alignment with agreement in
6238 * residue numbering still cannot be found. Use data from ATOM
6239 * records only - use seq1 (all residues) and presume there are no
6240 * missing residues. */
6241 if (!done)
6242 {
6243 ajFmtPrintF(flog, "%-15s%d (%c)\n", "NOMATCH", i + 1,
6244 ajChararrGet(pdbfile->chid, i));
6245
6246 ajStrAssignS(&(pdbfile->seqres[i]), seq1[i]);
6247 pdbfile->nres[i] = ajStrGetLen(pdbfile->seqres[i]);
6248
6249 for (k = 0; k < nres1[i]; k++)
6250 ajIntPut(&idx[i], k, k + 1);
6251
6252 pdbfile->resn1ok[i] = ajTrue;
6253 }
6254 else
6255 {
6256 if (j)
6257 ajFmtPrintF(flog, "%-15s%d (%c) %d\n", "MISSNTERM", i + 1,
6258 ajChararrGet(pdbfile->chid, i), j);
6259
6260 if (!ajStrMatchS(seq1[i], seq2[i]))
6261 {
6262 if (x == 0)
6263 ajFmtPrintF(flog, "%-15s%d\n", "ALTERNOK", i + 1);
6264 else
6265 ajFmtPrintF(flog, "%-15s%d\n", "HETEROK", i + 1);
6266
6267 }
6268 }
6269 }
6270
6271
6272 /* Write the index arrays */
6273 for (i = 0; i < pdbfile->nchains; i++)
6274 {
6275 if (!pdbfile->chainok[i])
6276 continue;
6277
6278
6279 if (pdbfile->resn1ok[i])
6280 for (j = 0; j < nres1[i]; j++)
6281 ajIntPut(&idx_full[i], ajIntGet(num1[i], j),
6282 ajIntGet(idx[i], j));
6283 else
6284 for (j = 0; j < nres2[i]; j++)
6285 ajIntPut(&idx_full[i], ajIntGet(num2[i], j),
6286 ajIntGet(idx[i], j));
6287 }
6288
6289 /* Write the resni element of the Pdbfile object. These are the residue
6290 * numbers that give the correct index into the finalised seqres sequence */
6291
6292 for (i = pdbfile->idxfirst; i < pdbfile->nlines; i++)
6293 {
6294 if (pdbfile->linetype[i] == pdbfileELinetypeCoordinate)
6295 {
6296 if (pdbfile->resn1ok[pdbfile->chnn[i] - 1])
6297 {
6298 pdbfile->resni[i] =
6299 ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6300 pdbfile->resn1[i]);
6301 #if AJFALSE
6302 /* DIAGNOSTIC */
6303 ajFmtPrintF(flog, "Got position %d (%d) ok\n",
6304 pdbfile->resn1[i],
6305 ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6306 pdbfile->resn1[i]));
6307 #endif /* AJFALSE */
6308 }
6309
6310 else
6311 {
6312 pdbfile->resni[i] =
6313 ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6314 pdbfile->resn2[i]);
6315 #if AJFALSE
6316 /* DIAGNOSTIC */
6317 ajFmtPrintF(flog, "Got position %d (%d) ok\n",
6318 pdbfile->resn2[i],
6319 ajIntGet(idx_full[pdbfile->chnn[i] - 1],
6320 pdbfile->resn2[i]));
6321 #endif /* AJFALSE */
6322 }
6323 }
6324 }
6325
6326 #if AJFALSE
6327 /* DIAGNOSTIC */
6328
6329 ajFmtPrintF(flog, "\n\n\n");
6330
6331 seq = seq1;
6332
6333 for (i = 0; i < pdbfile->nchains; i++)
6334 {
6335 if (!pdbfile->chainok[i])
6336 {
6337 ajFmtPrintF(flog,
6338 "Chain %d\nSEQRES %S\nCHAIN NOT OK\n\n\n",
6339 i + 1,
6340 pdbfile->seqres[i]);
6341
6342 continue;
6343 }
6344
6345 ajFmtPrintF(flog,
6346 "Chain %d\nSEQRES %S\nSEQ__1 %S\nSEQ__2 %S\n\n\n",
6347 i + 1,
6348 pdbfile->seqres[i],
6349 seq1[i],
6350 seq2[i]);
6351
6352 }
6353
6354
6355 for (i = 0; i < pdbfile->nchains; i++)
6356 {
6357 if (!pdbfile->chainok[i])
6358 continue;
6359
6360 if (nres1[i] > nres2[i])
6361 max = nres1[i];
6362 else
6363 max = nres2[i];
6364
6365 ajFmtPrintF(flog, "CHAIN %d\n", i + 1);
6366 ajFmtPrintF(flog, "seqres %S\n", pdbfile->seqres[i]);
6367
6368
6369 ajFmtPrintF(flog, "%-6s%-6s%-6s%-6s%-6s%-6s%-6s\n",
6370 "RES", "NUM", "SEQ1", "RESN1", "SEQ2", "RESN2", "IDX");
6371
6372 for (j = 0; j < max; j++)
6373 {
6374 if (j < nres1[i] && j < nres2[i])
6375 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6d%-6c%-6d%-6d\n",
6376 "RES",
6377 j + 1,
6378 ajStrGetCharPos(seq1[i], j),
6379 ajIntGet(num1[i], j),
6380 ajStrGetCharPos(seq2[i], j),
6381 ajIntGet(num2[i], j),
6382 ajIntGet(idx[i], j));
6383 else if (j < nres1[i])
6384 {
6385 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6d%-6c%-6c",
6386 "RES",
6387 j + 1,
6388 ajStrGetCharPos(seq1[i], j),
6389 ajIntGet(num1[i], j),
6390 '.', '.');
6391
6392
6393 if (pdbfile->resn1ok[i])
6394 ajFmtPrintF(flog, "%-6d\n", ajIntGet(idx[i], j));
6395 else
6396 ajFmtPrintF(flog, "%-6c\n", '.');
6397
6398 }
6399 else
6400 {
6401 ajFmtPrintF(flog, "%-6s%-6d%-6c%-6c%-6c%-6d",
6402 "RES",
6403 j + 1,
6404 '.', '.',
6405 ajStrGetCharPos(seq2[i], j),
6406 ajIntGet(num2[i], j));
6407
6408 if (!pdbfile->resn1ok[i])
6409 ajFmtPrintF(flog, "%-6d\n", ajIntGet(idx[i], j));
6410 else
6411 ajFmtPrintF(flog, "%-6c\n", '.');
6412 }
6413 }
6414 }
6415 #endif /* AJFALSE */
6416
6417 /* Tidy up and return */
6418 ajStrDel(&aa_misfit);
6419 ajStrDel(&seqbit);
6420 ajStrDel(&msgstr);
6421 ajStrDel(&msgbit);
6422 AJFREE(insert);
6423 ajStrDel(&tmpseqres);
6424 ajStrDel(&bit);
6425 ajStrDel(&substr);
6426 ajStrDel(&substr2);
6427
6428 for (i = 0U; i < pdbfile->nchains; i++)
6429 {
6430 if (!pdbfile->chainok[i])
6431 continue;
6432
6433 ajStrDel(&seq1[i]);
6434 ajStrDel(&seq2[i]);
6435
6436 ajIntDel(&num1[i]);
6437 ajIntDel(&num2[i]);
6438
6439 ajIntDel(&idx[i]);
6440 ajIntDel(&idx_full[i]);
6441
6442 }
6443
6444 AJFREE(seq1);
6445 AJFREE(seq2);
6446
6447 AJFREE(num1);
6448 AJFREE(num2);
6449
6450 AJFREE(idx);
6451 AJFREE(idx_full);
6452
6453 AJFREE(nres1);
6454 AJFREE(nres2);
6455
6456 return ajTrue;
6457 }
6458
6459
6460
6461
6462 /* #funcstatic pdbioDiagnostic ************************************************
6463 **
6464 ** For printing out diagnostics for pdbparse build
6465 **
6466 **
6467 ** #param [r] pdbfile [AjPPdbfile] Pdbfile object
6468 ** #param [r] n [ajint] Flag for controlling output
6469 **
6470 ** #return [AjBool] True on success, False otherwise
6471 ** ##
6472 ******************************************************************************/
6473
6474 #if AJFALSE
6475 /*THIS_DIAGNOSTIC*/
pdbioDiagnostic(AjPPdbfile pdbfile,ajint n)6476 static void pdbioDiagnostic(AjPPdbfile pdbfile, ajint n)
6477 {
6478 ajuint i = 0U;
6479
6480 if (n == 0)
6481 {
6482 ajFmtPrintF(tempfile, "nchains: %d\n", pdbfile->nchains);
6483 for (i = 0U; i < pdbfile->nchains; i++)
6484 ajFmtPrintF(tempfile, "chid: %c\n",
6485 ajChararrGet(pdbfile->chid, i));
6486 for (i = 0U; i < pdbfile->nchains; i++)
6487 ajFmtPrintF(tempfile, "seqres %d: %S\n", i, pdbfile->seqres[i]);
6488
6489 ajFmtPrintF(tempfile, "tercnt: %d\n", pdbfile->tercnt);
6490 ajFmtPrintF(tempfile, "COMPND: %S\n", pdbfile->compnd);
6491 ajFmtPrintF(tempfile, "SOURCE: %S\n", pdbfile->source);
6492 ajFmtPrintF(tempfile, "reso: %f\n", pdbfile->reso);
6493 if (pdbfile->method == ajEPdbMethodXray)
6494 ajFmtPrintF(tempfile, "method: ajEPdbMethodXray\n");
6495 else
6496 ajFmtPrintF(tempfile, "method: ajEPdbMethodNmr\n");
6497 }
6498 else if (n == 1)
6499 {
6500 for (i = 0U; i < pdbfile->nlines; i++)
6501 {
6502 ajFmtPrintF(tempfile, "%-5S", pdbfile->pdbid);
6503
6504 switch (pdbfile->linetype[i])
6505 {
6506 case pdbfileELinetypeIgnore:
6507 ajFmtPrintF(tempfile, "%-10s", "IGNORE");
6508 break;
6509 case pdbfileELinetypeCoordinate:
6510 ajFmtPrintF(tempfile, "%-10s", "COORD");
6511 break;
6512 case pdbfileELinetypeHeterogen:
6513 ajFmtPrintF(tempfile, "%-10s", "COORDHET");
6514 break;
6515 case pdbfileELinetypeGroups:
6516 ajFmtPrintF(tempfile, "%-10s", "COORDGP");
6517 break;
6518 case pdbfileELinetypeWater:
6519 ajFmtPrintF(tempfile, "%-10s", "COORDWAT");
6520 break;
6521 case pdbfileELinetypeTER:
6522 ajFmtPrintF(tempfile, "%-10s", "TER");
6523 break;
6524 case pdbfileELinetypeMODEL:
6525 ajFmtPrintF(tempfile, "%-10s", "MODEL");
6526 break;
6527 }
6528
6529 ajFmtPrintF(tempfile,
6530 "M%-2dC%-2d%-6S%-5d%-5d%-4B%-4S%-4S%-7.3f%-7.3f"
6531 "%-7.3f%-6.3f%-6.3f\n",
6532 pdbfile->modn[i],
6533 pdbfile->chnn[i],
6534 pdbfile->pdbn[i],
6535 pdbfile->resn1[i],
6536 pdbfile->resn2[i],
6537 pdbfile->oddnum[i],
6538 pdbfile->atype[i],
6539 pdbfile->rtype[i],
6540 pdbfile->x[i],
6541 pdbfile->y[i],
6542 pdbfile->z[i],
6543 pdbfile->o[i],
6544 pdbfile->b[i]);
6545 }
6546 }
6547
6548 return;
6549 }
6550 #endif /* AJFALSE */
6551
6552
6553
6554
6555 /* @funcstatic pdbioPdbfileToPdb **********************************************
6556 **
6557 ** Reads data from a Pdbfile object and writes a Pdb object.
6558 ** Chains that did not contain at least the user-defined threshold number of
6559 ** amino acid residues are discarded, i.e. are NOT copied and will NOT appear
6560 ** in the output file that is eventually generated.
6561 **
6562 **
6563 ** @param [w] Ppdb [AjPPdb *] Pdb object pointer
6564 ** @param [u] pdbfile [AjPPdbfile] Pdbfile object
6565 **
6566 ** @return [AjBool] True on success, False otherwise
6567 **
6568 ** @release 2.9.0
6569 ** @@
6570 ******************************************************************************/
6571
pdbioPdbfileToPdb(AjPPdb * Ppdb,AjPPdbfile pdbfile)6572 static AjBool pdbioPdbfileToPdb(AjPPdb *Ppdb, AjPPdbfile pdbfile)
6573 {
6574 ajuint i = 0U; /* Loop counter */
6575 ajint idx = 0; /* Index into chain array */
6576 ajuint j = 0U; /* Loop counter */
6577 AjPAtom atom = NULL; /* Atom object */
6578 AjPResidue residue = NULL; /* Residue object */
6579 ajuint nchn = 0U; /* No. chains that have min. no. of aa's */
6580 AjPInt lookup; /* Array of chain numbers for chains in ret
6581 * for all chains in pdb.A '0' is given for
6582 * chains with < threshold no. of aa's */
6583 ajint chn = 0;
6584 ajuint rn_last = UINT_MAX;
6585 ajuint mn_last = UINT_MAX;
6586
6587 ajint eNum;
6588 AjPStr eId = NULL;
6589 char eType;
6590 ajint eClass;
6591
6592 if (!Ppdb || !pdbfile)
6593 {
6594 ajWarn("Bad args passed to pdbioPdbfileToPdb");
6595
6596 return ajFalse;
6597 }
6598
6599 if (*Ppdb)
6600 {
6601 ajWarn("Bad args passed to pdbioPdbfileToPdb - PDB object exists");
6602
6603 return ajFalse;
6604 }
6605
6606 eId = ajStrNew();
6607
6608 lookup = ajIntNewRes(pdbfile->nchains);
6609 ajIntPut(&lookup, pdbfile->nchains - 1, 0);
6610
6611 for (nchn = 0U, i = 0U; i < pdbfile->nchains; i++)
6612 if (pdbfile->chainok[i])
6613 {
6614 nchn++;
6615 ajIntPut(&lookup, i, nchn);
6616 }
6617
6618 *Ppdb = ajPdbNew(nchn);
6619 (*Ppdb)->Nchn = nchn;
6620
6621 ajStrAssignS(&((*Ppdb)->Pdb), pdbfile->pdbid);
6622 ajStrAssignS(&((*Ppdb)->Compnd), pdbfile->compnd);
6623 ajStrAssignS(&((*Ppdb)->Source), pdbfile->source);
6624 (*Ppdb)->Method = pdbfile->method;
6625 (*Ppdb)->Reso = pdbfile->reso;
6626 (*Ppdb)->Nmod = pdbfile->modcnt;
6627 (*Ppdb)->Ngp = pdbfile->ngroups;
6628 /* (*Ppdb)->Nchn = pdbfile->nchains; */
6629
6630 for (i = 0U; i < pdbfile->ngroups; i++)
6631 ajChararrPut(&((*Ppdb)->gpid), i, ajChararrGet(pdbfile->gpid, i));
6632
6633 for (idx = -1, i = 0U; i < pdbfile->nchains; i++)
6634 {
6635 if (pdbfile->chainok[i])
6636 idx++;
6637 else
6638 continue;
6639
6640 (*Ppdb)->Chains[idx]->Id = ajChararrGet(pdbfile->chid, i);
6641
6642
6643 /* These counts are no longer made from the PDB records. They are
6644 * only made if the file is annotated with stride secondary structure
6645 * info by using pdbstride */
6646 #if AJFALSE
6647 (*Ppdb)->Chains[idx]->numHelices = pdbfile->numHelices[i];
6648 (*Ppdb)->Chains[idx]->numStrands = pdbfile->numStrands[i];
6649 (*Ppdb)->Chains[idx]->numSheets = pdbfile->numSheets[i];
6650 (*Ppdb)->Chains[idx]->numTurns = pdbfile->numTurns[i];
6651 #endif /* AJFALSE */
6652 (*Ppdb)->Chains[idx]->Nres = pdbfile->nres[i];
6653 (*Ppdb)->Chains[idx]->Nlig = pdbfile->nligands[i];
6654 ajStrAssignS(&((*Ppdb)->Chains[idx]->Seq), pdbfile->seqres[i]);
6655 }
6656
6657
6658 for (j = pdbfile->idxfirst; j < pdbfile->nlines; j++)
6659 {
6660 if ((pdbfile->linetype[j] == pdbfileELinetypeCoordinate) ||
6661 (pdbfile->linetype[j] == pdbfileELinetypeHeterogen) ||
6662 (pdbfile->linetype[j] == pdbfileELinetypeGroups) ||
6663 (pdbfile->linetype[j] == pdbfileELinetypeWater))
6664 {
6665 /* Skip this line if it for a heterogenous (duplicate) position */
6666 if ((!pdbfile->resn1ok[pdbfile->chnn[j] - 1]) && pdbfile->oddnum[j])
6667 continue;
6668
6669 atom = ajAtomNew();
6670
6671 atom->Mod = pdbfile->modn[j];
6672 /* atom->Chn = pdbfile->chnn[j]; */
6673 atom->Chn = ajIntGet(lookup, pdbfile->chnn[j] - 1);
6674
6675 atom->Gpn = pdbfile->gpn[j];
6676
6677 switch (pdbfile->linetype[j])
6678 {
6679 case pdbfileELinetypeHeterogen:
6680 atom->Type = 'H';
6681 break;
6682 case pdbfileELinetypeGroups:
6683 atom->Type = 'H';
6684 break;
6685 case pdbfileELinetypeCoordinate:
6686 atom->Type = 'P';
6687 break;
6688 case pdbfileELinetypeWater:
6689 atom->Type = 'W';
6690 break;
6691 default:
6692 break;
6693 }
6694
6695 atom->Idx = pdbfile->resni[j];
6696
6697 ajStrAssignS(&atom->Pdb, pdbfile->pdbn[j]);
6698
6699 if ((pdbfile->linetype[j] == pdbfileELinetypeHeterogen) ||
6700 (pdbfile->linetype[j] == pdbfileELinetypeGroups) ||
6701 (pdbfile->linetype[j] == pdbfileELinetypeWater))
6702 atom->Id1 = '.';
6703 else
6704 ajResidueFromTriplet(pdbfile->rtype[j], &atom->Id1);
6705
6706 ajStrAssignS(&atom->Id3, pdbfile->rtype[j]);
6707 ajStrAssignS(&atom->Atm, pdbfile->atype[j]);
6708 atom->X = pdbfile->x[j];
6709 atom->Y = pdbfile->y[j];
6710 atom->Z = pdbfile->z[j];
6711 atom->O = pdbfile->o[j];
6712 atom->B = pdbfile->b[j];
6713
6714
6715 ajStrAssignS(&eId, pdbfile->elementId[j]);
6716 eNum = pdbfile->elementNum[j];
6717 eType = pdbfile->elementType[j];
6718 eClass = pdbfile->helixClass[j];
6719
6720
6721 if (pdbfile->linetype[j] == pdbfileELinetypeGroups)
6722 ajListPushAppend((*Ppdb)->Groups, atom);
6723 else if (pdbfile->linetype[j] == pdbfileELinetypeWater)
6724 ajListPushAppend((*Ppdb)->Water, atom);
6725 else
6726 {
6727 if (pdbfile->chainok[pdbfile->chnn[j] - 1])
6728 {
6729 #if AJFALSE
6730 ajListPushAppend((*Ppdb)->Chains[pdbfile->chnn[j] - 1]->Atoms,
6731 atom);
6732 ajListPushAppend((*Ppdb)->Chains[ajIntGet(lookup,
6733 pdbfile->chnn[j] - 1) - 1]->Atoms, atom);
6734 #endif /* AJFALSE */
6735 chn = ajIntGet(lookup, pdbfile->chnn[j] - 1) - 1;
6736
6737 ajListPushAppend((*Ppdb)->Chains[chn]->Atoms, atom);
6738
6739 /* Write residue object */
6740 if (atom->Type == 'P')
6741 {
6742 /* New model */
6743 if (atom->Mod != mn_last)
6744 {
6745 rn_last = UINT_MAX;
6746 mn_last = atom->Mod;
6747 }
6748
6749 /* New residue */
6750 if (atom->Idx != rn_last)
6751 {
6752 residue = ajResidueNew();
6753
6754 residue->Mod = atom->Mod;
6755 residue->Chn = atom->Chn;
6756 residue->Idx = atom->Idx;
6757 ajStrAssignS(&residue->Pdb, atom->Pdb);
6758 residue->Id1 = atom->Id1;
6759 ajStrAssignS(&residue->Id3, atom->Id3);
6760
6761 residue->eNum = eNum;
6762 ajStrAssignS(&residue->eId, eId);
6763 residue->eType = eType;
6764 residue->eClass = eClass;
6765
6766 ajListPushAppend((*Ppdb)->Chains[chn]->Residues,
6767 (void *) residue);
6768 rn_last = atom->Idx;
6769 }
6770 }
6771 }
6772 else
6773 ajAtomDel(&atom);
6774 }
6775 }
6776 else
6777 continue;
6778 }
6779
6780 ajIntDel(&lookup);
6781 ajStrDel(&eId);
6782
6783 return ajTrue;
6784 }
6785
6786
6787
6788
6789 /* @funcstatic pdbioPdbfileFindLine *******************************************
6790 **
6791 ** Returns the line number of the first instance of a line with a specified
6792 ** residue and chain number.
6793 **
6794 ** @param [r] pdbfile [const AjPPdbfile] Pdbfile object pointer
6795 ** @param [r] chn [ajint] Chain number
6796 ** @param [r] which [ajint] 0 or 1, refer to resn1 or resn2 residue
6797 ** @param [r] pos [ajint] Residue number
6798 **
6799 ** @return [ajint] Line number (index, i.e. starts from 0).
6800 **
6801 ** @release 2.9.0
6802 ** @@
6803 ******************************************************************************/
6804
pdbioPdbfileFindLine(const AjPPdbfile pdbfile,ajint chn,ajint which,ajint pos)6805 static ajint pdbioPdbfileFindLine(const AjPPdbfile pdbfile, ajint chn,
6806 ajint which, ajint pos)
6807 {
6808 ajuint a = 0U;
6809 /* a will give the number of the first coordinate line for the mismatch
6810 * residue from the ATOM records */
6811
6812 for (a = pdbfile->idxfirst; a < pdbfile->nlines; a++)
6813 if (pdbfile->linetype[a] == pdbfileELinetypeCoordinate &&
6814 pdbfile->chnn[a] == chn)
6815 /* First sequence (all residues) derived for atom records */
6816 /* OR Second sequence (excluding certain residues) derived for
6817 * atom records */
6818 if (((which == 0) && (pos == pdbfile->resn1[a])) ||
6819 ((which == 1) && (pos == pdbfile->resn2[a])))
6820 break;
6821
6822 if (a == pdbfile->nlines)
6823 ajFatal("Unexpected loop failure in pdbioPdbfileFindLine. "
6824 "Email jison@hgmp.mrc.ac.uk\n");
6825
6826 return a;
6827 }
6828
6829
6830
6831
6832 /* @funcstatic pdbioPdbfileChain **********************************************
6833 **
6834 ** Finds the chain number for a given chain identifier in a pdbfile structure
6835 **
6836 ** @param [r] id [char] Chain identifier
6837 ** @param [r] pdbfile [const AjPPdbfile] Pdbfile object
6838 ** @param [w] chn [ajint *] Chain number
6839 **
6840 ** @return [AjBool] True on success
6841 **
6842 ** @release 2.9.0
6843 ** @@
6844 ******************************************************************************/
6845
pdbioPdbfileChain(char id,const AjPPdbfile pdbfile,ajint * chn)6846 static AjBool pdbioPdbfileChain(char id, const AjPPdbfile pdbfile, ajint *chn)
6847 {
6848 ajuint a = 0U;
6849
6850 for (a = 0U; a < pdbfile->nchains; a++)
6851 {
6852 if (toupper((int) ajChararrGet(pdbfile->chid, a)) == toupper((int) id))
6853 {
6854 *chn = a + 1;
6855
6856 return ajTrue;
6857 }
6858
6859 /* Cope with chain id's of ' ' (which might be given as '.' in the
6860 * Pdbfile object) */
6861 if ((id == ' ') && (ajChararrGet(pdbfile->chid, a) == '.'))
6862 {
6863 *chn = a + 1;
6864
6865 return ajTrue;
6866 }
6867 }
6868
6869 /* A '.' may be given as the id for domains comprising more than one
6870 * chain */
6871 if (id == '.')
6872 {
6873 *chn = 1;
6874
6875 return ajTrue;
6876 }
6877
6878
6879 return ajFalse;
6880 }
6881
6882
6883
6884
6885 /* @funcstatic pdbioWriteElementData ******************************************
6886 **
6887 ** Reads the secondary structure information from an Elements object
6888 ** and writes equivalent variables in an Pdbfile object.
6889 **
6890 ** @param [w] pdbfile [AjPPdbfile] Pdbfile object
6891 ** @param [u] flog [AjPFile] Pointer to log file (build diagnostics)
6892 ** @param [r] elms [const AjPElements] Elements object pointer
6893 **
6894 ** @return [AjBool] True on success, False otherwise
6895 **
6896 ** @release 2.9.0
6897 ** @@
6898 ******************************************************************************/
6899
pdbioWriteElementData(AjPPdbfile pdbfile,AjPFile flog,const AjPElements elms)6900 static AjBool pdbioWriteElementData(AjPPdbfile pdbfile, AjPFile flog,
6901 const AjPElements elms)
6902 {
6903 ajuint x = 0U;
6904 ajint y = 0;
6905 ajint z = 0;
6906 ajuint modn = 0U; /* Model number */
6907
6908 ajuint idx = 0U; /* idx into lines in Pdbfile object */
6909 ajuint idx_start = 0u; /* Line index of start of element */
6910 ajuint idx_end = 0U; /* Line index of end of element */
6911 ajuint idx_last = 0U; /* Line index of last line to try */
6912 ajuint idx_tmp = 0U; /* Temp. line index */
6913
6914 ajint chn = 0; /* Chain id of current element as integer */
6915
6916 AjPInt nsheets = NULL; /* Number of sheets in each chain */
6917 AjPStr *lastids = NULL; /* Last sheet identifier read in for each
6918 * chain */
6919 AjBool found_start = ajFalse; /* Whether start residue of the
6920 * current element has been found yet */
6921 AjBool found_end = ajFalse; /* Whether the end residue of the current
6922 * element has been found yet */
6923 ajint n_unknown = 0; /* No. of unknown chain ids */
6924 AjPChar unknowns = NULL; /* Unknown chain ids */
6925 AjBool found = ajFalse; /* True if we have already reported an error
6926 * message for the unknown chain id */
6927
6928 /* Check args */
6929 if (!pdbfile || !(elms) || !(flog))
6930 {
6931 ajWarn("NULL arg passed to pdbioWriteElementData");
6932
6933 return ajFalse;
6934 }
6935
6936 /* Assign default values for secondary structure fields in Pdbfile object */
6937 for (x = 0U; x < pdbfile->nlines; x++)
6938 {
6939 pdbfile->elementType[x] = '.';
6940 ajStrAssignC(&pdbfile->elementId[x], ".");
6941 }
6942
6943 /* Allocate memory */
6944 nsheets = ajIntNewRes(pdbfile->nchains);
6945 ajIntPut(&nsheets, pdbfile->nchains, 0);
6946
6947 unknowns = ajChararrNew();
6948
6949 AJCNEW0(lastids, pdbfile->nchains);
6950
6951 for (x = 0U; x < pdbfile->nchains; x++)
6952 {
6953 lastids[x] = ajStrNew();
6954 /* Assign a silly value for starters */
6955 ajStrAssignC(&lastids[x], "?????");
6956 }
6957
6958 #if AJFALSE
6959 ajFmtPrint("LOOK HERE x:%d elms->n:%d\n", x, elms->n);
6960 #endif /* AJFALSE */
6961
6962 for (modn = 1U; modn <= pdbfile->modcnt; modn++)
6963 {
6964 /* Loop for each element. Set the current line to the first line in
6965 * the file */
6966 for (idx = pdbfile->idxfirst,
6967 x = 0U; x < elms->n; x++)
6968 {
6969 /* Find the chain number of the current element */
6970 if (!pdbioPdbfileChain(elms->elms[x]->chainId, pdbfile, &chn))
6971 {
6972 /* Only report errors once for each unknown id */
6973 if (modn == 1)
6974 {
6975 for (found = ajFalse, y = 0; y < n_unknown; y++)
6976 if (ajChararrGet(unknowns, y) == elms->elms[x]->chainId)
6977 {
6978 found = ajTrue;
6979 break;
6980 }
6981
6982 if (!found)
6983 {
6984 ajFmtPrintF(flog, "%-15s%c %u\n", "SECCHAIN",
6985 elms->elms[x]->chainId, idx);
6986 ajChararrPut(&unknowns, n_unknown,
6987 elms->elms[x]->chainId);
6988 n_unknown++;
6989 }
6990 }
6991 continue;
6992 }
6993
6994 /* Only want to do this once (for the first model ) */
6995 if (modn == 1)
6996 {
6997 /* Make a count of the number of beta sheets */
6998 if (elms->elms[x]->elementType == 'E')
6999 if (!ajStrMatchS(lastids[chn - 1], elms->elms[x]->elementId))
7000 {
7001 ajIntInc(&nsheets, chn - 1);
7002 ajStrAssignS(&lastids[chn - 1], elms->elms[x]->elementId);
7003 }
7004 }
7005
7006 /* Loop for two passes. z is for efficiency, if z == 0 it will
7007 * check from the current position up to the last coordinate
7008 * line, if z == 1 it will check from the first coordinate line up
7009 * to the last position checked */
7010 for (found_start = ajFalse, found_end = ajFalse,
7011 z = 0; z < 2; z++)
7012 {
7013 if (z == 0)
7014 idx_last = pdbfile->nlines;
7015 else
7016 {
7017 idx = pdbfile->idxfirst;
7018 idx_last = idx_tmp;
7019 }
7020
7021 /* Find the start and end of the current element (as an index
7022 * into the line array) */
7023 for (; idx < idx_last; idx++)
7024 {
7025 /* Find the correct chain and skip lines that are not for
7026 * amino acids */
7027 if ((pdbfile->chnn[idx] != chn) ||
7028 (pdbfile->linetype[idx] != pdbfileELinetypeCoordinate) ||
7029 (pdbfile->modn[idx] != modn))
7030 continue;
7031
7032 /* We have not found the start residue yet */
7033 if (!found_start)
7034 if (ajStrMatchS(elms->elms[x]->initSeqNum,
7035 pdbfile->pdbn[idx]))
7036 if (ajStrMatchS(elms->elms[x]->initResName,
7037 pdbfile->rtype[idx]))
7038 {
7039 /* Residue number for start found and residue
7040 * type matches */
7041 idx_start = idx;
7042 /* printf("found_start !\n"); */
7043
7044 found_start = ajTrue;
7045 }
7046
7047 if (ajStrMatchS(elms->elms[x]->endSeqNum,
7048 pdbfile->pdbn[idx]))
7049 if (ajStrMatchS(elms->elms[x]->endResName,
7050 pdbfile->rtype[idx]))
7051 {
7052 /* Residue number for end found and residue type
7053 * matches */
7054 idx_end = idx;
7055
7056 /* printf("idx_end: %d ...", idx_end); */
7057
7058 /* Set the index to the LAST atom of the residue */
7059
7060 for (; idx_end < pdbfile->nlines; idx_end++)
7061 {
7062 if (pdbfile->linetype[idx_end] !=
7063 pdbfileELinetypeCoordinate)
7064 continue;
7065
7066 if (!ajStrMatchS(elms->elms[x]->endSeqNum,
7067 pdbfile->pdbn[idx_end]) ||
7068 !ajStrMatchS(elms->elms[x]->endResName,
7069 pdbfile->rtype[idx_end]) ||
7070 pdbfile->chnn[idx_end] != chn ||
7071 pdbfile->modn[idx_end] != modn)
7072 break;
7073 }
7074
7075 idx_end--;
7076
7077 #if AJFALSE
7078 printf(" %d\n", idx_end);
7079
7080 ajFmtPrint("found_end !\n"
7081 "elms->elms[x]->endSeqNum "
7082 ": pdbfile->pdbn[idx_end] === %S : %S\n"
7083 "elms->elms[x]->endResName "
7084 ": pdbfile->rtype[idx_end] === %S : %S\n"
7085 "pdbfile->chnn[idx_end] "
7086 ": chn === %d : %d\n"
7087 "pdbfile->modn[idx_end] "
7088 ": modn === %d : %d\n",
7089 elms->elms[x]->endSeqNum,
7090 pdbfile->pdbn[idx_end + 1],
7091 elms->elms[x]->endResName,
7092 pdbfile->rtype[idx_end + 1],
7093 pdbfile->chnn[idx_end + 1],
7094 chn,
7095 pdbfile->modn[idx_end + 1],
7096 modn);
7097 #endif /* AJFALSE */
7098
7099 found_end = ajTrue;
7100 idx_tmp = idx;
7101 break;
7102 }
7103 }
7104
7105 if (found_start && found_end)
7106 break;
7107 }
7108
7109 if (!found_start || !found_end)
7110 {
7111 if (!found_start && !found_end)
7112 ajFmtPrintF(flog, "%-15s%d %d %S %S %S %S\n", "SECBOTH",
7113 chn, modn, elms->elms[x]->initResName,
7114 elms->elms[x]->initSeqNum,
7115 elms->elms[x]->endResName,
7116 elms->elms[x]->endSeqNum);
7117 else if (!found_start)
7118 ajFmtPrintF(flog, "%-15s%d %d %S %S\n", "SECSTART",
7119 chn, modn, elms->elms[x]->initResName,
7120 elms->elms[x]->initSeqNum);
7121 else if (!found_end)
7122 ajFmtPrintF(flog, "%-15s%d %d %S %S\n", "SECEND", chn,
7123 modn, elms->elms[x]->endResName,
7124 elms->elms[x]->endSeqNum);
7125 }
7126
7127 /* Assign secondary structure fields in Pdbfile object */
7128 for (idx = idx_start; idx <= idx_end; idx++)
7129 {
7130 pdbfile->elementNum[idx] = elms->elms[x]->elementNum;
7131 pdbfile->elementType[idx] = elms->elms[x]->elementType;
7132
7133 if (elms->elms[x]->elementType == 'H')
7134 pdbfile->helixClass[idx] = elms->elms[x]->helixClass;
7135
7136 ajStrAssignS(&pdbfile->elementId[idx],
7137 elms->elms[x]->elementId);
7138 }
7139
7140 /* Only want to do this once */
7141 if (modn == 1)
7142 {
7143 if (elms->elms[x]->elementType == 'H')
7144 pdbfile->numHelices[chn - 1]++;
7145 else if (elms->elms[x]->elementType == 'E')
7146 pdbfile->numStrands[chn - 1]++;
7147 else if (elms->elms[x]->elementType == 'T')
7148 pdbfile->numTurns[chn - 1]++;
7149 }
7150 }
7151 }
7152
7153 /* Assign number of sheets */
7154 if (modn == 1)
7155 for (x = 0; x < pdbfile->nchains; x++)
7156 pdbfile->numSheets[x] = ajIntGet(nsheets, x);
7157
7158 /* Tidy up and return */
7159 ajIntDel(&nsheets);
7160
7161 for (x = 0; x < pdbfile->nchains; x++)
7162 ajStrDel(&lastids[x]);
7163
7164 AJFREE(lastids);
7165
7166 ajChararrDel(&unknowns);
7167
7168 return ajTrue;
7169 }
7170
7171
7172
7173
7174 /* ======================================================================= */
7175 /* =========================== constructors ============================== */
7176 /* ======================================================================= */
7177
7178
7179
7180
7181 /* @section Constructors ******************************************************
7182 **
7183 ** All constructors return a pointer to a new instance. It is the
7184 ** responsibility of the user to first destroy any previous instance. The
7185 ** target pointer does not need to be initialised to NULL, but it is good
7186 ** programming practice to do so anyway.
7187 **
7188 ******************************************************************************/
7189
7190
7191
7192
7193 /* @func ajPdbReadRawNew *****************************************************
7194 **
7195 ** Reads a pdb file and returns a pointer to a filled Pdb object.
7196 **
7197 ** The pdb id is derived from the file name and extension of the pdb file
7198 ** (these are passed in by argument).
7199 **
7200 ** @param [u] inf [AjPFile] Pointer to pdb file
7201 ** @param [r] pdbid [const AjPStr] PDB id code of pdb file
7202 ** @param [r] min_chain_size [ajint] Minimum number of amino acids in a chain
7203 ** @param [r] max_mismatch [ajint] Maximum number of permissible mismatches
7204 ** between the ATOM and SEQRES sequences
7205 ** @param [r] max_trim [ajint] Max. no. residues to trim when checking
7206 ** for missing N- or C-terminal ATOM or
7207 ** SEQRES sequences.
7208 ** @param [r] camask [AjBool] Whether to mask non-amino acid groups
7209 ** within protein chains which do not have a C-alpha atom.
7210 ** @param [r] camask1 [AjBool] Whether to mask amino acid residues
7211 ** within protein chains which do not have a C-alpha atom.
7212 ** @param [r] atommask [AjBool] Whether to mask residues or groups
7213 ** in protein chains with a single atom only.
7214 ** @param [u] flog [AjPFile] Pointer to log file (build diagnostics)
7215 **
7216 ** @return [AjPPdb] pdb object pointer, or NULL on failure.
7217 **
7218 ** @release 2.9.0
7219 ** @@
7220 ******************************************************************************/
7221
ajPdbReadRawNew(AjPFile inf,const AjPStr pdbid,ajint min_chain_size,ajint max_mismatch,ajint max_trim,AjBool camask,AjBool camask1,AjBool atommask,AjPFile flog)7222 AjPPdb ajPdbReadRawNew(AjPFile inf, const AjPStr pdbid, ajint min_chain_size,
7223 ajint max_mismatch, ajint max_trim, AjBool camask,
7224 AjBool camask1, AjBool atommask, AjPFile flog)
7225 {
7226 AjPPdbfile pdbfile = NULL; /* Pdbfile structure (for raw data) */
7227 AjPPdb ret = NULL; /* Pdb structure (for parsed data) */
7228 AjPElements elms = NULL; /* Elements structure (for parsed data) */
7229
7230
7231 if (!inf || !flog)
7232 {
7233 ajWarn("Null arg passed to ajPdbReadRawNew");
7234
7235 return NULL;
7236 }
7237
7238
7239 /* Write pdbfile structure */
7240 if (!(pdbfile = pdbioReadLines(inf)))
7241 return NULL;
7242
7243 /* Allocate Elements object */
7244 elms = pdbioElementsNew(0);
7245
7246
7247 ajStrAssignS(&(pdbfile->pdbid), pdbid);
7248 ajStrFmtLower(&(pdbfile->pdbid));
7249
7250
7251 /* Initial read of pdb file, read sequences for chains from SEQRES
7252 * records, mark lines up to ignore or as coordinate lines, assigning
7253 * initial residue numbers, read bibliographic information etc. */
7254 if (!pdbioFirstPass(pdbfile, flog, &elms, camask))
7255 {
7256 pdbioPdbfileDel(&pdbfile);
7257 pdbioElementsDel(&elms);
7258
7259 return NULL;
7260 }
7261
7262 #if AJFALSE
7263 /* DIAGNOSTIC */
7264 pdbioDiagnostic(&pdbfile, 0);
7265 pdbioDiagnostic(&pdbfile, 1);
7266 #endif /* AJFALSE */
7267
7268
7269 /* Check that SEQRES records contain protein chains. Check that chain
7270 * id's are unique */
7271 if (!pdbioCheckChains(pdbfile, flog, min_chain_size))
7272 {
7273 pdbioPdbfileDel(&pdbfile);
7274 pdbioElementsDel(&elms);
7275
7276 return NULL;
7277 }
7278
7279
7280 /* Check for correct number of TER records. Mask unwanted TER records */
7281 if (!pdbioCheckTer(pdbfile, flog))
7282 {
7283 pdbioPdbfileDel(&pdbfile);
7284 pdbioElementsDel(&elms);
7285
7286 return NULL;
7287 }
7288
7289
7290 /* Assign model and chain number to each coordinate line. Mark up
7291 * non-protein coordinates */
7292 if (!pdbioNumberChains(pdbfile, flog))
7293 {
7294 pdbioPdbfileDel(&pdbfile);
7295 pdbioElementsDel(&elms);
7296
7297 return NULL;
7298 }
7299
7300 /* Mask any ATOM or HETATM records with chain id's of chains of
7301 * non-proteins or chains that have non-unique id's (chainok==ajFalse).
7302 * Check that ATOM records contain protein chains. */
7303 if (!pdbioMaskChains(pdbfile, flog, min_chain_size, camask,
7304 camask1, atommask))
7305 {
7306 pdbioElementsDel(&elms);
7307 pdbioPdbfileDel(&pdbfile);
7308
7309 return NULL;
7310
7311 }
7312
7313 #if AJFALSE
7314 /* DIAGNOSTIC */
7315 pdbioDiagnostic(pdbfile, 0);
7316 pdbioDiagnostic(pdbfile, 1);
7317 #endif /* AJFALSE */
7318
7319 /* Standardise residue numbering */
7320 if (!pdbioStandardiseNumbering(pdbfile, flog))
7321 {
7322 pdbioPdbfileDel(&pdbfile);
7323 pdbioElementsDel(&elms);
7324
7325 return NULL;
7326 }
7327
7328
7329 /* Find correct residue numbering */
7330 if (!pdbioAlignNumbering(pdbfile, flog, max_mismatch, max_trim))
7331 {
7332 pdbioPdbfileDel(&pdbfile);
7333 pdbioElementsDel(&elms);
7334 return NULL;
7335 }
7336
7337 if (!pdbioWriteElementData(pdbfile, flog, elms))
7338 {
7339 pdbioPdbfileDel(&pdbfile);
7340 pdbioElementsDel(&elms);
7341
7342 return NULL;
7343 }
7344
7345 pdbioElementsDel(&elms);
7346
7347 /* Copy data from Pdbfile object to Pdb object. pdbioPdbfileToPdb creates the
7348 * Pdb object (ret) */
7349 if (!pdbioPdbfileToPdb(&ret, pdbfile))
7350 {
7351 pdbioPdbfileDel(&pdbfile);
7352 ajPdbDel(&ret);
7353
7354 return NULL;
7355 }
7356
7357
7358 /* Tidy up and return */
7359 pdbioPdbfileDel(&pdbfile);
7360
7361 return ret;
7362 }
7363
7364
7365
7366
7367 /* ======================================================================= */
7368 /* =========================== destructors =============================== */
7369 /* ======================================================================= */
7370
7371
7372
7373
7374 /* @section Structure Destructors *********************************************
7375 **
7376 ** All destructor functions receive the address of the instance to be
7377 ** deleted. The original pointer is set to NULL so is ready for re-use.
7378 **
7379 ******************************************************************************/
7380
7381
7382
7383
7384 /* ======================================================================= */
7385 /* ============================ Assignments ============================== */
7386 /* ======================================================================= */
7387
7388
7389
7390
7391 /* @section Assignments *******************************************************
7392 **
7393 ** These functions overwrite the instance provided as the first argument
7394 ** A NULL value is always acceptable so these functions are often used to
7395 ** create a new instance by assignment.
7396 **
7397 ******************************************************************************/
7398
7399
7400
7401
7402 /* ======================================================================= */
7403 /* ============================= Modifiers =============================== */
7404 /* ======================================================================= */
7405
7406
7407
7408
7409 /* @section Modifiers *********************************************************
7410 **
7411 ** These functions use the contents of an instance and update them.
7412 **
7413 ******************************************************************************/
7414
7415
7416
7417
7418 /* ======================================================================= */
7419 /* ========================== Operators ===================================*/
7420 /* ======================================================================= */
7421
7422
7423
7424
7425 /* @section Operators *********************************************************
7426 **
7427 ** These functions use the contents of an instance but do not make any
7428 ** changes.
7429 **
7430 ******************************************************************************/
7431
7432
7433
7434
7435 /* ======================================================================= */
7436 /* ============================== Casts ================================== */
7437 /* ======================================================================= */
7438
7439
7440
7441
7442 /* @section Casts *************************************************************
7443 **
7444 ** These functions examine the contents of an instance and return some
7445 ** derived information. Some of them provide access to the internal
7446 ** components of an instance. They are provided for programming convenience
7447 ** but should be used with caution.
7448 **
7449 ******************************************************************************/
7450
7451
7452
7453
7454 /* ======================================================================= */
7455 /* =========================== Reporters ==================================*/
7456 /* ======================================================================= */
7457
7458
7459
7460
7461 /* @section Reporters *********************************************************
7462 **
7463 ** These functions return the contents of an instance but do not make any
7464 ** changes.
7465 **
7466 ******************************************************************************/
7467
7468
7469
7470
7471 /* ======================================================================= */
7472 /* ========================== Input & Output ============================= */
7473 /* ======================================================================= */
7474
7475
7476
7477
7478 /* @section Input & output ****************************************************
7479 **
7480 ** These functions are used for formatted input and output to file.
7481 **
7482 ******************************************************************************/
7483
7484
7485
7486
7487 /* @func ajPdbWriteDomainRecordRaw ********************************************
7488 **
7489 ** Writes lines to a PDB file. What is written depends upon the mode:
7490 ** ajEPdbioModeHeaderDomain Header line for domain PDB file.
7491 ** ajEPdbioModeSeqresDomain SEQRES records for domain.
7492 ** ajEPdbioModeAtomPdbDomain ATOM records for domain using original residue
7493 ** numbers.
7494 ** ajEPdbioModeAtomIdxDomain ATOM records for domain using residues numbers
7495 ** that give correct index into SEQRES sequence.
7496 **
7497 ** @param [u] mode [AjEPdbioMode] Mode that controls what is printed: one of
7498 ** ajEPdbioModeHeaderDomain, ajEPdbioModeSeqresDomain,
7499 ** ajEPdbioModeAtomPdbDomain, ajEPdbioModeAtomIdxDomain
7500 **
7501 ** @param [r] pdb [const AjPPdb] Pdb object
7502 ** @param [r] mod [ajint] Model number
7503 ** @param [r] scop [const AjPScop] Scop object for domain
7504 ** @param [w] outf [AjPFile] Output file stream
7505 ** @param [w] errf [AjPFile] Output file stream for error messages
7506 **
7507 ** @return [AjBool] True on success
7508 **
7509 ** @release 2.9.0
7510 ** @@
7511 ******************************************************************************/
7512
ajPdbWriteDomainRecordRaw(AjEPdbioMode mode,const AjPPdb pdb,ajint mod,const AjPScop scop,AjPFile outf,AjPFile errf)7513 AjBool ajPdbWriteDomainRecordRaw(AjEPdbioMode mode, const AjPPdb pdb,
7514 ajint mod, const AjPScop scop,
7515 AjPFile outf, AjPFile errf)
7516 {
7517 /* Check args */
7518 if (!outf || !scop)
7519 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7520
7521 switch (mode)
7522 {
7523 case ajEPdbioModeHeaderDomain:
7524 if (!pdbioWriteHeaderScop(outf, scop))
7525 return ajFalse;
7526 break;
7527
7528 case ajEPdbioModeSeqresDomain:
7529 if (!errf || !pdb)
7530 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7531
7532 if (!pdbioWriteSeqresDomain(errf, outf, pdb, scop))
7533 return ajFalse;
7534 break;
7535
7536 case ajEPdbioModeAtomPdbDomain:
7537 if (!errf || !pdb)
7538 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7539
7540 if (!pdbioWriteAtomDomainPdb(errf, outf, pdb, scop, mod))
7541 return ajFalse;
7542 break;
7543
7544 case ajEPdbioModeAtomIdxDomain:
7545 if (!errf || !pdb)
7546 ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw");
7547
7548 if (!pdbioWriteAtomDomainIdx(errf, outf, pdb, scop, mod))
7549 return ajFalse;
7550 break;
7551
7552 default:
7553 ajFatal("Invalid mode in ajPdbWriteDomainRecordRaw");
7554 }
7555
7556 return ajTrue;
7557 }
7558
7559
7560
7561
7562 /* @func ajPdbWriteRecordRaw **************************************************
7563 **
7564 ** Writes lines in pdb format to a PDB file. What is written depends upon
7565 ** the mode:
7566 ** ajEPdbioModeSeqResChain SEQRES records for a chain.
7567 ** ajEPdbioModeAtomPdbChain ATOM records for chain using original residue
7568 ** numbers.
7569 ** ajEPdbioModeAtomIdxChain ATOM records for domain using residues numbers that
7570 ** give correct index into SEQRES sequence.
7571 ** ajEPdbioModeHeterogen ATOM line for a heterogen (small ligand).
7572 ** ajEPdbioModeHeader Header line.
7573 ** ajEPdbioModeTitle Title line.
7574 ** ajEPdbioModeCompnd COMPND records (info. on compound)
7575 ** ajEPdbioModeSource SOURCE records (info. on protein source)
7576 ** ajEPdbioModeEmptyRemark An empty REMARK record.
7577 ** ajEPdbioModeResolution Record with resolution of the structure.
7578 **
7579 ** @param [u] mode [AjEPdbioMode] Mode that controls what is printed: one of
7580 ** ajEPdbioModeSeqResChain, ajEPdbioModeAtomPdbChain,
7581 ** ajEPdbioModeAtomIdxChain, ajEPdbioModeHeterogen,
7582 ** ajEPdbioModeHeader, ajEPdbioModeTitle,
7583 ** ajEPdbioModeCompnd, ajEPdbioModeSource,
7584 ** ajEPdbioModeEmptyRemark, ajEPdbioModeResolution.
7585 ** @param [r] pdb [const AjPPdb] Pdb object
7586 ** @param [r] mod [ajint] Model number.
7587 ** @param [r] chn [ajint] Chain number.
7588 ** @param [w] outf [AjPFile] Output file stream
7589 ** @param [w] errf [AjPFile] Output file stream for error messages
7590 **
7591 ** @return [AjBool] True on success
7592 **
7593 ** @release 2.9.0
7594 ** @@
7595 ******************************************************************************/
7596
ajPdbWriteRecordRaw(AjEPdbioMode mode,const AjPPdb pdb,ajint mod,ajint chn,AjPFile outf,AjPFile errf)7597 AjBool ajPdbWriteRecordRaw(AjEPdbioMode mode, const AjPPdb pdb, ajint mod,
7598 ajint chn, AjPFile outf, AjPFile errf)
7599 {
7600 /* Check args */
7601 if (!outf || !pdb)
7602 ajFatal("Invalid args passed to ajPdbWriteRecordRaw");
7603
7604 switch (mode)
7605 {
7606 case ajEPdbioModeSeqResChain:
7607 if (!errf)
7608 ajFatal("Invalid args passed to ajPdbWriteRecordRaw");
7609
7610 if (!pdbioWriteSeqresChain(errf, outf, pdb, chn))
7611 return ajFalse;
7612 break;
7613
7614 case ajEPdbioModeAtomPdbChain:
7615 if (!pdbioWriteAtomChain(outf, pdb, mod, chn, ajEPdbModePdb))
7616 return ajFalse;
7617 break;
7618
7619 case ajEPdbioModeAtomIdxChain:
7620 if (!pdbioWriteAtomChain(outf, pdb, mod, chn, ajEPdbModeIdx))
7621 return ajFalse;
7622 break;
7623
7624 case ajEPdbioModeHeterogen:
7625 if (!pdbioWriteHeterogen(outf, pdb, mod))
7626 return ajFalse;
7627 break;
7628
7629 case ajEPdbioModeHeader:
7630 if (!pdbioWriteHeader(outf, pdb))
7631 return ajFalse;
7632 break;
7633
7634 case ajEPdbioModeTitle:
7635 if (!pdbioWriteTitle(outf, pdb))
7636 return ajFalse;
7637 break;
7638
7639 case ajEPdbioModeCompnd:
7640 if (!pdbioWriteCompnd(outf, pdb))
7641 return ajFalse;
7642 break;
7643
7644 case ajEPdbioModeSource:
7645 if (!pdbioWriteSource(outf, pdb))
7646 return ajFalse;
7647 break;
7648
7649 case ajEPdbioModeEmptyRemark:
7650 if (!pdbioWriteEmptyRemark(outf, pdb))
7651 return ajFalse;
7652 break;
7653
7654 case ajEPdbioModeResolution:
7655 if (!pdbioWriteResolution(outf, pdb))
7656 return ajFalse;
7657 break;
7658
7659 default:
7660 ajFatal("Invalid mode in ajPdbWriteRecordRaw");
7661 }
7662
7663 return ajTrue;
7664 }
7665
7666
7667
7668
7669 /* @func ajPdbWriteAllRaw *****************************************************
7670 **
7671 ** Writes a pdb file for a protein.
7672 **
7673 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
7674 ** or ajEPdbModeIdx if the original or
7675 ** corrected residue number is to be used.
7676 ** @param [r] pdb [const AjPPdb] Pdb object
7677 ** @param [w] outf [AjPFile] Output file stream
7678 ** @param [w] errf [AjPFile] Output file stream for error messages
7679 **
7680 ** @return [AjBool] True on success
7681 **
7682 ** @release 2.9.0
7683 ** @@
7684 ******************************************************************************/
7685
ajPdbWriteAllRaw(AjEPdbMode mode,const AjPPdb pdb,AjPFile outf,AjPFile errf)7686 AjBool ajPdbWriteAllRaw(AjEPdbMode mode, const AjPPdb pdb,
7687 AjPFile outf, AjPFile errf)
7688 {
7689 ajuint i = 0U;
7690 ajuint j = 0U;
7691
7692 /* Write bibliographic info. */
7693 ajPdbWriteRecordRaw(ajEPdbioModeHeader, pdb, 0, 0, outf, NULL);
7694 ajPdbWriteRecordRaw(ajEPdbioModeTitle, pdb, 0, 0, outf, NULL);
7695 ajPdbWriteRecordRaw(ajEPdbioModeCompnd, pdb, 0, 0, outf, NULL);
7696 ajPdbWriteRecordRaw(ajEPdbioModeSource, pdb, 0, 0, outf, NULL);
7697 ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7698 ajPdbWriteRecordRaw(ajEPdbioModeResolution, pdb, 0, 0, outf, NULL);
7699 ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7700
7701 /* Write SEQRES records */
7702 for (i = 0U; i < pdb->Nchn; i++)
7703 if (!ajPdbWriteRecordRaw(
7704 ajEPdbioModeSeqResChain, pdb, 0, i + 1, outf, errf))
7705 {
7706 ajWarn("Error writing file in ajPdbWriteAllRaw");
7707
7708 return ajFalse;
7709 }
7710
7711
7712 /* Loop for each model */
7713 for (j = 0U; j < pdb->Nmod; j++)
7714 {
7715 /* Write the MODEL record */
7716 if (pdb->Method == ajEPdbMethodNmr)
7717 ajFmtPrintF(outf, "MODEL%9d%66s\n", j + 1, " ");
7718
7719
7720 /* Write ATOM/HETATM records */
7721 for (i = 0U; i < pdb->Nchn; i++)
7722 {
7723 switch (mode)
7724 {
7725 case ajEPdbModePdb:
7726 if (!ajPdbWriteRecordRaw(
7727 ajEPdbioModeAtomPdbChain, pdb, j + 1, i + 1,
7728 outf, NULL))
7729 {
7730 ajWarn("Error writing file in ajPdbWriteAllRaw");
7731
7732 return ajFalse;
7733 }
7734 break;
7735
7736 case ajEPdbModeIdx:
7737 if (!ajPdbWriteRecordRaw(
7738 ajEPdbioModeAtomIdxChain, pdb, j + 1, i + 1,
7739 outf, NULL))
7740 {
7741 ajWarn("Error writing file in ajPdbWriteAllRaw");
7742
7743 return ajFalse;
7744 }
7745 break;
7746
7747 default:
7748 ajFatal("Invalid mode in ajPdbWriteAllRaw");
7749 }
7750
7751
7752 if (!ajPdbWriteRecordRaw(
7753 ajEPdbioModeHeterogen, pdb, j + 1, 0,
7754 outf, NULL))
7755 {
7756 ajWarn("Error writing file in ajPdbWriteAllRaw");
7757
7758 return ajFalse;
7759 }
7760
7761
7762 /* Write ENDMDL record */
7763 if (pdb->Method == ajEPdbMethodNmr)
7764 ajFmtPrintF(outf, "%-80s\n", "ENDMDL");
7765 }
7766
7767 }
7768
7769 /* Write END record */
7770 ajFmtPrintF(outf, "%-80s\n", "END");
7771
7772 return ajTrue;
7773
7774 }
7775
7776
7777
7778
7779 /* @func ajPdbWriteDomainRaw **************************************************
7780 **
7781 ** Writes a pdb file for a SCOP domain. Where coordinates for multiple
7782 ** models (e.g. NMR structures) are given, data for model 1 are written.
7783 ** Coordinates are taken from a Pdb structure, domain definition is taken
7784 ** from a Scop structure.
7785 ** In the pdb file, the coordinates are presented as belonging to a single
7786 ** chain regardless of how many chains the domain comprised.
7787 ** Coordinates for heterogens are NOT written to file.
7788 **
7789 ** @param [u] mode [AjEPdbMode] AJAX PDB Mode enumeration, either ajEPdbModePdb
7790 ** or ajEPdbModeIdx if the original or
7791 ** corrected residue number is to be used.
7792 ** @param [r] pdb [const AjPPdb] Pdb object
7793 ** @param [r] scop [const AjPScop] Scop object
7794 ** @param [w] outf [AjPFile] Output file stream
7795 ** @param [w] errf [AjPFile] Output file stream for error messages
7796 **
7797 ** @return [AjBool] True on success
7798 **
7799 ** @release 2.9.0
7800 ** @@
7801 ******************************************************************************/
7802
ajPdbWriteDomainRaw(AjEPdbMode mode,const AjPPdb pdb,const AjPScop scop,AjPFile outf,AjPFile errf)7803 AjBool ajPdbWriteDomainRaw(AjEPdbMode mode, const AjPPdb pdb,
7804 const AjPScop scop,
7805 AjPFile outf, AjPFile errf)
7806 {
7807 ajuint i = 0U; /* A counter */
7808 ajuint chn = 0U; /* No. of the chain in the pdb structure */
7809
7810 if (!pdb || !scop || !outf || !errf)
7811 ajFatal("Bad args passed to ajPdbWriteDomainRaw");
7812
7813 /* Check for errors in chain identifier and length */
7814 for (i = 0U; i < scop->Number; i++)
7815 if (!ajPdbChnidToNum(scop->Chain[i], pdb, &chn))
7816 {
7817 ajWarn("Chain incompatibility error in "
7818 "ajPdbWriteDomainRaw");
7819 ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility error "
7820 "in ajPdbWriteDomainRaw\n", scop->Entry);
7821
7822 return ajFalse;
7823 }
7824 else if (pdb->Chains[chn - 1]->Nres == 0)
7825 {
7826 ajWarn("Chain length zero");
7827 ajFmtPrintF(errf, "//\n%S\nERROR Chain length zero\n",
7828 scop->Entry);
7829
7830 return ajFalse;
7831 }
7832
7833
7834
7835 /* Write bibliographic info. */
7836 ajPdbWriteDomainRecordRaw(
7837 ajEPdbioModeHeaderDomain, NULL, 0, scop, outf, NULL);
7838
7839 ajPdbWriteRecordRaw(ajEPdbioModeTitle, pdb, 0, 0, outf, NULL);
7840 ajPdbWriteRecordRaw(ajEPdbioModeCompnd, pdb, 0, 0, outf, NULL);
7841 ajPdbWriteRecordRaw(ajEPdbioModeSource, pdb, 0, 0, outf, NULL);
7842 ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7843 ajPdbWriteRecordRaw(ajEPdbioModeResolution, pdb, 0, 0, outf, NULL);
7844 ajPdbWriteRecordRaw(ajEPdbioModeEmptyRemark, pdb, 0, 0, outf, NULL);
7845
7846
7847 /* Write SEQRES records */
7848 if (!ajPdbWriteDomainRecordRaw(ajEPdbioModeSeqresDomain, pdb, 0, scop,
7849 outf, errf))
7850 {
7851 ajWarn("Error writing file in ajPdbWriteDomainRaw");
7852
7853 return ajFalse;
7854 }
7855
7856
7857 /* Write MODEL record, if appropriate */
7858 if (pdb->Method == ajEPdbMethodNmr)
7859 ajFmtPrintF(outf, "MODEL%9d%66s\n", 1, " ");
7860
7861
7862 /* Write ATOM/HETATM records */
7863 if (!pdbioWriteAtomDomain(errf, outf, pdb, scop, 1, mode))
7864 {
7865 ajWarn("Error writing file in ajPdbWriteDomainRaw");
7866
7867 return ajFalse;
7868 }
7869
7870
7871 /* Write END/ENDMDL records */
7872 if (pdb->Method == ajEPdbMethodNmr)
7873 ajFmtPrintF(outf, "%-80s\n", "ENDMDL");
7874
7875 ajFmtPrintF(outf, "%-80s\n", "END");
7876
7877 return ajTrue;
7878 }
7879