1 // SEQBUILD.CPP
2
3 // Copyright (C) 1998 Tommi Hassinen, Geoff Hutchison.
4
5 // This package is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
9
10 // This package is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14
15 // You should have received a copy of the GNU General Public License
16 // along with this package; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19 /*################################################################################################*/
20
21 #include "libghemicalconfig2.h"
22 #include "seqbuild.h"
23
24 #include "v3d.h"
25
26 #include "local_i18n.h"
27 #include "notice.h"
28
29 #include <cstring>
30 #include <iomanip>
31 #include <iostream>
32 #include <sstream>
33 using namespace std;
34
35 /*################################################################################################*/
36
sb_chain_descriptor(bool m1l)37 sb_chain_descriptor::sb_chain_descriptor(bool m1l)
38 {
39 mode_1_letter = m1l;
40
41 seq1 = NULL;
42 seq3 = NULL;
43
44 curr_res = NOT_DEFINED;
45 c_crd_BGN = c_crd_END = NOT_DEFINED;
46 c_tor_BGN = c_tor_END = NOT_DEFINED;
47
48 if (mode_1_letter) seq1 = new vector<char>;
49 else seq3 = new vector<char *>;
50 }
51
~sb_chain_descriptor(void)52 sb_chain_descriptor::~sb_chain_descriptor(void)
53 {
54 if (seq1 != NULL)
55 {
56 delete seq1;
57 seq1 = NULL;
58 }
59
60 if (seq3 != NULL)
61 {
62 for (int n1 = 0;n1 < (int) seq3->size();n1++)
63 {
64 char * tmps = seq3->operator[](n1);
65 delete[] tmps;
66 }
67
68 delete seq3;
69 seq3 = NULL;
70 }
71 }
72
AddRes1(char r1l)73 int sb_chain_descriptor::AddRes1(char r1l)
74 {
75 seq1->push_back(r1l);
76 return (int) seq1->size();
77 }
78
AddRes3(const char * r3l)79 int sb_chain_descriptor::AddRes3(const char * r3l)
80 {
81 if (strlen(r3l) != 3)
82 {
83 assertion_failed(__FILE__, __LINE__, "bad input");
84 }
85
86 char * tmps = new char[4];
87 strcpy(tmps, r3l);
88
89 seq3->push_back(tmps);
90 return (int) seq3->size();
91 }
92
93 // add an XYZ constraint for an atom in a residue which was last added.
94 // USE THIS WITH GREAT CARE! ; this has been added for a special purpose.
95 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
96 // the constraints are forced to be added in sequence order...
97
AddCRD(int atm_id,float x,float y,float z)98 void sb_chain_descriptor::AddCRD(int atm_id, float x, float y, float z)
99 {
100 int lastres = ((int) (mode_1_letter ? seq1->size() : seq3->size())) - 1;
101
102 sb_constraint_crd newcrd;
103 newcrd.pos = lastres;
104
105 newcrd.atm_id = atm_id;
106 newcrd.crdX = x; newcrd.crdY = y; newcrd.crdZ = z;
107
108 c_crd_v.push_back(newcrd);
109 }
110
111 // add a torsional constraint for a residue which was last added.
112 // USE THIS WITH GREAT CARE! ; this has been added for a special purpose.
113 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
114 // the constraints are forced to be added in sequence order...
115
AddTOR(int tor_ind,float tor_val)116 void sb_chain_descriptor::AddTOR(int tor_ind, float tor_val)
117 {
118 int lastres = ((int) (mode_1_letter ? seq1->size() : seq3->size())) - 1;
119
120 sb_constraint_tor newtor;
121 newtor.pos = lastres;
122
123 newtor.tor_ind = tor_ind;
124 newtor.tor_val = tor_val;
125
126 c_tor_v.push_back(newtor);
127 }
128
129 /*################################################################################################*/
130
sequencebuilder(chn_info::chn_type ct,const char * filename)131 sequencebuilder::sequencebuilder(chn_info::chn_type ct, const char * filename)
132 {
133 type = ct;
134
135 ifstream file;
136 file.unsetf(ios::dec | ios::oct | ios::hex);
137
138 model::OpenLibDataFile(file, false, filename);
139
140 // read information about the main chain...
141
142 while (file.peek() != 'M') file.getline(buffer, sizeof(buffer));
143 file.getline(buffer, sizeof(buffer));
144
145 while (file.peek() != 'E')
146 {
147 if (file.peek() == 'T')
148 {
149 sb_data_td newtd; file >> newtd;
150 file.getline(buffer, sizeof(buffer));
151 td_mc_store.push_back(newtd);
152 continue;
153 }
154
155 if (file.peek() == 'A')
156 {
157 sb_data_atm newatm; file >> newatm;
158 while (file.peek() != '(') file.get();
159 newatm.tr = new typerule(& file, & cout);
160 file.getline(buffer, sizeof(buffer));
161 main_vector.push_back(newatm);
162 continue;
163 }
164
165 file.getline(buffer, sizeof(buffer));
166 }
167
168 while (file.peek() != 'C') file.getline(buffer, sizeof(buffer));
169 file.getline(buffer, sizeof(buffer));
170
171 while (file.peek() != 'E')
172 {
173 if (file.peek() == 'T')
174 {
175 sb_data_td newtd; file >> newtd;
176 file.getline(buffer, sizeof(buffer));
177 td_mc_store.push_back(newtd);
178 continue;
179 }
180
181 if (file.peek() == 'A')
182 {
183 sb_data_atm newatm; file >> newatm;
184 while (file.peek() != '(') file.get();
185 newatm.tr = new typerule(& file, & cout);
186 file.getline(buffer, sizeof(buffer));
187 conn_vector.push_back(newatm);
188 continue;
189 }
190
191 file.getline(buffer, sizeof(buffer));
192 }
193
194 // read descriptions about the chain initiation...
195
196 while (file.peek() != 'H') file.getline(buffer, sizeof(buffer));
197 file.getline(buffer, sizeof(buffer));
198
199 while (file.peek() != 'E')
200 {
201 typerule newrule = typerule(& file, & cout);
202 file.getline(buffer, sizeof(buffer));
203 head_vector.push_back(newrule);
204 }
205
206 // read descriptions about the chain termination...
207
208 while (file.peek() != 'T') file.getline(buffer, sizeof(buffer));
209 file.getline(buffer, sizeof(buffer));
210
211 while (file.peek() != 'E')
212 {
213 typerule newrule = typerule(& file, & cout);
214 file.getline(buffer, sizeof(buffer));
215 tail_vector.push_back(newrule);
216 }
217
218 // read the default modifications for residues...
219
220 while (file.peek() != 'B') file.getline(buffer, sizeof(buffer)); // default...
221 file.getline(buffer, sizeof(buffer)); mod[0] = new sb_data_res; mod[0]->ReadModification(file);
222 if (mod[0]->td_vector.size() > 0)
223 {
224 assertion_failed(__FILE__, __LINE__, "BODY_MOD should not have TORDEF lines!");
225 }
226
227 while (file.peek() != 'H') file.getline(buffer, sizeof(buffer)); // head-mods
228 file.getline(buffer, sizeof(buffer)); mod[1] = new sb_data_res; mod[1]->ReadModification(file);
229 if (mod[1]->td_vector.size() > 0)
230 {
231 assertion_failed(__FILE__, __LINE__, "HEAD_MOD should not have TORDEF lines!");
232 }
233
234 while (file.peek() != 'T') file.getline(buffer, sizeof(buffer)); // tail-mods
235 file.getline(buffer, sizeof(buffer)); mod[2] = new sb_data_res; mod[2]->ReadModification(file);
236 if (mod[2]->td_vector.size() > 0)
237 {
238 assertion_failed(__FILE__, __LINE__, "TAIL_MOD should not have TORDEF lines!");
239 }
240
241 // read descriptions for the residues...
242
243 while (file.peek() != 'E')
244 {
245 if (file.peek() == 'R')
246 {
247 sb_data_res newres; file >> newres;
248 resi_vector.push_back(newres);
249 }
250 else file.getline(buffer, sizeof(buffer));
251 }
252
253 // ready!!!
254
255 file.close();
256 }
257
~sequencebuilder(void)258 sequencebuilder::~sequencebuilder(void)
259 {
260 delete mod[0];
261 delete mod[1];
262 delete mod[2];
263 }
264
265 // here we use only the first coordinate set, no matter how many there are...
266
Build(model * mdl,sb_chain_descriptor * chde,bool do_centering)267 void sequencebuilder::Build(model * mdl, sb_chain_descriptor * chde, bool do_centering)
268 {
269 bool bad_chde = false;
270 if (chde->seq1 == NULL && chde->seq3 == NULL) bad_chde = true;
271 if (chde->mode_1_letter)
272 {
273 if (chde->seq1 == NULL) bad_chde = true;
274 }
275 else
276 {
277 if (chde->seq3 == NULL) bad_chde = true;
278 }
279
280 if (bad_chde)
281 {
282 assertion_failed(__FILE__, __LINE__, "bad input");
283 }
284
285 const int num_res = (chde->mode_1_letter ? chde->seq1->size() : chde->seq3->size());
286 const int num_cs = mdl->GetCRDSetCount();
287
288 fGL b1crd[3] = { 1.0, 1.0, 0.0 }; // these should be const...
289 fGL b2crd[3] = { 1.0, 0.0, 0.0 };
290 fGL b3crd[3] = { 0.0, 0.0, 0.0 };
291
292 atom base[3] =
293 {
294 atom(element(), b1crd, num_cs), // these should be const...
295 atom(element(), b2crd, num_cs),
296 atom(element(), b3crd, num_cs)
297 };
298
299 id_vector.resize(0);
300 atom_vector.resize(0);
301 allatm_vector.resize(0);
302
303 id_vector.push_back(main_vector[0].prev[0]); atom_vector.push_back(& base[0]);
304 id_vector.push_back(main_vector[0].prev[1]); atom_vector.push_back(& base[1]);
305 id_vector.push_back(main_vector[0].prev[2]); atom_vector.push_back(& base[2]);
306
307 for (i32u n1 = 0;n1 < num_res;n1++)
308 {
309 bool first_res = (n1 == 0);
310 bool last_res = (n1 == (num_res - 1));
311
312 chde->curr_res = n1;
313
314 chde->c_crd_BGN = chde->c_crd_END = NOT_DEFINED;
315 for (i32u n2 = 0;n2 < chde->c_crd_v.size();n2++)
316 {
317 if (chde->c_crd_BGN < 0 && chde->c_crd_v[n2].pos == n1) chde->c_crd_BGN = n2;
318 if (chde->c_crd_BGN >= 0 && chde->c_crd_END < 0 && chde->c_crd_v[n2].pos > n1) chde->c_crd_END = n2;
319 } if (chde->c_crd_BGN >= 0 && chde->c_crd_END < 0) chde->c_crd_END = chde->c_crd_v.size();
320
321 chde->c_tor_BGN = chde->c_tor_END = NOT_DEFINED;
322 for (i32u n2 = 0;n2 < chde->c_tor_v.size();n2++)
323 {
324 if (chde->c_tor_BGN < 0 && chde->c_tor_v[n2].pos == n1) chde->c_tor_BGN = n2;
325 if (chde->c_tor_BGN >= 0 && chde->c_tor_END < 0 && chde->c_tor_v[n2].pos > n1) chde->c_tor_END = n2;
326 } if (chde->c_tor_BGN >= 0 && chde->c_tor_END < 0) chde->c_tor_END = chde->c_tor_v.size();
327
328 td_v.resize(0);
329 for (i32u n2 = 0;n2 < td_mc_store.size();n2++)
330 {
331 td_v.push_back(td_mc_store[n2]);
332 }
333
334 // build main chain.
335
336 for (i32u n2 = 0;n2 < main_vector.size();n2++)
337 {
338 fGL crd[3];
339 Convert(chde, & main_vector[n2], crd);
340
341 id_vector.push_back(main_vector[n2].id);
342 atom newatom(main_vector[n2].el, crd, num_cs);
343
344 newatom.formal_charge = main_vector[n2].f_chrg;
345
346 mdl->AddAtom_lg(newatom);
347 atom_vector.push_back(& mdl->GetLastAtom());
348 allatm_vector.push_back(& mdl->GetLastAtom());
349
350 if (!first_res || n2 != 0)
351 {
352 i32s p_ind = 0;
353 while (id_vector[p_ind] != main_vector[n2].prev[0]) p_ind++;
354 atom * prev = atom_vector[p_ind];
355
356 bond newbond(prev, & mdl->GetLastAtom(), main_vector[n2].bt);
357 mdl->AddBond(newbond);
358 }
359 }
360
361 // build connection (if needed).
362
363 if (!last_res)
364 {
365 for (i32u n2 = 0;n2 < conn_vector.size();n2++)
366 {
367 fGL crd[3];
368 Convert(chde, & conn_vector[n2], crd);
369
370 id_vector.push_back(conn_vector[n2].id);
371 atom newatom(conn_vector[n2].el, crd, num_cs);
372
373 newatom.formal_charge = conn_vector[n2].f_chrg;
374
375 mdl->AddAtom_lg(newatom);
376 atom_vector.push_back(& mdl->GetLastAtom());
377 allatm_vector.push_back(& mdl->GetLastAtom());
378
379 i32s p_ind = 0;
380 while (id_vector[p_ind] != conn_vector[n2].prev[0]) p_ind++;
381 atom * prev = atom_vector[p_ind];
382
383 bond newbond(prev, & mdl->GetLastAtom(), conn_vector[n2].bt);
384 mdl->AddBond(newbond);
385 }
386 }
387
388 if (first_res) // head-mods
389 {
390 BuildResidue(chde, mdl, mod[1]);
391
392 // a hardcoded fix implemented:
393 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
394 // amino : need to set amino-terminal N formal_charge.
395
396 if (type == chn_info::amino_acid)
397 {
398 i32u index = 0;
399 while (index < id_vector.size())
400 {
401 if (id_vector[index] == 0x0000) break;
402 else index++;
403 }
404
405 if (index >= id_vector.size())
406 {
407 assertion_failed(__FILE__, __LINE__, "amino-terminal N missing?");
408 }
409
410 atom_vector[index]->formal_charge = +1;
411 }
412 }
413 else if (last_res) // tail-mods
414 {
415 BuildResidue(chde, mdl, mod[2]);
416 }
417 else // default...
418 {
419 BuildResidue(chde, mdl, mod[0]);
420 }
421
422 i32u res = 0;
423 while (true)
424 {
425 if (chde->mode_1_letter)
426 {
427 if (resi_vector[res].symbol1 == chde->seq1->operator[](n1)) break;
428 }
429 else
430 {
431 const char * seq3str = chde->seq3->operator[](n1);
432 if (!strcmp(resi_vector[res].symbol3, seq3str)) break;
433 }
434
435 if (res != resi_vector.size()) res++;
436 else break;
437 }
438
439 if (res != resi_vector.size())
440 {
441 for (i32u n2 = 0;n2 < resi_vector[res].td_vector.size();n2++)
442 {
443 td_v.push_back(resi_vector[res].td_vector[n2]);
444 }
445
446 BuildResidue(chde, mdl, & resi_vector[res]);
447 }
448 else
449 {
450 ostringstream str;
451 str << _("unknown residue ");
452
453 if (chde->mode_1_letter)
454 {
455 str << chde->seq1->operator[](n1);
456 }
457 else
458 {
459 str << chde->seq3->operator[](n1);
460 }
461
462 str << endl << ends;
463 if (mdl->verbosity >= 2) mdl->PrintToLog(str.str().c_str());
464 }
465
466 vector<i32s> tmpv1; atmr_vector tmpv2;
467 for (i32u n2 = 0;n2 < id_vector.size();n2++)
468 {
469 if (id_vector[n2] & 0xFF00) continue;
470
471 tmpv1.push_back(id_vector[n2]);
472 tmpv2.push_back(atom_vector[n2]);
473 }
474
475 id_vector.resize(0);
476 atom_vector.resize(0);
477
478 for (i32u n2 = 0;n2 < tmpv1.size();n2++)
479 {
480 id_vector.push_back(tmpv1[n2] + 0xFF00);
481 atom_vector.push_back(tmpv2[n2]);
482 }
483 }
484
485 id_vector.resize(0);
486 atom_vector.resize(0);
487
488 // add hydrogens for a relevant range of atoms...
489
490 for (i32u n1 = 0;n1 < allatm_vector.size();n1++)
491 {
492 mdl->AddHydrogens(allatm_vector[n1]);
493 }
494
495 allatm_vector.resize(0);
496
497 // do centering (if asked). can't affect current_eng object
498 // since above many atoms/bonds added -> current_eng deleted!
499
500 if (do_centering) mdl->CenterCRDSet(0, true);
501 }
502
503 // here we will identify the sequences (if there is any), write down the sequences
504 // into model::ref_civ and update the /mol/chn/res-numbering...
505
506 // only model::UpdateChains() should call this???
507 // only model::UpdateChains() should call this???
508 // only model::UpdateChains() should call this???
509
Identify(model * mdl)510 void sequencebuilder::Identify(model * mdl)
511 {
512 if (!mdl->IsGroupsClean())
513 {
514 assertion_failed(__FILE__, __LINE__, "!mdl->IsGroupsClean()");
515 }
516
517 if (mdl->ref_civ == NULL)
518 {
519 assertion_failed(__FILE__, __LINE__, "mdl->ref_civ == NULL");
520 }
521
522 // here we will find all possible chains and identify them.
523
524 for (i32s n1 = 0;n1 < mdl->nmol;n1++)
525 {
526 iter_al range[2];
527 mdl->GetRange(0, n1, range);
528
529 vector<atom *> head_atoms;
530 vector<atom *> tail_atoms;
531
532 for (iter_al it1 = range[0];it1 != range[1];it1++)
533 {
534 i32s tmp1 = (* it1).el.GetAtomicNumber();
535
536 if (tmp1 == main_vector.front().el.GetAtomicNumber()) // look for heads...
537 {
538 for (i32u n2 = 0;n2 < head_vector.size();n2++)
539 {
540 bool flag = head_vector[n2].Check(mdl, & (* it1), 0);
541 if (!flag) continue; head_atoms.push_back(& (* it1)); break;
542 }
543 }
544
545 if (tmp1 == main_vector.back().el.GetAtomicNumber()) // look for tails...
546 {
547 for (i32u n2 = 0;n2 < tail_vector.size();n2++)
548 {
549 bool flag = tail_vector[n2].Check(mdl, & (* it1), 0);
550 if (!flag) continue; tail_atoms.push_back(& (* it1)); break;
551 }
552 }
553 }
554
555 // now we have all possible head/tail atoms. next we have to check
556 // all possible paths between them to find all possible main chains...
557
558 path_vector.resize(0);
559 for (i32u n2 = 0;n2 < head_atoms.size();n2++)
560 {
561 for (i32u n3 = 0;n3 < tail_atoms.size();n3++)
562 {
563 FindPath(mdl, head_atoms[n2], tail_atoms[n3]);
564 }
565 }
566
567 if (path_vector.size())
568 {
569 ostringstream str;
570 str << path_vector.size() << _(" chains:") << endl << ends;
571 if (mdl->verbosity >= 3) mdl->PrintToLog(str.str().c_str());
572 }
573
574 for (i32s n2 = 0;n2 < (i32s) path_vector.size();n2++)
575 {
576 for (i32s n3 = 0;n3 < ((i32s) path_vector[n2].size()) - 1;n3++) // tag the main-chain bonds...
577 {
578 iter_cl it1 = path_vector[n2][n3]->GetConnRecsBegin();
579 while ((* it1).atmr != path_vector[n2][n3 + 1]) it1++;
580 (* it1).bndr->flags[0] = true;
581 }
582
583 vector<char> tmpseq1;
584 vector<char *> tmpseq3;
585
586 i32u acount = 0; // atom counter
587 i32u rcount = 0; // residue counter
588
589 while (acount < path_vector[n2].size())
590 {
591 // rsize is the residue size (how many atoms it will chop away from the
592 // path_vector). we use it to determine whether we have reached the last
593 // residue, and to increment acount.
594
595 int head_el = NOT_DEFINED;
596 int tail_el = NOT_DEFINED;
597
598 bool is_first = !rcount;
599 if (is_first && type == chn_info::amino_acid)
600 {
601 // an extra test for an ACE-like block group.
602 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
603 // -NH2 or -NH3 group -> -NH-X- group
604
605 atom * refA = path_vector[n2][acount + 0];
606 atom * refB = path_vector[n2][acount + 1];
607 atom * refX = NULL;
608
609 for (iter_cl itX = refA->GetConnRecsBegin();itX != refA->GetConnRecsEnd();itX++)
610 {
611 if ((* itX).atmr == refB) continue;
612 if ((* itX).atmr->el.GetAtomicNumber() <= 1) continue;
613
614 refX = (* itX).atmr;
615 }
616
617 if (refX != NULL)
618 {
619 cout << _("found an ACE-like-block!") << endl;
620 //exit(EXIT_FAILURE);
621
622 head_el = refX->el.GetAtomicNumber();
623 is_first = false;
624 }
625 }
626
627 bool is_last = (acount + main_vector.size()) >= path_vector[n2].size();
628 if (is_last && type == chn_info::amino_acid)
629 {
630 // an extra test for an NME-like block group.
631 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
632 // O=C-O group -> O=C-X group (where X != O)
633
634 atom * refA = path_vector[n2][acount + 2];
635 atom * refB = path_vector[n2][acount + 1];
636 atom * refX = NULL;
637
638 for (iter_cl itX = refA->GetConnRecsBegin();itX != refA->GetConnRecsEnd();itX++)
639 {
640 if ((* itX).atmr == refB) continue;
641 if ((* itX).bndr->bt.GetValue() != 1) continue;
642 if ((* itX).atmr->el.GetAtomicNumber() == 8) continue;
643
644 refX = (* itX).atmr;
645 }
646
647 if (refX != NULL)
648 {
649 cout << _("found an NME-like-block!") << endl;
650 //exit(EXIT_FAILURE);
651
652 tail_el = refX->el.GetAtomicNumber();
653 is_last = false;
654 }
655 }
656
657 i32u rsize = main_vector.size();
658 if (!is_last) rsize += conn_vector.size();
659
660 // now we start identifying the residues.
661
662 // we make a simple template for each of them, compare those to
663 // the residue descriptions we have, and pick the closest match.
664 // there may not be any missing atoms, and the closest match is
665 // the one with largest number of correct atoms identified.
666
667 i32s tmp1[2] = { NOT_DEFINED, NOT_DEFINED };
668 for (i32s n4 = 0;n4 < (i32s) resi_vector.size();n4++)
669 {
670 vector<sb_tdata> tdata;
671 BuildTemplate(tdata, n4, is_first, is_last);
672
673 // do the template mods if blockgroups!!!
674 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
675
676 if (head_el != NOT_DEFINED)
677 {
678 if (is_first)
679 {
680 assertion_failed(__FILE__, __LINE__, "bad head_el");
681 }
682
683 // ACE needs no modifications...
684 }
685
686 if (tail_el != NOT_DEFINED)
687 {
688 if (is_last)
689 {
690 assertion_failed(__FILE__, __LINE__, "bad tail_el");
691 }
692
693 for (i32s n5 = 0;n5 < (i32s) tdata.size();n5++)
694 {
695 if (tdata[n5].id[0] == 0x11)
696 {
697 tdata[n5].el = element(tail_el);
698 }
699 }
700 }
701
702 // set the main chain stuff...
703
704 for (i32s n5 = 0;n5 < (i32s) main_vector.size();n5++)
705 {
706 tdata[n5].ref = path_vector[n2][acount + n5];
707 }
708
709 // set the connection stuff...
710
711 if (!is_last)
712 {
713 const int mvsz = main_vector.size();
714 for (i32s n5 = 0;n5 < (i32s) conn_vector.size();n5++)
715 {
716 tdata[mvsz + n5].ref = path_vector[n2][acount + mvsz + n5];
717 }
718 }
719
720 bool result = CheckTemplate(tdata, 0);
721 ///////////////////////////////////////////////////////////////////////////////////////////
722 //cout << "checktemplate " << n4 << " " << is_first << " " << is_last << " ; ";
723 //cout << "result = " << result << endl; int zzz; cin >> zzz;
724 ///////////////////////////////////////////////////////////////////////////////////////////
725 if (result && (tmp1[0] == NOT_DEFINED || ((i32s) tdata.size()) > tmp1[1]))
726 {
727 tmp1[0] = n4;
728 tmp1[1] = (i32s) tdata.size();
729 }
730 }
731
732 if (tmp1[0] == NOT_DEFINED)
733 {
734 ostringstream str;
735 str << _("WARNING : residue ") << rcount << _(" was of unknown type!!!") << endl << ends;
736 if (mdl->verbosity >= 2) mdl->PrintToLog(str.str().c_str());
737
738 tmpseq1.push_back('?'); // handle the sequence...
739
740 char * tmps = new char[4];
741 strcpy(tmps, "???");
742
743 tmpseq3.push_back(tmps); // handle the sequence...
744
745 for (i32u n3 = 0;n3 < main_vector.size();n3++) // handle the numbers...
746 {
747 path_vector[n2][acount + n3]->builder_res_id = NOT_DEFINED; // WHAT HERE???
748
749 path_vector[n2][acount + n3]->id[1] = mdl->ref_civ->size();
750 path_vector[n2][acount + n3]->id[2] = rcount;
751 }
752 }
753 else
754 {
755 tmpseq1.push_back(resi_vector[tmp1[0]].symbol1);
756
757 char * tmps = new char[4];
758 strcpy(tmps, resi_vector[tmp1[0]].symbol3);
759
760 tmpseq3.push_back(tmps);
761
762 // rebuild the best matching template and use that to
763 // set up the atom ID numbers...
764
765 vector<sb_tdata> tdata;
766 BuildTemplate(tdata, tmp1[0], is_first, is_last);
767
768 for (i32s n4 = 0;n4 < (i32s) main_vector.size();n4++)
769 {
770 tdata[n4].ref = path_vector[n2][acount + n4];
771 }
772
773 if (!is_last)
774 {
775 const int mvsz = main_vector.size();
776 for (i32s n5 = 0;n5 < (i32s) conn_vector.size();n5++)
777 {
778 tdata[mvsz + n5].ref = path_vector[n2][acount + mvsz + n5];
779 }
780 }
781
782 CheckTemplate(tdata, 0);
783 for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
784 {
785 tdata[n4].ref->builder_res_id = (resi_vector[tmp1[0]].id << 8) + tdata[n4].id[0];
786
787 tdata[n4].ref->id[1] = mdl->ref_civ->size();
788 tdata[n4].ref->id[2] = rcount;
789 }
790
791 /////////////////////////////////////////////////////////////////////////// exceptions
792 /////////////////////////////////////////////////////////////////////////// exceptions
793
794 // a hardcoded fix implemented:
795 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
796 // amino : fix the unambiguous LEU residues.
797
798 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'L')
799 {
800 atom * ref20 = NULL;
801 atom * ref21 = NULL;
802 atom * ref22 = NULL;
803 atom * ref23 = NULL;
804
805 for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
806 {
807 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x20) ref20 = tdata[n4].ref;
808 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x21) ref21 = tdata[n4].ref;
809 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x22) ref22 = tdata[n4].ref;
810 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x23) ref23 = tdata[n4].ref;
811 }
812
813 if (!ref20) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref20 not found!");
814 if (!ref21) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref21 not found!");
815 if (!ref22) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref22 not found!");
816 if (!ref23) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref23 not found!");
817
818 v3d<fGL> v1 = v3d<fGL>(ref21->GetCRD(0), ref20->GetCRD(0));
819 v3d<fGL> v2 = v3d<fGL>(ref21->GetCRD(0), ref22->GetCRD(0));
820 v3d<fGL> v3 = v3d<fGL>(ref22->GetCRD(0), ref23->GetCRD(0));
821 fGL tor = v1.tor(v2, v3);
822
823 if (tor < 0.0) // should be +/- 120 deg (about +/- 2.1 rad).
824 {
825 cout << _("LEU-fix!!!") << endl;
826
827 // ok, this seems to be "wrong" way...
828 // fix by exhanging 22<->23.
829
830 int tmpid;
831
832 tmpid = ref22->builder_res_id;
833 ref22->builder_res_id = ref23->builder_res_id;
834 ref23->builder_res_id = tmpid;
835 }
836 }
837
838 // a hardcoded fix implemented:
839 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
840 // amino : fix the unambiguous VAL residues.
841
842 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'V')
843 {
844 atom * ref01 = NULL;
845 atom * ref20 = NULL;
846 atom * ref21 = NULL;
847 atom * ref22 = NULL;
848
849 for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
850 {
851 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x01) ref01 = tdata[n4].ref;
852 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x20) ref20 = tdata[n4].ref;
853 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x21) ref21 = tdata[n4].ref;
854 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x22) ref22 = tdata[n4].ref;
855 }
856
857 if (!ref01) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref01 not found!");
858 if (!ref20) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref20 not found!");
859 if (!ref21) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref21 not found!");
860 if (!ref22) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref22 not found!");
861
862 v3d<fGL> v1 = v3d<fGL>(ref20->GetCRD(0), ref01->GetCRD(0));
863 v3d<fGL> v2 = v3d<fGL>(ref20->GetCRD(0), ref21->GetCRD(0));
864 v3d<fGL> v3 = v3d<fGL>(ref21->GetCRD(0), ref22->GetCRD(0));
865 fGL tor = v1.tor(v2, v3);
866
867 if (tor < 0.0) // should be +/- 120 deg (about +/- 2.1 rad).
868 {
869 cout << _("VAL-fix!!!") << endl;
870
871 // ok, this seems to be "wrong" way...
872 // fix by exhanging 21<->22.
873
874 int tmpid;
875
876 tmpid = ref21->builder_res_id;
877 ref21->builder_res_id = ref22->builder_res_id;
878 ref22->builder_res_id = tmpid;
879 }
880 }
881
882 // a hardcoded fix implemented:
883 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
884 // amino : fix the unambiguous TRP residues.
885
886 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'W')
887 {
888 atom * ref23 = NULL;
889 atom * ref25 = NULL;
890 atom * ref26 = NULL;
891 atom * ref28 = NULL;
892 atom * ref29 = NULL;
893
894 for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
895 {
896 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x23) ref23 = tdata[n4].ref;
897 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x25) ref25 = tdata[n4].ref;
898 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x26) ref26 = tdata[n4].ref;
899 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x28) ref28 = tdata[n4].ref;
900 if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x29) ref29 = tdata[n4].ref;
901 }
902
903 if (!ref23) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref23 not found!");
904 if (!ref25) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref25 not found!");
905 if (!ref26) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref26 not found!");
906 if (!ref28) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref28 not found!");
907 if (!ref29) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref29 not found!");
908
909 bond b1 = bond(ref23, ref25, bondtype('S'));
910 iter_bl itb1 = find(mdl->GetBondsBegin(), mdl->GetBondsEnd(), b1);
911 bool b1exists = (itb1 != mdl->GetBondsEnd());
912
913 bond b2 = bond(ref23, ref29, bondtype('S'));
914 iter_bl itb2 = find(mdl->GetBondsBegin(), mdl->GetBondsEnd(), b2);
915 bool b2exists = (itb2 != mdl->GetBondsEnd());
916
917 if (b1exists == b2exists) assertion_failed(__FILE__, __LINE__, "b1exists == b2exists");
918
919 if (b1exists && !b2exists)
920 {
921 cout << _("TRP-fix!!!") << endl;
922
923 // ok, this seems to be "wrong" way...
924 // fix by exhanging 25<->29 and 26<->28.
925
926 int tmpid;
927
928 tmpid = ref25->builder_res_id;
929 ref25->builder_res_id = ref29->builder_res_id;
930 ref29->builder_res_id = tmpid;
931
932 tmpid = ref26->builder_res_id;
933 ref26->builder_res_id = ref28->builder_res_id;
934 ref28->builder_res_id = tmpid;
935 }
936 }
937
938 /////////////////////////////////////////////////////////////////////////// exceptions
939 /////////////////////////////////////////////////////////////////////////// exceptions
940
941 }
942
943 rcount++;
944 acount += rsize;
945 }
946
947 // next we will write down the sequence...
948
949 if (tmpseq1.size() != tmpseq3.size())
950 {
951 assertion_failed(__FILE__, __LINE__, "tmpseq1.size() != tmpseq3.size()");
952 }
953
954 chn_info newinfo(type, tmpseq1.size());
955
956 newinfo.id_mol = n1;
957 newinfo.id_chn = mdl->ref_civ->size();
958
959 for (i32u n3 = 0;n3 < tmpseq1.size();n3++)
960 {
961 newinfo.sequence1[n3] = tmpseq1[n3]; // a char...
962
963 newinfo.sequence3[n3] = new char[strlen(tmpseq3[n3]) + 1]; // an array...
964 strcpy(newinfo.sequence3[n3], tmpseq3[n3]); // an array...
965 }
966
967 mdl->ref_civ->push_back(newinfo);
968
969 // and make some output...
970
971 ostringstream str;
972 str << _("chain ") << newinfo.id_chn;
973 str << _(", length ") << newinfo.length << ":" << endl;
974
975 for (i32u n3 = 0;n3 < tmpseq1.size();n3++)
976 {
977 str << tmpseq3[n3] << " ";
978
979 bool is_break = !((n3 + 1) % 10);
980 bool is_end = ((n3 + 1) == tmpseq1.size());
981
982 if (is_break || is_end) str << endl;
983
984 // now free the text memory blocks...
985
986 delete[] tmpseq3[n3];
987 tmpseq3[n3] = NULL;
988 }
989
990 str << ends;
991 if (mdl->verbosity >= 3) mdl->PrintToLog(str.str().c_str());
992
993 // finally un-tag the main-chain bonds...
994
995 for (i32s n3 = 0;n3 < ((i32s) path_vector[n2].size()) - 1;n3++)
996 {
997 iter_cl it1 = path_vector[n2][n3]->GetConnRecsBegin();
998 while ((* it1).atmr != path_vector[n2][n3 + 1]) it1++;
999 (* it1).bndr->flags[0] = false;
1000 }
1001 }
1002
1003 // now check the HYDROGENS that are handed by model::AddHydrogens() and not
1004 // (except in some cases) by the sequence builder ; for hydrogens copy the res/atm
1005 // ID's from the heavy atom and set a special builder_res_id lower part (0xFF).
1006
1007 // now for example a "select residue" operation will not skip the hydroges...
1008
1009 for (iter_al it1 = range[0];it1 != range[1];it1++)
1010 {
1011 if ((* it1).el.GetAtomicNumber() != 1) continue;
1012
1013 if ((* it1).GetConnRecCount() != 1)
1014 {
1015 cout << _("WARNING : seqbuild : H atom with abnormal connectivity found.") << endl;
1016 continue;
1017 }
1018
1019 bool already_has_id = true;
1020 if ((* it1).id[1] < 0) already_has_id = false;
1021 if ((* it1).id[2] < 0) already_has_id = false;
1022
1023 if (already_has_id != true)
1024 {
1025 atom * heavy = (* it1).GetFirstConnRec().atmr;
1026 if (heavy->id[1] < 0) continue; // id < 0 means NOT_DEFINED...
1027 if (heavy->id[2] < 0) continue; // id < 0 means NOT_DEFINED...
1028
1029 (* it1).builder_res_id = (heavy->builder_res_id | 0xFF);
1030
1031 (* it1).id[1] = heavy->id[1]; // chn id
1032 (* it1).id[2] = heavy->id[2]; // res id
1033 }
1034 }
1035 }
1036
1037 path_vector.resize(0);
1038
1039 // traditionally, we sorted the atom list here to make the newly assigned chains in nice order.
1040 // nowadays, this is done at model::UpdateChains() since there might be many different seqbuilder
1041 // objects (like for amino/nucleic acid variants) and we like to use them all...
1042 }
1043
BuildResidue(sb_chain_descriptor * chde,model * mdl,sb_data_res * res)1044 void sequencebuilder::BuildResidue(sb_chain_descriptor * chde, model * mdl, sb_data_res * res)
1045 {
1046 const int num_cs = mdl->GetCRDSetCount();
1047
1048 for (i32u n1 = 0;n1 < res->atm_vector.size();n1++)
1049 {
1050 fGL crd[3];
1051 Convert(chde, & res->atm_vector[n1], crd);
1052
1053 id_vector.push_back(res->atm_vector[n1].id);
1054 atom newatom(res->atm_vector[n1].el, crd, num_cs);
1055
1056 newatom.formal_charge = res->atm_vector[n1].f_chrg;
1057
1058 mdl->AddAtom_lg(newatom);
1059 atom_vector.push_back(& mdl->GetLastAtom());
1060 allatm_vector.push_back(& mdl->GetLastAtom());
1061
1062 i32s p_ind = 0;
1063 while (id_vector[p_ind] != res->atm_vector[n1].prev[0]) p_ind++;
1064 atom * prev = atom_vector[p_ind];
1065
1066 bond newbond(prev, & mdl->GetLastAtom(), res->atm_vector[n1].bt);
1067 mdl->AddBond(newbond);
1068 }
1069
1070 for (i32u n1 = 0;n1 < res->bnd_vector.size();n1++)
1071 {
1072 atom * tmp1[2]; i32s tmp2;
1073 tmp2 = 0; while (id_vector[tmp2] != res->bnd_vector[n1].atm[0]) tmp2++; tmp1[0] = atom_vector[tmp2];
1074 tmp2 = 0; while (id_vector[tmp2] != res->bnd_vector[n1].atm[1]) tmp2++; tmp1[1] = atom_vector[tmp2];
1075
1076 bond newbond(tmp1[0], tmp1[1], res->bnd_vector[n1].bt);
1077 mdl->AddBond(newbond);
1078 }
1079 }
1080
Convert(sb_chain_descriptor * chde,sb_data_atm * adata,fGL * crd)1081 void sequencebuilder::Convert(sb_chain_descriptor * chde, sb_data_atm * adata, fGL * crd)
1082 {
1083 // cout << "Convert() begins..." << endl;
1084 // for (i32u n1 = 0;n1 < id_vector.size();n1++)
1085 // {
1086 // cout << "id_vector[" << n1 << "] ";
1087 // cout << "0x" << hex << id_vector[n1] << " ";
1088 // cout << atom_vector[n1] << endl;
1089 // } int pause ; cin >> pause;
1090
1091 ///////////////////////////////////////////////////////////////////////////
1092
1093 atom * prev[3]; f64 ic[3];
1094 for (i32s n1 = 0;n1 < 3;n1++)
1095 {
1096 i32u n2 = 0;
1097 while (n2 < id_vector.size())
1098 {
1099 if (id_vector[n2] == adata->prev[n1]) break;
1100 else n2++;
1101 }
1102
1103 if (n2 >= id_vector.size())
1104 {
1105 assertion_failed(__FILE__, __LINE__, "prev not found!");
1106 }
1107
1108 prev[n1] = atom_vector[n2];
1109 ic[n1] = adata->ic1[n1];
1110 }
1111
1112 const int tor_i = adata->ic2;
1113 ///////////////////////////////////////////////////////////
1114 // int tor_i = adata->ic2; // for debug only...
1115 ///////////////////////////////////////////////////////////
1116
1117 bool store = false;
1118 i32s p0i = NOT_DEFINED;
1119 i32s p1i = NOT_DEFINED;
1120 i32s p2i = NOT_DEFINED;
1121
1122 if (tor_i > -1)
1123 {
1124 if (tor_i >= td_v.size())
1125 {
1126 ostringstream msg;
1127 msg << "td_v is incomplete ; " << tor_i << ends;
1128 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1129 }
1130
1131 if (!td_v[tor_i].flag)
1132 {
1133 if (adata->id != td_v[tor_i].id[0])
1134 {
1135 ostringstream msg;
1136 msg << "id mismatch ; ";
1137 msg << hex << adata->id << " != ";
1138 msg << hex << td_v[tor_i].id[0] << ends;
1139 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1140 }
1141
1142 td_v[tor_i].flag = true;
1143 store = true;
1144
1145 if (fabs(ic[2]) > 0.0001)
1146 {
1147 assertion_failed(__FILE__, __LINE__, "tor error");
1148 }
1149 }
1150
1151 if (type == chn_info::amino_acid)
1152 {
1153 if (adata->id == 0x10) goto skip_checks; // a known issue...
1154 if (adata->id == 0x11) goto skip_checks; // a known issue...
1155 }
1156
1157 if (type == chn_info::nucleic_acid)
1158 {
1159 if (adata->id == 0x10) goto skip_checks; // a known issue...
1160 if (adata->id == 0x11) goto skip_checks; // a known issue...
1161 }
1162
1163 // check prev0
1164
1165 p0i = 0; while (p0i < (i32s) id_vector.size())
1166 {
1167 if (id_vector[p0i] == td_v[tor_i].id[1]) break;
1168 else p0i++;
1169 }
1170
1171 if (p0i >= (i32s) id_vector.size())
1172 {
1173 assertion_failed(__FILE__, __LINE__, "p0i not found");
1174 }
1175 else if (atom_vector[p0i] != prev[0])
1176 {
1177 ostringstream msg;
1178 msg << "p0i mismatch ; ";
1179 msg << "0x" << hex << id_vector[p0i] << " " << prev[0] << endl;
1180 msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1181 msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1182 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1183 }
1184
1185 // check prev1
1186
1187 p1i = 0; while (p1i < (i32s) id_vector.size())
1188 {
1189 if (id_vector[p1i] == td_v[tor_i].id[2]) break;
1190 else p1i++;
1191 }
1192
1193 if (p1i >= (i32s) id_vector.size())
1194 {
1195 assertion_failed(__FILE__, __LINE__, "p1i not found");
1196 }
1197 else if (atom_vector[p1i] != prev[1])
1198 {
1199 ostringstream msg;
1200 msg << "p1i mismatch ; ";
1201 msg << "0x" << hex << id_vector[p1i] << " " << prev[1] << endl;
1202 msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1203 msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1204 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1205 }
1206
1207 // check prev2
1208
1209 p2i = 0; while (p2i < (i32s) id_vector.size())
1210 {
1211 if (id_vector[p2i] == td_v[tor_i].id[3]) break;
1212 else p2i++;
1213 }
1214
1215 if (p2i >= (i32s) id_vector.size())
1216 {
1217 assertion_failed(__FILE__, __LINE__, "p2i not found");
1218 }
1219 else if (atom_vector[p2i] != prev[2])
1220 {
1221 ostringstream msg;
1222 msg << "p2i mismatch ; ";
1223 msg << "0x" << hex << id_vector[p2i] << " " << prev[2] << endl;
1224 msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1225 msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1226 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1227 }
1228 }
1229 else
1230 {
1231 /* for (i32u debug = 0;debug < td_v.size();debug++) // for debug only...
1232 {
1233 bool all_match = true;
1234 tor_i = debug;
1235
1236 // check prev0
1237 p0i = 0; while (p0i < (i32s) id_vector.size())
1238 {
1239 if (id_vector[p0i] == td_v[tor_i].id[1]) break;
1240 else p0i++;
1241 }
1242
1243 if (p0i >= (i32s) id_vector.size() || atom_vector[p0i] != prev[0]) all_match = false;
1244
1245 // check prev1
1246 p1i = 0; while (p1i < (i32s) id_vector.size())
1247 {
1248 if (id_vector[p1i] == td_v[tor_i].id[2]) break;
1249 else p1i++;
1250 }
1251
1252 if (p1i >= (i32s) id_vector.size() || atom_vector[p1i] != prev[1]) all_match = false;
1253
1254 // check prev2
1255 p2i = 0; while (p2i < (i32s) id_vector.size())
1256 {
1257 if (id_vector[p2i] == td_v[tor_i].id[3]) break;
1258 else p2i++;
1259 }
1260
1261 if (p2i >= (i32s) id_vector.size() || atom_vector[p2i] != prev[2]) all_match = false;
1262
1263 if (all_match)
1264 {
1265 cout << "DEBUG : all_match ; tor_i = " << tor_i << endl;
1266 exit(EXIT_FAILURE);
1267 }
1268 }
1269
1270 tor_i = NOT_DEFINED; */
1271 }
1272
1273 skip_checks:
1274
1275 int c_crd_index = chde->c_crd_BGN;
1276 while (c_crd_index < chde->c_crd_END)
1277 {
1278 if (adata->id == chde->c_crd_v[c_crd_index].atm_id) break;
1279 else c_crd_index++;
1280 }
1281
1282 if (c_crd_index < chde->c_crd_END)
1283 {
1284 // take the coordinates from constraints...
1285 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1286
1287 crd[0] = chde->c_crd_v[c_crd_index].crdX;
1288 crd[1] = chde->c_crd_v[c_crd_index].crdY;
1289 crd[2] = chde->c_crd_v[c_crd_index].crdZ;
1290
1291 if (tor_i > -1 && store)
1292 {
1293 v3d<fGL> v1 = v3d<fGL>(prev[0]->GetCRD(0), crd);
1294 v3d<fGL> v2 = v3d<fGL>(prev[0]->GetCRD(0), prev[1]->GetCRD(0));
1295 v3d<fGL> v3 = v3d<fGL>(prev[1]->GetCRD(0), prev[2]->GetCRD(0));
1296 fGL tor = v1.tor(v2, v3);
1297
1298 if (tor_i >= chde->def_tor.size())
1299 {
1300 assertion_failed(__FILE__, __LINE__, "cannot store a torsion.");
1301 }
1302 else
1303 {
1304 chde->def_tor[tor_i] = tor;
1305 }
1306 }
1307 }
1308 else
1309 {
1310 // calculate the coordinates from internal coordinates...
1311 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1312
1313 if (tor_i > -1)
1314 {
1315 int c_tor_index = chde->c_tor_BGN;
1316 while (c_tor_index < chde->c_tor_END)
1317 {
1318 if (tor_i == chde->c_tor_v[c_tor_index].tor_ind) break;
1319 else c_tor_index++;
1320 }
1321
1322 if (c_tor_index < chde->c_tor_END)
1323 {
1324 // use the constraint value, if available.
1325
1326 ic[2] += chde->c_tor_v[c_tor_index].tor_val;
1327 }
1328 else if (tor_i < chde->def_tor.size())
1329 {
1330 // use the default table value, if available.
1331
1332 ic[2] += chde->def_tor[tor_i];
1333 }
1334 else
1335 {
1336 // set a default value if the above fails;
1337 // the default geometry is the extended one,
1338 // EXCEPT for proline (15.0 deg and 0.0 deg).
1339
1340 bool is_pro = false;
1341 if (chde->mode_1_letter)
1342 {
1343 if (chde->seq1->operator[](chde->curr_res) == 'P') is_pro = true;
1344 }
1345 else
1346 {
1347 if (!strcmp(chde->seq3->operator[](chde->curr_res), "PRO")) is_pro = true;
1348 }
1349
1350 if (is_pro)
1351 {
1352 switch (tor_i)
1353 {
1354 case +3:
1355 ic[2] += 15.0 * M_PI / 180.0;
1356 break;
1357
1358 case +4:
1359 ic[2] += 0.0 * M_PI / 180.0;
1360 break;
1361
1362 default:
1363 ic[2] += 180.0 * M_PI / 180.0;
1364 }
1365 }
1366 else
1367 {
1368 ic[2] += 180.0 * M_PI / 180.0;
1369 }
1370 }
1371 }
1372
1373 v3d<fGL> v1 = v3d<fGL>(prev[0]->GetCRD(0), prev[1]->GetCRD(0));
1374
1375 v3d<fGL> tmp1 = v3d<fGL>(prev[1]->GetCRD(0), prev[2]->GetCRD(0));
1376 v3d<fGL> tmp2 = v1 * (tmp1.spr(v1) / v1.spr(v1));
1377 v3d<fGL> v2 = tmp1 - tmp2;
1378
1379 v3d<fGL> v3 = v2.vpr(v1);
1380
1381 v1 = v1 * (ic[0] * cos(ic[1]) / v1.len());
1382 v2 = v2 * (ic[0] * sin(ic[1]) * cos(ic[2]) / v2.len());
1383 v3 = v3 * (ic[0] * sin(ic[1]) * sin(ic[2]) / v3.len());
1384
1385 v3d<fGL> tmp3 = v3d<fGL>(prev[0]->GetCRD(0)); tmp3 = tmp3 + (v1 + (v2 + v3));
1386 for (i32s n1 = 0;n1 < 3;n1++) crd[n1] = tmp3.data[n1];
1387 }
1388 }
1389
BuildTemplate(vector<sb_tdata> & tdata,i32s res,bool is_first,bool is_last)1390 void sequencebuilder::BuildTemplate(vector<sb_tdata> & tdata, i32s res, bool is_first, bool is_last)
1391 {
1392 BuildPartialT(tdata, main_vector);
1393 if (!is_last) BuildPartialT(tdata, conn_vector);
1394
1395 if (is_first) BuildPartialT(tdata, mod[1]->atm_vector); // head-mods
1396 else if (is_last) BuildPartialT(tdata, mod[2]->atm_vector); // tail-mods
1397 else BuildPartialT(tdata, mod[0]->atm_vector); // default...
1398
1399 BuildPartialT(tdata, resi_vector[res].atm_vector);
1400 }
1401
BuildPartialT(vector<sb_tdata> & tdata,vector<sb_data_atm> & adata)1402 void sequencebuilder::BuildPartialT(vector<sb_tdata> & tdata, vector<sb_data_atm> & adata)
1403 {
1404 for (i32u n1 = 0;n1 < adata.size();n1++)
1405 {
1406 sb_tdata newdata;
1407 newdata.id[0] = adata[n1].id;
1408
1409 if (adata[n1].prev[0] & 0xFF00) newdata.id[1] = NOT_DEFINED;
1410 else newdata.id[1] = adata[n1].prev[0];
1411
1412 newdata.el = adata[n1].el;
1413 newdata.bt = adata[n1].bt;
1414 newdata.ref = NULL;
1415
1416 tdata.push_back(newdata);
1417 }
1418 }
1419
1420 // idea is that this path-searching function should be used only inside a molecule.
1421 // this way we are sure to find always at least one path. we collect references to
1422 // atoms for all paths and collect this way all possible paths.
1423
1424 // different flags should be used in type checking and in this path-searching...
1425
FindPath(model * mdl,atom * ref1,atom * ref2,i32u index)1426 void sequencebuilder::FindPath(model * mdl, atom * ref1, atom * ref2, i32u index)
1427 {
1428 if (index >= main_vector.size() + conn_vector.size()) index = 0;
1429
1430 if (index < main_vector.size())
1431 {
1432 if (ref1->el.GetAtomicNumber() != main_vector[index].el.GetAtomicNumber()) return;
1433 if (!main_vector[index].tr->Check(mdl, ref1, 0)) return;
1434 }
1435 else
1436 {
1437 i32u index2 = index - main_vector.size();
1438
1439 if (ref1->el.GetAtomicNumber() != conn_vector[index2].el.GetAtomicNumber()) return;
1440 if (!conn_vector[index2].tr->Check(mdl, ref1, 0)) return;
1441 }
1442
1443 tmpatm_vector.push_back(ref1);
1444
1445 //for (i32u debug = 0;debug < tmpatm_vector.size();debug++)
1446 //{
1447 // cout << tmpatm_vector[debug]->el.GetSymbol();
1448 //} cout << endl;
1449
1450 if (ref1 == ref2) path_vector.push_back(tmpatm_vector);
1451 else
1452 {
1453 for (iter_cl it1 = ref1->GetConnRecsBegin();it1 != ref1->GetConnRecsEnd();it1++)
1454 {
1455 if ((* it1).bndr->flags[2]) continue;
1456
1457 (* it1).bndr->flags[2] = true;
1458 FindPath(mdl, (* it1).atmr, ref2, (index + 1));
1459 (* it1).bndr->flags[2] = false;
1460 }
1461 }
1462
1463 tmpatm_vector.pop_back();
1464 }
1465
CheckTemplate(vector<sb_tdata> & tdata,i32s flag)1466 bool sequencebuilder::CheckTemplate(vector<sb_tdata> & tdata, i32s flag)
1467 {
1468 vector<i32s> tmpv1;
1469
1470 // look for a suitable starting point to start matching in the template.
1471 // if there is no such point we assume the template fits. however, the template
1472 // could match since it is too small (for exaple, the template for glycine will
1473 // match for all residues). therefore we have to test all templates and choose
1474 // the biggest matching one!!!
1475
1476 i32u index = 0;
1477 while (index < tdata.size())
1478 {
1479 if (tdata[index].ref != NULL)
1480 {
1481 tmpv1.resize(0);
1482 for (i32u n1 = 0;n1 < tdata.size();n1++)
1483 {
1484 bool test1 = (tdata[n1].ref == NULL);
1485 bool test2 = (tdata[n1].id[1] == tdata[index].id[0]);
1486 if (test1 && test2) tmpv1.push_back(n1);
1487 }
1488
1489 if (tmpv1.size()) break;
1490 }
1491
1492 index++;
1493 }
1494
1495 // if there is no suitable starting point, we have tested all atoms and
1496 // none of them is missing -> template fits -> TRUE!!!
1497
1498 if (index == tdata.size()) return true;
1499
1500 // find all possible bonds that we can use to continue testing...
1501
1502 vector<crec> tmpv2; iter_cl it1;
1503 for (it1 = tdata[index].ref->GetConnRecsBegin();it1 != tdata[index].ref->GetConnRecsEnd();it1++)
1504 {
1505 bool test1 = (* it1).bndr->flags[flag];
1506 if (!test1) tmpv2.push_back((* it1));
1507 }
1508
1509 // if there are less bonds than we need, the template can't match -> FALSE!!!
1510
1511 if (tmpv2.size() < tmpv1.size()) return false;
1512
1513 vector<i32s> tmpv3; tmpv3.resize(tmpv2.size());
1514 for (i32u n1 = 0;n1 < tmpv3.size();n1++) tmpv3[n1] = n1;
1515
1516 // then we will check all bonds against the template in all possible permutations.
1517 // if some combination is a match then leave the ID-numbers untouched, clean the
1518 // bond flags and leave -> TRUE!!!
1519
1520 while (true)
1521 {
1522 bool test = true;
1523 for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1524 {
1525 i32s el = tdata[tmpv1[n1]].el.GetAtomicNumber();
1526 if (el != NOT_DEFINED && el != tmpv2[tmpv3[n1]].atmr->el.GetAtomicNumber()) test = false;
1527
1528 i32s bt = tdata[tmpv1[n1]].bt.GetValue();
1529 if (bt != NOT_DEFINED && bt != tmpv2[tmpv3[n1]].bndr->bt.GetValue()) test = false;
1530 }
1531
1532 if (test)
1533 {
1534 for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1535 {
1536 tmpv2[tmpv3[n1]].bndr->flags[flag] = true;
1537 tdata[tmpv1[n1]].ref = tmpv2[tmpv3[n1]].atmr;
1538 }
1539
1540 bool result = CheckTemplate(tdata, flag);
1541
1542 for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1543 {
1544 tmpv2[tmpv3[n1]].bndr->flags[flag] = false;
1545 if (!result) tdata[tmpv1[n1]].ref = NULL;
1546 }
1547
1548 if (result) return true;
1549 }
1550
1551 if (!next_permutation(tmpv3.begin(), tmpv3.end())) break;
1552 }
1553
1554 // none of those combinations matched -> the template doesn't fit -> FALSE!!!
1555
1556 return false;
1557 }
1558
1559 /*################################################################################################*/
1560
sb_data_td(void)1561 sb_data_td::sb_data_td(void)
1562 {
1563 id[0] = NOT_DEFINED;
1564 id[1] = NOT_DEFINED;
1565 id[2] = NOT_DEFINED;
1566 id[3] = NOT_DEFINED;
1567
1568 flag = false;
1569 }
1570
~sb_data_td(void)1571 sb_data_td::~sb_data_td(void)
1572 {
1573 }
1574
operator >>(istream & p1,sb_data_td & p2)1575 istream & operator>>(istream & p1, sb_data_td & p2)
1576 {
1577 char buffer[256];
1578 while (p1.get() != 'F');
1579
1580 p1 >> p2.id[0];
1581 p1 >> p2.id[1];
1582 p1 >> p2.id[2];
1583 p1 >> p2.id[3];
1584
1585 p2.flag = false;
1586
1587 return p1;
1588 }
1589
1590 /*################################################################################################*/
1591
sb_data_atm(void)1592 sb_data_atm::sb_data_atm(void)
1593 {
1594 id = NOT_DEFINED;
1595
1596 el = element(1);
1597 f_chrg = +0;
1598
1599 bt = bondtype(1);
1600
1601 ic2 = NOT_DEFINED;
1602 for (i32s n1 = 0;n1 < 3;n1++)
1603 {
1604 prev[n1] = NOT_DEFINED;
1605 ic1[n1] = 0.0;
1606 }
1607
1608 tr = NULL;
1609 }
1610
sb_data_atm(const sb_data_atm & p1)1611 sb_data_atm::sb_data_atm(const sb_data_atm & p1)
1612 {
1613 id = p1.id;
1614
1615 el = p1.el;
1616 f_chrg = p1.f_chrg;
1617
1618 bt = p1.bt;
1619
1620 ic2 = p1.ic2;
1621 for (i32s n1 = 0;n1 < 3;n1++)
1622 {
1623 prev[n1] = p1.prev[n1];
1624 ic1[n1] = p1.ic1[n1];
1625 }
1626
1627 if (p1.tr != NULL) tr = new typerule(* p1.tr);
1628 else tr = NULL;
1629 }
1630
~sb_data_atm(void)1631 sb_data_atm::~sb_data_atm(void)
1632 {
1633 if (tr != NULL) delete tr;
1634 }
1635
operator >>(istream & p1,sb_data_atm & p2)1636 istream & operator>>(istream & p1, sb_data_atm & p2)
1637 {
1638 char buffer[256];
1639 while (p1.get() != 'M');
1640
1641 p1 >> p2.id;
1642 p1 >> buffer; p2.el = element(buffer);
1643 p1 >> p2.f_chrg;
1644
1645 p1 >> p2.prev[0] >> p2.prev[1] >> p2.prev[2];
1646 p1 >> p2.ic1[0] >> p2.ic1[1] >> p2.ic2 >> p2.ic1[2];
1647
1648 p2.ic1[1] = M_PI * p2.ic1[1] / 180.0;
1649 p2.ic1[2] = M_PI * p2.ic1[2] / 180.0;
1650
1651 p1 >> buffer;
1652 p2.bt = bondtype(buffer[0]);
1653
1654 //////////////////////////////////////////////////////////////////////////
1655 if (p2.bt.GetValue() == 0) assertion_failed(__FILE__, __LINE__, "bad bondtype A");
1656 //////////////////////////////////////////////////////////////////////////
1657
1658 return p1;
1659 }
1660
1661 /*################################################################################################*/
1662
sb_data_bnd(void)1663 sb_data_bnd::sb_data_bnd(void)
1664 {
1665 atm[0] = NOT_DEFINED;
1666 atm[1] = NOT_DEFINED;
1667 bt = bondtype(1);
1668 }
1669
~sb_data_bnd(void)1670 sb_data_bnd::~sb_data_bnd(void)
1671 {
1672 }
1673
operator >>(istream & p1,sb_data_bnd & p2)1674 istream & operator>>(istream & p1, sb_data_bnd & p2)
1675 {
1676 char buffer[256];
1677 while (p1.get() != 'D');
1678
1679 p1 >> p2.atm[0] >> p2.atm[1];
1680 p1 >> buffer; p2.bt = bondtype(buffer[0]);
1681
1682 //////////////////////////////////////////////////////////////////////////
1683 if (p2.bt.GetValue() == 0) assertion_failed(__FILE__, __LINE__, "bad bondtype B");
1684 //////////////////////////////////////////////////////////////////////////
1685
1686 return p1;
1687 }
1688
1689 /*################################################################################################*/
1690
sb_data_res(void)1691 sb_data_res::sb_data_res(void)
1692 {
1693 symbol1 = '?';
1694 strcpy(symbol3, "???");
1695
1696 description = NULL;
1697 }
1698
sb_data_res(const sb_data_res & p1)1699 sb_data_res::sb_data_res(const sb_data_res & p1)
1700 {
1701 id = p1.id;
1702
1703 symbol1 = p1.symbol1;
1704 strcpy(symbol3, p1.symbol3);
1705
1706 if (p1.description != NULL)
1707 {
1708 description = new char[strlen(p1.description) + 1];
1709 strcpy(description, p1.description);
1710 }
1711 else description = NULL;
1712
1713 td_vector = p1.td_vector;
1714 atm_vector = p1.atm_vector;
1715 bnd_vector = p1.bnd_vector;
1716 }
1717
~sb_data_res(void)1718 sb_data_res::~sb_data_res(void)
1719 {
1720 if (description != NULL) delete[] description;
1721 }
1722
ReadModification(istream & p1)1723 void sb_data_res::ReadModification(istream & p1)
1724 {
1725 char buffer[256];
1726 while (p1.peek() != 'E')
1727 {
1728 if (p1.peek() == 'T')
1729 {
1730 assertion_failed(__FILE__, __LINE__, "xxxx_MOD should not have TORDEF lines!");
1731 }
1732
1733 if (p1.peek() == 'A')
1734 {
1735 sb_data_atm newatm; p1 >> newatm;
1736 p1.getline(buffer, sizeof(buffer));
1737 atm_vector.push_back(newatm);
1738 continue;
1739 }
1740
1741 if (p1.peek() == 'B')
1742 {
1743 sb_data_bnd newbnd; p1 >> newbnd;
1744 p1.getline(buffer, sizeof(buffer));
1745 bnd_vector.push_back(newbnd);
1746 continue;
1747 }
1748
1749 p1.getline(buffer, sizeof(buffer));
1750 }
1751
1752 p1.getline(buffer, sizeof(buffer));
1753 }
1754
operator >>(istream & p1,sb_data_res & p2)1755 istream & operator>>(istream & p1, sb_data_res & p2)
1756 {
1757 char buffer[256];
1758
1759 while (p1.get() != 'S');
1760
1761 p1 >> p2.id;
1762
1763 while (p1.get() != ':');
1764
1765 p1 >> buffer;
1766 if (strlen(buffer) != 3)
1767 {
1768 ostringstream msg;
1769 msg << "bad symbol3 : " << buffer << ends;
1770 assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1771 }
1772
1773 strcpy(p2.symbol3, buffer);
1774 p1 >> p2.symbol1;
1775
1776 while (p1.get() != '"');
1777 p1.getline(buffer, sizeof(buffer), '"');
1778
1779 p2.description = new char[strlen(buffer) + 1];
1780 strcpy(p2.description, buffer);
1781
1782 p1.getline(buffer, sizeof(buffer));
1783
1784 while (p1.peek() != 'E')
1785 {
1786 if (p1.peek() == 'T')
1787 {
1788 sb_data_td newtd; p1 >> newtd;
1789 p1.getline(buffer, sizeof(buffer));
1790 p2.td_vector.push_back(newtd);
1791 continue;
1792 }
1793
1794 if (p1.peek() == 'A')
1795 {
1796 sb_data_atm newatm; p1 >> newatm;
1797 p1.getline(buffer, sizeof(buffer));
1798 p2.atm_vector.push_back(newatm);
1799 continue;
1800 }
1801
1802 if (p1.peek() == 'B')
1803 {
1804 sb_data_bnd newbnd; p1 >> newbnd;
1805 p1.getline(buffer, sizeof(buffer));
1806 p2.bnd_vector.push_back(newbnd);
1807 continue;
1808 }
1809
1810 p1.getline(buffer, sizeof(buffer));
1811 }
1812
1813 p1.getline(buffer, sizeof(buffer));
1814 return p1;
1815 }
1816
1817 /*################################################################################################*/
1818
1819 // eof
1820