1 // SEQBUILD.CPP
2 
3 // Copyright (C) 1998 Tommi Hassinen, Geoff Hutchison.
4 
5 // This package is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
9 
10 // This package is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 
15 // You should have received a copy of the GNU General Public License
16 // along with this package; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 
19 /*################################################################################################*/
20 
21 #include "libghemicalconfig2.h"
22 #include "seqbuild.h"
23 
24 #include "v3d.h"
25 
26 #include "local_i18n.h"
27 #include "notice.h"
28 
29 #include <cstring>
30 #include <iomanip>
31 #include <iostream>
32 #include <sstream>
33 using namespace std;
34 
35 /*################################################################################################*/
36 
sb_chain_descriptor(bool m1l)37 sb_chain_descriptor::sb_chain_descriptor(bool m1l)
38 {
39 	mode_1_letter = m1l;
40 
41 	seq1 = NULL;
42 	seq3 = NULL;
43 
44 	curr_res = NOT_DEFINED;
45 	c_crd_BGN = c_crd_END = NOT_DEFINED;
46 	c_tor_BGN = c_tor_END = NOT_DEFINED;
47 
48 	if (mode_1_letter) seq1 = new vector<char>;
49 	else seq3 = new vector<char *>;
50 }
51 
~sb_chain_descriptor(void)52 sb_chain_descriptor::~sb_chain_descriptor(void)
53 {
54 	if (seq1 != NULL)
55 	{
56 		delete seq1;
57 		seq1 = NULL;
58 	}
59 
60 	if (seq3 != NULL)
61 	{
62 		for (int n1 = 0;n1 < (int) seq3->size();n1++)
63 		{
64 			char * tmps = seq3->operator[](n1);
65 			delete[] tmps;
66 		}
67 
68 		delete seq3;
69 		seq3 = NULL;
70 	}
71 }
72 
AddRes1(char r1l)73 int sb_chain_descriptor::AddRes1(char r1l)
74 {
75 	seq1->push_back(r1l);
76 	return (int) seq1->size();
77 }
78 
AddRes3(const char * r3l)79 int sb_chain_descriptor::AddRes3(const char * r3l)
80 {
81 	if (strlen(r3l) != 3)
82 	{
83 		assertion_failed(__FILE__, __LINE__, "bad input");
84 	}
85 
86 	char * tmps = new char[4];
87 	strcpy(tmps, r3l);
88 
89 	seq3->push_back(tmps);
90 	return (int) seq3->size();
91 }
92 
93 // add an XYZ constraint for an atom in a residue which was last added.
94 // USE THIS WITH GREAT CARE! ; this has been added for a special purpose.
95 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
96 // the constraints are forced to be added in sequence order...
97 
AddCRD(int atm_id,float x,float y,float z)98 void sb_chain_descriptor::AddCRD(int atm_id, float x, float y, float z)
99 {
100 	int lastres = ((int) (mode_1_letter ? seq1->size() : seq3->size())) - 1;
101 
102 	sb_constraint_crd newcrd;
103 	newcrd.pos = lastres;
104 
105 	newcrd.atm_id = atm_id;
106 	newcrd.crdX = x; newcrd.crdY = y; newcrd.crdZ = z;
107 
108 	c_crd_v.push_back(newcrd);
109 }
110 
111 // add a torsional constraint for a residue which was last added.
112 // USE THIS WITH GREAT CARE! ; this has been added for a special purpose.
113 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
114 // the constraints are forced to be added in sequence order...
115 
AddTOR(int tor_ind,float tor_val)116 void sb_chain_descriptor::AddTOR(int tor_ind, float tor_val)
117 {
118 	int lastres = ((int) (mode_1_letter ? seq1->size() : seq3->size())) - 1;
119 
120 	sb_constraint_tor newtor;
121 	newtor.pos = lastres;
122 
123 	newtor.tor_ind = tor_ind;
124 	newtor.tor_val = tor_val;
125 
126 	c_tor_v.push_back(newtor);
127 }
128 
129 /*################################################################################################*/
130 
sequencebuilder(chn_info::chn_type ct,const char * filename)131 sequencebuilder::sequencebuilder(chn_info::chn_type ct, const char * filename)
132 {
133 	type = ct;
134 
135 	ifstream file;
136 	file.unsetf(ios::dec | ios::oct | ios::hex);
137 
138 	model::OpenLibDataFile(file, false, filename);
139 
140 	// read information about the main chain...
141 
142 	while (file.peek() != 'M') file.getline(buffer, sizeof(buffer));
143 	file.getline(buffer, sizeof(buffer));
144 
145 	while (file.peek() != 'E')
146 	{
147 		if (file.peek() == 'T')
148 		{
149 			sb_data_td newtd; file >> newtd;
150 			file.getline(buffer, sizeof(buffer));
151 			td_mc_store.push_back(newtd);
152 			continue;
153 		}
154 
155 		if (file.peek() == 'A')
156 		{
157 			sb_data_atm newatm; file >> newatm;
158 			while (file.peek() != '(') file.get();
159 			newatm.tr = new typerule(& file, & cout);
160 			file.getline(buffer, sizeof(buffer));
161 			main_vector.push_back(newatm);
162 			continue;
163 		}
164 
165 		file.getline(buffer, sizeof(buffer));
166 	}
167 
168 	while (file.peek() != 'C') file.getline(buffer, sizeof(buffer));
169 	file.getline(buffer, sizeof(buffer));
170 
171 	while (file.peek() != 'E')
172 	{
173 		if (file.peek() == 'T')
174 		{
175 			sb_data_td newtd; file >> newtd;
176 			file.getline(buffer, sizeof(buffer));
177 			td_mc_store.push_back(newtd);
178 			continue;
179 		}
180 
181 		if (file.peek() == 'A')
182 		{
183 			sb_data_atm newatm; file >> newatm;
184 			while (file.peek() != '(') file.get();
185 			newatm.tr = new typerule(& file, & cout);
186 			file.getline(buffer, sizeof(buffer));
187 			conn_vector.push_back(newatm);
188 			continue;
189 		}
190 
191 		file.getline(buffer, sizeof(buffer));
192 	}
193 
194 	// read descriptions about the chain initiation...
195 
196 	while (file.peek() != 'H') file.getline(buffer, sizeof(buffer));
197 	file.getline(buffer, sizeof(buffer));
198 
199 	while (file.peek() != 'E')
200 	{
201 		typerule newrule = typerule(& file, & cout);
202 		file.getline(buffer, sizeof(buffer));
203 		head_vector.push_back(newrule);
204 	}
205 
206 	// read descriptions about the chain termination...
207 
208 	while (file.peek() != 'T') file.getline(buffer, sizeof(buffer));
209 	file.getline(buffer, sizeof(buffer));
210 
211 	while (file.peek() != 'E')
212 	{
213 		typerule newrule = typerule(& file, & cout);
214 		file.getline(buffer, sizeof(buffer));
215 		tail_vector.push_back(newrule);
216 	}
217 
218 	// read the default modifications for residues...
219 
220 	while (file.peek() != 'B') file.getline(buffer, sizeof(buffer));	// default...
221 	file.getline(buffer, sizeof(buffer)); mod[0] = new sb_data_res; mod[0]->ReadModification(file);
222 	if (mod[0]->td_vector.size() > 0)
223 	{
224 		assertion_failed(__FILE__, __LINE__, "BODY_MOD should not have TORDEF lines!");
225 	}
226 
227 	while (file.peek() != 'H') file.getline(buffer, sizeof(buffer));	// head-mods
228 	file.getline(buffer, sizeof(buffer)); mod[1] = new sb_data_res; mod[1]->ReadModification(file);
229 	if (mod[1]->td_vector.size() > 0)
230 	{
231 		assertion_failed(__FILE__, __LINE__, "HEAD_MOD should not have TORDEF lines!");
232 	}
233 
234 	while (file.peek() != 'T') file.getline(buffer, sizeof(buffer));	// tail-mods
235 	file.getline(buffer, sizeof(buffer)); mod[2] = new sb_data_res; mod[2]->ReadModification(file);
236 	if (mod[2]->td_vector.size() > 0)
237 	{
238 		assertion_failed(__FILE__, __LINE__, "TAIL_MOD should not have TORDEF lines!");
239 	}
240 
241 	// read descriptions for the residues...
242 
243 	while (file.peek() != 'E')
244 	{
245 		if (file.peek() == 'R')
246 		{
247 			sb_data_res newres; file >> newres;
248 			resi_vector.push_back(newres);
249 		}
250 		else file.getline(buffer, sizeof(buffer));
251 	}
252 
253 	// ready!!!
254 
255 	file.close();
256 }
257 
~sequencebuilder(void)258 sequencebuilder::~sequencebuilder(void)
259 {
260 	delete mod[0];
261 	delete mod[1];
262 	delete mod[2];
263 }
264 
265 // here we use only the first coordinate set, no matter how many there are...
266 
Build(model * mdl,sb_chain_descriptor * chde,bool do_centering)267 void sequencebuilder::Build(model * mdl, sb_chain_descriptor * chde, bool do_centering)
268 {
269 	bool bad_chde = false;
270 	if (chde->seq1 == NULL && chde->seq3 == NULL) bad_chde = true;
271 	if (chde->mode_1_letter)
272 	{
273 		if (chde->seq1 == NULL) bad_chde = true;
274 	}
275 	else
276 	{
277 		if (chde->seq3 == NULL) bad_chde = true;
278 	}
279 
280 	if (bad_chde)
281 	{
282 		assertion_failed(__FILE__, __LINE__, "bad input");
283 	}
284 
285 	const int num_res = (chde->mode_1_letter ? chde->seq1->size() : chde->seq3->size());
286 	const int num_cs = mdl->GetCRDSetCount();
287 
288 	fGL b1crd[3] = { 1.0, 1.0, 0.0 };		// these should be const...
289 	fGL b2crd[3] = { 1.0, 0.0, 0.0 };
290 	fGL b3crd[3] = { 0.0, 0.0, 0.0 };
291 
292 	atom base[3] =
293 	{
294 		atom(element(), b1crd, num_cs),		// these should be const...
295 		atom(element(), b2crd, num_cs),
296 		atom(element(), b3crd, num_cs)
297 	};
298 
299 	id_vector.resize(0);
300 	atom_vector.resize(0);
301 	allatm_vector.resize(0);
302 
303 	id_vector.push_back(main_vector[0].prev[0]); atom_vector.push_back(& base[0]);
304 	id_vector.push_back(main_vector[0].prev[1]); atom_vector.push_back(& base[1]);
305 	id_vector.push_back(main_vector[0].prev[2]); atom_vector.push_back(& base[2]);
306 
307 	for (i32u n1 = 0;n1 < num_res;n1++)
308 	{
309 		bool first_res = (n1 == 0);
310 		bool last_res = (n1 == (num_res - 1));
311 
312 		chde->curr_res = n1;
313 
314 		chde->c_crd_BGN = chde->c_crd_END = NOT_DEFINED;
315 		for (i32u n2 = 0;n2 < chde->c_crd_v.size();n2++)
316 		{
317 			if (chde->c_crd_BGN < 0 && chde->c_crd_v[n2].pos == n1) chde->c_crd_BGN = n2;
318 			if (chde->c_crd_BGN >= 0 && chde->c_crd_END < 0 && chde->c_crd_v[n2].pos > n1) chde->c_crd_END = n2;
319 		}	if (chde->c_crd_BGN >= 0 && chde->c_crd_END < 0) chde->c_crd_END = chde->c_crd_v.size();
320 
321 		chde->c_tor_BGN = chde->c_tor_END = NOT_DEFINED;
322 		for (i32u n2 = 0;n2 < chde->c_tor_v.size();n2++)
323 		{
324 			if (chde->c_tor_BGN < 0 && chde->c_tor_v[n2].pos == n1) chde->c_tor_BGN = n2;
325 			if (chde->c_tor_BGN >= 0 && chde->c_tor_END < 0 && chde->c_tor_v[n2].pos > n1) chde->c_tor_END = n2;
326 		}	if (chde->c_tor_BGN >= 0 && chde->c_tor_END < 0) chde->c_tor_END = chde->c_tor_v.size();
327 
328 		td_v.resize(0);
329 		for (i32u n2 = 0;n2 < td_mc_store.size();n2++)
330 		{
331 			td_v.push_back(td_mc_store[n2]);
332 		}
333 
334 		// build main chain.
335 
336 		for (i32u n2 = 0;n2 < main_vector.size();n2++)
337 		{
338 			fGL crd[3];
339 			Convert(chde, & main_vector[n2], crd);
340 
341 			id_vector.push_back(main_vector[n2].id);
342 			atom newatom(main_vector[n2].el, crd, num_cs);
343 
344 			newatom.formal_charge = main_vector[n2].f_chrg;
345 
346 			mdl->AddAtom_lg(newatom);
347 			atom_vector.push_back(& mdl->GetLastAtom());
348 			allatm_vector.push_back(& mdl->GetLastAtom());
349 
350 			if (!first_res || n2 != 0)
351 			{
352 				i32s p_ind = 0;
353 				while (id_vector[p_ind] != main_vector[n2].prev[0]) p_ind++;
354 				atom * prev = atom_vector[p_ind];
355 
356 				bond newbond(prev, & mdl->GetLastAtom(), main_vector[n2].bt);
357 				mdl->AddBond(newbond);
358 			}
359 		}
360 
361 		// build connection (if needed).
362 
363 		if (!last_res)
364 		{
365 			for (i32u n2 = 0;n2 < conn_vector.size();n2++)
366 			{
367 				fGL crd[3];
368 				Convert(chde, & conn_vector[n2], crd);
369 
370 				id_vector.push_back(conn_vector[n2].id);
371 				atom newatom(conn_vector[n2].el, crd, num_cs);
372 
373 				newatom.formal_charge = conn_vector[n2].f_chrg;
374 
375 				mdl->AddAtom_lg(newatom);
376 				atom_vector.push_back(& mdl->GetLastAtom());
377 				allatm_vector.push_back(& mdl->GetLastAtom());
378 
379 				i32s p_ind = 0;
380 				while (id_vector[p_ind] != conn_vector[n2].prev[0]) p_ind++;
381 				atom * prev = atom_vector[p_ind];
382 
383 				bond newbond(prev, & mdl->GetLastAtom(), conn_vector[n2].bt);
384 				mdl->AddBond(newbond);
385 			}
386 		}
387 
388 		if (first_res)		// head-mods
389 		{
390 			BuildResidue(chde, mdl, mod[1]);
391 
392 			// a hardcoded fix implemented:
393 			// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
394 			// amino : need to set amino-terminal N formal_charge.
395 
396 			if (type == chn_info::amino_acid)
397 			{
398 				i32u index = 0;
399 				while (index < id_vector.size())
400 				{
401 					if (id_vector[index] == 0x0000) break;
402 					else index++;
403 				}
404 
405 				if (index >= id_vector.size())
406 				{
407 					assertion_failed(__FILE__, __LINE__, "amino-terminal N missing?");
408 				}
409 
410 				atom_vector[index]->formal_charge = +1;
411 			}
412 		}
413 		else if (last_res)	// tail-mods
414 		{
415 			BuildResidue(chde, mdl, mod[2]);
416 		}
417 		else			// default...
418 		{
419 			BuildResidue(chde, mdl, mod[0]);
420 		}
421 
422 		i32u res = 0;
423 		while (true)
424 		{
425 			if (chde->mode_1_letter)
426 			{
427 				if (resi_vector[res].symbol1 == chde->seq1->operator[](n1)) break;
428 			}
429 			else
430 			{
431 				const char * seq3str = chde->seq3->operator[](n1);
432 				if (!strcmp(resi_vector[res].symbol3, seq3str)) break;
433 			}
434 
435 			if (res != resi_vector.size()) res++;
436 			else break;
437 		}
438 
439 		if (res != resi_vector.size())
440 		{
441 			for (i32u n2 = 0;n2 < resi_vector[res].td_vector.size();n2++)
442 			{
443 				td_v.push_back(resi_vector[res].td_vector[n2]);
444 			}
445 
446 			BuildResidue(chde, mdl, & resi_vector[res]);
447 		}
448 		else
449 		{
450 			ostringstream str;
451 			str << _("unknown residue ");
452 
453 			if (chde->mode_1_letter)
454 			{
455 				str << chde->seq1->operator[](n1);
456 			}
457 			else
458 			{
459 				str << chde->seq3->operator[](n1);
460 			}
461 
462 			str << endl << ends;
463 			if (mdl->verbosity >= 2) mdl->PrintToLog(str.str().c_str());
464 		}
465 
466 		vector<i32s> tmpv1; atmr_vector tmpv2;
467 		for (i32u n2 = 0;n2 < id_vector.size();n2++)
468 		{
469 			if (id_vector[n2] & 0xFF00) continue;
470 
471 			tmpv1.push_back(id_vector[n2]);
472 			tmpv2.push_back(atom_vector[n2]);
473 		}
474 
475 		id_vector.resize(0);
476 		atom_vector.resize(0);
477 
478 		for (i32u n2 = 0;n2 < tmpv1.size();n2++)
479 		{
480 			id_vector.push_back(tmpv1[n2] + 0xFF00);
481 			atom_vector.push_back(tmpv2[n2]);
482 		}
483 	}
484 
485 	id_vector.resize(0);
486 	atom_vector.resize(0);
487 
488 	// add hydrogens for a relevant range of atoms...
489 
490 	for (i32u n1 = 0;n1 < allatm_vector.size();n1++)
491 	{
492 		mdl->AddHydrogens(allatm_vector[n1]);
493 	}
494 
495 	allatm_vector.resize(0);
496 
497 	// do centering (if asked). can't affect current_eng object
498 	// since above many atoms/bonds added -> current_eng deleted!
499 
500 	if (do_centering) mdl->CenterCRDSet(0, true);
501 }
502 
503 // here we will identify the sequences (if there is any), write down the sequences
504 // into model::ref_civ and update the /mol/chn/res-numbering...
505 
506 // only model::UpdateChains() should call this???
507 // only model::UpdateChains() should call this???
508 // only model::UpdateChains() should call this???
509 
Identify(model * mdl)510 void sequencebuilder::Identify(model * mdl)
511 {
512 	if (!mdl->IsGroupsClean())
513 	{
514 		assertion_failed(__FILE__, __LINE__, "!mdl->IsGroupsClean()");
515 	}
516 
517 	if (mdl->ref_civ == NULL)
518 	{
519 		assertion_failed(__FILE__, __LINE__, "mdl->ref_civ == NULL");
520 	}
521 
522 	// here we will find all possible chains and identify them.
523 
524 	for (i32s n1 = 0;n1 < mdl->nmol;n1++)
525 	{
526 		iter_al range[2];
527 		mdl->GetRange(0, n1, range);
528 
529 		vector<atom *> head_atoms;
530 		vector<atom *> tail_atoms;
531 
532 		for (iter_al it1 = range[0];it1 != range[1];it1++)
533 		{
534 			i32s tmp1 = (* it1).el.GetAtomicNumber();
535 
536 			if (tmp1 == main_vector.front().el.GetAtomicNumber())		// look for heads...
537 			{
538 				for (i32u n2 = 0;n2 < head_vector.size();n2++)
539 				{
540 					bool flag = head_vector[n2].Check(mdl, & (* it1), 0);
541 					if (!flag) continue; head_atoms.push_back(& (* it1)); break;
542 				}
543 			}
544 
545 			if (tmp1 == main_vector.back().el.GetAtomicNumber())		// look for tails...
546 			{
547 				for (i32u n2 = 0;n2 < tail_vector.size();n2++)
548 				{
549 					bool flag = tail_vector[n2].Check(mdl, & (* it1), 0);
550 					if (!flag) continue; tail_atoms.push_back(& (* it1)); break;
551 				}
552 			}
553 		}
554 
555 		// now we have all possible head/tail atoms. next we have to check
556 		// all possible paths between them to find all possible main chains...
557 
558 		path_vector.resize(0);
559 		for (i32u n2 = 0;n2 < head_atoms.size();n2++)
560 		{
561 			for (i32u n3 = 0;n3 < tail_atoms.size();n3++)
562 			{
563 				FindPath(mdl, head_atoms[n2], tail_atoms[n3]);
564 			}
565 		}
566 
567 		if (path_vector.size())
568 		{
569 			ostringstream str;
570 			str << path_vector.size() << _(" chains:") << endl << ends;
571 			if (mdl->verbosity >= 3) mdl->PrintToLog(str.str().c_str());
572 		}
573 
574 		for (i32s n2 = 0;n2 < (i32s) path_vector.size();n2++)
575 		{
576 			for (i32s n3 = 0;n3 < ((i32s) path_vector[n2].size()) - 1;n3++)		// tag the main-chain bonds...
577 			{
578 				iter_cl it1 = path_vector[n2][n3]->GetConnRecsBegin();
579 				while ((* it1).atmr != path_vector[n2][n3 + 1]) it1++;
580 				(* it1).bndr->flags[0] = true;
581 			}
582 
583 			vector<char> tmpseq1;
584 			vector<char *> tmpseq3;
585 
586 			i32u acount = 0;	// atom counter
587 			i32u rcount = 0;	// residue counter
588 
589 			while (acount < path_vector[n2].size())
590 			{
591 				// rsize is the residue size (how many atoms it will chop away from the
592 				// path_vector). we use it to determine whether we have reached the last
593 				// residue, and to increment acount.
594 
595 				int head_el = NOT_DEFINED;
596 				int tail_el = NOT_DEFINED;
597 
598 				bool is_first = !rcount;
599 				if (is_first && type == chn_info::amino_acid)
600 				{
601 					// an extra test for an ACE-like block group.
602 					// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
603 					// -NH2 or -NH3 group -> -NH-X- group
604 
605 					atom * refA = path_vector[n2][acount + 0];
606 					atom * refB = path_vector[n2][acount + 1];
607 					atom * refX = NULL;
608 
609 					for (iter_cl itX = refA->GetConnRecsBegin();itX != refA->GetConnRecsEnd();itX++)
610 					{
611 						if ((* itX).atmr == refB) continue;
612 						if ((* itX).atmr->el.GetAtomicNumber() <= 1) continue;
613 
614 						refX = (* itX).atmr;
615 					}
616 
617 					if (refX != NULL)
618 					{
619 						cout << _("found an ACE-like-block!") << endl;
620 						//exit(EXIT_FAILURE);
621 
622 						head_el = refX->el.GetAtomicNumber();
623 						is_first = false;
624 					}
625 				}
626 
627 				bool is_last = (acount + main_vector.size()) >= path_vector[n2].size();
628 				if (is_last && type == chn_info::amino_acid)
629 				{
630 					// an extra test for an NME-like block group.
631 					// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
632 					// O=C-O group -> O=C-X group (where X != O)
633 
634 					atom * refA = path_vector[n2][acount + 2];
635 					atom * refB = path_vector[n2][acount + 1];
636 					atom * refX = NULL;
637 
638 					for (iter_cl itX = refA->GetConnRecsBegin();itX != refA->GetConnRecsEnd();itX++)
639 					{
640 						if ((* itX).atmr == refB) continue;
641 						if ((* itX).bndr->bt.GetValue() != 1) continue;
642 						if ((* itX).atmr->el.GetAtomicNumber() == 8) continue;
643 
644 						refX = (* itX).atmr;
645 					}
646 
647 					if (refX != NULL)
648 					{
649 						cout << _("found an NME-like-block!") << endl;
650 						//exit(EXIT_FAILURE);
651 
652 						tail_el = refX->el.GetAtomicNumber();
653 						is_last = false;
654 					}
655 				}
656 
657 				i32u rsize = main_vector.size();
658 				if (!is_last) rsize += conn_vector.size();
659 
660 				// now we start identifying the residues.
661 
662 				// we make a simple template for each of them, compare those to
663 				// the residue descriptions we have, and pick the closest match.
664 				// there may not be any missing atoms, and the closest match is
665 				// the one with largest number of correct atoms identified.
666 
667 				i32s tmp1[2] = { NOT_DEFINED, NOT_DEFINED };
668 				for (i32s n4 = 0;n4 < (i32s) resi_vector.size();n4++)
669 				{
670 					vector<sb_tdata> tdata;
671 					BuildTemplate(tdata, n4, is_first, is_last);
672 
673 					// do the template mods if blockgroups!!!
674 					// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
675 
676 					if (head_el != NOT_DEFINED)
677 					{
678 						if (is_first)
679 						{
680 							assertion_failed(__FILE__, __LINE__, "bad head_el");
681 						}
682 
683 						// ACE needs no modifications...
684 					}
685 
686 					if (tail_el != NOT_DEFINED)
687 					{
688 						if (is_last)
689 						{
690 							assertion_failed(__FILE__, __LINE__, "bad tail_el");
691 						}
692 
693 						for (i32s n5 = 0;n5 < (i32s) tdata.size();n5++)
694 						{
695 							if (tdata[n5].id[0] == 0x11)
696 							{
697 								tdata[n5].el = element(tail_el);
698 							}
699 						}
700 					}
701 
702 					// set the main chain stuff...
703 
704 					for (i32s n5 = 0;n5 < (i32s) main_vector.size();n5++)
705 					{
706 						tdata[n5].ref = path_vector[n2][acount + n5];
707 					}
708 
709 					// set the connection stuff...
710 
711 					if (!is_last)
712 					{
713 						const int mvsz = main_vector.size();
714 						for (i32s n5 = 0;n5 < (i32s) conn_vector.size();n5++)
715 						{
716 							tdata[mvsz + n5].ref = path_vector[n2][acount + mvsz + n5];
717 						}
718 					}
719 
720 					bool result = CheckTemplate(tdata, 0);
721 ///////////////////////////////////////////////////////////////////////////////////////////
722 //cout << "checktemplate " << n4 << " " << is_first << " " << is_last << " ; ";
723 //cout << "result = " << result << endl; int zzz; cin >> zzz;
724 ///////////////////////////////////////////////////////////////////////////////////////////
725 					if (result && (tmp1[0] == NOT_DEFINED || ((i32s) tdata.size()) > tmp1[1]))
726 					{
727 						tmp1[0] = n4;
728 						tmp1[1] = (i32s) tdata.size();
729 					}
730 				}
731 
732 				if (tmp1[0] == NOT_DEFINED)
733 				{
734 					ostringstream str;
735 					str << _("WARNING : residue ") << rcount << _(" was of unknown type!!!") << endl << ends;
736 					if (mdl->verbosity >= 2) mdl->PrintToLog(str.str().c_str());
737 
738 					tmpseq1.push_back('?');		// handle the sequence...
739 
740 					char * tmps = new char[4];
741 					strcpy(tmps, "???");
742 
743 					tmpseq3.push_back(tmps);	// handle the sequence...
744 
745 					for (i32u n3 = 0;n3 < main_vector.size();n3++)		// handle the numbers...
746 					{
747 						path_vector[n2][acount + n3]->builder_res_id = NOT_DEFINED;	// WHAT HERE???
748 
749 						path_vector[n2][acount + n3]->id[1] = mdl->ref_civ->size();
750 						path_vector[n2][acount + n3]->id[2] = rcount;
751 					}
752 				}
753 				else
754 				{
755 					tmpseq1.push_back(resi_vector[tmp1[0]].symbol1);
756 
757 					char * tmps = new char[4];
758 					strcpy(tmps, resi_vector[tmp1[0]].symbol3);
759 
760 					tmpseq3.push_back(tmps);
761 
762 					// rebuild the best matching template and use that to
763 					// set up the atom ID numbers...
764 
765 					vector<sb_tdata> tdata;
766 					BuildTemplate(tdata, tmp1[0], is_first, is_last);
767 
768 					for (i32s n4 = 0;n4 < (i32s) main_vector.size();n4++)
769 					{
770 						tdata[n4].ref = path_vector[n2][acount + n4];
771 					}
772 
773 					if (!is_last)
774 					{
775 						const int mvsz = main_vector.size();
776 						for (i32s n5 = 0;n5 < (i32s) conn_vector.size();n5++)
777 						{
778 							tdata[mvsz + n5].ref = path_vector[n2][acount + mvsz + n5];
779 						}
780 					}
781 
782 					CheckTemplate(tdata, 0);
783 					for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
784 					{
785 						tdata[n4].ref->builder_res_id = (resi_vector[tmp1[0]].id << 8) + tdata[n4].id[0];
786 
787 						tdata[n4].ref->id[1] = mdl->ref_civ->size();
788 						tdata[n4].ref->id[2] = rcount;
789 					}
790 
791 /////////////////////////////////////////////////////////////////////////// exceptions
792 /////////////////////////////////////////////////////////////////////////// exceptions
793 
794 // a hardcoded fix implemented:
795 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
796 // amino : fix the unambiguous LEU residues.
797 
798 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'L')
799 {
800 	atom * ref20 = NULL;
801 	atom * ref21 = NULL;
802 	atom * ref22 = NULL;
803 	atom * ref23 = NULL;
804 
805 	for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
806 	{
807 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x20) ref20 = tdata[n4].ref;
808 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x21) ref21 = tdata[n4].ref;
809 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x22) ref22 = tdata[n4].ref;
810 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x23) ref23 = tdata[n4].ref;
811 	}
812 
813 	if (!ref20) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref20 not found!");
814 	if (!ref21) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref21 not found!");
815 	if (!ref22) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref22 not found!");
816 	if (!ref23) assertion_failed(__FILE__, __LINE__, "LEU-fix : ref23 not found!");
817 
818 	v3d<fGL> v1 = v3d<fGL>(ref21->GetCRD(0), ref20->GetCRD(0));
819 	v3d<fGL> v2 = v3d<fGL>(ref21->GetCRD(0), ref22->GetCRD(0));
820 	v3d<fGL> v3 = v3d<fGL>(ref22->GetCRD(0), ref23->GetCRD(0));
821 	fGL tor = v1.tor(v2, v3);
822 
823 	if (tor < 0.0)	// should be +/- 120 deg (about +/- 2.1 rad).
824 	{
825 		cout << _("LEU-fix!!!") << endl;
826 
827 		// ok, this seems to be "wrong" way...
828 		// fix by exhanging 22<->23.
829 
830 		int tmpid;
831 
832 		tmpid = ref22->builder_res_id;
833 		ref22->builder_res_id = ref23->builder_res_id;
834 		ref23->builder_res_id = tmpid;
835 	}
836 }
837 
838 // a hardcoded fix implemented:
839 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
840 // amino : fix the unambiguous VAL residues.
841 
842 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'V')
843 {
844 	atom * ref01 = NULL;
845 	atom * ref20 = NULL;
846 	atom * ref21 = NULL;
847 	atom * ref22 = NULL;
848 
849 	for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
850 	{
851 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x01) ref01 = tdata[n4].ref;
852 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x20) ref20 = tdata[n4].ref;
853 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x21) ref21 = tdata[n4].ref;
854 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x22) ref22 = tdata[n4].ref;
855 	}
856 
857 	if (!ref01) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref01 not found!");
858 	if (!ref20) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref20 not found!");
859 	if (!ref21) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref21 not found!");
860 	if (!ref22) assertion_failed(__FILE__, __LINE__, "VAL-fix : ref22 not found!");
861 
862 	v3d<fGL> v1 = v3d<fGL>(ref20->GetCRD(0), ref01->GetCRD(0));
863 	v3d<fGL> v2 = v3d<fGL>(ref20->GetCRD(0), ref21->GetCRD(0));
864 	v3d<fGL> v3 = v3d<fGL>(ref21->GetCRD(0), ref22->GetCRD(0));
865 	fGL tor = v1.tor(v2, v3);
866 
867 	if (tor < 0.0)	// should be +/- 120 deg (about +/- 2.1 rad).
868 	{
869 		cout << _("VAL-fix!!!") << endl;
870 
871 		// ok, this seems to be "wrong" way...
872 		// fix by exhanging 21<->22.
873 
874 		int tmpid;
875 
876 		tmpid = ref21->builder_res_id;
877 		ref21->builder_res_id = ref22->builder_res_id;
878 		ref22->builder_res_id = tmpid;
879 	}
880 }
881 
882 // a hardcoded fix implemented:
883 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
884 // amino : fix the unambiguous TRP residues.
885 
886 if (type == chn_info::amino_acid && resi_vector[tmp1[0]].symbol1 == 'W')
887 {
888 	atom * ref23 = NULL;
889 	atom * ref25 = NULL;
890 	atom * ref26 = NULL;
891 	atom * ref28 = NULL;
892 	atom * ref29 = NULL;
893 
894 	for (i32s n4 = 0;n4 < (i32s) tdata.size();n4++)
895 	{
896 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x23) ref23 = tdata[n4].ref;
897 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x25) ref25 = tdata[n4].ref;
898 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x26) ref26 = tdata[n4].ref;
899 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x28) ref28 = tdata[n4].ref;
900 		if ((tdata[n4].ref->builder_res_id & 0xFF) == 0x29) ref29 = tdata[n4].ref;
901 	}
902 
903 	if (!ref23) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref23 not found!");
904 	if (!ref25) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref25 not found!");
905 	if (!ref26) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref26 not found!");
906 	if (!ref28) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref28 not found!");
907 	if (!ref29) assertion_failed(__FILE__, __LINE__, "TRP-fix : ref29 not found!");
908 
909 	bond b1 = bond(ref23, ref25, bondtype('S'));
910 	iter_bl itb1 = find(mdl->GetBondsBegin(), mdl->GetBondsEnd(), b1);
911 	bool b1exists = (itb1 != mdl->GetBondsEnd());
912 
913 	bond b2 = bond(ref23, ref29, bondtype('S'));
914 	iter_bl itb2 = find(mdl->GetBondsBegin(), mdl->GetBondsEnd(), b2);
915 	bool b2exists = (itb2 != mdl->GetBondsEnd());
916 
917 	if (b1exists == b2exists) assertion_failed(__FILE__, __LINE__, "b1exists == b2exists");
918 
919 	if (b1exists && !b2exists)
920 	{
921 		cout << _("TRP-fix!!!") << endl;
922 
923 		// ok, this seems to be "wrong" way...
924 		// fix by exhanging 25<->29 and 26<->28.
925 
926 		int tmpid;
927 
928 		tmpid = ref25->builder_res_id;
929 		ref25->builder_res_id = ref29->builder_res_id;
930 		ref29->builder_res_id = tmpid;
931 
932 		tmpid = ref26->builder_res_id;
933 		ref26->builder_res_id = ref28->builder_res_id;
934 		ref28->builder_res_id = tmpid;
935 	}
936 }
937 
938 /////////////////////////////////////////////////////////////////////////// exceptions
939 /////////////////////////////////////////////////////////////////////////// exceptions
940 
941 				}
942 
943 				rcount++;
944 				acount += rsize;
945 			}
946 
947 			// next we will write down the sequence...
948 
949 			if (tmpseq1.size() != tmpseq3.size())
950 			{
951 				assertion_failed(__FILE__, __LINE__, "tmpseq1.size() != tmpseq3.size()");
952 			}
953 
954 			chn_info newinfo(type, tmpseq1.size());
955 
956 			newinfo.id_mol = n1;
957 			newinfo.id_chn = mdl->ref_civ->size();
958 
959 			for (i32u n3 = 0;n3 < tmpseq1.size();n3++)
960 			{
961 				newinfo.sequence1[n3] = tmpseq1[n3];				// a char...
962 
963 				newinfo.sequence3[n3] = new char[strlen(tmpseq3[n3]) + 1];	// an array...
964 				strcpy(newinfo.sequence3[n3], tmpseq3[n3]);			// an array...
965 			}
966 
967 			mdl->ref_civ->push_back(newinfo);
968 
969 			// and make some output...
970 
971 			ostringstream str;
972 			str << _("chain ") << newinfo.id_chn;
973 			str << _(", length ") << newinfo.length << ":" << endl;
974 
975 			for (i32u n3 = 0;n3 < tmpseq1.size();n3++)
976 			{
977 				str << tmpseq3[n3] << " ";
978 
979 				bool is_break = !((n3 + 1) % 10);
980 				bool is_end = ((n3 + 1) == tmpseq1.size());
981 
982 				if (is_break || is_end) str << endl;
983 
984 				// now free the text memory blocks...
985 
986 				delete[] tmpseq3[n3];
987 				tmpseq3[n3] = NULL;
988 			}
989 
990 			str << ends;
991 			if (mdl->verbosity >= 3) mdl->PrintToLog(str.str().c_str());
992 
993 			// finally un-tag the main-chain bonds...
994 
995 			for (i32s n3 = 0;n3 < ((i32s) path_vector[n2].size()) - 1;n3++)
996 			{
997 				iter_cl it1 = path_vector[n2][n3]->GetConnRecsBegin();
998 				while ((* it1).atmr != path_vector[n2][n3 + 1]) it1++;
999 				(* it1).bndr->flags[0] = false;
1000 			}
1001 		}
1002 
1003 		// now check the HYDROGENS that are handed by model::AddHydrogens() and not
1004 		// (except in some cases) by the sequence builder ; for hydrogens copy the res/atm
1005 		// ID's from the heavy atom and set a special builder_res_id lower part (0xFF).
1006 
1007 		// now for example a "select residue" operation will not skip the hydroges...
1008 
1009 		for (iter_al it1 = range[0];it1 != range[1];it1++)
1010 		{
1011 			if ((* it1).el.GetAtomicNumber() != 1) continue;
1012 
1013 			if ((* it1).GetConnRecCount() != 1)
1014 			{
1015 				cout << _("WARNING : seqbuild : H atom with abnormal connectivity found.") << endl;
1016 				continue;
1017 			}
1018 
1019 			bool already_has_id = true;
1020 			if ((* it1).id[1] < 0) already_has_id = false;
1021 			if ((* it1).id[2] < 0) already_has_id = false;
1022 
1023 			if (already_has_id != true)
1024 			{
1025 				atom * heavy = (* it1).GetFirstConnRec().atmr;
1026 				if (heavy->id[1] < 0) continue;		// id < 0 means NOT_DEFINED...
1027 				if (heavy->id[2] < 0) continue;		// id < 0 means NOT_DEFINED...
1028 
1029 				(* it1).builder_res_id = (heavy->builder_res_id | 0xFF);
1030 
1031 				(* it1).id[1] = heavy->id[1];		// chn id
1032 				(* it1).id[2] = heavy->id[2];		// res id
1033 			}
1034 		}
1035 	}
1036 
1037 	path_vector.resize(0);
1038 
1039 	// traditionally, we sorted the atom list here to make the newly assigned chains in nice order.
1040 	// nowadays, this is done at model::UpdateChains() since there might be many different seqbuilder
1041 	// objects (like for amino/nucleic acid variants) and we like to use them all...
1042 }
1043 
BuildResidue(sb_chain_descriptor * chde,model * mdl,sb_data_res * res)1044 void sequencebuilder::BuildResidue(sb_chain_descriptor * chde, model * mdl, sb_data_res * res)
1045 {
1046 	const int num_cs = mdl->GetCRDSetCount();
1047 
1048 	for (i32u n1 = 0;n1 < res->atm_vector.size();n1++)
1049 	{
1050 		fGL crd[3];
1051 		Convert(chde, & res->atm_vector[n1], crd);
1052 
1053 		id_vector.push_back(res->atm_vector[n1].id);
1054 		atom newatom(res->atm_vector[n1].el, crd, num_cs);
1055 
1056 		newatom.formal_charge = res->atm_vector[n1].f_chrg;
1057 
1058 		mdl->AddAtom_lg(newatom);
1059 		atom_vector.push_back(& mdl->GetLastAtom());
1060 		allatm_vector.push_back(& mdl->GetLastAtom());
1061 
1062 		i32s p_ind = 0;
1063 		while (id_vector[p_ind] != res->atm_vector[n1].prev[0]) p_ind++;
1064 		atom * prev = atom_vector[p_ind];
1065 
1066 		bond newbond(prev, & mdl->GetLastAtom(), res->atm_vector[n1].bt);
1067 		mdl->AddBond(newbond);
1068 	}
1069 
1070 	for (i32u n1 = 0;n1 < res->bnd_vector.size();n1++)
1071 	{
1072 		atom * tmp1[2]; i32s tmp2;
1073 		tmp2 = 0; while (id_vector[tmp2] != res->bnd_vector[n1].atm[0]) tmp2++; tmp1[0] = atom_vector[tmp2];
1074 		tmp2 = 0; while (id_vector[tmp2] != res->bnd_vector[n1].atm[1]) tmp2++; tmp1[1] = atom_vector[tmp2];
1075 
1076 		bond newbond(tmp1[0], tmp1[1], res->bnd_vector[n1].bt);
1077 		mdl->AddBond(newbond);
1078 	}
1079 }
1080 
Convert(sb_chain_descriptor * chde,sb_data_atm * adata,fGL * crd)1081 void sequencebuilder::Convert(sb_chain_descriptor * chde, sb_data_atm * adata, fGL * crd)
1082 {
1083 //	cout << "Convert() begins..." << endl;
1084 //	for (i32u n1 = 0;n1 < id_vector.size();n1++)
1085 //	{
1086 //		cout << "id_vector[" << n1 << "] ";
1087 //		cout << "0x" << hex << id_vector[n1] << " ";
1088 //		cout << atom_vector[n1] << endl;
1089 //	}	int pause ; cin >> pause;
1090 
1091 ///////////////////////////////////////////////////////////////////////////
1092 
1093 	atom * prev[3]; f64 ic[3];
1094 	for (i32s n1 = 0;n1 < 3;n1++)
1095 	{
1096 		i32u n2 = 0;
1097 		while (n2 < id_vector.size())
1098 		{
1099 			if (id_vector[n2] == adata->prev[n1]) break;
1100 			else n2++;
1101 		}
1102 
1103 		if (n2 >= id_vector.size())
1104 		{
1105 			assertion_failed(__FILE__, __LINE__, "prev not found!");
1106 		}
1107 
1108 		prev[n1] = atom_vector[n2];
1109 		ic[n1] = adata->ic1[n1];
1110 	}
1111 
1112 	const int tor_i = adata->ic2;
1113 ///////////////////////////////////////////////////////////
1114 //	int tor_i = adata->ic2;		// for debug only...
1115 ///////////////////////////////////////////////////////////
1116 
1117 	bool store = false;
1118 	i32s p0i = NOT_DEFINED;
1119 	i32s p1i = NOT_DEFINED;
1120 	i32s p2i = NOT_DEFINED;
1121 
1122 	if (tor_i > -1)
1123 	{
1124 		if (tor_i >= td_v.size())
1125 		{
1126 			ostringstream msg;
1127 			msg << "td_v is incomplete ; " << tor_i << ends;
1128 			assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1129 		}
1130 
1131 		if (!td_v[tor_i].flag)
1132 		{
1133 			if (adata->id != td_v[tor_i].id[0])
1134 			{
1135 				ostringstream msg;
1136 				msg << "id mismatch ; ";
1137 				msg << hex << adata->id << " != ";
1138 				msg << hex << td_v[tor_i].id[0] << ends;
1139 				assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1140 			}
1141 
1142 			td_v[tor_i].flag = true;
1143 			store = true;
1144 
1145 			if (fabs(ic[2]) > 0.0001)
1146 			{
1147 				assertion_failed(__FILE__, __LINE__, "tor error");
1148 			}
1149 		}
1150 
1151 		if (type == chn_info::amino_acid)
1152 		{
1153 			if (adata->id == 0x10) goto skip_checks;	// a known issue...
1154 			if (adata->id == 0x11) goto skip_checks;	// a known issue...
1155 		}
1156 
1157 		if (type == chn_info::nucleic_acid)
1158 		{
1159 			if (adata->id == 0x10) goto skip_checks;	// a known issue...
1160 			if (adata->id == 0x11) goto skip_checks;	// a known issue...
1161 		}
1162 
1163 		// check prev0
1164 
1165 		p0i = 0; while (p0i < (i32s) id_vector.size())
1166 		{
1167 			if (id_vector[p0i] == td_v[tor_i].id[1]) break;
1168 			else p0i++;
1169 		}
1170 
1171 		if (p0i >= (i32s) id_vector.size())
1172 		{
1173 			assertion_failed(__FILE__, __LINE__, "p0i not found");
1174 		}
1175 		else if (atom_vector[p0i] != prev[0])
1176 		{
1177 			ostringstream msg;
1178 			msg << "p0i mismatch ; ";
1179 			msg << "0x" << hex << id_vector[p0i] << " " << prev[0] << endl;
1180 			msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1181 			msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1182 			assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1183 		}
1184 
1185 		// check prev1
1186 
1187 		p1i = 0; while (p1i < (i32s) id_vector.size())
1188 		{
1189 			if (id_vector[p1i] == td_v[tor_i].id[2]) break;
1190 			else p1i++;
1191 		}
1192 
1193 		if (p1i >= (i32s) id_vector.size())
1194 		{
1195 			assertion_failed(__FILE__, __LINE__, "p1i not found");
1196 		}
1197 		else if (atom_vector[p1i] != prev[1])
1198 		{
1199 			ostringstream msg;
1200 			msg << "p1i mismatch ; ";
1201 			msg << "0x" << hex << id_vector[p1i] << " " << prev[1] << endl;
1202 			msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1203 			msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1204 			assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1205 		}
1206 
1207 		// check prev2
1208 
1209 		p2i = 0; while (p2i < (i32s) id_vector.size())
1210 		{
1211 			if (id_vector[p2i] == td_v[tor_i].id[3]) break;
1212 			else p2i++;
1213 		}
1214 
1215 		if (p2i >= (i32s) id_vector.size())
1216 		{
1217 			assertion_failed(__FILE__, __LINE__, "p2i not found");
1218 		}
1219 		else if (atom_vector[p2i] != prev[2])
1220 		{
1221 			ostringstream msg;
1222 			msg << "p2i mismatch ; ";
1223 			msg << "0x" << hex << id_vector[p2i] << " " << prev[2] << endl;
1224 			msg << "tordef " << hex << td_v[tor_i].id[0] << " " << hex << td_v[tor_i].id[1] << " ";
1225 			msg << hex << td_v[tor_i].id[2] << " " << hex << td_v[tor_i].id[3] << ends;
1226 			assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1227 		}
1228 	}
1229 	else
1230 	{
1231 	/*	for (i32u debug = 0;debug < td_v.size();debug++)	// for debug only...
1232 		{
1233 			bool all_match = true;
1234 			tor_i = debug;
1235 
1236 			// check prev0
1237 			p0i = 0; while (p0i < (i32s) id_vector.size())
1238 			{
1239 				if (id_vector[p0i] == td_v[tor_i].id[1]) break;
1240 				else p0i++;
1241 			}
1242 
1243 		if (p0i >= (i32s) id_vector.size() || atom_vector[p0i] != prev[0]) all_match = false;
1244 
1245 			// check prev1
1246 			p1i = 0; while (p1i < (i32s) id_vector.size())
1247 			{
1248 				if (id_vector[p1i] == td_v[tor_i].id[2]) break;
1249 				else p1i++;
1250 			}
1251 
1252 		if (p1i >= (i32s) id_vector.size() || atom_vector[p1i] != prev[1]) all_match = false;
1253 
1254 			// check prev2
1255 			p2i = 0; while (p2i < (i32s) id_vector.size())
1256 			{
1257 				if (id_vector[p2i] == td_v[tor_i].id[3]) break;
1258 				else p2i++;
1259 			}
1260 
1261 		if (p2i >= (i32s) id_vector.size() || atom_vector[p2i] != prev[2]) all_match = false;
1262 
1263 			if (all_match)
1264 			{
1265 				cout << "DEBUG : all_match ; tor_i = " << tor_i << endl;
1266 				exit(EXIT_FAILURE);
1267 			}
1268 		}
1269 
1270 		tor_i = NOT_DEFINED;	*/
1271 	}
1272 
1273 	skip_checks:
1274 
1275 	int c_crd_index = chde->c_crd_BGN;
1276 	while (c_crd_index < chde->c_crd_END)
1277 	{
1278 		if (adata->id == chde->c_crd_v[c_crd_index].atm_id) break;
1279 		else c_crd_index++;
1280 	}
1281 
1282 	if (c_crd_index < chde->c_crd_END)
1283 	{
1284 		// take the coordinates from constraints...
1285 		// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1286 
1287 		crd[0] = chde->c_crd_v[c_crd_index].crdX;
1288 		crd[1] = chde->c_crd_v[c_crd_index].crdY;
1289 		crd[2] = chde->c_crd_v[c_crd_index].crdZ;
1290 
1291 		if (tor_i > -1 && store)
1292 		{
1293 			v3d<fGL> v1 = v3d<fGL>(prev[0]->GetCRD(0), crd);
1294 			v3d<fGL> v2 = v3d<fGL>(prev[0]->GetCRD(0), prev[1]->GetCRD(0));
1295 			v3d<fGL> v3 = v3d<fGL>(prev[1]->GetCRD(0), prev[2]->GetCRD(0));
1296 			fGL tor = v1.tor(v2, v3);
1297 
1298 			if (tor_i >= chde->def_tor.size())
1299 			{
1300 				assertion_failed(__FILE__, __LINE__, "cannot store a torsion.");
1301 			}
1302 			else
1303 			{
1304 				chde->def_tor[tor_i] = tor;
1305 			}
1306 		}
1307 	}
1308 	else
1309 	{
1310 		// calculate the coordinates from internal coordinates...
1311 		// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1312 
1313 		if (tor_i > -1)
1314 		{
1315 			int c_tor_index = chde->c_tor_BGN;
1316 			while (c_tor_index < chde->c_tor_END)
1317 			{
1318 				if (tor_i == chde->c_tor_v[c_tor_index].tor_ind) break;
1319 				else c_tor_index++;
1320 			}
1321 
1322 			if (c_tor_index < chde->c_tor_END)
1323 			{
1324 				// use the constraint value, if available.
1325 
1326 				ic[2] += chde->c_tor_v[c_tor_index].tor_val;
1327 			}
1328 			else if (tor_i < chde->def_tor.size())
1329 			{
1330 				// use the default table value, if available.
1331 
1332 				ic[2] += chde->def_tor[tor_i];
1333 			}
1334 			else
1335 			{
1336 				// set a default value if the above fails;
1337 				// the default geometry is the extended one,
1338 				// EXCEPT for proline (15.0 deg and 0.0 deg).
1339 
1340 				bool is_pro = false;
1341 				if (chde->mode_1_letter)
1342 				{
1343 					if (chde->seq1->operator[](chde->curr_res) == 'P') is_pro = true;
1344 				}
1345 				else
1346 				{
1347 					if (!strcmp(chde->seq3->operator[](chde->curr_res), "PRO")) is_pro = true;
1348 				}
1349 
1350 				if (is_pro)
1351 				{
1352 					switch (tor_i)
1353 					{
1354 						case +3:
1355 						ic[2] += 15.0 * M_PI / 180.0;
1356 						break;
1357 
1358 						case +4:
1359 						ic[2] += 0.0 * M_PI / 180.0;
1360 						break;
1361 
1362 						default:
1363 						ic[2] += 180.0 * M_PI / 180.0;
1364 					}
1365 				}
1366 				else
1367 				{
1368 					ic[2] += 180.0 * M_PI / 180.0;
1369 				}
1370 			}
1371 		}
1372 
1373 		v3d<fGL> v1 = v3d<fGL>(prev[0]->GetCRD(0), prev[1]->GetCRD(0));
1374 
1375 		v3d<fGL> tmp1 = v3d<fGL>(prev[1]->GetCRD(0), prev[2]->GetCRD(0));
1376 		v3d<fGL> tmp2 = v1 * (tmp1.spr(v1) / v1.spr(v1));
1377 		v3d<fGL> v2 = tmp1 - tmp2;
1378 
1379 		v3d<fGL> v3 = v2.vpr(v1);
1380 
1381 		v1 = v1 * (ic[0] * cos(ic[1]) / v1.len());
1382 		v2 = v2 * (ic[0] * sin(ic[1]) * cos(ic[2]) / v2.len());
1383 		v3 = v3 * (ic[0] * sin(ic[1]) * sin(ic[2]) / v3.len());
1384 
1385 		v3d<fGL> tmp3 = v3d<fGL>(prev[0]->GetCRD(0)); tmp3 = tmp3 + (v1 + (v2 + v3));
1386 		for (i32s n1 = 0;n1 < 3;n1++) crd[n1] = tmp3.data[n1];
1387 	}
1388 }
1389 
BuildTemplate(vector<sb_tdata> & tdata,i32s res,bool is_first,bool is_last)1390 void sequencebuilder::BuildTemplate(vector<sb_tdata> & tdata, i32s res, bool is_first, bool is_last)
1391 {
1392 	BuildPartialT(tdata, main_vector);
1393 	if (!is_last) BuildPartialT(tdata, conn_vector);
1394 
1395 	if (is_first) BuildPartialT(tdata, mod[1]->atm_vector);		// head-mods
1396 	else if (is_last) BuildPartialT(tdata, mod[2]->atm_vector);	// tail-mods
1397 	else BuildPartialT(tdata, mod[0]->atm_vector);			// default...
1398 
1399 	BuildPartialT(tdata, resi_vector[res].atm_vector);
1400 }
1401 
BuildPartialT(vector<sb_tdata> & tdata,vector<sb_data_atm> & adata)1402 void sequencebuilder::BuildPartialT(vector<sb_tdata> & tdata, vector<sb_data_atm> & adata)
1403 {
1404 	for (i32u n1 = 0;n1 < adata.size();n1++)
1405 	{
1406 		sb_tdata newdata;
1407 		newdata.id[0] = adata[n1].id;
1408 
1409 		if (adata[n1].prev[0] & 0xFF00) newdata.id[1] = NOT_DEFINED;
1410 		else newdata.id[1] = adata[n1].prev[0];
1411 
1412 		newdata.el = adata[n1].el;
1413 		newdata.bt = adata[n1].bt;
1414 		newdata.ref = NULL;
1415 
1416 		tdata.push_back(newdata);
1417 	}
1418 }
1419 
1420 // idea is that this path-searching function should be used only inside a molecule.
1421 // this way we are sure to find always at least one path. we collect references to
1422 // atoms for all paths and collect this way all possible paths.
1423 
1424 // different flags should be used in type checking and in this path-searching...
1425 
FindPath(model * mdl,atom * ref1,atom * ref2,i32u index)1426 void sequencebuilder::FindPath(model * mdl, atom * ref1, atom * ref2, i32u index)
1427 {
1428 	if (index >= main_vector.size() + conn_vector.size()) index = 0;
1429 
1430 	if (index < main_vector.size())
1431 	{
1432 		if (ref1->el.GetAtomicNumber() != main_vector[index].el.GetAtomicNumber()) return;
1433 		if (!main_vector[index].tr->Check(mdl, ref1, 0)) return;
1434 	}
1435 	else
1436 	{
1437 		i32u index2 = index - main_vector.size();
1438 
1439 		if (ref1->el.GetAtomicNumber() != conn_vector[index2].el.GetAtomicNumber()) return;
1440 		if (!conn_vector[index2].tr->Check(mdl, ref1, 0)) return;
1441 	}
1442 
1443 	tmpatm_vector.push_back(ref1);
1444 
1445 //for (i32u debug = 0;debug < tmpatm_vector.size();debug++)
1446 //{
1447 //	cout << tmpatm_vector[debug]->el.GetSymbol();
1448 //}	cout << endl;
1449 
1450 	if (ref1 == ref2) path_vector.push_back(tmpatm_vector);
1451 	else
1452 	{
1453 		for (iter_cl it1 = ref1->GetConnRecsBegin();it1 != ref1->GetConnRecsEnd();it1++)
1454 		{
1455 			if ((* it1).bndr->flags[2]) continue;
1456 
1457 			(* it1).bndr->flags[2] = true;
1458 			FindPath(mdl, (* it1).atmr, ref2, (index + 1));
1459 			(* it1).bndr->flags[2] = false;
1460 		}
1461 	}
1462 
1463 	tmpatm_vector.pop_back();
1464 }
1465 
CheckTemplate(vector<sb_tdata> & tdata,i32s flag)1466 bool sequencebuilder::CheckTemplate(vector<sb_tdata> & tdata, i32s flag)
1467 {
1468 	vector<i32s> tmpv1;
1469 
1470 	// look for a suitable starting point to start matching in the template.
1471 	// if there is no such point we assume the template fits. however, the template
1472 	// could match since it is too small (for exaple, the template for glycine will
1473 	// match for all residues). therefore we have to test all templates and choose
1474 	// the biggest matching one!!!
1475 
1476 	i32u index = 0;
1477 	while (index < tdata.size())
1478 	{
1479 		if (tdata[index].ref != NULL)
1480 		{
1481 			tmpv1.resize(0);
1482 			for (i32u n1 = 0;n1 < tdata.size();n1++)
1483 			{
1484 				bool test1 = (tdata[n1].ref == NULL);
1485 				bool test2 = (tdata[n1].id[1] == tdata[index].id[0]);
1486 				if (test1 && test2) tmpv1.push_back(n1);
1487 			}
1488 
1489 			if (tmpv1.size()) break;
1490 		}
1491 
1492 		index++;
1493 	}
1494 
1495 	// if there is no suitable starting point, we have tested all atoms and
1496 	// none of them is missing -> template fits -> TRUE!!!
1497 
1498 	if (index == tdata.size()) return true;
1499 
1500 	// find all possible bonds that we can use to continue testing...
1501 
1502 	vector<crec> tmpv2; iter_cl it1;
1503 	for (it1 = tdata[index].ref->GetConnRecsBegin();it1 != tdata[index].ref->GetConnRecsEnd();it1++)
1504 	{
1505 		bool test1 = (* it1).bndr->flags[flag];
1506 		if (!test1) tmpv2.push_back((* it1));
1507 	}
1508 
1509 	// if there are less bonds than we need, the template can't match -> FALSE!!!
1510 
1511 	if (tmpv2.size() < tmpv1.size()) return false;
1512 
1513 	vector<i32s> tmpv3; tmpv3.resize(tmpv2.size());
1514 	for (i32u n1 = 0;n1 < tmpv3.size();n1++) tmpv3[n1] = n1;
1515 
1516 	// then we will check all bonds against the template in all possible permutations.
1517 	// if some combination is a match then leave the ID-numbers untouched, clean the
1518 	// bond flags and leave -> TRUE!!!
1519 
1520 	while (true)
1521 	{
1522 		bool test = true;
1523 		for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1524 		{
1525 			i32s el = tdata[tmpv1[n1]].el.GetAtomicNumber();
1526 			if (el != NOT_DEFINED && el != tmpv2[tmpv3[n1]].atmr->el.GetAtomicNumber()) test = false;
1527 
1528 			i32s bt = tdata[tmpv1[n1]].bt.GetValue();
1529 			if (bt != NOT_DEFINED && bt != tmpv2[tmpv3[n1]].bndr->bt.GetValue()) test = false;
1530 		}
1531 
1532 		if (test)
1533 		{
1534 			for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1535 			{
1536 				tmpv2[tmpv3[n1]].bndr->flags[flag] = true;
1537 				tdata[tmpv1[n1]].ref = tmpv2[tmpv3[n1]].atmr;
1538 			}
1539 
1540 			bool result = CheckTemplate(tdata, flag);
1541 
1542 			for (i32u n1 = 0;n1 < tmpv1.size();n1++)
1543 			{
1544 				tmpv2[tmpv3[n1]].bndr->flags[flag] = false;
1545 				if (!result) tdata[tmpv1[n1]].ref = NULL;
1546 			}
1547 
1548 			if (result) return true;
1549 		}
1550 
1551 		if (!next_permutation(tmpv3.begin(), tmpv3.end())) break;
1552 	}
1553 
1554 	// none of those combinations matched -> the template doesn't fit -> FALSE!!!
1555 
1556 	return false;
1557 }
1558 
1559 /*################################################################################################*/
1560 
sb_data_td(void)1561 sb_data_td::sb_data_td(void)
1562 {
1563 	id[0] = NOT_DEFINED;
1564 	id[1] = NOT_DEFINED;
1565 	id[2] = NOT_DEFINED;
1566 	id[3] = NOT_DEFINED;
1567 
1568 	flag = false;
1569 }
1570 
~sb_data_td(void)1571 sb_data_td::~sb_data_td(void)
1572 {
1573 }
1574 
operator >>(istream & p1,sb_data_td & p2)1575 istream & operator>>(istream & p1, sb_data_td & p2)
1576 {
1577 	char buffer[256];
1578 	while (p1.get() != 'F');
1579 
1580 	p1 >> p2.id[0];
1581 	p1 >> p2.id[1];
1582 	p1 >> p2.id[2];
1583 	p1 >> p2.id[3];
1584 
1585 	p2.flag = false;
1586 
1587 	return p1;
1588 }
1589 
1590 /*################################################################################################*/
1591 
sb_data_atm(void)1592 sb_data_atm::sb_data_atm(void)
1593 {
1594 	id = NOT_DEFINED;
1595 
1596 	el = element(1);
1597 	f_chrg = +0;
1598 
1599 	bt = bondtype(1);
1600 
1601 	ic2 = NOT_DEFINED;
1602 	for (i32s n1 = 0;n1 < 3;n1++)
1603 	{
1604 		prev[n1] = NOT_DEFINED;
1605 		ic1[n1] = 0.0;
1606 	}
1607 
1608 	tr = NULL;
1609 }
1610 
sb_data_atm(const sb_data_atm & p1)1611 sb_data_atm::sb_data_atm(const sb_data_atm & p1)
1612 {
1613 	id = p1.id;
1614 
1615 	el = p1.el;
1616 	f_chrg = p1.f_chrg;
1617 
1618 	bt = p1.bt;
1619 
1620 	ic2 = p1.ic2;
1621 	for (i32s n1 = 0;n1 < 3;n1++)
1622 	{
1623 		prev[n1] = p1.prev[n1];
1624 		ic1[n1] = p1.ic1[n1];
1625 	}
1626 
1627 	if (p1.tr != NULL) tr = new typerule(* p1.tr);
1628 	else tr = NULL;
1629 }
1630 
~sb_data_atm(void)1631 sb_data_atm::~sb_data_atm(void)
1632 {
1633 	if (tr != NULL) delete tr;
1634 }
1635 
operator >>(istream & p1,sb_data_atm & p2)1636 istream & operator>>(istream & p1, sb_data_atm & p2)
1637 {
1638 	char buffer[256];
1639 	while (p1.get() != 'M');
1640 
1641 	p1 >> p2.id;
1642 	p1 >> buffer; p2.el = element(buffer);
1643 	p1 >> p2.f_chrg;
1644 
1645 	p1 >> p2.prev[0] >> p2.prev[1] >> p2.prev[2];
1646 	p1 >> p2.ic1[0] >> p2.ic1[1] >> p2.ic2 >> p2.ic1[2];
1647 
1648 	p2.ic1[1] = M_PI * p2.ic1[1] / 180.0;
1649 	p2.ic1[2] = M_PI * p2.ic1[2] / 180.0;
1650 
1651 	p1 >> buffer;
1652 	p2.bt = bondtype(buffer[0]);
1653 
1654 //////////////////////////////////////////////////////////////////////////
1655 if (p2.bt.GetValue() == 0) assertion_failed(__FILE__, __LINE__, "bad bondtype A");
1656 //////////////////////////////////////////////////////////////////////////
1657 
1658 	return p1;
1659 }
1660 
1661 /*################################################################################################*/
1662 
sb_data_bnd(void)1663 sb_data_bnd::sb_data_bnd(void)
1664 {
1665 	atm[0] = NOT_DEFINED;
1666 	atm[1] = NOT_DEFINED;
1667 	bt = bondtype(1);
1668 }
1669 
~sb_data_bnd(void)1670 sb_data_bnd::~sb_data_bnd(void)
1671 {
1672 }
1673 
operator >>(istream & p1,sb_data_bnd & p2)1674 istream & operator>>(istream & p1, sb_data_bnd & p2)
1675 {
1676 	char buffer[256];
1677 	while (p1.get() != 'D');
1678 
1679 	p1 >> p2.atm[0] >> p2.atm[1];
1680 	p1 >> buffer; p2.bt = bondtype(buffer[0]);
1681 
1682 //////////////////////////////////////////////////////////////////////////
1683 if (p2.bt.GetValue() == 0) assertion_failed(__FILE__, __LINE__, "bad bondtype B");
1684 //////////////////////////////////////////////////////////////////////////
1685 
1686 	return p1;
1687 }
1688 
1689 /*################################################################################################*/
1690 
sb_data_res(void)1691 sb_data_res::sb_data_res(void)
1692 {
1693 	symbol1 = '?';
1694 	strcpy(symbol3, "???");
1695 
1696 	description = NULL;
1697 }
1698 
sb_data_res(const sb_data_res & p1)1699 sb_data_res::sb_data_res(const sb_data_res & p1)
1700 {
1701 	id = p1.id;
1702 
1703 	symbol1 = p1.symbol1;
1704 	strcpy(symbol3, p1.symbol3);
1705 
1706 	if (p1.description != NULL)
1707 	{
1708 		description = new char[strlen(p1.description) + 1];
1709 		strcpy(description, p1.description);
1710 	}
1711 	else description = NULL;
1712 
1713 	td_vector = p1.td_vector;
1714 	atm_vector = p1.atm_vector;
1715 	bnd_vector = p1.bnd_vector;
1716 }
1717 
~sb_data_res(void)1718 sb_data_res::~sb_data_res(void)
1719 {
1720 	if (description != NULL) delete[] description;
1721 }
1722 
ReadModification(istream & p1)1723 void sb_data_res::ReadModification(istream & p1)
1724 {
1725 	char buffer[256];
1726 	while (p1.peek() != 'E')
1727 	{
1728 		if (p1.peek() == 'T')
1729 		{
1730 			assertion_failed(__FILE__, __LINE__, "xxxx_MOD should not have TORDEF lines!");
1731 		}
1732 
1733 		if (p1.peek() == 'A')
1734 		{
1735 			sb_data_atm newatm; p1 >> newatm;
1736 			p1.getline(buffer, sizeof(buffer));
1737 			atm_vector.push_back(newatm);
1738 			continue;
1739 		}
1740 
1741 		if (p1.peek() == 'B')
1742 		{
1743 			sb_data_bnd newbnd; p1 >> newbnd;
1744 			p1.getline(buffer, sizeof(buffer));
1745 			bnd_vector.push_back(newbnd);
1746 			continue;
1747 		}
1748 
1749 		p1.getline(buffer, sizeof(buffer));
1750 	}
1751 
1752 	p1.getline(buffer, sizeof(buffer));
1753 }
1754 
operator >>(istream & p1,sb_data_res & p2)1755 istream & operator>>(istream & p1, sb_data_res & p2)
1756 {
1757 	char buffer[256];
1758 
1759 	while (p1.get() != 'S');
1760 
1761 	p1 >> p2.id;
1762 
1763 	while (p1.get() != ':');
1764 
1765 	p1 >> buffer;
1766 	if (strlen(buffer) != 3)
1767 	{
1768 		ostringstream msg;
1769 		msg << "bad symbol3 : " << buffer << ends;
1770 		assertion_failed(__FILE__, __LINE__, msg.str().c_str());
1771 	}
1772 
1773 	strcpy(p2.symbol3, buffer);
1774 	p1 >> p2.symbol1;
1775 
1776 	while (p1.get() != '"');
1777 	p1.getline(buffer, sizeof(buffer), '"');
1778 
1779 	p2.description = new char[strlen(buffer) + 1];
1780 	strcpy(p2.description, buffer);
1781 
1782 	p1.getline(buffer, sizeof(buffer));
1783 
1784 	while (p1.peek() != 'E')
1785 	{
1786 		if (p1.peek() == 'T')
1787 		{
1788 			sb_data_td newtd; p1 >> newtd;
1789 			p1.getline(buffer, sizeof(buffer));
1790 			p2.td_vector.push_back(newtd);
1791 			continue;
1792 		}
1793 
1794 		if (p1.peek() == 'A')
1795 		{
1796 			sb_data_atm newatm; p1 >> newatm;
1797 			p1.getline(buffer, sizeof(buffer));
1798 			p2.atm_vector.push_back(newatm);
1799 			continue;
1800 		}
1801 
1802 		if (p1.peek() == 'B')
1803 		{
1804 			sb_data_bnd newbnd; p1 >> newbnd;
1805 			p1.getline(buffer, sizeof(buffer));
1806 			p2.bnd_vector.push_back(newbnd);
1807 			continue;
1808 		}
1809 
1810 		p1.getline(buffer, sizeof(buffer));
1811 	}
1812 
1813 	p1.getline(buffer, sizeof(buffer));
1814 	return p1;
1815 }
1816 
1817 /*################################################################################################*/
1818 
1819 // eof
1820