1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39 
40 #include "pdb2top.h"
41 
42 #include <cctype>
43 #include <cmath>
44 #include <cstdio>
45 #include <cstring>
46 
47 #include <algorithm>
48 #include <string>
49 #include <vector>
50 
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/gmxpreprocess/add_par.h"
53 #include "gromacs/gmxpreprocess/fflibutil.h"
54 #include "gromacs/gmxpreprocess/gen_ad.h"
55 #include "gromacs/gmxpreprocess/gen_vsite.h"
56 #include "gromacs/gmxpreprocess/grompp_impl.h"
57 #include "gromacs/gmxpreprocess/h_db.h"
58 #include "gromacs/gmxpreprocess/notset.h"
59 #include "gromacs/gmxpreprocess/pgutil.h"
60 #include "gromacs/gmxpreprocess/specbond.h"
61 #include "gromacs/gmxpreprocess/topdirs.h"
62 #include "gromacs/gmxpreprocess/topio.h"
63 #include "gromacs/gmxpreprocess/toputil.h"
64 #include "gromacs/math/functions.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/topology/residuetypes.h"
67 #include "gromacs/topology/symtab.h"
68 #include "gromacs/utility/binaryinformation.h"
69 #include "gromacs/utility/cstringutil.h"
70 #include "gromacs/utility/dir_separator.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/futil.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/niceheader.h"
76 #include "gromacs/utility/path.h"
77 #include "gromacs/utility/programcontext.h"
78 #include "gromacs/utility/smalloc.h"
79 #include "gromacs/utility/strconvert.h"
80 #include "gromacs/utility/strdb.h"
81 #include "gromacs/utility/stringutil.h"
82 #include "gromacs/utility/textwriter.h"
83 
84 #include "hackblock.h"
85 #include "resall.h"
86 
87 /* this must correspond to enum in pdb2top.h */
88 const char* hh[ehisNR] = { "HISD", "HISE", "HISH", "HIS1" };
89 
missing_atoms(const PreprocessResidue * rp,int resind,t_atoms * at,int i0,int i,const gmx::MDLogger & logger)90 static int missing_atoms(const PreprocessResidue* rp, int resind, t_atoms* at, int i0, int i, const gmx::MDLogger& logger)
91 {
92     int nmiss = 0;
93     for (int j = 0; j < rp->natom(); j++)
94     {
95         const char* name   = *(rp->atomname[j]);
96         bool        bFound = false;
97         for (int k = i0; k < i; k++)
98         {
99             bFound = (bFound || (gmx_strcasecmp(*(at->atomname[k]), name) == 0));
100         }
101         if (!bFound)
102         {
103             nmiss++;
104             GMX_LOG(logger.warning)
105                     .asParagraph()
106                     .appendTextFormatted("atom %s is missing in residue %s %d in the pdb file",
107                                          name, *(at->resinfo[resind].name), at->resinfo[resind].nr);
108             if (name[0] == 'H' || name[0] == 'h')
109             {
110                 GMX_LOG(logger.warning)
111                         .asParagraph()
112                         .appendTextFormatted(
113                                 "You might need to add atom %s to the hydrogen database of "
114                                 "building block %s "
115                                 "in the file %s.hdb (see the manual)",
116                                 name, *(at->resinfo[resind].rtp), rp->filebase.c_str());
117             }
118         }
119     }
120 
121     return nmiss;
122 }
123 
is_int(double x)124 bool is_int(double x)
125 {
126     const double tol = 1e-4;
127     int          ix;
128 
129     if (x < 0)
130     {
131         x = -x;
132     }
133     ix = gmx::roundToInt(x);
134 
135     return (fabs(x - ix) < tol);
136 }
137 
choose_ff_impl(const char * ffsel,char * forcefield,int ff_maxlen,char * ffdir,int ffdir_maxlen,const gmx::MDLogger & logger)138 static void choose_ff_impl(const char*          ffsel,
139                            char*                forcefield,
140                            int                  ff_maxlen,
141                            char*                ffdir,
142                            int                  ffdir_maxlen,
143                            const gmx::MDLogger& logger)
144 {
145     std::vector<gmx::DataFileInfo> ffdirs = fflib_enumerate_forcefields();
146     const int                      nff    = ssize(ffdirs);
147 
148     /* Replace with unix path separators */
149 #if DIR_SEPARATOR != '/'
150     for (int i = 0; i < nff; ++i)
151     {
152         std::replace(ffdirs[i].dir.begin(), ffdirs[i].dir.end(), DIR_SEPARATOR, '/');
153     }
154 #endif
155 
156     /* Store the force field names in ffs */
157     std::vector<std::string> ffs;
158     ffs.reserve(ffdirs.size());
159     for (int i = 0; i < nff; ++i)
160     {
161         ffs.push_back(gmx::stripSuffixIfPresent(ffdirs[i].name, fflib_forcefield_dir_ext()));
162     }
163 
164     int sel;
165     if (ffsel != nullptr)
166     {
167         sel        = -1;
168         int cwdsel = -1;
169         int nfound = 0;
170         for (int i = 0; i < nff; ++i)
171         {
172             if (ffs[i] == ffsel)
173             {
174                 /* Matching ff name */
175                 sel = i;
176                 nfound++;
177 
178                 if (ffdirs[i].dir == ".")
179                 {
180                     cwdsel = i;
181                 }
182             }
183         }
184 
185         if (cwdsel != -1)
186         {
187             sel = cwdsel;
188         }
189 
190         if (nfound > 1)
191         {
192             if (cwdsel != -1)
193             {
194                 GMX_LOG(logger.warning)
195                         .asParagraph()
196                         .appendTextFormatted(
197                                 "Force field '%s' occurs in %d places. pdb2gmx is using the one in "
198                                 "the current directory. Use interactive selection "
199                                 "(not the -ff option) if you would prefer a different one.",
200                                 ffsel, nfound);
201             }
202             else
203             {
204                 std::string message = gmx::formatString(
205                         "Force field '%s' occurs in %d places, but not in "
206                         "the current directory.\n"
207                         "Run without the -ff switch and select the force "
208                         "field interactively.",
209                         ffsel, nfound);
210                 GMX_THROW(gmx::InconsistentInputError(message));
211             }
212         }
213         else if (nfound == 0)
214         {
215             std::string message = gmx::formatString(
216                     "Could not find force field '%s' in current directory, "
217                     "install tree or GMXLIB path.",
218                     ffsel);
219             GMX_THROW(gmx::InconsistentInputError(message));
220         }
221     }
222     else if (nff > 1)
223     {
224         std::vector<std::string> desc;
225         desc.reserve(ffdirs.size());
226         for (int i = 0; i < nff; ++i)
227         {
228             std::string docFileName(gmx::Path::join(ffdirs[i].dir, ffdirs[i].name, fflib_forcefield_doc()));
229             // TODO: Just try to open the file with a method that does not
230             // throw/bail out with a fatal error instead of multiple checks.
231             if (gmx::File::exists(docFileName, gmx::File::returnFalseOnError))
232             {
233                 // TODO: Use a C++ API without such an intermediate/fixed-length buffer.
234                 char buf[STRLEN];
235                 /* We don't use fflib_open, because we don't want printf's */
236                 FILE* fp = gmx_ffopen(docFileName, "r");
237                 get_a_line(fp, buf, STRLEN);
238                 gmx_ffclose(fp);
239                 desc.emplace_back(buf);
240             }
241             else
242             {
243                 desc.push_back(ffs[i]);
244             }
245         }
246         /* Order force fields from the same dir alphabetically
247          * and put deprecated force fields at the end.
248          */
249         for (int i = 0; i < nff; ++i)
250         {
251             for (int j = i + 1; j < nff; ++j)
252             {
253                 if (ffdirs[i].dir == ffdirs[j].dir
254                     && ((desc[i][0] == '[' && desc[j][0] != '[')
255                         || ((desc[i][0] == '[' || desc[j][0] != '[')
256                             && gmx_strcasecmp(desc[i].c_str(), desc[j].c_str()) > 0)))
257                 {
258                     std::swap(ffdirs[i].name, ffdirs[j].name);
259                     std::swap(ffs[i], ffs[j]);
260                     std::swap(desc[i], desc[j]);
261                 }
262             }
263         }
264 
265         GMX_LOG(logger.info).asParagraph().appendTextFormatted("Select the Force Field:");
266         for (int i = 0; i < nff; ++i)
267         {
268             if (i == 0 || ffdirs[i - 1].dir != ffdirs[i].dir)
269             {
270                 if (ffdirs[i].dir == ".")
271                 {
272                     GMX_LOG(logger.info)
273                             .asParagraph()
274                             .appendTextFormatted("From current directory:");
275                 }
276                 else
277                 {
278                     GMX_LOG(logger.info)
279                             .asParagraph()
280                             .appendTextFormatted("From '%s':", ffdirs[i].dir.c_str());
281                 }
282             }
283             GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", i + 1, desc[i].c_str());
284         }
285 
286         sel = -1;
287         // TODO: Add a C++ API for this.
288         char  buf[STRLEN];
289         char* pret;
290         do
291         {
292             pret = fgets(buf, STRLEN, stdin);
293 
294             if (pret != nullptr)
295             {
296                 sel = strtol(buf, nullptr, 10);
297                 sel--;
298             }
299         } while (pret == nullptr || (sel < 0) || (sel >= nff));
300 
301         /* Check for a current limitation of the fflib code.
302          * It will always read from the first ff directory in the list.
303          * This check assumes that the order of ffs matches the order
304          * in which fflib_open searches ff library files.
305          */
306         for (int i = 0; i < sel; i++)
307         {
308             if (ffs[i] == ffs[sel])
309             {
310                 std::string message = gmx::formatString(
311                         "Can only select the first of multiple force "
312                         "field entries with directory name '%s%s' in "
313                         "the list. If you want to use the next entry, "
314                         "run pdb2gmx in a different directory, set GMXLIB "
315                         "to point to the desired force field first, and/or "
316                         "rename or move the force field directory present "
317                         "in the current working directory.",
318                         ffs[sel].c_str(), fflib_forcefield_dir_ext());
319                 GMX_THROW(gmx::NotImplementedError(message));
320             }
321         }
322     }
323     else
324     {
325         sel = 0;
326     }
327 
328     if (ffs[sel].length() >= static_cast<size_t>(ff_maxlen))
329     {
330         std::string message = gmx::formatString("Length of force field name (%d) >= maxlen (%d)",
331                                                 static_cast<int>(ffs[sel].length()), ff_maxlen);
332         GMX_THROW(gmx::InvalidInputError(message));
333     }
334     strcpy(forcefield, ffs[sel].c_str());
335 
336     std::string ffpath;
337     if (ffdirs[sel].bFromDefaultDir)
338     {
339         ffpath = ffdirs[sel].name;
340     }
341     else
342     {
343         ffpath = gmx::Path::join(ffdirs[sel].dir, ffdirs[sel].name);
344     }
345     if (ffpath.length() >= static_cast<size_t>(ffdir_maxlen))
346     {
347         std::string message = gmx::formatString("Length of force field dir (%d) >= maxlen (%d)",
348                                                 static_cast<int>(ffpath.length()), ffdir_maxlen);
349         GMX_THROW(gmx::InvalidInputError(message));
350     }
351     strcpy(ffdir, ffpath.c_str());
352 }
353 
choose_ff(const char * ffsel,char * forcefield,int ff_maxlen,char * ffdir,int ffdir_maxlen,const gmx::MDLogger & logger)354 void choose_ff(const char* ffsel, char* forcefield, int ff_maxlen, char* ffdir, int ffdir_maxlen, const gmx::MDLogger& logger)
355 {
356     try
357     {
358         choose_ff_impl(ffsel, forcefield, ff_maxlen, ffdir, ffdir_maxlen, logger);
359     }
360     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
361 }
362 
choose_watermodel(const char * wmsel,const char * ffdir,char ** watermodel,const gmx::MDLogger & logger)363 void choose_watermodel(const char* wmsel, const char* ffdir, char** watermodel, const gmx::MDLogger& logger)
364 {
365     const char* fn_watermodels = "watermodels.dat";
366     FILE*       fp;
367     char        buf[STRLEN];
368     int         nwm, sel, i;
369     char**      model;
370     char*       pret;
371 
372     if (strcmp(wmsel, "none") == 0)
373     {
374         *watermodel = nullptr;
375 
376         return;
377     }
378     else if (strcmp(wmsel, "select") != 0)
379     {
380         *watermodel = gmx_strdup(wmsel);
381 
382         return;
383     }
384 
385     std::string filename = gmx::Path::join(ffdir, fn_watermodels);
386     if (!fflib_fexist(filename))
387     {
388         GMX_LOG(logger.warning)
389                 .asParagraph()
390                 .appendTextFormatted("No file '%s' found, will not include a water model", fn_watermodels);
391         *watermodel = nullptr;
392 
393         return;
394     }
395 
396     fp = fflib_open(filename);
397     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Select the Water Model:");
398     nwm   = 0;
399     model = nullptr;
400     while (get_a_line(fp, buf, STRLEN))
401     {
402         srenew(model, nwm + 1);
403         snew(model[nwm], STRLEN);
404         sscanf(buf, "%s%n", model[nwm], &i);
405         if (i > 0)
406         {
407             ltrim(buf + i);
408             GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", nwm + 1, buf + i);
409             nwm++;
410         }
411         else
412         {
413             sfree(model[nwm]);
414         }
415     }
416     gmx_ffclose(fp);
417     GMX_LOG(logger.info).asParagraph().appendTextFormatted("%2d: %s", nwm + 1, "None");
418 
419     sel = -1;
420     do
421     {
422         pret = fgets(buf, STRLEN, stdin);
423 
424         if (pret != nullptr)
425         {
426             sel = strtol(buf, nullptr, 10);
427             sel--;
428         }
429     } while (pret == nullptr || sel < 0 || sel > nwm);
430 
431     if (sel == nwm)
432     {
433         *watermodel = nullptr;
434     }
435     else
436     {
437         *watermodel = gmx_strdup(model[sel]);
438     }
439 
440     for (i = 0; i < nwm; i++)
441     {
442         sfree(model[i]);
443     }
444     sfree(model);
445 }
446 
name2type(t_atoms * at,int ** cgnr,gmx::ArrayRef<const PreprocessResidue> usedPpResidues,ResidueType * rt,const gmx::MDLogger & logger)447 static int name2type(t_atoms*                               at,
448                      int**                                  cgnr,
449                      gmx::ArrayRef<const PreprocessResidue> usedPpResidues,
450                      ResidueType*                           rt,
451                      const gmx::MDLogger&                   logger)
452 {
453     int    i, j, prevresind, i0, prevcg, cg, curcg;
454     char*  name;
455     bool   bNterm;
456     double qt;
457     int    nmissat;
458 
459     nmissat = 0;
460 
461     int resind = -1;
462     bNterm     = false;
463     i0         = 0;
464     snew(*cgnr, at->nr);
465     qt    = 0;
466     curcg = 0;
467     cg    = -1;
468 
469     for (i = 0; (i < at->nr); i++)
470     {
471         prevresind = resind;
472         if (at->atom[i].resind != resind)
473         {
474             resind     = at->atom[i].resind;
475             bool bProt = rt->namedResidueHasType(*(at->resinfo[resind].name), "Protein");
476             bNterm     = bProt && (resind == 0);
477             if (resind > 0)
478             {
479                 nmissat += missing_atoms(&usedPpResidues[prevresind], prevresind, at, i0, i, logger);
480             }
481             i0 = i;
482         }
483         if (at->atom[i].m == 0)
484         {
485             qt               = 0;
486             prevcg           = cg;
487             name             = *(at->atomname[i]);
488             j                = search_jtype(usedPpResidues[resind], name, bNterm);
489             at->atom[i].type = usedPpResidues[resind].atom[j].type;
490             at->atom[i].q    = usedPpResidues[resind].atom[j].q;
491             at->atom[i].m    = usedPpResidues[resind].atom[j].m;
492             cg               = usedPpResidues[resind].cgnr[j];
493             /* A charge group number -1 signals a separate charge group
494              * for this atom.
495              */
496             if ((cg == -1) || (cg != prevcg) || (resind != prevresind))
497             {
498                 curcg++;
499             }
500         }
501         else
502         {
503             cg = -1;
504             if (is_int(qt))
505             {
506                 qt = 0;
507                 curcg++;
508             }
509             qt += at->atom[i].q;
510         }
511         (*cgnr)[i]        = curcg;
512         at->atom[i].typeB = at->atom[i].type;
513         at->atom[i].qB    = at->atom[i].q;
514         at->atom[i].mB    = at->atom[i].m;
515     }
516     nmissat += missing_atoms(&usedPpResidues[resind], resind, at, i0, i, logger);
517 
518     return nmissat;
519 }
520 
print_top_heavy_H(FILE * out,real mHmult)521 static void print_top_heavy_H(FILE* out, real mHmult)
522 {
523     if (mHmult == 2.0)
524     {
525         fprintf(out, "; Using deuterium instead of hydrogen\n\n");
526     }
527     else if (mHmult == 4.0)
528     {
529         fprintf(out, "#define HEAVY_H\n\n");
530     }
531     else if (mHmult != 1.0)
532     {
533         fprintf(stderr,
534                 "WARNING: unsupported proton mass multiplier (%g) "
535                 "in pdb2top\n",
536                 mHmult);
537     }
538 }
539 
print_top_comment(FILE * out,const char * filename,const char * ffdir,bool bITP)540 void print_top_comment(FILE* out, const char* filename, const char* ffdir, bool bITP)
541 {
542     char  ffdir_parent[STRLEN];
543     char* p;
544 
545     try
546     {
547         gmx::TextWriter writer(out);
548         gmx::niceHeader(&writer, filename, ';');
549         writer.writeLine(gmx::formatString(";\tThis is a %s topology file", bITP ? "include" : "standalone"));
550         writer.writeLine(";");
551 
552         gmx::BinaryInformationSettings settings;
553         settings.generatedByHeader(true);
554         settings.linePrefix(";\t");
555         gmx::printBinaryInformation(&writer, gmx::getProgramContext(), settings);
556     }
557     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
558 
559     if (strchr(ffdir, '/') == nullptr)
560     {
561         fprintf(out, ";\tForce field was read from the standard GROMACS share directory.\n;\n\n");
562     }
563     else if (ffdir[0] == '.')
564     {
565         fprintf(out,
566                 ";\tForce field was read from current directory or a relative path - path "
567                 "added.\n;\n\n");
568     }
569     else
570     {
571         strncpy(ffdir_parent, ffdir, STRLEN - 1);
572         ffdir_parent[STRLEN - 1] = '\0'; /*make sure it is 0-terminated even for long string*/
573         p                        = strrchr(ffdir_parent, '/');
574 
575         *p = '\0';
576 
577         fprintf(out,
578                 ";\tForce field data was read from:\n"
579                 ";\t%s\n"
580                 ";\n"
581                 ";\tNote:\n"
582                 ";\tThis might be a non-standard force field location. When you use this topology, "
583                 "the\n"
584                 ";\tforce field must either be present in the current directory, or the location\n"
585                 ";\tspecified in the GMXLIB path variable or with the 'include' mdp file "
586                 "option.\n;\n\n",
587                 ffdir_parent);
588     }
589 }
590 
print_top_header(FILE * out,const char * filename,bool bITP,const char * ffdir,real mHmult)591 void print_top_header(FILE* out, const char* filename, bool bITP, const char* ffdir, real mHmult)
592 {
593     const char* p;
594 
595     print_top_comment(out, filename, ffdir, bITP);
596 
597     print_top_heavy_H(out, mHmult);
598     fprintf(out, "; Include forcefield parameters\n");
599 
600     p = strrchr(ffdir, '/');
601     p = (ffdir[0] == '.' || p == nullptr) ? ffdir : p + 1;
602 
603     fprintf(out, "#include \"%s/%s\"\n\n", p, fflib_forcefield_itp());
604 }
605 
print_top_posre(FILE * out,const char * pr)606 static void print_top_posre(FILE* out, const char* pr)
607 {
608     fprintf(out, "; Include Position restraint file\n");
609     fprintf(out, "#ifdef POSRES\n");
610     fprintf(out, "#include \"%s\"\n", pr);
611     fprintf(out, "#endif\n\n");
612 }
613 
print_top_water(FILE * out,const char * ffdir,const char * water)614 static void print_top_water(FILE* out, const char* ffdir, const char* water)
615 {
616     const char* p;
617     char        buf[STRLEN];
618 
619     fprintf(out, "; Include water topology\n");
620 
621     p = strrchr(ffdir, '/');
622     p = (ffdir[0] == '.' || p == nullptr) ? ffdir : p + 1;
623     fprintf(out, "#include \"%s/%s.itp\"\n", p, water);
624 
625     fprintf(out, "\n");
626     fprintf(out, "#ifdef POSRES_WATER\n");
627     fprintf(out, "; Position restraint for each water oxygen\n");
628     fprintf(out, "[ position_restraints ]\n");
629     fprintf(out, ";%3s %5s %9s %10s %10s\n", "i", "funct", "fcx", "fcy", "fcz");
630     fprintf(out, "%4d %4d %10g %10g %10g\n", 1, 1, 1000.0, 1000.0, 1000.0);
631     fprintf(out, "#endif\n");
632     fprintf(out, "\n");
633 
634     sprintf(buf, "%s/ions.itp", p);
635 
636     if (fflib_fexist(buf))
637     {
638         fprintf(out, "; Include topology for ions\n");
639         fprintf(out, "#include \"%s\"\n", buf);
640         fprintf(out, "\n");
641     }
642 }
643 
print_top_system(FILE * out,const char * title)644 static void print_top_system(FILE* out, const char* title)
645 {
646     fprintf(out, "[ %s ]\n", dir2str(Directive::d_system));
647     fprintf(out, "; Name\n");
648     fprintf(out, "%s\n\n", title[0] ? title : "Protein");
649 }
650 
print_top_mols(FILE * out,const char * title,const char * ffdir,const char * water,gmx::ArrayRef<const std::string> incls,gmx::ArrayRef<const t_mols> mols)651 void print_top_mols(FILE*                            out,
652                     const char*                      title,
653                     const char*                      ffdir,
654                     const char*                      water,
655                     gmx::ArrayRef<const std::string> incls,
656                     gmx::ArrayRef<const t_mols>      mols)
657 {
658 
659     if (!incls.empty())
660     {
661         fprintf(out, "; Include chain topologies\n");
662         for (const auto& incl : incls)
663         {
664             fprintf(out, "#include \"%s\"\n", gmx::Path::getFilename(incl).c_str());
665         }
666         fprintf(out, "\n");
667     }
668 
669     if (water)
670     {
671         print_top_water(out, ffdir, water);
672     }
673     print_top_system(out, title);
674 
675     if (!mols.empty())
676     {
677         fprintf(out, "[ %s ]\n", dir2str(Directive::d_molecules));
678         fprintf(out, "; %-15s %5s\n", "Compound", "#mols");
679         for (const auto& mol : mols)
680         {
681             fprintf(out, "%-15s %5d\n", mol.name.c_str(), mol.nr);
682         }
683     }
684 }
685 
write_top(FILE * out,const char * pr,const char * molname,t_atoms * at,bool bRTPresname,int bts[],gmx::ArrayRef<const InteractionsOfType> plist,t_excls excls[],PreprocessingAtomTypes * atype,int * cgnr,int nrexcl)686 void write_top(FILE*                                   out,
687                const char*                             pr,
688                const char*                             molname,
689                t_atoms*                                at,
690                bool                                    bRTPresname,
691                int                                     bts[],
692                gmx::ArrayRef<const InteractionsOfType> plist,
693                t_excls                                 excls[],
694                PreprocessingAtomTypes*                 atype,
695                int*                                    cgnr,
696                int                                     nrexcl)
697 /* NOTE: nrexcl is not the size of *excl! */
698 {
699     if (at && atype && cgnr)
700     {
701         fprintf(out, "[ %s ]\n", dir2str(Directive::d_moleculetype));
702         fprintf(out, "; %-15s %5s\n", "Name", "nrexcl");
703         fprintf(out, "%-15s %5d\n\n", molname ? molname : "Protein", nrexcl);
704 
705         print_atoms(out, atype, at, cgnr, bRTPresname);
706         print_bondeds(out, at->nr, Directive::d_bonds, F_BONDS, bts[ebtsBONDS], plist);
707         print_bondeds(out, at->nr, Directive::d_constraints, F_CONSTR, 0, plist);
708         print_bondeds(out, at->nr, Directive::d_constraints, F_CONSTRNC, 0, plist);
709         print_bondeds(out, at->nr, Directive::d_pairs, F_LJ14, 0, plist);
710         print_excl(out, at->nr, excls);
711         print_bondeds(out, at->nr, Directive::d_angles, F_ANGLES, bts[ebtsANGLES], plist);
712         print_bondeds(out, at->nr, Directive::d_dihedrals, F_PDIHS, bts[ebtsPDIHS], plist);
713         print_bondeds(out, at->nr, Directive::d_dihedrals, F_IDIHS, bts[ebtsIDIHS], plist);
714         print_bondeds(out, at->nr, Directive::d_cmap, F_CMAP, bts[ebtsCMAP], plist);
715         print_bondeds(out, at->nr, Directive::d_polarization, F_POLARIZATION, 0, plist);
716         print_bondeds(out, at->nr, Directive::d_thole_polarization, F_THOLE_POL, 0, plist);
717         print_bondeds(out, at->nr, Directive::d_vsites2, F_VSITE2, 0, plist);
718         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3, 0, plist);
719         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3FD, 0, plist);
720         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3FAD, 0, plist);
721         print_bondeds(out, at->nr, Directive::d_vsites3, F_VSITE3OUT, 0, plist);
722         print_bondeds(out, at->nr, Directive::d_vsites4, F_VSITE4FD, 0, plist);
723         print_bondeds(out, at->nr, Directive::d_vsites4, F_VSITE4FDN, 0, plist);
724 
725         if (pr)
726         {
727             print_top_posre(out, pr);
728         }
729     }
730 }
731 
732 
do_ssbonds(InteractionsOfType * ps,t_atoms * atoms,gmx::ArrayRef<const DisulfideBond> ssbonds,bool bAllowMissing)733 static void do_ssbonds(InteractionsOfType*                ps,
734                        t_atoms*                           atoms,
735                        gmx::ArrayRef<const DisulfideBond> ssbonds,
736                        bool                               bAllowMissing)
737 {
738     for (const auto& bond : ssbonds)
739     {
740         int ri = bond.firstResidue;
741         int rj = bond.secondResidue;
742         int ai = search_res_atom(bond.firstAtom.c_str(), ri, atoms, "special bond", bAllowMissing);
743         int aj = search_res_atom(bond.secondAtom.c_str(), rj, atoms, "special bond", bAllowMissing);
744         if ((ai == -1) || (aj == -1))
745         {
746             gmx_fatal(FARGS, "Trying to make impossible special bond (%s-%s)!",
747                       bond.firstAtom.c_str(), bond.secondAtom.c_str());
748         }
749         add_param(ps, ai, aj, {}, nullptr);
750     }
751 }
752 
at2bonds(InteractionsOfType * psb,gmx::ArrayRef<MoleculePatchDatabase> globalPatches,t_atoms * atoms,gmx::ArrayRef<const gmx::RVec> x,real long_bond_dist,real short_bond_dist,gmx::ArrayRef<const int> cyclicBondsIndex,const gmx::MDLogger & logger)753 static void at2bonds(InteractionsOfType*                  psb,
754                      gmx::ArrayRef<MoleculePatchDatabase> globalPatches,
755                      t_atoms*                             atoms,
756                      gmx::ArrayRef<const gmx::RVec>       x,
757                      real                                 long_bond_dist,
758                      real                                 short_bond_dist,
759                      gmx::ArrayRef<const int>             cyclicBondsIndex,
760                      const gmx::MDLogger&                 logger)
761 {
762     real        long_bond_dist2, short_bond_dist2;
763     const char* ptr;
764 
765     long_bond_dist2  = gmx::square(long_bond_dist);
766     short_bond_dist2 = gmx::square(short_bond_dist);
767 
768     if (debug)
769     {
770         ptr = "bond";
771     }
772     else
773     {
774         ptr = "check";
775     }
776 
777     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Making bonds...");
778     int i = 0;
779     for (int resind = 0; (resind < atoms->nres) && (i < atoms->nr); resind++)
780     {
781         /* add bonds from list of bonded interactions */
782         for (const auto& patch : globalPatches[resind].rb[ebtsBONDS].b)
783         {
784             /* Unfortunately we can not issue errors or warnings
785              * for missing atoms in bonds, as the hydrogens and terminal atoms
786              * have not been added yet.
787              */
788             int ai = search_atom(patch.ai().c_str(), i, atoms, ptr, TRUE, cyclicBondsIndex);
789             int aj = search_atom(patch.aj().c_str(), i, atoms, ptr, TRUE, cyclicBondsIndex);
790             if (ai != -1 && aj != -1)
791             {
792                 real dist2 = distance2(x[ai], x[aj]);
793                 if (dist2 > long_bond_dist2)
794 
795                 {
796                     GMX_LOG(logger.warning)
797                             .asParagraph()
798                             .appendTextFormatted("Long Bond (%d-%d = %g nm)", ai + 1, aj + 1,
799                                                  std::sqrt(dist2));
800                 }
801                 else if (dist2 < short_bond_dist2)
802                 {
803                     GMX_LOG(logger.warning)
804                             .asParagraph()
805                             .appendTextFormatted("Short Bond (%d-%d = %g nm)", ai + 1, aj + 1,
806                                                  std::sqrt(dist2));
807                 }
808                 add_param(psb, ai, aj, {}, patch.s.c_str());
809             }
810         }
811         /* add bonds from list of hacks (each added atom gets a bond) */
812         while ((i < atoms->nr) && (atoms->atom[i].resind == resind))
813         {
814             for (const auto& patch : globalPatches[resind].hack)
815             {
816                 if ((patch.tp > 0 || patch.type() == MoleculePatchType::Add)
817                     && patch.a[0] == *(atoms->atomname[i]))
818                 {
819                     switch (patch.tp)
820                     {
821                         case 9:                                        /* COOH terminus */
822                             add_param(psb, i, i + 1, {}, nullptr);     /* C-O  */
823                             add_param(psb, i, i + 2, {}, nullptr);     /* C-OA */
824                             add_param(psb, i + 2, i + 3, {}, nullptr); /* OA-H */
825                             break;
826                         default:
827                             for (int k = 0; (k < patch.nr); k++)
828                             {
829                                 add_param(psb, i, i + k + 1, {}, nullptr);
830                             }
831                     }
832                 }
833             }
834             i++;
835         }
836         /* we're now at the start of the next residue */
837     }
838 }
839 
pcompar(const InteractionOfType & a,const InteractionOfType & b)840 static bool pcompar(const InteractionOfType& a, const InteractionOfType& b)
841 {
842     int d;
843 
844     if (((d = a.ai() - b.ai()) != 0) || ((d = a.aj() - b.aj()) != 0))
845     {
846         return d < 0;
847     }
848     else
849     {
850         return a.interactionTypeName().length() > b.interactionTypeName().length();
851     }
852 }
853 
clean_bonds(InteractionsOfType * ps,const gmx::MDLogger & logger)854 static void clean_bonds(InteractionsOfType* ps, const gmx::MDLogger& logger)
855 {
856     if (ps->size() > 0)
857     {
858         /* Sort bonds */
859         for (auto& bond : ps->interactionTypes)
860         {
861             bond.sortAtomIds();
862         }
863         std::sort(ps->interactionTypes.begin(), ps->interactionTypes.end(), pcompar);
864 
865         /* remove doubles, keep the first one always. */
866         int oldNumber = ps->size();
867         for (auto parm = ps->interactionTypes.begin() + 1; parm != ps->interactionTypes.end();)
868         {
869             auto prev = parm - 1;
870             if (parm->ai() == prev->ai() && parm->aj() == prev->aj())
871             {
872                 parm = ps->interactionTypes.erase(parm);
873             }
874             else
875             {
876                 ++parm;
877             }
878         }
879         GMX_LOG(logger.info)
880                 .asParagraph()
881                 .appendTextFormatted("Number of bonds was %d, now %zu", oldNumber, ps->size());
882     }
883     else
884     {
885         GMX_LOG(logger.info).asParagraph().appendTextFormatted("No bonds");
886     }
887 }
888 
print_sums(const t_atoms * atoms,bool bSystem,const gmx::MDLogger & logger)889 void print_sums(const t_atoms* atoms, bool bSystem, const gmx::MDLogger& logger)
890 {
891     double      m, qtot;
892     int         i;
893     const char* where;
894 
895     if (bSystem)
896     {
897         where = " in system";
898     }
899     else
900     {
901         where = "";
902     }
903 
904     m    = 0;
905     qtot = 0;
906     for (i = 0; (i < atoms->nr); i++)
907     {
908         m += atoms->atom[i].m;
909         qtot += atoms->atom[i].q;
910     }
911 
912     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Total mass%s %.3f a.m.u.", where, m);
913     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Total charge%s %.3f e", where, qtot);
914 }
915 
check_restp_type(const char * name,int t1,int t2)916 static void check_restp_type(const char* name, int t1, int t2)
917 {
918     if (t1 != t2)
919     {
920         gmx_fatal(FARGS, "Residues in one molecule have a different '%s' type: %d and %d", name, t1, t2);
921     }
922 }
923 
check_restp_types(const PreprocessResidue & r0,const PreprocessResidue & r1)924 static void check_restp_types(const PreprocessResidue& r0, const PreprocessResidue& r1)
925 {
926     check_restp_type("all dihedrals", static_cast<int>(r0.bKeepAllGeneratedDihedrals),
927                      static_cast<int>(r1.bKeepAllGeneratedDihedrals));
928     check_restp_type("nrexcl", r0.nrexcl, r1.nrexcl);
929     check_restp_type("HH14", static_cast<int>(r0.bGenerateHH14Interactions),
930                      static_cast<int>(r1.bGenerateHH14Interactions));
931     check_restp_type("remove dihedrals", static_cast<int>(r0.bRemoveDihedralIfWithImproper),
932                      static_cast<int>(r1.bRemoveDihedralIfWithImproper));
933 
934     for (int i = 0; i < ebtsNR; i++)
935     {
936         check_restp_type(btsNames[i], r0.rb[i].type, r1.rb[i].type);
937     }
938 }
939 
add_atom_to_restp(PreprocessResidue * usedPpResidues,t_symtab * symtab,int at_start,const MoleculePatch * patch)940 static void add_atom_to_restp(PreprocessResidue*   usedPpResidues,
941                               t_symtab*            symtab,
942                               int                  at_start,
943                               const MoleculePatch* patch)
944 {
945     /* now add them */
946     for (int k = 0; k < patch->nr; k++)
947     {
948         /* set counter in atomname */
949         std::string buf = patch->nname;
950         if (patch->nr > 1)
951         {
952             buf.append(gmx::formatString("%d", k + 1));
953         }
954         usedPpResidues->atomname.insert(usedPpResidues->atomname.begin() + at_start + 1 + k,
955                                         put_symtab(symtab, buf.c_str()));
956         usedPpResidues->atom.insert(usedPpResidues->atom.begin() + at_start + 1 + k, patch->atom.back());
957         if (patch->cgnr != NOTSET)
958         {
959             usedPpResidues->cgnr.insert(usedPpResidues->cgnr.begin() + at_start + 1 + k, patch->cgnr);
960         }
961         else
962         {
963             usedPpResidues->cgnr.insert(usedPpResidues->cgnr.begin() + at_start + 1 + k,
964                                         usedPpResidues->cgnr[at_start]);
965         }
966     }
967 }
968 
get_hackblocks_rtp(std::vector<MoleculePatchDatabase> * globalPatches,std::vector<PreprocessResidue> * usedPpResidues,gmx::ArrayRef<const PreprocessResidue> rtpFFDB,int nres,t_resinfo * resinfo,int nterpairs,t_symtab * symtab,gmx::ArrayRef<MoleculePatchDatabase * > ntdb,gmx::ArrayRef<MoleculePatchDatabase * > ctdb,gmx::ArrayRef<const int> rn,gmx::ArrayRef<const int> rc,bool bAllowMissing,const gmx::MDLogger & logger)969 void get_hackblocks_rtp(std::vector<MoleculePatchDatabase>*    globalPatches,
970                         std::vector<PreprocessResidue>*        usedPpResidues,
971                         gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
972                         int                                    nres,
973                         t_resinfo*                             resinfo,
974                         int                                    nterpairs,
975                         t_symtab*                              symtab,
976                         gmx::ArrayRef<MoleculePatchDatabase*>  ntdb,
977                         gmx::ArrayRef<MoleculePatchDatabase*>  ctdb,
978                         gmx::ArrayRef<const int>               rn,
979                         gmx::ArrayRef<const int>               rc,
980                         bool                                   bAllowMissing,
981                         const gmx::MDLogger&                   logger)
982 {
983     char* key;
984     bool  bRM;
985 
986     globalPatches->resize(nres);
987     usedPpResidues->clear();
988     /* first the termini */
989     for (int i = 0; i < nterpairs; i++)
990     {
991         if (rn[i] >= 0 && ntdb[i] != nullptr)
992         {
993             copyModificationBlocks(*ntdb[i], &globalPatches->at(rn[i]));
994         }
995         if (rc[i] >= 0 && ctdb[i] != nullptr)
996         {
997             mergeAtomAndBondModifications(*ctdb[i], &globalPatches->at(rc[i]));
998         }
999     }
1000 
1001     /* then the whole rtp */
1002     for (int i = 0; i < nres; i++)
1003     {
1004         /* Here we allow a mismatch of one character when looking for the rtp entry.
1005          * For such a mismatch there should be only one mismatching name.
1006          * This is mainly useful for small molecules such as ions.
1007          * Note that this will usually not work for protein, DNA and RNA,
1008          * since there the residue names should be listed in residuetypes.dat
1009          * and an error will have been generated earlier in the process.
1010          */
1011         key = *resinfo[i].rtp;
1012 
1013         resinfo[i].rtp = put_symtab(symtab, searchResidueDatabase(key, rtpFFDB, logger).c_str());
1014         auto res       = getDatabaseEntry(*resinfo[i].rtp, rtpFFDB);
1015         usedPpResidues->push_back(PreprocessResidue());
1016         PreprocessResidue* newentry = &usedPpResidues->back();
1017         copyPreprocessResidues(*res, newentry, symtab);
1018 
1019         /* Check that we do not have different bonded types in one molecule */
1020         check_restp_types(usedPpResidues->front(), *newentry);
1021 
1022         int tern = -1;
1023         for (int j = 0; j < nterpairs && tern == -1; j++)
1024         {
1025             if (i == rn[j])
1026             {
1027                 tern = j;
1028             }
1029         }
1030         int terc = -1;
1031         for (int j = 0; j < nterpairs && terc == -1; j++)
1032         {
1033             if (i == rc[j])
1034             {
1035                 terc = j;
1036             }
1037         }
1038         bRM = mergeBondedInteractionList(res->rb, globalPatches->at(i).rb, tern >= 0, terc >= 0);
1039 
1040         if (bRM && ((tern >= 0 && ntdb[tern] == nullptr) || (terc >= 0 && ctdb[terc] == nullptr)))
1041         {
1042             const char* errString =
1043                     "There is a dangling bond at at least one of the terminal ends and the force "
1044                     "field does not provide terminal entries or files. Fix your terminal "
1045                     "residues so that they match the residue database (.rtp) entries, or provide "
1046                     "terminal database entries (.tdb).";
1047             if (bAllowMissing)
1048             {
1049                 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("%s", errString);
1050             }
1051             else
1052             {
1053                 gmx_fatal(FARGS, "%s", errString);
1054             }
1055         }
1056         else if (bRM && ((tern >= 0 && ntdb[tern]->nhack() == 0) || (terc >= 0 && ctdb[terc]->nhack() == 0)))
1057         {
1058             const char* errString =
1059                     "There is a dangling bond at at least one of the terminal ends. Fix your "
1060                     "coordinate file, add a new terminal database entry (.tdb), or select the "
1061                     "proper existing terminal entry.";
1062             if (bAllowMissing)
1063             {
1064                 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("%s", errString);
1065             }
1066             else
1067             {
1068                 gmx_fatal(FARGS, "%s", errString);
1069             }
1070         }
1071     }
1072 
1073     /* Apply patchs to t_restp entries
1074        i.e. add's and deletes from termini database will be
1075        added to/removed from residue topology
1076        we'll do this on one big dirty loop, so it won't make easy reading! */
1077     for (auto modifiedResidue = globalPatches->begin(); modifiedResidue != globalPatches->end();
1078          modifiedResidue++)
1079     {
1080         const int          pos    = std::distance(globalPatches->begin(), modifiedResidue);
1081         PreprocessResidue* posres = &usedPpResidues->at(pos);
1082         for (auto patch = modifiedResidue->hack.begin(); patch != modifiedResidue->hack.end(); patch++)
1083         {
1084             if (patch->nr != 0)
1085             {
1086                 /* find atom in restp */
1087                 auto found = std::find_if(posres->atomname.begin(), posres->atomname.end(),
1088                                           [&patch](char** name) {
1089                                               return (patch->oname.empty() && patch->a[0] == *name)
1090                                                      || (patch->oname == *name);
1091                                           });
1092 
1093                 if (found == posres->atomname.end())
1094                 {
1095                     /* If we are doing an atom rename only, we don't need
1096                      * to generate a fatal error if the old name is not found
1097                      * in the rtp.
1098                      */
1099                     /* Deleting can happen also only on the input atoms,
1100                      * not necessarily always on the rtp entry.
1101                      */
1102                     if (patch->type() == MoleculePatchType::Add)
1103                     {
1104                         gmx_fatal(FARGS,
1105                                   "atom %s not found in buiding block %d%s "
1106                                   "while combining tdb and rtp",
1107                                   patch->oname.empty() ? patch->a[0].c_str() : patch->oname.c_str(),
1108                                   pos + 1, *resinfo[pos].rtp);
1109                     }
1110                 }
1111                 else
1112                 {
1113                     int l = std::distance(posres->atomname.begin(), found);
1114                     switch (patch->type())
1115                     {
1116                         case MoleculePatchType::Add:
1117                         {
1118                             /* we're adding: */
1119                             add_atom_to_restp(posres, symtab, l, &(*patch));
1120                             break;
1121                         }
1122                         case MoleculePatchType::Delete:
1123                         { /* we're deleting */
1124                             posres->atom.erase(posres->atom.begin() + l);
1125                             posres->atomname.erase(posres->atomname.begin() + l);
1126                             posres->cgnr.erase(posres->cgnr.begin() + l);
1127                             break;
1128                         }
1129                         case MoleculePatchType::Replace:
1130                         {
1131                             /* we're replacing */
1132                             posres->atom[l]     = patch->atom.back();
1133                             posres->atomname[l] = put_symtab(symtab, patch->nname.c_str());
1134                             if (patch->cgnr != NOTSET)
1135                             {
1136                                 posres->cgnr[l] = patch->cgnr;
1137                             }
1138                             break;
1139                         }
1140                     }
1141                 }
1142             }
1143         }
1144     }
1145 }
1146 
atomname_cmp_nr(const char * anm,const MoleculePatch * patch,int * nr)1147 static bool atomname_cmp_nr(const char* anm, const MoleculePatch* patch, int* nr)
1148 {
1149 
1150     if (patch->nr == 1)
1151     {
1152         *nr = 0;
1153 
1154         return (gmx::equalCaseInsensitive(anm, patch->nname));
1155     }
1156     else
1157     {
1158         if (isdigit(anm[strlen(anm) - 1]))
1159         {
1160             *nr = anm[strlen(anm) - 1] - '0';
1161         }
1162         else
1163         {
1164             *nr = 0;
1165         }
1166         if (*nr <= 0 || *nr > patch->nr)
1167         {
1168             return FALSE;
1169         }
1170         else
1171         {
1172             return (strlen(anm) == patch->nname.length() + 1
1173                     && gmx_strncasecmp(anm, patch->nname.c_str(), patch->nname.length()) == 0);
1174         }
1175     }
1176 }
1177 
match_atomnames_with_rtp_atom(t_atoms * pdba,gmx::ArrayRef<gmx::RVec> x,t_symtab * symtab,int atind,PreprocessResidue * localPpResidue,const MoleculePatchDatabase & singlePatch,bool bVerbose,const gmx::MDLogger & logger)1178 static bool match_atomnames_with_rtp_atom(t_atoms*                     pdba,
1179                                           gmx::ArrayRef<gmx::RVec>     x,
1180                                           t_symtab*                    symtab,
1181                                           int                          atind,
1182                                           PreprocessResidue*           localPpResidue,
1183                                           const MoleculePatchDatabase& singlePatch,
1184                                           bool                         bVerbose,
1185                                           const gmx::MDLogger&         logger)
1186 {
1187     int   resnr;
1188     char* oldnm;
1189     int   anmnr;
1190     bool  bDeleted;
1191 
1192     oldnm = *pdba->atomname[atind];
1193     resnr = pdba->resinfo[pdba->atom[atind].resind].nr;
1194 
1195     bDeleted = FALSE;
1196     for (auto patch = singlePatch.hack.begin(); patch != singlePatch.hack.end(); patch++)
1197     {
1198         if (patch->type() == MoleculePatchType::Replace && gmx::equalCaseInsensitive(oldnm, patch->oname))
1199         {
1200             /* This is a replace entry. */
1201             /* Check if we are not replacing a replaced atom. */
1202             bool bReplaceReplace = false;
1203             for (auto selfPatch = singlePatch.hack.begin(); selfPatch != singlePatch.hack.end(); selfPatch++)
1204             {
1205                 if (patch != selfPatch && selfPatch->type() == MoleculePatchType::Replace
1206                     && gmx::equalCaseInsensitive(selfPatch->nname, patch->oname))
1207                 {
1208                     /* The replace in patch replaces an atom that
1209                      * was already replaced in selfPatch, we do not want
1210                      * second or higher level replaces at this stage.
1211                      */
1212                     bReplaceReplace = true;
1213                 }
1214             }
1215             if (bReplaceReplace)
1216             {
1217                 /* Skip this replace. */
1218                 continue;
1219             }
1220 
1221             /* This atom still has the old name, rename it */
1222             std::string newnm = patch->nname;
1223             auto        found = std::find_if(
1224                     localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
1225                     [&newnm](char** name) { return gmx::equalCaseInsensitive(newnm, *name); });
1226             if (found == localPpResidue->atomname.end())
1227             {
1228                 /* The new name is not present in the rtp.
1229                  * We need to apply the replace also to the rtp entry.
1230                  */
1231 
1232                 /* We need to find the add hack that can add this atom
1233                  * to find out after which atom it should be added.
1234                  */
1235                 bool bFoundInAdd = false;
1236                 for (auto rtpModification = singlePatch.hack.begin();
1237                      rtpModification != singlePatch.hack.end(); rtpModification++)
1238                 {
1239                     int         k = std::distance(localPpResidue->atomname.begin(), found);
1240                     std::string start_at;
1241                     if (rtpModification->type() == MoleculePatchType::Add
1242                         && atomname_cmp_nr(newnm.c_str(), &(*rtpModification), &anmnr))
1243                     {
1244                         if (anmnr <= 1)
1245                         {
1246                             start_at = singlePatch.hack[k].a[0];
1247                         }
1248                         else
1249                         {
1250                             start_at = gmx::formatString("%s%d", singlePatch.hack[k].nname.c_str(),
1251                                                          anmnr - 1);
1252                         }
1253                         auto found2 =
1254                                 std::find_if(localPpResidue->atomname.begin(),
1255                                              localPpResidue->atomname.end(), [&start_at](char** name) {
1256                                                  return gmx::equalCaseInsensitive(start_at, *name);
1257                                              });
1258                         if (found2 == localPpResidue->atomname.end())
1259                         {
1260                             gmx_fatal(FARGS,
1261                                       "Could not find atom '%s' in residue building block '%s' to "
1262                                       "add atom '%s' to",
1263                                       start_at.c_str(), localPpResidue->resname.c_str(), newnm.c_str());
1264                         }
1265                         /* We can add the atom after atom start_nr */
1266                         add_atom_to_restp(localPpResidue, symtab,
1267                                           std::distance(localPpResidue->atomname.begin(), found2),
1268                                           &(*patch));
1269 
1270                         bFoundInAdd = true;
1271                     }
1272                 }
1273 
1274                 if (!bFoundInAdd)
1275                 {
1276                     gmx_fatal(FARGS,
1277                               "Could not find an 'add' entry for atom named '%s' corresponding to "
1278                               "the 'replace' entry from atom name '%s' to '%s' for tdb or hdb "
1279                               "database of residue type '%s'",
1280                               newnm.c_str(), patch->oname.c_str(), patch->nname.c_str(),
1281                               localPpResidue->resname.c_str());
1282                 }
1283             }
1284 
1285             if (bVerbose)
1286             {
1287                 GMX_LOG(logger.info)
1288                         .asParagraph()
1289                         .appendTextFormatted("Renaming atom '%s' in residue '%s' %d to '%s'", oldnm,
1290                                              localPpResidue->resname.c_str(), resnr, newnm.c_str());
1291             }
1292             /* Rename the atom in pdba */
1293             pdba->atomname[atind] = put_symtab(symtab, newnm.c_str());
1294         }
1295         else if (patch->type() == MoleculePatchType::Delete
1296                  && gmx::equalCaseInsensitive(oldnm, patch->oname))
1297         {
1298             /* This is a delete entry, check if this atom is present
1299              * in the rtp entry of this residue.
1300              */
1301             auto found3 = std::find_if(
1302                     localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
1303                     [&oldnm](char** name) { return gmx::equalCaseInsensitive(oldnm, *name); });
1304             if (found3 == localPpResidue->atomname.end())
1305             {
1306                 /* This atom is not present in the rtp entry,
1307                  * delete is now from the input pdba.
1308                  */
1309                 if (bVerbose)
1310                 {
1311                     GMX_LOG(logger.info)
1312                             .asParagraph()
1313                             .appendTextFormatted("Deleting atom '%s' in residue '%s' %d", oldnm,
1314                                                  localPpResidue->resname.c_str(), resnr);
1315                 }
1316                 /* We should free the atom name,
1317                  * but it might be used multiple times in the symtab.
1318                  * sfree(pdba->atomname[atind]);
1319                  */
1320                 for (int k = atind + 1; k < pdba->nr; k++)
1321                 {
1322                     pdba->atom[k - 1]     = pdba->atom[k];
1323                     pdba->atomname[k - 1] = pdba->atomname[k];
1324                     copy_rvec(x[k], x[k - 1]);
1325                 }
1326                 pdba->nr--;
1327                 bDeleted = true;
1328             }
1329         }
1330     }
1331 
1332     return bDeleted;
1333 }
1334 
match_atomnames_with_rtp(gmx::ArrayRef<PreprocessResidue> usedPpResidues,gmx::ArrayRef<MoleculePatchDatabase> globalPatches,t_atoms * pdba,t_symtab * symtab,gmx::ArrayRef<gmx::RVec> x,bool bVerbose,const gmx::MDLogger & logger)1335 void match_atomnames_with_rtp(gmx::ArrayRef<PreprocessResidue>     usedPpResidues,
1336                               gmx::ArrayRef<MoleculePatchDatabase> globalPatches,
1337                               t_atoms*                             pdba,
1338                               t_symtab*                            symtab,
1339                               gmx::ArrayRef<gmx::RVec>             x,
1340                               bool                                 bVerbose,
1341                               const gmx::MDLogger&                 logger)
1342 {
1343     for (int i = 0; i < pdba->nr; i++)
1344     {
1345         const char*        oldnm          = *pdba->atomname[i];
1346         PreprocessResidue* localPpResidue = &usedPpResidues[pdba->atom[i].resind];
1347         auto               found          = std::find_if(
1348                 localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
1349                 [&oldnm](char** name) { return gmx::equalCaseInsensitive(oldnm, *name); });
1350         if (found == localPpResidue->atomname.end())
1351         {
1352             /* Not found yet, check if we have to rename this atom */
1353             if (match_atomnames_with_rtp_atom(pdba, x, symtab, i, localPpResidue,
1354                                               globalPatches[pdba->atom[i].resind], bVerbose, logger))
1355             {
1356                 /* We deleted this atom, decrease the atom counter by 1. */
1357                 i--;
1358             }
1359         }
1360     }
1361 }
1362 
1363 #define NUM_CMAP_ATOMS 5
gen_cmap(InteractionsOfType * psb,gmx::ArrayRef<const PreprocessResidue> usedPpResidues,t_atoms * atoms,gmx::ArrayRef<const int> cyclicBondsIndex,const gmx::MDLogger & logger)1364 static void gen_cmap(InteractionsOfType*                    psb,
1365                      gmx::ArrayRef<const PreprocessResidue> usedPpResidues,
1366                      t_atoms*                               atoms,
1367                      gmx::ArrayRef<const int>               cyclicBondsIndex,
1368                      const gmx::MDLogger&                   logger)
1369 {
1370     int         residx;
1371     const char* ptr;
1372     t_resinfo*  resinfo = atoms->resinfo;
1373     int         nres    = atoms->nres;
1374     int         cmap_atomid[NUM_CMAP_ATOMS];
1375     int         cmap_chainnum = -1;
1376 
1377     if (debug)
1378     {
1379         ptr = "cmap";
1380     }
1381     else
1382     {
1383         ptr = "check";
1384     }
1385 
1386     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Making cmap torsions...");
1387     int i = 0;
1388     /* Most cmap entries use the N atom from the next residue, so the last
1389      * residue should not have its CMAP entry in that case, but for things like
1390      * dipeptides we sometimes define a complete CMAP entry inside a residue,
1391      * and in this case we need to process everything through the last residue.
1392      */
1393     for (residx = 0; residx < nres; residx++)
1394     {
1395         /* Add CMAP terms from the list of CMAP interactions */
1396         for (const auto& b : usedPpResidues[residx].rb[ebtsCMAP].b)
1397         {
1398             bool bAddCMAP = true;
1399             /* Loop over atoms in a candidate CMAP interaction and
1400              * check that they exist, are from the same chain and are
1401              * from residues labelled as protein. */
1402             for (int k = 0; k < NUM_CMAP_ATOMS && bAddCMAP; k++)
1403             {
1404                 /* Assign the pointer to the name of the next reference atom.
1405                  * This can use -/+ labels to refer to previous/next residue.
1406                  */
1407                 const char* pname = b.a[k].c_str();
1408                 /* Skip this CMAP entry if it refers to residues before the
1409                  * first or after the last residue.
1410                  */
1411                 if ((cyclicBondsIndex.empty() && ((strchr(pname, '-') != nullptr) && (residx == 0)))
1412                     || ((strchr(pname, '+') != nullptr) && (residx == nres - 1)))
1413                 {
1414                     bAddCMAP = false;
1415                     break;
1416                 }
1417 
1418                 cmap_atomid[k] = search_atom(pname, i, atoms, ptr, TRUE, cyclicBondsIndex);
1419                 bAddCMAP       = bAddCMAP && (cmap_atomid[k] != -1);
1420                 if (!bAddCMAP)
1421                 {
1422                     /* This break is necessary, because cmap_atomid[k]
1423                      * == -1 cannot be safely used as an index
1424                      * into the atom array. */
1425                     break;
1426                 }
1427                 int this_residue_index = atoms->atom[cmap_atomid[k]].resind;
1428                 if (0 == k)
1429                 {
1430                     cmap_chainnum = resinfo[this_residue_index].chainnum;
1431                 }
1432                 else
1433                 {
1434                     /* Does the residue for this atom have the same
1435                      * chain number as the residues for previous
1436                      * atoms? */
1437                     bAddCMAP = bAddCMAP && cmap_chainnum == resinfo[this_residue_index].chainnum;
1438                 }
1439                 /* Here we used to check that the residuetype was protein and
1440                  * disable bAddCMAP if that was not the case. However, some
1441                  * special residues (say, alanine dipeptides) might not adhere
1442                  * to standard naming, and if we start calling them normal
1443                  * protein residues the user will be bugged to select termini.
1444                  *
1445                  * Instead, I believe that the right course of action is to
1446                  * keep the CMAP interaction if it is present in the RTP file
1447                  * and we correctly identified all atoms (which is the case
1448                  * if we got here).
1449                  */
1450             }
1451 
1452             if (bAddCMAP)
1453             {
1454                 add_cmap_param(psb, cmap_atomid[0], cmap_atomid[1], cmap_atomid[2], cmap_atomid[3],
1455                                cmap_atomid[4], b.s.c_str());
1456             }
1457         }
1458 
1459         if (residx < nres - 1)
1460         {
1461             while (atoms->atom[i].resind < residx + 1)
1462             {
1463                 i++;
1464             }
1465         }
1466     }
1467     /* Start the next residue */
1468 }
1469 
scrub_charge_groups(int * cgnr,int natoms)1470 static void scrub_charge_groups(int* cgnr, int natoms)
1471 {
1472     int i;
1473 
1474     for (i = 0; i < natoms; i++)
1475     {
1476         cgnr[i] = i + 1;
1477     }
1478 }
1479 
1480 
pdb2top(FILE * top_file,const char * posre_fn,const char * molname,t_atoms * atoms,std::vector<gmx::RVec> * x,PreprocessingAtomTypes * atype,t_symtab * tab,gmx::ArrayRef<const PreprocessResidue> rtpFFDB,gmx::ArrayRef<PreprocessResidue> usedPpResidues,gmx::ArrayRef<MoleculePatchDatabase> globalPatches,bool bAllowMissing,bool bVsites,bool bVsiteAromatics,const char * ffdir,real mHmult,gmx::ArrayRef<const DisulfideBond> ssbonds,real long_bond_dist,real short_bond_dist,bool bDeuterate,bool bChargeGroups,bool bCmap,bool bRenumRes,bool bRTPresname,gmx::ArrayRef<const int> cyclicBondsIndex,const gmx::MDLogger & logger)1481 void pdb2top(FILE*                                  top_file,
1482              const char*                            posre_fn,
1483              const char*                            molname,
1484              t_atoms*                               atoms,
1485              std::vector<gmx::RVec>*                x,
1486              PreprocessingAtomTypes*                atype,
1487              t_symtab*                              tab,
1488              gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1489              gmx::ArrayRef<PreprocessResidue>       usedPpResidues,
1490              gmx::ArrayRef<MoleculePatchDatabase>   globalPatches,
1491              bool                                   bAllowMissing,
1492              bool                                   bVsites,
1493              bool                                   bVsiteAromatics,
1494              const char*                            ffdir,
1495              real                                   mHmult,
1496              gmx::ArrayRef<const DisulfideBond>     ssbonds,
1497              real                                   long_bond_dist,
1498              real                                   short_bond_dist,
1499              bool                                   bDeuterate,
1500              bool                                   bChargeGroups,
1501              bool                                   bCmap,
1502              bool                                   bRenumRes,
1503              bool                                   bRTPresname,
1504              gmx::ArrayRef<const int>               cyclicBondsIndex,
1505              const gmx::MDLogger&                   logger)
1506 {
1507     std::array<InteractionsOfType, F_NRE> plist;
1508     t_excls*                              excls;
1509     int*                                  cgnr;
1510     int*                                  vsite_type;
1511     int                                   i, nmissat;
1512     int                                   bts[ebtsNR];
1513 
1514     ResidueType rt;
1515 
1516     /* Make bonds */
1517     at2bonds(&(plist[F_BONDS]), globalPatches, atoms, *x, long_bond_dist, short_bond_dist,
1518              cyclicBondsIndex, logger);
1519 
1520     /* specbonds: disulphide bonds & heme-his */
1521     do_ssbonds(&(plist[F_BONDS]), atoms, ssbonds, bAllowMissing);
1522 
1523     nmissat = name2type(atoms, &cgnr, usedPpResidues, &rt, logger);
1524     if (nmissat)
1525     {
1526         if (bAllowMissing)
1527         {
1528             GMX_LOG(logger.warning)
1529                     .asParagraph()
1530                     .appendTextFormatted("There were %d missing atoms in molecule %s", nmissat, molname);
1531         }
1532         else
1533         {
1534             gmx_fatal(FARGS,
1535                       "There were %d missing atoms in molecule %s, if you want to use this "
1536                       "incomplete topology anyhow, use the option -missing",
1537                       nmissat, molname);
1538         }
1539     }
1540 
1541     /* Cleanup bonds (sort and rm doubles) */
1542     clean_bonds(&(plist[F_BONDS]), logger);
1543 
1544     snew(vsite_type, atoms->nr);
1545     for (i = 0; i < atoms->nr; i++)
1546     {
1547         vsite_type[i] = NOTSET;
1548     }
1549     if (bVsites)
1550     {
1551         if (bVsiteAromatics)
1552         {
1553             GMX_LOG(logger.info)
1554                     .asParagraph()
1555                     .appendTextFormatted(
1556                             "The conversion of aromatic rings into virtual sites is deprecated "
1557                             "and may be removed in a future version of GROMACS");
1558         }
1559         /* determine which atoms will be vsites and add dummy masses
1560            also renumber atom numbers in plist[0..F_NRE]! */
1561         do_vsites(rtpFFDB, atype, atoms, tab, x, plist, &vsite_type, &cgnr, mHmult, bVsiteAromatics, ffdir);
1562     }
1563 
1564     /* Make Angles and Dihedrals */
1565     GMX_LOG(logger.info)
1566             .asParagraph()
1567             .appendTextFormatted("Generating angles, dihedrals and pairs...");
1568     snew(excls, atoms->nr);
1569     gen_pad(atoms, usedPpResidues, plist, excls, globalPatches, bAllowMissing, cyclicBondsIndex);
1570 
1571     /* Make CMAP */
1572     if (bCmap)
1573     {
1574         gen_cmap(&(plist[F_CMAP]), usedPpResidues, atoms, cyclicBondsIndex, logger);
1575         if (plist[F_CMAP].size() > 0)
1576         {
1577             GMX_LOG(logger.info)
1578                     .asParagraph()
1579                     .appendTextFormatted("There are %4zu cmap torsion pairs", plist[F_CMAP].size());
1580         }
1581     }
1582 
1583     /* set mass of all remaining hydrogen atoms */
1584     if (mHmult != 1.0)
1585     {
1586         do_h_mass(&(plist[F_BONDS]), vsite_type, atoms, mHmult, bDeuterate);
1587     }
1588     sfree(vsite_type);
1589 
1590     /* Cleanup bonds (sort and rm doubles) */
1591     /* clean_bonds(&(plist[F_BONDS]));*/
1592 
1593     GMX_LOG(logger.info)
1594             .asParagraph()
1595             .appendTextFormatted(
1596                     "There are %4zu dihedrals, %4zu impropers, %4zu angles\n"
1597                     "          %4zu pairs,     %4zu bonds and  %4zu virtual sites",
1598                     plist[F_PDIHS].size(), plist[F_IDIHS].size(), plist[F_ANGLES].size(),
1599                     plist[F_LJ14].size(), plist[F_BONDS].size(),
1600                     plist[F_VSITE2].size() + plist[F_VSITE3].size() + plist[F_VSITE3FD].size()
1601                             + plist[F_VSITE3FAD].size() + plist[F_VSITE3OUT].size()
1602                             + plist[F_VSITE4FD].size() + plist[F_VSITE4FDN].size());
1603 
1604     print_sums(atoms, FALSE, logger);
1605 
1606     if (!bChargeGroups)
1607     {
1608         scrub_charge_groups(cgnr, atoms->nr);
1609     }
1610 
1611     if (bRenumRes)
1612     {
1613         for (i = 0; i < atoms->nres; i++)
1614         {
1615             atoms->resinfo[i].nr = i + 1;
1616             atoms->resinfo[i].ic = ' ';
1617         }
1618     }
1619 
1620     if (top_file)
1621     {
1622         GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing topology");
1623         /* We can copy the bonded types from the first restp,
1624          * since the types have to be identical for all residues in one molecule.
1625          */
1626         for (i = 0; i < ebtsNR; i++)
1627         {
1628             bts[i] = usedPpResidues[0].rb[i].type;
1629         }
1630         write_top(top_file, posre_fn, molname, atoms, bRTPresname, bts, plist, excls, atype, cgnr,
1631                   usedPpResidues[0].nrexcl);
1632     }
1633 
1634 
1635     /* we should clean up hb and restp here, but that is a *L*O*T* of work! */
1636     sfree(cgnr);
1637     for (i = 0; i < atoms->nr; i++)
1638     {
1639         sfree(excls[i].e);
1640     }
1641     sfree(excls);
1642 }
1643