1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The authors of this code are: Gerardo Tauriello
6 //
7 // The code is heavily based on traverse.c written by Julien Ferte from the
8 // mmtf_c project
9 //
10 // *************************************************************************
11 
12 #include <mmtf.hpp>
13 
14 // C-style libraries used here to keep it close to traverse.c
15 #include <stdio.h>
16 #include <string.h>
17 #include <assert.h>
18 
19 // *************************************************************************
20 // JSON-style
21 // *************************************************************************
22 
23 // printf for strings
24 template<typename T>
printval(const char * pff,const T & val)25 void printval(const char* pff, const T& val) {
26     printf(pff, val);
27 }
28 template<>
printval(const char * pff,const std::string & val)29 void printval(const char* pff, const std::string& val) {
30     printf(pff, val.c_str());
31 }
32 
33 // print vector only
34 template<typename T>
printvec(const char * pff,const T * v,const size_t N)35 void printvec(const char* pff, const T* v, const size_t N) {
36     printf("[");
37     for (size_t i = 0; i < N; ++i) {
38         if (i > 0) printf(", ");
39         printval(pff, v[i]);
40     }
41     printf("]");
42 }
43 
44 // writing with no check if it's set or not
45 template<typename T>
printreq(const char * label,const char * pff,const T * v,const size_t N,bool last=false)46 void printreq(const char* label, const char* pff,
47               const T* v, const size_t N, bool last = false) {
48     printf("%s: ", label);
49     printvec(pff, v, N);
50     printf("%s\n", (last) ? "" : ",");
51 }
52 
53 template<typename T>
printreq(const char * label,const char * pff,const std::vector<T> & v,bool last=false)54 void printreq(const char* label, const char* pff,
55               const std::vector<T>& v, bool last = false) {
56     if (v.empty()) {
57         printf("%s: []%s\n", label, (last) ? "" : ",");
58     } else {
59         printreq(label, pff, &v[0], v.size(), last);
60     }
61 }
62 
printreq(const char * label,const std::string & s,bool last=false)63 void printreq(const char* label, const std::string& s, bool last = false) {
64     printf("%s: \"%s\"%s\n", label, s.c_str(), (last) ? "" : ",");
65 }
66 
printreq(const char * label,const float & f,bool last=false)67 void printreq(const char* label, const float& f, bool last = false) {
68     printf("%s: %g%s\n", label, f, (last) ? "" : ",");
69 }
70 
71 // write with check if set
72 template<typename T>
printopt(const char * label,const T & val,bool last=false)73 void printopt(const char* label, const T& val, bool last = false) {
74     if (!mmtf::isDefaultValue(val)) {
75         printreq(label, val, last);
76     } else {
77         printf("%s: null%s\n", label, (last) ? "" : ",");
78     }
79 }
80 
81 template<typename T>
printopt(const char * label,const char * pff,const std::vector<T> & vec,bool last=false)82 void printopt(const char* label, const char* pff,
83               const std::vector<T>& vec, bool last = false) {
84     if (!mmtf::isDefaultValue(vec)) {
85         printreq(label, pff, vec, last);
86     } else {
87         printf("%s: null%s\n", label, (last) ? "" : ",");
88     }
89 }
90 
91 // JSON style printing of parts (not very elegant hack here)
92 template<typename T>
93 void json_print(const T& t);
94 
95 template<typename T>
json_print_list(const char * label,const T * v,const size_t N,int indent,bool last=false)96 void json_print_list(const char* label, const T* v, const size_t N,
97                      int indent, bool last = false) {
98     printf("%s: [", label);
99     for (size_t i = 0; i < N; ++i) {
100         if (i > 0) printf(",\n");
101         else       printf("\n");
102         json_print(v[i]);
103     }
104     printf("\n%*s%s\n", indent+1, "]", (last) ? "" : ",");
105 }
106 
107 template<typename T>
json_print_list(const char * label,const std::vector<T> & v,int indent,bool last=false)108 void json_print_list(const char* label, const std::vector<T>& v,
109                      int indent, bool last = false) {
110     json_print_list(label, &v[0], v.size(), indent, last);
111 }
112 
113 // for optional vectors
114 template<typename T>
json_print_opt(const char * label,const std::vector<T> & v,int indent,bool last=false)115 void json_print_opt(const char* label, const std::vector<T>& v,
116                     int indent, bool last = false) {
117     if (!mmtf::isDefaultValue(v)) {
118         json_print_list(label, &v[0], v.size(), indent, last);
119     } else {
120         printf("%s: null%s\n", label, (last) ? "" : ",");
121     }
122 }
123 
124 // just aimed for ncsOperatorList
125 template<>
json_print(const std::vector<float> & list)126 void json_print(const std::vector<float>& list) {
127     printf("  ");
128     printvec("%g", &list[0], list.size());
129 }
130 
131 template<>
json_print(const mmtf::Transform & transform)132 void json_print(const mmtf::Transform& transform) {
133     printf("    {\n");
134     printreq("     \"chainIndexList\"", "%d", transform.chainIndexList);
135     printreq("     \"matrix\"", "%g", transform.matrix, 16, true);
136     printf("    }");
137 }
138 
139 template<>
json_print(const mmtf::BioAssembly & ba)140 void json_print(const mmtf::BioAssembly& ba) {
141     printf("  {\n");
142     printreq("   \"name\"", ba.name);
143     json_print_list("   \"transformList\"", ba.transformList, 3, true);
144     printf("  }");
145 }
146 
147 template<>
json_print(const mmtf::Entity & ent)148 void json_print(const mmtf::Entity& ent) {
149     printf("  {\n");
150     printreq("   \"chainIndexList\"", "%d", ent.chainIndexList);
151     printreq("   \"description\"", ent.description);
152     printreq("   \"type\"", ent.type);
153     printreq("   \"sequence\"", ent.sequence, true);
154     printf("  }");
155 }
156 
157 template<>
json_print(const mmtf::GroupType & group)158 void json_print(const mmtf::GroupType& group) {
159     printf("  {\n");
160     printreq("   \"formalChargeList\"", "%d", group.formalChargeList);
161     printreq("   \"atomNameList\"", "\"%s\"", group.atomNameList);
162     printreq("   \"bondAtomList\"", "%d", group.bondAtomList);
163     printreq("   \"bondOrderList\"", "%d", group.bondOrderList);
164     printreq("   \"groupName\"", group.groupName);
165     printf("   \"singleLetterCode\": \"%c\",\n", group.singleLetterCode);
166     printreq("   \"chemCompType\"", group.chemCompType, true);
167     printf("  }");
168 }
169 
170 /**
171  * @brief Raw output of the whole thing json style.
172  */
173 template<>
json_print(const mmtf::StructureData & example)174 void json_print(const mmtf::StructureData& example) {
175     printf("{\n");
176     // generic info
177     printreq(" \"mmtfVersion\"", example.mmtfVersion);
178     printreq(" \"mmtfProducer\"", example.mmtfProducer);
179     printopt(" \"unitCell\"", "%g", example.unitCell);
180     printopt(" \"spaceGroup\"", example.spaceGroup);
181     printopt(" \"structureId\"", example.structureId);
182     printopt(" \"title\"", example.title);
183     printopt(" \"depositionDate\"", example.depositionDate);
184     printopt(" \"releaseDate\"", example.releaseDate);
185 
186     // json_print_opt(" \"ncsOperatorList\"", example.ncsOperatorList, 1);
187     json_print_opt(" \"bioAssemblyList\"", example.bioAssemblyList, 1);
188     json_print_opt(" \"entityList\"", example.entityList, 1);
189     printopt(" \"experimentalMethods\"", "\"%s\"", example.experimentalMethods);
190 
191     printopt(" \"resolution\"", example.resolution);
192     printopt(" \"rFree\"", example.rFree);
193     printopt(" \"rWork\"", example.rWork);
194 
195     printf(" \"numBonds\": %d,\n", example.numBonds);
196     printf(" \"numAtoms\": %d,\n", example.numAtoms);
197     printf(" \"numGroups\": %d,\n", example.numGroups);
198     printf(" \"numChains\": %d,\n", example.numChains);
199     printf(" \"numModels\": %d,\n", example.numModels);
200 
201     json_print_list(" \"groupList\"", example.groupList, 1);
202 
203     printopt(" \"bondAtomList\"", "%d", example.bondAtomList);
204     printopt(" \"bondOrderList\"", "%d", example.bondOrderList);
205 
206     printreq(" \"xCoordList\"", "%g", example.xCoordList);
207     printreq(" \"yCoordList\"", "%g", example.yCoordList);
208     printreq(" \"zCoordList\"", "%g", example.zCoordList);
209     printopt(" \"bFactorList\"", "%g", example.bFactorList);
210     printopt(" \"atomIdList\"", "%d", example.atomIdList);
211     printopt(" \"altLocList\"", "%d", example.altLocList);
212     printopt(" \"occupancyList\"", "%g", example.occupancyList);
213 
214     printreq(" \"groupIdList\"", "%d", example.groupIdList);
215     printreq(" \"groupTypeList\"", "%d", example.groupTypeList);
216     printopt(" \"secStructList\"", "%d", example.secStructList);
217     printopt(" \"insCodeList\"", "%d", example.insCodeList);
218     printopt(" \"sequenceIndexList\"", "%d", example.sequenceIndexList);
219 
220     printreq(" \"chainIdList\"", "\"%s\"", example.chainIdList);
221     printopt(" \"chainNameList\"", "\"%s\"", example.chainNameList);
222     printreq(" \"groupsPerChain\"", "%d", example.groupsPerChain);
223     printreq(" \"chainsPerModel\"", "%d", example.chainsPerModel, true);
224 
225     printf("}\n");
226 }
227 
228 // *************************************************************************
229 // Verbose style
230 // *************************************************************************
231 
232 /**
233  * @brief If any value from
234  * \link mmtf::StructureData::insCodeList insCodeList \endlink or
235  * \link mmtf::StructureData::altLocList altLocList \endlink is empty,
236  * it would would replace the character to a dot
237  * @param c character which needs to be checked
238  * @return The c character if it is not empty otherwise a dot
239  */
safechar(char c)240 char safechar(char c) {
241     return (c < ' ') ? '.' : c;
242 }
243 
244 // helper for optional entries (printval(pff, vec[i]) only if vec given)
245 template<typename T>
printvalo(const char * pff,const std::vector<T> & vec,size_t i)246 void printvalo(const char* pff, const std::vector<T>& vec, size_t i) {
247     if (!mmtf::isDefaultValue(vec)) printval(pff, vec[i]);
248 }
249 // helper for char entries (do safeprint of vec[i])
250 template<>
printvalo(const char * pff,const std::vector<char> & vec,size_t i)251 void printvalo(const char* pff, const std::vector<char>& vec, size_t i) {
252     if (!mmtf::isDefaultValue(vec)) printval(pff, safechar(vec[i]));
253 }
254 
255 /**
256  * @brief This is the main traversal function to read out all the contents of
257  *        mmtf::StructureData
258  * @param example Any existing mmtf::StructureData which you want to read
259  * @return void
260  */
traverse_main(const mmtf::StructureData & example)261 void traverse_main(const mmtf::StructureData& example) {
262     // generic info
263     printreq("mmtfVersion", example.mmtfVersion, true);
264     printreq("mmtfProducer", example.mmtfProducer, true);
265     printopt("unitCell", "%g", example.unitCell, true);
266     printopt("spaceGroup", example.spaceGroup, true);
267     printopt("structureId", example.structureId, true);
268     printopt("title", example.title, true);
269     printopt("depositionDate", example.depositionDate, true);
270     printopt("releaseDate", example.releaseDate, true);
271 
272     for (size_t i = 0; i < example.ncsOperatorList.size(); ++i) {
273         printf("ncsOperatorList[%d]", int(i));
274         printreq("", "%g", example.ncsOperatorList[i], true);
275     }
276 
277     for (size_t i = 0; i < example.bioAssemblyList.size(); ++i) {
278         printf("bioAssemblyIndex: %d\n", int(i));
279         const mmtf::BioAssembly& ba = example.bioAssemblyList[i];
280         printreq(" name", ba.name, true);
281         for (size_t j = 0; j < ba.transformList.size(); ++j) {
282             printf(" bioAssemblyTransformIndex: %d\n", int(j));
283             const mmtf::Transform & transform = ba.transformList[j];
284             printreq("  chainIndexList", "%d", transform.chainIndexList, true);
285             printreq("  matrix", "%g", transform.matrix, 16, true);
286         }
287     }
288 
289     for (size_t i = 0; i < example.entityList.size(); ++i) {
290         printf("entityIndex: %d\n", int(i));
291         const mmtf::Entity& ent = example.entityList[i];
292         printreq(" chainIndexList", "%d", ent.chainIndexList, true);
293         printreq(" description", ent.description, true);
294         printreq(" type", ent.type, true);
295         printreq(" sequence", ent.sequence, true);
296     }
297 
298     for (size_t i = 0; i < example.experimentalMethods.size(); ++i) {
299         printf("experimentalMethod %d: %s\n", int(i),
300                example.experimentalMethods[i].c_str());
301     }
302 
303     printopt("resolution", example.resolution, true);
304     printopt("rFree", example.rFree, true);
305     printopt("rWork", example.rWork, true);
306 
307     printf("numBonds: %d\n", example.numBonds);
308     printf("numAtoms: %d\n", example.numAtoms);
309     printf("numGroups: %d\n", example.numGroups);
310     printf("numChains: %d\n", example.numChains);
311     printf("numModels: %d\n", example.numModels);
312 
313     //  # initialize index counters
314     int modelIndex = 0;
315     int chainIndex = 0;
316     int groupIndex = 0;
317     int atomIndex = 0;
318     //# traverse models
319     int i;
320     for (i = 0; i < example.numModels; i++) {
321         int modelChainCount = example.chainsPerModel[i];
322         printf("modelIndex: %d\n", modelIndex);
323         //    # traverse chains
324         int j;
325         for (j = 0; j < modelChainCount; j++) {
326             printf(" chainIndex : %d\n", chainIndex);
327             printval("  Chain id: %s\n", example.chainIdList[chainIndex]);
328             printvalo("  Chain name: %s\n", example.chainNameList, chainIndex);
329             int chainGroupCount = example.groupsPerChain[chainIndex];
330             //        # traverse groups
331             int k;
332             for (k = 0; k < chainGroupCount; k++) {
333                 printf("  groupIndex: %d\n", groupIndex);
334                 printf("   groupId: %d\n", example.groupIdList[groupIndex]);
335                 printvalo("   insCodeList: %c\n", example.insCodeList,
336                           groupIndex);
337                 printvalo("   secStruc: %d\n", example.secStructList,
338                           groupIndex);
339                 printvalo("   seqIndex: %i\n", example.sequenceIndexList,
340                           groupIndex);
341                 printf("   groupType: %d\n", example.groupTypeList[groupIndex]);
342                 const mmtf::GroupType& group =
343                         example.groupList[example.groupTypeList[groupIndex]];
344                 printval("    Group name: %s\n", group.groupName);
345                 printf("    Single letter code: %c\n", group.singleLetterCode);
346                 printval("    Chem comp type: %s\n", group.chemCompType);
347                 int atomOffset = atomIndex;
348 
349                 int l;
350                 for (l = 0; l < group.bondOrderList.size(); l++) {
351                     printf("    Atom id One: %d\n",
352                            (atomOffset + group.bondAtomList[l * 2]));
353                     printf("    Atom id Two: %d\n",
354                            (atomOffset + group.bondAtomList[l * 2 + 1]));
355                     printf("    Bond order: %d\n", group.bondOrderList[l]);
356                 }
357                 int groupAtomCount = group.atomNameList.size();
358                 for (l = 0; l < groupAtomCount; l++) {
359                     printf("    atomIndex: %d\n", atomIndex);
360                     printf("     x coord: %.3f\n",
361                            example.xCoordList[atomIndex]);
362                     printf("     y coord: %.3f\n",
363                            example.yCoordList[atomIndex]);
364                     printf("     z coord: %.3f\n",
365                            example.zCoordList[atomIndex]);
366                     printvalo("     b factor: %.2f\n", example.bFactorList,
367                               atomIndex);
368                     printvalo("     atom id: %d\n", example.atomIdList,
369                               atomIndex);
370                     printvalo("     altLocList: %c\n", example.altLocList,
371                               atomIndex);
372                     printvalo("     occupancy: %.2f\n", example.occupancyList,
373                               atomIndex);
374                     printf("     charge: %d\n", group.formalChargeList[l]);
375                     printval("     atom name: %s\n", group.atomNameList[l]);
376                     printval("     element: %s\n", group.elementList[l]);
377                     atomIndex++;
378                 }
379                 groupIndex++;
380             }
381             chainIndex++;
382         }
383         modelIndex++;
384     }
385     printf("Number of inter group bonds: %d\n",
386            (int) example.bondOrderList.size());
387     for (i = 0; i < example.bondOrderList.size(); i++) {
388         printf(" Atom One: %d\n", example.bondAtomList[i * 2]);
389         printf(" Atom Two: %d\n", example.bondAtomList[i * 2 + 1]);
390         printf(" Bond order: %d\n", example.bondOrderList[i]);
391     }
392 }
393 
394 // *************************************************************************
395 // PDB style
396 // *************************************************************************
397 
398 /**
399  * @brief This function tells if the group atoms belong to HETATM
400  * @param type The group of type
401  *             \link mmtf::GroupType::chemCompType chemCompType \endlink
402  * that needs to be checked.
403  * @return 0 or 1
404  */
is_hetatm(const char * type)405 int is_hetatm(const char* type) {
406     const char* hetatm_type[] = {
407         "D-SACCHARIDE",
408         "D-SACCHARIDE 1,4 AND 1,4 LINKING",
409         "D-SACCHARIDE 1,4 AND 1,6 LINKING",
410         "L-SACCHARIDE",
411         "L-SACCHARIDE 1,4 AND 1,4 LINKING",
412         "L-SACCHARIDE 1,4 AND 1,6 LINKING",
413         "SACCHARIDE",
414         "OTHER",
415         "NON-POLYMER",
416         0 };
417     for (int i=0; hetatm_type[i]; ++i) {
418         if (strcmp(type,hetatm_type[i]) == 0)
419             return 1;
420     }
421     return 0;
422 }
423 
424 /**
425  * @brief Read out the contents of mmtf::StructureData in a PDB-like fashion
426  * Columns are in order:
427  * ATOM/HETATM AtomId Element AtomName AltLoc GroupId GroupType
428  * InsCode ChainName x y z B-factor Occupancy Charge
429  * @param example Any existing mmtf::StructureData which you want to read
430  */
traverse_pdb_like(const mmtf::StructureData & example)431 void traverse_pdb_like(const mmtf::StructureData& example) {
432 
433 
434     int modelIndex = 0;
435     int chainIndex = 0;
436     int groupIndex = 0;
437     int atomIndex = 0;
438 
439     //# traverse models
440     for (int i = 0; i < example.numModels; i++, modelIndex++) {
441         //    # traverse chains
442         for (int j = 0; j < example.chainsPerModel[modelIndex]; j++, chainIndex++) {
443             //        # traverse groups
444             for (int k = 0; k < example.groupsPerChain[chainIndex]; k++, groupIndex++) {
445                 const mmtf::GroupType& group =
446                         example.groupList[example.groupTypeList[groupIndex]];
447                 int groupAtomCount = group.atomNameList.size();
448 
449                 for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
450                     // ATOM or HETATM
451                     if (is_hetatm(group.chemCompType.c_str()))
452                         printf("HETATM ");
453                     else
454                         printf("ATOM ");
455                     // Atom serial
456                     printvalo("%d ", example.atomIdList, atomIndex);
457                     // Atom name
458                     printval("%s ", group.atomNameList[l]);
459                     // Alternate location
460                     printvalo("%c ", example.altLocList, atomIndex);
461                     // Group name
462                     printval("%s ", group.groupName);
463                     // Chain
464                     printvalo("%s ", example.chainNameList, chainIndex);
465                     // Group serial
466                     printf("%d ", example.groupIdList[groupIndex]);
467                     // Insertion code
468                     printvalo("%c ", example.insCodeList, groupIndex);
469                     // x, y, z
470                     printf("%.3f ", example.xCoordList[atomIndex]);
471                     printf("%.3f ", example.yCoordList[atomIndex]);
472                     printf("%.3f ", example.zCoordList[atomIndex]);
473                     // B-factor
474                     printvalo("%.2f ", example.bFactorList, atomIndex);
475                     // Occupancy
476                     printvalo("%.2f ", example.occupancyList, atomIndex);
477                     // Element
478                     printval("%s ", group.elementList[l]);
479                     // Charge
480                     printf("%d \n", group.formalChargeList[l]);
481                 }
482             }
483         }
484     }
485 }
486 
main(int argc,char ** argv)487 int main(int argc, char** argv) {
488     // check arguments
489     if (argc < 2) {
490       printf("USAGE: traverse <mmtffile> [<style>]\n");
491       printf("-> if style ommited, we output a PDB-like output\n");
492       printf("-> if style == 'json', we output a json-dump of all data\n");
493       printf("-> if style == 'print', we output a built-in tab delimited format\n");
494       printf("-> else, we output all the data in a verbose, readable form\n");
495       return 1;
496     }
497 
498     // decode MMTF file
499     mmtf::StructureData example;
500     mmtf::decodeFromFile(example, argv[1]);
501 
502     // traverse it according to user specified style
503     if (argc > 2) {
504         if (strcmp(argv[2], "json") == 0) {
505             json_print(example);
506         } else if (strcmp(argv[2], "print") == 0) {
507             std::cout << example.print() << std::endl;
508         } else {
509             traverse_main(example);
510         }
511     } else {
512       traverse_pdb_like(example);
513     }
514     return 0;
515 }
516