1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The authors of this code are: Gerardo Tauriello
6 //
7 // The code is heavily based on traverse.c written by Julien Ferte from the
8 // mmtf_c project
9 //
10 // *************************************************************************
11
12 #include <mmtf.hpp>
13
14 // C-style libraries used here to keep it close to traverse.c
15 #include <stdio.h>
16 #include <string.h>
17 #include <assert.h>
18
19 // *************************************************************************
20 // JSON-style
21 // *************************************************************************
22
23 // printf for strings
24 template<typename T>
printval(const char * pff,const T & val)25 void printval(const char* pff, const T& val) {
26 printf(pff, val);
27 }
28 template<>
printval(const char * pff,const std::string & val)29 void printval(const char* pff, const std::string& val) {
30 printf(pff, val.c_str());
31 }
32
33 // print vector only
34 template<typename T>
printvec(const char * pff,const T * v,const size_t N)35 void printvec(const char* pff, const T* v, const size_t N) {
36 printf("[");
37 for (size_t i = 0; i < N; ++i) {
38 if (i > 0) printf(", ");
39 printval(pff, v[i]);
40 }
41 printf("]");
42 }
43
44 // writing with no check if it's set or not
45 template<typename T>
printreq(const char * label,const char * pff,const T * v,const size_t N,bool last=false)46 void printreq(const char* label, const char* pff,
47 const T* v, const size_t N, bool last = false) {
48 printf("%s: ", label);
49 printvec(pff, v, N);
50 printf("%s\n", (last) ? "" : ",");
51 }
52
53 template<typename T>
printreq(const char * label,const char * pff,const std::vector<T> & v,bool last=false)54 void printreq(const char* label, const char* pff,
55 const std::vector<T>& v, bool last = false) {
56 if (v.empty()) {
57 printf("%s: []%s\n", label, (last) ? "" : ",");
58 } else {
59 printreq(label, pff, &v[0], v.size(), last);
60 }
61 }
62
printreq(const char * label,const std::string & s,bool last=false)63 void printreq(const char* label, const std::string& s, bool last = false) {
64 printf("%s: \"%s\"%s\n", label, s.c_str(), (last) ? "" : ",");
65 }
66
printreq(const char * label,const float & f,bool last=false)67 void printreq(const char* label, const float& f, bool last = false) {
68 printf("%s: %g%s\n", label, f, (last) ? "" : ",");
69 }
70
71 // write with check if set
72 template<typename T>
printopt(const char * label,const T & val,bool last=false)73 void printopt(const char* label, const T& val, bool last = false) {
74 if (!mmtf::isDefaultValue(val)) {
75 printreq(label, val, last);
76 } else {
77 printf("%s: null%s\n", label, (last) ? "" : ",");
78 }
79 }
80
81 template<typename T>
printopt(const char * label,const char * pff,const std::vector<T> & vec,bool last=false)82 void printopt(const char* label, const char* pff,
83 const std::vector<T>& vec, bool last = false) {
84 if (!mmtf::isDefaultValue(vec)) {
85 printreq(label, pff, vec, last);
86 } else {
87 printf("%s: null%s\n", label, (last) ? "" : ",");
88 }
89 }
90
91 // JSON style printing of parts (not very elegant hack here)
92 template<typename T>
93 void json_print(const T& t);
94
95 template<typename T>
json_print_list(const char * label,const T * v,const size_t N,int indent,bool last=false)96 void json_print_list(const char* label, const T* v, const size_t N,
97 int indent, bool last = false) {
98 printf("%s: [", label);
99 for (size_t i = 0; i < N; ++i) {
100 if (i > 0) printf(",\n");
101 else printf("\n");
102 json_print(v[i]);
103 }
104 printf("\n%*s%s\n", indent+1, "]", (last) ? "" : ",");
105 }
106
107 template<typename T>
json_print_list(const char * label,const std::vector<T> & v,int indent,bool last=false)108 void json_print_list(const char* label, const std::vector<T>& v,
109 int indent, bool last = false) {
110 json_print_list(label, &v[0], v.size(), indent, last);
111 }
112
113 // for optional vectors
114 template<typename T>
json_print_opt(const char * label,const std::vector<T> & v,int indent,bool last=false)115 void json_print_opt(const char* label, const std::vector<T>& v,
116 int indent, bool last = false) {
117 if (!mmtf::isDefaultValue(v)) {
118 json_print_list(label, &v[0], v.size(), indent, last);
119 } else {
120 printf("%s: null%s\n", label, (last) ? "" : ",");
121 }
122 }
123
124 // just aimed for ncsOperatorList
125 template<>
json_print(const std::vector<float> & list)126 void json_print(const std::vector<float>& list) {
127 printf(" ");
128 printvec("%g", &list[0], list.size());
129 }
130
131 template<>
json_print(const mmtf::Transform & transform)132 void json_print(const mmtf::Transform& transform) {
133 printf(" {\n");
134 printreq(" \"chainIndexList\"", "%d", transform.chainIndexList);
135 printreq(" \"matrix\"", "%g", transform.matrix, 16, true);
136 printf(" }");
137 }
138
139 template<>
json_print(const mmtf::BioAssembly & ba)140 void json_print(const mmtf::BioAssembly& ba) {
141 printf(" {\n");
142 printreq(" \"name\"", ba.name);
143 json_print_list(" \"transformList\"", ba.transformList, 3, true);
144 printf(" }");
145 }
146
147 template<>
json_print(const mmtf::Entity & ent)148 void json_print(const mmtf::Entity& ent) {
149 printf(" {\n");
150 printreq(" \"chainIndexList\"", "%d", ent.chainIndexList);
151 printreq(" \"description\"", ent.description);
152 printreq(" \"type\"", ent.type);
153 printreq(" \"sequence\"", ent.sequence, true);
154 printf(" }");
155 }
156
157 template<>
json_print(const mmtf::GroupType & group)158 void json_print(const mmtf::GroupType& group) {
159 printf(" {\n");
160 printreq(" \"formalChargeList\"", "%d", group.formalChargeList);
161 printreq(" \"atomNameList\"", "\"%s\"", group.atomNameList);
162 printreq(" \"bondAtomList\"", "%d", group.bondAtomList);
163 printreq(" \"bondOrderList\"", "%d", group.bondOrderList);
164 printreq(" \"groupName\"", group.groupName);
165 printf(" \"singleLetterCode\": \"%c\",\n", group.singleLetterCode);
166 printreq(" \"chemCompType\"", group.chemCompType, true);
167 printf(" }");
168 }
169
170 /**
171 * @brief Raw output of the whole thing json style.
172 */
173 template<>
json_print(const mmtf::StructureData & example)174 void json_print(const mmtf::StructureData& example) {
175 printf("{\n");
176 // generic info
177 printreq(" \"mmtfVersion\"", example.mmtfVersion);
178 printreq(" \"mmtfProducer\"", example.mmtfProducer);
179 printopt(" \"unitCell\"", "%g", example.unitCell);
180 printopt(" \"spaceGroup\"", example.spaceGroup);
181 printopt(" \"structureId\"", example.structureId);
182 printopt(" \"title\"", example.title);
183 printopt(" \"depositionDate\"", example.depositionDate);
184 printopt(" \"releaseDate\"", example.releaseDate);
185
186 // json_print_opt(" \"ncsOperatorList\"", example.ncsOperatorList, 1);
187 json_print_opt(" \"bioAssemblyList\"", example.bioAssemblyList, 1);
188 json_print_opt(" \"entityList\"", example.entityList, 1);
189 printopt(" \"experimentalMethods\"", "\"%s\"", example.experimentalMethods);
190
191 printopt(" \"resolution\"", example.resolution);
192 printopt(" \"rFree\"", example.rFree);
193 printopt(" \"rWork\"", example.rWork);
194
195 printf(" \"numBonds\": %d,\n", example.numBonds);
196 printf(" \"numAtoms\": %d,\n", example.numAtoms);
197 printf(" \"numGroups\": %d,\n", example.numGroups);
198 printf(" \"numChains\": %d,\n", example.numChains);
199 printf(" \"numModels\": %d,\n", example.numModels);
200
201 json_print_list(" \"groupList\"", example.groupList, 1);
202
203 printopt(" \"bondAtomList\"", "%d", example.bondAtomList);
204 printopt(" \"bondOrderList\"", "%d", example.bondOrderList);
205
206 printreq(" \"xCoordList\"", "%g", example.xCoordList);
207 printreq(" \"yCoordList\"", "%g", example.yCoordList);
208 printreq(" \"zCoordList\"", "%g", example.zCoordList);
209 printopt(" \"bFactorList\"", "%g", example.bFactorList);
210 printopt(" \"atomIdList\"", "%d", example.atomIdList);
211 printopt(" \"altLocList\"", "%d", example.altLocList);
212 printopt(" \"occupancyList\"", "%g", example.occupancyList);
213
214 printreq(" \"groupIdList\"", "%d", example.groupIdList);
215 printreq(" \"groupTypeList\"", "%d", example.groupTypeList);
216 printopt(" \"secStructList\"", "%d", example.secStructList);
217 printopt(" \"insCodeList\"", "%d", example.insCodeList);
218 printopt(" \"sequenceIndexList\"", "%d", example.sequenceIndexList);
219
220 printreq(" \"chainIdList\"", "\"%s\"", example.chainIdList);
221 printopt(" \"chainNameList\"", "\"%s\"", example.chainNameList);
222 printreq(" \"groupsPerChain\"", "%d", example.groupsPerChain);
223 printreq(" \"chainsPerModel\"", "%d", example.chainsPerModel, true);
224
225 printf("}\n");
226 }
227
228 // *************************************************************************
229 // Verbose style
230 // *************************************************************************
231
232 /**
233 * @brief If any value from
234 * \link mmtf::StructureData::insCodeList insCodeList \endlink or
235 * \link mmtf::StructureData::altLocList altLocList \endlink is empty,
236 * it would would replace the character to a dot
237 * @param c character which needs to be checked
238 * @return The c character if it is not empty otherwise a dot
239 */
safechar(char c)240 char safechar(char c) {
241 return (c < ' ') ? '.' : c;
242 }
243
244 // helper for optional entries (printval(pff, vec[i]) only if vec given)
245 template<typename T>
printvalo(const char * pff,const std::vector<T> & vec,size_t i)246 void printvalo(const char* pff, const std::vector<T>& vec, size_t i) {
247 if (!mmtf::isDefaultValue(vec)) printval(pff, vec[i]);
248 }
249 // helper for char entries (do safeprint of vec[i])
250 template<>
printvalo(const char * pff,const std::vector<char> & vec,size_t i)251 void printvalo(const char* pff, const std::vector<char>& vec, size_t i) {
252 if (!mmtf::isDefaultValue(vec)) printval(pff, safechar(vec[i]));
253 }
254
255 /**
256 * @brief This is the main traversal function to read out all the contents of
257 * mmtf::StructureData
258 * @param example Any existing mmtf::StructureData which you want to read
259 * @return void
260 */
traverse_main(const mmtf::StructureData & example)261 void traverse_main(const mmtf::StructureData& example) {
262 // generic info
263 printreq("mmtfVersion", example.mmtfVersion, true);
264 printreq("mmtfProducer", example.mmtfProducer, true);
265 printopt("unitCell", "%g", example.unitCell, true);
266 printopt("spaceGroup", example.spaceGroup, true);
267 printopt("structureId", example.structureId, true);
268 printopt("title", example.title, true);
269 printopt("depositionDate", example.depositionDate, true);
270 printopt("releaseDate", example.releaseDate, true);
271
272 for (size_t i = 0; i < example.ncsOperatorList.size(); ++i) {
273 printf("ncsOperatorList[%d]", int(i));
274 printreq("", "%g", example.ncsOperatorList[i], true);
275 }
276
277 for (size_t i = 0; i < example.bioAssemblyList.size(); ++i) {
278 printf("bioAssemblyIndex: %d\n", int(i));
279 const mmtf::BioAssembly& ba = example.bioAssemblyList[i];
280 printreq(" name", ba.name, true);
281 for (size_t j = 0; j < ba.transformList.size(); ++j) {
282 printf(" bioAssemblyTransformIndex: %d\n", int(j));
283 const mmtf::Transform & transform = ba.transformList[j];
284 printreq(" chainIndexList", "%d", transform.chainIndexList, true);
285 printreq(" matrix", "%g", transform.matrix, 16, true);
286 }
287 }
288
289 for (size_t i = 0; i < example.entityList.size(); ++i) {
290 printf("entityIndex: %d\n", int(i));
291 const mmtf::Entity& ent = example.entityList[i];
292 printreq(" chainIndexList", "%d", ent.chainIndexList, true);
293 printreq(" description", ent.description, true);
294 printreq(" type", ent.type, true);
295 printreq(" sequence", ent.sequence, true);
296 }
297
298 for (size_t i = 0; i < example.experimentalMethods.size(); ++i) {
299 printf("experimentalMethod %d: %s\n", int(i),
300 example.experimentalMethods[i].c_str());
301 }
302
303 printopt("resolution", example.resolution, true);
304 printopt("rFree", example.rFree, true);
305 printopt("rWork", example.rWork, true);
306
307 printf("numBonds: %d\n", example.numBonds);
308 printf("numAtoms: %d\n", example.numAtoms);
309 printf("numGroups: %d\n", example.numGroups);
310 printf("numChains: %d\n", example.numChains);
311 printf("numModels: %d\n", example.numModels);
312
313 // # initialize index counters
314 int modelIndex = 0;
315 int chainIndex = 0;
316 int groupIndex = 0;
317 int atomIndex = 0;
318 //# traverse models
319 int i;
320 for (i = 0; i < example.numModels; i++) {
321 int modelChainCount = example.chainsPerModel[i];
322 printf("modelIndex: %d\n", modelIndex);
323 // # traverse chains
324 int j;
325 for (j = 0; j < modelChainCount; j++) {
326 printf(" chainIndex : %d\n", chainIndex);
327 printval(" Chain id: %s\n", example.chainIdList[chainIndex]);
328 printvalo(" Chain name: %s\n", example.chainNameList, chainIndex);
329 int chainGroupCount = example.groupsPerChain[chainIndex];
330 // # traverse groups
331 int k;
332 for (k = 0; k < chainGroupCount; k++) {
333 printf(" groupIndex: %d\n", groupIndex);
334 printf(" groupId: %d\n", example.groupIdList[groupIndex]);
335 printvalo(" insCodeList: %c\n", example.insCodeList,
336 groupIndex);
337 printvalo(" secStruc: %d\n", example.secStructList,
338 groupIndex);
339 printvalo(" seqIndex: %i\n", example.sequenceIndexList,
340 groupIndex);
341 printf(" groupType: %d\n", example.groupTypeList[groupIndex]);
342 const mmtf::GroupType& group =
343 example.groupList[example.groupTypeList[groupIndex]];
344 printval(" Group name: %s\n", group.groupName);
345 printf(" Single letter code: %c\n", group.singleLetterCode);
346 printval(" Chem comp type: %s\n", group.chemCompType);
347 int atomOffset = atomIndex;
348
349 int l;
350 for (l = 0; l < group.bondOrderList.size(); l++) {
351 printf(" Atom id One: %d\n",
352 (atomOffset + group.bondAtomList[l * 2]));
353 printf(" Atom id Two: %d\n",
354 (atomOffset + group.bondAtomList[l * 2 + 1]));
355 printf(" Bond order: %d\n", group.bondOrderList[l]);
356 }
357 int groupAtomCount = group.atomNameList.size();
358 for (l = 0; l < groupAtomCount; l++) {
359 printf(" atomIndex: %d\n", atomIndex);
360 printf(" x coord: %.3f\n",
361 example.xCoordList[atomIndex]);
362 printf(" y coord: %.3f\n",
363 example.yCoordList[atomIndex]);
364 printf(" z coord: %.3f\n",
365 example.zCoordList[atomIndex]);
366 printvalo(" b factor: %.2f\n", example.bFactorList,
367 atomIndex);
368 printvalo(" atom id: %d\n", example.atomIdList,
369 atomIndex);
370 printvalo(" altLocList: %c\n", example.altLocList,
371 atomIndex);
372 printvalo(" occupancy: %.2f\n", example.occupancyList,
373 atomIndex);
374 printf(" charge: %d\n", group.formalChargeList[l]);
375 printval(" atom name: %s\n", group.atomNameList[l]);
376 printval(" element: %s\n", group.elementList[l]);
377 atomIndex++;
378 }
379 groupIndex++;
380 }
381 chainIndex++;
382 }
383 modelIndex++;
384 }
385 printf("Number of inter group bonds: %d\n",
386 (int) example.bondOrderList.size());
387 for (i = 0; i < example.bondOrderList.size(); i++) {
388 printf(" Atom One: %d\n", example.bondAtomList[i * 2]);
389 printf(" Atom Two: %d\n", example.bondAtomList[i * 2 + 1]);
390 printf(" Bond order: %d\n", example.bondOrderList[i]);
391 }
392 }
393
394 // *************************************************************************
395 // PDB style
396 // *************************************************************************
397
398 /**
399 * @brief This function tells if the group atoms belong to HETATM
400 * @param type The group of type
401 * \link mmtf::GroupType::chemCompType chemCompType \endlink
402 * that needs to be checked.
403 * @return 0 or 1
404 */
is_hetatm(const char * type)405 int is_hetatm(const char* type) {
406 const char* hetatm_type[] = {
407 "D-SACCHARIDE",
408 "D-SACCHARIDE 1,4 AND 1,4 LINKING",
409 "D-SACCHARIDE 1,4 AND 1,6 LINKING",
410 "L-SACCHARIDE",
411 "L-SACCHARIDE 1,4 AND 1,4 LINKING",
412 "L-SACCHARIDE 1,4 AND 1,6 LINKING",
413 "SACCHARIDE",
414 "OTHER",
415 "NON-POLYMER",
416 0 };
417 for (int i=0; hetatm_type[i]; ++i) {
418 if (strcmp(type,hetatm_type[i]) == 0)
419 return 1;
420 }
421 return 0;
422 }
423
424 /**
425 * @brief Read out the contents of mmtf::StructureData in a PDB-like fashion
426 * Columns are in order:
427 * ATOM/HETATM AtomId Element AtomName AltLoc GroupId GroupType
428 * InsCode ChainName x y z B-factor Occupancy Charge
429 * @param example Any existing mmtf::StructureData which you want to read
430 */
traverse_pdb_like(const mmtf::StructureData & example)431 void traverse_pdb_like(const mmtf::StructureData& example) {
432
433
434 int modelIndex = 0;
435 int chainIndex = 0;
436 int groupIndex = 0;
437 int atomIndex = 0;
438
439 //# traverse models
440 for (int i = 0; i < example.numModels; i++, modelIndex++) {
441 // # traverse chains
442 for (int j = 0; j < example.chainsPerModel[modelIndex]; j++, chainIndex++) {
443 // # traverse groups
444 for (int k = 0; k < example.groupsPerChain[chainIndex]; k++, groupIndex++) {
445 const mmtf::GroupType& group =
446 example.groupList[example.groupTypeList[groupIndex]];
447 int groupAtomCount = group.atomNameList.size();
448
449 for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
450 // ATOM or HETATM
451 if (is_hetatm(group.chemCompType.c_str()))
452 printf("HETATM ");
453 else
454 printf("ATOM ");
455 // Atom serial
456 printvalo("%d ", example.atomIdList, atomIndex);
457 // Atom name
458 printval("%s ", group.atomNameList[l]);
459 // Alternate location
460 printvalo("%c ", example.altLocList, atomIndex);
461 // Group name
462 printval("%s ", group.groupName);
463 // Chain
464 printvalo("%s ", example.chainNameList, chainIndex);
465 // Group serial
466 printf("%d ", example.groupIdList[groupIndex]);
467 // Insertion code
468 printvalo("%c ", example.insCodeList, groupIndex);
469 // x, y, z
470 printf("%.3f ", example.xCoordList[atomIndex]);
471 printf("%.3f ", example.yCoordList[atomIndex]);
472 printf("%.3f ", example.zCoordList[atomIndex]);
473 // B-factor
474 printvalo("%.2f ", example.bFactorList, atomIndex);
475 // Occupancy
476 printvalo("%.2f ", example.occupancyList, atomIndex);
477 // Element
478 printval("%s ", group.elementList[l]);
479 // Charge
480 printf("%d \n", group.formalChargeList[l]);
481 }
482 }
483 }
484 }
485 }
486
main(int argc,char ** argv)487 int main(int argc, char** argv) {
488 // check arguments
489 if (argc < 2) {
490 printf("USAGE: traverse <mmtffile> [<style>]\n");
491 printf("-> if style ommited, we output a PDB-like output\n");
492 printf("-> if style == 'json', we output a json-dump of all data\n");
493 printf("-> if style == 'print', we output a built-in tab delimited format\n");
494 printf("-> else, we output all the data in a verbose, readable form\n");
495 return 1;
496 }
497
498 // decode MMTF file
499 mmtf::StructureData example;
500 mmtf::decodeFromFile(example, argv[1]);
501
502 // traverse it according to user specified style
503 if (argc > 2) {
504 if (strcmp(argv[2], "json") == 0) {
505 json_print(example);
506 } else if (strcmp(argv[2], "print") == 0) {
507 std::cout << example.print() << std::endl;
508 } else {
509 traverse_main(example);
510 }
511 } else {
512 traverse_pdb_like(example);
513 }
514 return 0;
515 }
516