1 // clipper CNS->MTZ utility
2 /* (C) 2007 Kevin Cowtan */
3 
4 #include <iostream>
5 #include <fstream>
6 #include <set>
7 #include <stdlib.h>
8 
9 #include "cns2mtz.h"
10 
main(int argc,char ** argv)11 int main( int argc, char** argv )
12 {
13   CCP4Program prog( "cns2mtz", "0.2", "$Date: 2004/14/01" );
14 
15   // defaults
16   clipper::String title;
17   clipper::String ipfile = "NONE";
18   clipper::String opfile = "NONE";
19   clipper::String ccell = "NONE";
20   clipper::String cspgr = "NONE";
21   clipper::String ipfilepdb = "NONE";
22   bool complete = true;
23   int verbose = 0;
24 
25   // command input
26   CCP4CommandInput args( argc, argv, true );
27   int arg = 0;
28   while ( ++arg < args.size() ) {
29     if ( args[arg] == "-title" ) {
30       if ( ++arg < args.size() ) title = args[arg];
31     } else if ( args[arg] == "-cnsin" ) {
32       if ( ++arg < args.size() ) ipfile = args[arg];
33     } else if ( args[arg] == "-mtzout" ) {
34       if ( ++arg < args.size() ) opfile = args[arg];
35     } else if ( args[arg] == "-cell" ) {
36       if ( ++arg < args.size() ) ccell = args[arg];
37     } else if ( args[arg] == "-spacegroup" ) {
38       if ( ++arg < args.size() ) cspgr = args[arg];
39     } else if ( args[arg] == "-pdbin" ) {
40       if ( ++arg < args.size() ) ipfilepdb = args[arg];
41     } else if ( args[arg] == "-no-complete" ) {
42       complete = false;
43     } else if ( args[arg] == "-verbose" ) {
44       if ( ++arg < args.size() ) verbose = clipper::String(args[arg]).i();
45     } else {
46       std::cout << "Unrecognized:\t" << args[arg] << "\n";
47       args.clear();
48     }
49   }
50   if ( args.size() <= 1 ) {
51     std::cout << "Usage: cns2mtz\n\t-cnsin <filename> [COMPULSORY]\n\t-mtzout <filename>\n\t-cell a,b,c,a,b,g\n\t-spacegroup <spacegroup>\n\t-pdbin <filename>\n\t-no-complete\nConvert CNS reflection file to MTZ.\nCell and spacegroup may be specified directly or by providing a PDB file.\n";
52     exit(1);
53   }
54 
55   if ( ipfilepdb == "NONE" && ccell == "NONE" )
56     { std::cout << "Missing cell.";       exit(1); }
57   if ( ipfilepdb == "NONE" && cspgr == "NONE" )
58     { std::cout << "Missing spacegroup."; exit(1); }
59 
60   // set defaults
61   if ( opfile == "NONE" ) opfile = ipfile + ".mtz";
62 
63   // set crystal info
64   clipper::Cell cell;
65   clipper::Spacegroup spgr;
66   clipper::Resolution reso;
67   if ( ipfilepdb != "NONE" ) {
68     clipper::MMDBManager mmdb;
69     mmdb.ReadCoorFile( (char*)ipfilepdb.c_str() );
70     spgr = mmdb.spacegroup();
71     cell = mmdb.cell();
72   } else {
73     double cd[] = { 100.0, 100.0, 100.0, 90.0, 90.0, 90.0 };
74     std::vector<clipper::String> cellx = ccell.split( ", " );
75     for ( int i = 0; i < cellx.size(); i++ ) cd[i] = cellx[i].f();
76     clipper::Cell_descr celld( cd[0], cd[1], cd[2], cd[3], cd[4], cd[5] );
77     clipper::Spgr_descr spgrd( cspgr );
78     cell = clipper::Cell( celld );
79     spgr = clipper::Spacegroup( spgrd );
80   }
81 
82   // read input file
83   std::vector<clipper::String> words;
84   std::ifstream file( ipfile.c_str() );
85   int lvl = 0;
86   std::string word;
87   while ( file.good() ) {
88     char c;
89     file.get( c );
90     if ( c == '{' ) lvl++;
91     if ( lvl == 0 ) {
92       if ( isspace(c) || c == '=' ) {
93 	if ( word.length() != 0 ) {
94 	  words.push_back( word );
95 	  word = "";
96 	}
97       } else {
98 	word = word + c;
99       }
100     }
101     if ( c == '}' ) lvl--;
102   }
103   file.close();
104 
105   // file info
106   int nref = 0;
107   bool anom = false;
108   std::vector<std::string> cols, typs;
109   std::vector<std::pair<std::string,std::vector<std::string> > > grps;
110   cols.push_back( "H" ); cols.push_back( "K" ); cols.push_back( "L" );
111   typs.push_back( "H" ); typs.push_back( "H" ); typs.push_back( "H" );
112   std::vector<float> vals;
113 
114   // loop over words
115   int pos = 0;
116   // header section
117   while ( pos < words.size() ) {
118     if        ( words[pos].substr(0,4) == "NREF" ) {
119       if ( ++pos < words.size() ) nref = clipper::String(words[pos]).i();
120     } else if ( words[pos].substr(0,4) == "ANOM" ) {
121       if ( ++pos < words.size() ) anom = ( words[pos][0] == 'T' );
122     } else if ( words[pos].substr(0,4) == "HERM" ) {
123       if ( ++pos < words.size() ) anom = ( words[pos][0] == 'F' );
124     } else if ( words[pos].substr(0,4) == "DECL" ) {
125       // COLUMN DECLARATIONS
126       ++pos;
127       std::string col, dom, typ;
128       while ( pos < words.size() ) {
129 	if ( words[pos] == "END" ) {
130 	  if ( dom.substr(0,4) == "RECI" ) {
131 	    if ( typ.substr(0,4) == "INTE" ) {
132 	      cols.push_back( col );
133 	      typs.push_back( "I" );
134 	    } else if ( typ.substr(0,4) == "REAL" ) {
135 	      cols.push_back( col );
136 	      typs.push_back( "R" );
137 	    } else if ( typ.substr(0,4) == "COMP" ) {
138 	      cols.push_back( col );
139 	      typs.push_back( "F" );
140 	      cols.push_back( col+".phase" );
141 	      typs.push_back( "P" );
142 	    }
143 	  }
144 	  break;
145 	} else if ( words[pos].substr(0,4) == "NAME" ) {
146 	  if ( ++pos < words.size() ) col = words[pos];
147 	} else if ( words[pos].substr(0,4) == "DOMA" ) {
148 	  if ( ++pos < words.size() ) dom = words[pos];
149 	} else if ( words[pos].substr(0,4) == "TYPE" ) {
150 	  if ( ++pos < words.size() ) typ = words[pos];
151 	}
152 	++pos;
153       }
154     } else if ( words[pos].substr(0,4) == "GROU" ) {
155       // GROUP DECLARATIONS
156       ++pos;
157       std::string gtyp;
158       std::vector<std::string> gcol;
159       while ( pos < words.size() ) {
160 	if ( words[pos] == "END" ) {
161 	  grps.push_back( std::pair<std::string,std::vector<std::string> >( gtyp, gcol ) );
162 	  break;
163 	} else if ( words[pos].substr(0,4) == "TYPE" ) {
164 	  if ( ++pos < words.size() ) gtyp = words[pos];
165 	} else if ( words[pos].substr(0,4) == "OBJE" ) {
166 	  if ( ++pos < words.size() ) gcol.push_back( words[pos] );
167 	}
168 	++pos;
169       }
170     } else {
171       if ( words[pos].find_first_not_of("0123456789.-") == std::string::npos ) {
172 	vals.push_back( clipper::String(words[pos]).f() );
173       }
174     }
175     ++pos;
176   }
177 
178   // Now do group assignments
179   char hls[][2] = {"A","B","C","D"};
180   for ( int g = 0; g < grps.size(); g++ ) {
181     if ( grps[g].first == "HL" ) {
182       for ( int c = 0; c < grps[g].second.size(); c++ ) {
183 	for ( int i = 0; i < cols.size(); i++ ) {
184 	  if ( cols[i] == grps[g].second[c] )
185 	    typs[i] = std::string(hls[c]);
186 	}
187       }
188     }
189   }
190 
191   // column assignment heuristics
192   int ncol = cols.size();
193   for ( int i = 3; i < ncol; i++ ) {
194     if ( typs[i] == "R" ) {
195       if      ( cols[i].find("SIG") != std::string::npos )
196 	typs[i] = "Q";
197       else if ( cols[i].find("FOM") != std::string::npos )
198 	typs[i] = "W";
199       else if ( toupper( cols[i][0] ) == 'F' )
200 	typs[i] = "F";
201       else if ( toupper( cols[i][0] ) == 'E' )
202 	typs[i] = "E";
203       else if ( toupper( cols[i][0] ) == 'I' )
204 	typs[i] = "J";
205       else if ( toupper( cols[i][0] ) == 'P' )
206 	typs[i] = "P";
207       else if ( toupper( cols[i][0] ) == 'S' )
208 	typs[i] = "Q";
209       else if ( toupper( cols[i][0] ) == 'W' )
210 	typs[i] = "W";
211       else if ( toupper( cols[i][0] ) == 'M' )
212 	typs[i] = "W";
213     }
214   }
215 
216   // post-assignment diagnostics
217   std::cout << std::endl << "Columns found:" << std::endl;
218   for ( int i = 0; i < cols.size(); i++ ) {
219     std::cout << i << " " << cols[i] << " " << typs[i] << "\n";
220   }
221   std::cout << std::endl;
222 
223   // check data length
224   if ( ncol*nref != vals.size() ) {
225     std::cout << "Error: data length does not match number of reflections.\n";
226     exit(1);
227   }
228 
229   // get reflections
230   std::set<clipper::HKL,HKLlessthan> hkl_set;
231   for ( int i = 0; i < nref; i++ ) {
232     clipper::HKL hkl( Util::intr(vals[i*ncol  ]),
233 		      Util::intr(vals[i*ncol+1]),
234 		      Util::intr(vals[i*ncol+2]) );
235     clipper::HKL asu( 0, 0, 0 );
236     for ( int s = 0; s < spgr.num_primops(); s++ ) {
237       asu = hkl.transform( spgr.symop(s) );
238       if ( spgr.recip_asu( asu ) ) break;
239       asu = -asu;
240       if ( spgr.recip_asu( asu ) ) break;
241     }
242     hkl_set.insert( asu );
243   }
244   std::vector<clipper::HKL> hkl_list( hkl_set.begin(), hkl_set.end() );
245   // get resolution
246   double smax = 0.0;
247   for ( int i = 0; i < hkl_list.size(); i++ ) {
248     double s = hkl_list[i].invresolsq( cell );
249     if ( s > smax ) smax = s;
250   }
251   reso = clipper::Resolution( 1.0/sqrt(smax) );
252 
253   // prepare data arrays
254   clipper::HKL_info hkls( spgr, cell, reso );
255   if ( complete ) hkls.generate_hkl_list();
256   else            hkls.add_hkl_list( hkl_list );
257   std::vector<clipper::HKL_data<dataI> > datai;
258   std::vector<clipper::HKL_data<dataF> > dataf;
259   std::vector<clipper::HKL_data<dataE> > datae;
260   std::vector<clipper::HKL_data<dataSig> > datasig;
261   std::vector<clipper::HKL_data<dataPhi> > dataphi;
262   std::vector<clipper::HKL_data<dataFom> > datafom;
263   std::vector<clipper::HKL_data<dataABCD> > dataabcd;
264   std::vector<clipper::HKL_data<dataFlag> > dataflag;
265   std::vector<clipper::HKL_data<dataIano> > dataiano;
266   std::vector<clipper::HKL_data<dataFano> > datafano;
267   std::vector<clipper::HKL_data<dataSigIano> > datasigiano;
268   std::vector<clipper::HKL_data<dataSigFano> > datasigfano;
269   clipper::HKL_data<clipper::data32::Flag> datafree(hkls);
270 
271   // fill data arrays and export
272   clipper::CCP4MTZfile mtzout;
273   mtzout.open_write( opfile );
274   mtzout.export_hkl_info( hkls );
275 
276   // loop over the columns
277   bool friedel;
278   int isym;
279   for ( int col = 3; col < ncol; col++ ) {
280     // I or Iano
281     if ( typs[col] == "J" ) {
282       if ( !anom ) {
283 	clipper::HKL_data<dataI> data( hkls );
284 	for ( int ref = 0; ref < nref; ref++ ) {
285 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
286 			    Util::intr(vals[ref*ncol+1]),
287 			    Util::intr(vals[ref*ncol+2]) );
288 	  dataI dat( vals[ref*ncol+col] );
289 	  data.set_data( hkl, dat );
290 	}
291 	datai.push_back( data );
292 	mtzout.export_hkl_data( datai.back(), "/*/*/["+cols[col]+"]" );
293       } else {
294 	clipper::HKL_data<dataIano> data( hkls );
295 	for ( int ref = 0; ref < nref; ref++ ) {
296 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
297 			    Util::intr(vals[ref*ncol+1]),
298 			    Util::intr(vals[ref*ncol+2]) );
299 	  clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
300 	  int index = hkls.index_of( hkl1 );
301 	  float v = vals[ref*ncol+col];
302 	  if ( friedel ) data[index] = dataIano( data[index].I_pl(), v );
303           else           data[index] = dataIano( v, data[index].I_mi() );
304 	}
305 	dataiano.push_back( data );
306 	mtzout.export_hkl_data( dataiano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
307       }
308     }
309     // F or Fano
310     if ( typs[col] == "F" ) {
311       if ( !anom ) {
312 	clipper::HKL_data<dataF> data( hkls );
313 	for ( int ref = 0; ref < nref; ref++ ) {
314 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
315 			    Util::intr(vals[ref*ncol+1]),
316 			    Util::intr(vals[ref*ncol+2]) );
317 	  dataF dat( vals[ref*ncol+col] );
318 	  data.set_data( hkl, dat );
319 	}
320 	dataf.push_back( data );
321 	mtzout.export_hkl_data( dataf.back(), "/*/*/["+cols[col]+"]" );
322       } else {
323 	clipper::HKL_data<dataFano> data( hkls );
324 	for ( int ref = 0; ref < nref; ref++ ) {
325 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
326 			    Util::intr(vals[ref*ncol+1]),
327 			    Util::intr(vals[ref*ncol+2]) );
328 	  clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
329 	  int index = hkls.index_of( hkl1 );
330 	  float v = vals[ref*ncol+col];
331 	  if ( friedel ) data[index] = dataFano( data[index].F_pl(), v );
332           else           data[index] = dataFano( v, data[index].F_mi() );
333 	}
334 	datafano.push_back( data );
335 	mtzout.export_hkl_data( datafano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
336       }
337     }
338     // sigF or sigFano
339     if ( typs[col] == "Q" ) {
340       if ( !anom ) {
341 	clipper::HKL_data<dataSig> data( hkls );
342 	for ( int ref = 0; ref < nref; ref++ ) {
343 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
344 			    Util::intr(vals[ref*ncol+1]),
345 			    Util::intr(vals[ref*ncol+2]) );
346 	  dataSig dat( vals[ref*ncol+col] );
347 	  data.set_data( hkl, dat );
348 	}
349 	datasig.push_back( data );
350 	mtzout.export_hkl_data( datasig.back(), "/*/*/["+cols[col]+"]" );
351       } else {
352 	clipper::HKL_data<dataSigFano> data( hkls );
353 	for ( int ref = 0; ref < nref; ref++ ) {
354 	  clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
355 			    Util::intr(vals[ref*ncol+1]),
356 			    Util::intr(vals[ref*ncol+2]) );
357 	  clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
358 	  int index = hkls.index_of( hkl1 );
359 	  float v = vals[ref*ncol+col];
360 	  if ( friedel ) data[index] = dataSigFano( data[index].sigF_pl(), v );
361           else           data[index] = dataSigFano( v, data[index].sigF_mi() );
362 	}
363 	datasigfano.push_back( data );
364 	mtzout.export_hkl_data( datasigfano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
365       }
366     }
367     // Phase
368     if ( typs[col] == "P" ) {
369       clipper::HKL_data<dataPhi> data( hkls );
370       for ( int ref = 0; ref < nref; ref++ ) {
371 	clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
372 			  Util::intr(vals[ref*ncol+1]),
373 			  Util::intr(vals[ref*ncol+2]) );
374 	dataPhi dat( Util::d2rad(vals[ref*ncol+col]) );
375 	data.set_data( hkl, dat );
376       }
377       dataphi.push_back( data );
378       mtzout.export_hkl_data( dataphi.back(), "/*/*/["+cols[col]+"]" );
379     }
380     // FOM
381     if ( typs[col] == "W" ) {
382       clipper::HKL_data<dataFom> data( hkls );
383       for ( int ref = 0; ref < nref; ref++ ) {
384 	clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
385 			  Util::intr(vals[ref*ncol+1]),
386 			  Util::intr(vals[ref*ncol+2]) );
387 	dataFom dat( vals[ref*ncol+col] );
388 	data.set_data( hkl, dat );
389       }
390       datafom.push_back( data );
391       mtzout.export_hkl_data( datafom.back(), "/*/*/["+cols[col]+"]" );
392     }
393     // Flag
394     if ( typs[col] == "I" ) {
395       clipper::HKL_data<dataFlag> data( hkls );
396       for ( int ref = 0; ref < nref; ref++ ) {
397 	clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
398 			  Util::intr(vals[ref*ncol+1]),
399 			  Util::intr(vals[ref*ncol+2]) );
400 	dataFlag dat( Util::intr(vals[ref*ncol+col]) );
401 	data.set_data( hkl, dat );
402       }
403       dataflag.push_back( data );
404       mtzout.export_hkl_data( dataflag.back(), "/*/*/["+cols[col]+"]" );
405     }
406     // HL coeffs
407     if ( typs[col] == "A" && col < ncol-3 ) {
408       clipper::HKL_data<dataABCD> data( hkls );
409       for ( int ref = 0; ref < nref; ref++ ) {
410 	clipper::HKL hkl( Util::intr(vals[ref*ncol  ]),
411 			  Util::intr(vals[ref*ncol+1]),
412 			  Util::intr(vals[ref*ncol+2]) );
413 	dataABCD dat( vals[ref*ncol+col  ], vals[ref*ncol+col+1],
414 		      vals[ref*ncol+col+2], vals[ref*ncol+col+3] );
415 	data.set_data( hkl, dat );
416       }
417       dataabcd.push_back( data );
418       mtzout.export_hkl_data( dataabcd.back(), "/*/*/["+cols[col]+","+cols[col+1]+","+cols[col+2]+","+cols[col+3]+"]" );
419       col += 3;
420     }
421   }
422 
423   // Free-R flag conversion
424   typedef clipper::HKL_info::HKL_reference_index HRI;
425   if ( dataflag.size() > 0 ) {
426     int n, n0, n1, fl, nfl;
427     n = n0 = n1 = 0;
428     for ( HRI ih = dataflag[0].first(); !ih.last(); ih.next() ) {
429       if ( dataflag[0][ih].flag() >= 0 ) n++;
430       if ( dataflag[0][ih].flag() == 0 ) n0++;
431       if ( dataflag[0][ih].flag() == 1 ) n1++;
432     }
433     fl = 1; nfl = n1;  // conventional CNS flag definition
434     if ( n0 < n1/2 ) { fl = 0; nfl = n0; }  // check for inverted CNS flags
435     // generate CCP4 free flags
436     int nfree = Util::intr( double(n) / double(nfl) );
437     for ( HRI ih = dataflag[0].first(); !ih.last(); ih.next() )
438       if      ( dataflag[0][ih].flag() == fl )  // free: add to free set
439 	datafree[ih] = dataFlag( 0 );
440       else                           // missing or work: pick a working set
441 	datafree[ih] = dataFlag( random()%(nfree-1) + 1 );
442   } else {
443     int nfree = nref / 1000;
444     if ( nfree < 10 ) nfree = 10;
445     if ( nfree > 20 ) nfree = 20;
446     for ( HRI ih = datafree.first(); !ih.last(); ih.next() )
447       datafree[ih] = dataFlag( random()%nfree );
448   }
449   mtzout.export_hkl_data( datafree, "/*/*/[FreeF_flag]" );
450 
451   mtzout.close_write();
452 
453   std::cout << ( anom ? "Merging HKL and -HKL" : "Not merging HKL and -HKL" ) << std::endl;
454   std::cout << ( complete ? "Inserting missing reflections" : "Not inserting missing reflections" ) << std::endl;
455   std::cout << "Number of reflections input : " << nref << std::endl;
456   std::cout << "Number of reflections output: " << hkls.num_reflections() << std::endl;
457 }
458