1 // clipper CNS->MTZ utility
2 /* (C) 2007 Kevin Cowtan */
3
4 #include <iostream>
5 #include <fstream>
6 #include <set>
7 #include <stdlib.h>
8
9 #include "cns2mtz.h"
10
main(int argc,char ** argv)11 int main( int argc, char** argv )
12 {
13 CCP4Program prog( "cns2mtz", "0.2", "$Date: 2004/14/01" );
14
15 // defaults
16 clipper::String title;
17 clipper::String ipfile = "NONE";
18 clipper::String opfile = "NONE";
19 clipper::String ccell = "NONE";
20 clipper::String cspgr = "NONE";
21 clipper::String ipfilepdb = "NONE";
22 bool complete = true;
23 int verbose = 0;
24
25 // command input
26 CCP4CommandInput args( argc, argv, true );
27 int arg = 0;
28 while ( ++arg < args.size() ) {
29 if ( args[arg] == "-title" ) {
30 if ( ++arg < args.size() ) title = args[arg];
31 } else if ( args[arg] == "-cnsin" ) {
32 if ( ++arg < args.size() ) ipfile = args[arg];
33 } else if ( args[arg] == "-mtzout" ) {
34 if ( ++arg < args.size() ) opfile = args[arg];
35 } else if ( args[arg] == "-cell" ) {
36 if ( ++arg < args.size() ) ccell = args[arg];
37 } else if ( args[arg] == "-spacegroup" ) {
38 if ( ++arg < args.size() ) cspgr = args[arg];
39 } else if ( args[arg] == "-pdbin" ) {
40 if ( ++arg < args.size() ) ipfilepdb = args[arg];
41 } else if ( args[arg] == "-no-complete" ) {
42 complete = false;
43 } else if ( args[arg] == "-verbose" ) {
44 if ( ++arg < args.size() ) verbose = clipper::String(args[arg]).i();
45 } else {
46 std::cout << "Unrecognized:\t" << args[arg] << "\n";
47 args.clear();
48 }
49 }
50 if ( args.size() <= 1 ) {
51 std::cout << "Usage: cns2mtz\n\t-cnsin <filename> [COMPULSORY]\n\t-mtzout <filename>\n\t-cell a,b,c,a,b,g\n\t-spacegroup <spacegroup>\n\t-pdbin <filename>\n\t-no-complete\nConvert CNS reflection file to MTZ.\nCell and spacegroup may be specified directly or by providing a PDB file.\n";
52 exit(1);
53 }
54
55 if ( ipfilepdb == "NONE" && ccell == "NONE" )
56 { std::cout << "Missing cell."; exit(1); }
57 if ( ipfilepdb == "NONE" && cspgr == "NONE" )
58 { std::cout << "Missing spacegroup."; exit(1); }
59
60 // set defaults
61 if ( opfile == "NONE" ) opfile = ipfile + ".mtz";
62
63 // set crystal info
64 clipper::Cell cell;
65 clipper::Spacegroup spgr;
66 clipper::Resolution reso;
67 if ( ipfilepdb != "NONE" ) {
68 clipper::MMDBManager mmdb;
69 mmdb.ReadCoorFile( (char*)ipfilepdb.c_str() );
70 spgr = mmdb.spacegroup();
71 cell = mmdb.cell();
72 } else {
73 double cd[] = { 100.0, 100.0, 100.0, 90.0, 90.0, 90.0 };
74 std::vector<clipper::String> cellx = ccell.split( ", " );
75 for ( int i = 0; i < cellx.size(); i++ ) cd[i] = cellx[i].f();
76 clipper::Cell_descr celld( cd[0], cd[1], cd[2], cd[3], cd[4], cd[5] );
77 clipper::Spgr_descr spgrd( cspgr );
78 cell = clipper::Cell( celld );
79 spgr = clipper::Spacegroup( spgrd );
80 }
81
82 // read input file
83 std::vector<clipper::String> words;
84 std::ifstream file( ipfile.c_str() );
85 int lvl = 0;
86 std::string word;
87 while ( file.good() ) {
88 char c;
89 file.get( c );
90 if ( c == '{' ) lvl++;
91 if ( lvl == 0 ) {
92 if ( isspace(c) || c == '=' ) {
93 if ( word.length() != 0 ) {
94 words.push_back( word );
95 word = "";
96 }
97 } else {
98 word = word + c;
99 }
100 }
101 if ( c == '}' ) lvl--;
102 }
103 file.close();
104
105 // file info
106 int nref = 0;
107 bool anom = false;
108 std::vector<std::string> cols, typs;
109 std::vector<std::pair<std::string,std::vector<std::string> > > grps;
110 cols.push_back( "H" ); cols.push_back( "K" ); cols.push_back( "L" );
111 typs.push_back( "H" ); typs.push_back( "H" ); typs.push_back( "H" );
112 std::vector<float> vals;
113
114 // loop over words
115 int pos = 0;
116 // header section
117 while ( pos < words.size() ) {
118 if ( words[pos].substr(0,4) == "NREF" ) {
119 if ( ++pos < words.size() ) nref = clipper::String(words[pos]).i();
120 } else if ( words[pos].substr(0,4) == "ANOM" ) {
121 if ( ++pos < words.size() ) anom = ( words[pos][0] == 'T' );
122 } else if ( words[pos].substr(0,4) == "HERM" ) {
123 if ( ++pos < words.size() ) anom = ( words[pos][0] == 'F' );
124 } else if ( words[pos].substr(0,4) == "DECL" ) {
125 // COLUMN DECLARATIONS
126 ++pos;
127 std::string col, dom, typ;
128 while ( pos < words.size() ) {
129 if ( words[pos] == "END" ) {
130 if ( dom.substr(0,4) == "RECI" ) {
131 if ( typ.substr(0,4) == "INTE" ) {
132 cols.push_back( col );
133 typs.push_back( "I" );
134 } else if ( typ.substr(0,4) == "REAL" ) {
135 cols.push_back( col );
136 typs.push_back( "R" );
137 } else if ( typ.substr(0,4) == "COMP" ) {
138 cols.push_back( col );
139 typs.push_back( "F" );
140 cols.push_back( col+".phase" );
141 typs.push_back( "P" );
142 }
143 }
144 break;
145 } else if ( words[pos].substr(0,4) == "NAME" ) {
146 if ( ++pos < words.size() ) col = words[pos];
147 } else if ( words[pos].substr(0,4) == "DOMA" ) {
148 if ( ++pos < words.size() ) dom = words[pos];
149 } else if ( words[pos].substr(0,4) == "TYPE" ) {
150 if ( ++pos < words.size() ) typ = words[pos];
151 }
152 ++pos;
153 }
154 } else if ( words[pos].substr(0,4) == "GROU" ) {
155 // GROUP DECLARATIONS
156 ++pos;
157 std::string gtyp;
158 std::vector<std::string> gcol;
159 while ( pos < words.size() ) {
160 if ( words[pos] == "END" ) {
161 grps.push_back( std::pair<std::string,std::vector<std::string> >( gtyp, gcol ) );
162 break;
163 } else if ( words[pos].substr(0,4) == "TYPE" ) {
164 if ( ++pos < words.size() ) gtyp = words[pos];
165 } else if ( words[pos].substr(0,4) == "OBJE" ) {
166 if ( ++pos < words.size() ) gcol.push_back( words[pos] );
167 }
168 ++pos;
169 }
170 } else {
171 if ( words[pos].find_first_not_of("0123456789.-") == std::string::npos ) {
172 vals.push_back( clipper::String(words[pos]).f() );
173 }
174 }
175 ++pos;
176 }
177
178 // Now do group assignments
179 char hls[][2] = {"A","B","C","D"};
180 for ( int g = 0; g < grps.size(); g++ ) {
181 if ( grps[g].first == "HL" ) {
182 for ( int c = 0; c < grps[g].second.size(); c++ ) {
183 for ( int i = 0; i < cols.size(); i++ ) {
184 if ( cols[i] == grps[g].second[c] )
185 typs[i] = std::string(hls[c]);
186 }
187 }
188 }
189 }
190
191 // column assignment heuristics
192 int ncol = cols.size();
193 for ( int i = 3; i < ncol; i++ ) {
194 if ( typs[i] == "R" ) {
195 if ( cols[i].find("SIG") != std::string::npos )
196 typs[i] = "Q";
197 else if ( cols[i].find("FOM") != std::string::npos )
198 typs[i] = "W";
199 else if ( toupper( cols[i][0] ) == 'F' )
200 typs[i] = "F";
201 else if ( toupper( cols[i][0] ) == 'E' )
202 typs[i] = "E";
203 else if ( toupper( cols[i][0] ) == 'I' )
204 typs[i] = "J";
205 else if ( toupper( cols[i][0] ) == 'P' )
206 typs[i] = "P";
207 else if ( toupper( cols[i][0] ) == 'S' )
208 typs[i] = "Q";
209 else if ( toupper( cols[i][0] ) == 'W' )
210 typs[i] = "W";
211 else if ( toupper( cols[i][0] ) == 'M' )
212 typs[i] = "W";
213 }
214 }
215
216 // post-assignment diagnostics
217 std::cout << std::endl << "Columns found:" << std::endl;
218 for ( int i = 0; i < cols.size(); i++ ) {
219 std::cout << i << " " << cols[i] << " " << typs[i] << "\n";
220 }
221 std::cout << std::endl;
222
223 // check data length
224 if ( ncol*nref != vals.size() ) {
225 std::cout << "Error: data length does not match number of reflections.\n";
226 exit(1);
227 }
228
229 // get reflections
230 std::set<clipper::HKL,HKLlessthan> hkl_set;
231 for ( int i = 0; i < nref; i++ ) {
232 clipper::HKL hkl( Util::intr(vals[i*ncol ]),
233 Util::intr(vals[i*ncol+1]),
234 Util::intr(vals[i*ncol+2]) );
235 clipper::HKL asu( 0, 0, 0 );
236 for ( int s = 0; s < spgr.num_primops(); s++ ) {
237 asu = hkl.transform( spgr.symop(s) );
238 if ( spgr.recip_asu( asu ) ) break;
239 asu = -asu;
240 if ( spgr.recip_asu( asu ) ) break;
241 }
242 hkl_set.insert( asu );
243 }
244 std::vector<clipper::HKL> hkl_list( hkl_set.begin(), hkl_set.end() );
245 // get resolution
246 double smax = 0.0;
247 for ( int i = 0; i < hkl_list.size(); i++ ) {
248 double s = hkl_list[i].invresolsq( cell );
249 if ( s > smax ) smax = s;
250 }
251 reso = clipper::Resolution( 1.0/sqrt(smax) );
252
253 // prepare data arrays
254 clipper::HKL_info hkls( spgr, cell, reso );
255 if ( complete ) hkls.generate_hkl_list();
256 else hkls.add_hkl_list( hkl_list );
257 std::vector<clipper::HKL_data<dataI> > datai;
258 std::vector<clipper::HKL_data<dataF> > dataf;
259 std::vector<clipper::HKL_data<dataE> > datae;
260 std::vector<clipper::HKL_data<dataSig> > datasig;
261 std::vector<clipper::HKL_data<dataPhi> > dataphi;
262 std::vector<clipper::HKL_data<dataFom> > datafom;
263 std::vector<clipper::HKL_data<dataABCD> > dataabcd;
264 std::vector<clipper::HKL_data<dataFlag> > dataflag;
265 std::vector<clipper::HKL_data<dataIano> > dataiano;
266 std::vector<clipper::HKL_data<dataFano> > datafano;
267 std::vector<clipper::HKL_data<dataSigIano> > datasigiano;
268 std::vector<clipper::HKL_data<dataSigFano> > datasigfano;
269 clipper::HKL_data<clipper::data32::Flag> datafree(hkls);
270
271 // fill data arrays and export
272 clipper::CCP4MTZfile mtzout;
273 mtzout.open_write( opfile );
274 mtzout.export_hkl_info( hkls );
275
276 // loop over the columns
277 bool friedel;
278 int isym;
279 for ( int col = 3; col < ncol; col++ ) {
280 // I or Iano
281 if ( typs[col] == "J" ) {
282 if ( !anom ) {
283 clipper::HKL_data<dataI> data( hkls );
284 for ( int ref = 0; ref < nref; ref++ ) {
285 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
286 Util::intr(vals[ref*ncol+1]),
287 Util::intr(vals[ref*ncol+2]) );
288 dataI dat( vals[ref*ncol+col] );
289 data.set_data( hkl, dat );
290 }
291 datai.push_back( data );
292 mtzout.export_hkl_data( datai.back(), "/*/*/["+cols[col]+"]" );
293 } else {
294 clipper::HKL_data<dataIano> data( hkls );
295 for ( int ref = 0; ref < nref; ref++ ) {
296 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
297 Util::intr(vals[ref*ncol+1]),
298 Util::intr(vals[ref*ncol+2]) );
299 clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
300 int index = hkls.index_of( hkl1 );
301 float v = vals[ref*ncol+col];
302 if ( friedel ) data[index] = dataIano( data[index].I_pl(), v );
303 else data[index] = dataIano( v, data[index].I_mi() );
304 }
305 dataiano.push_back( data );
306 mtzout.export_hkl_data( dataiano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
307 }
308 }
309 // F or Fano
310 if ( typs[col] == "F" ) {
311 if ( !anom ) {
312 clipper::HKL_data<dataF> data( hkls );
313 for ( int ref = 0; ref < nref; ref++ ) {
314 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
315 Util::intr(vals[ref*ncol+1]),
316 Util::intr(vals[ref*ncol+2]) );
317 dataF dat( vals[ref*ncol+col] );
318 data.set_data( hkl, dat );
319 }
320 dataf.push_back( data );
321 mtzout.export_hkl_data( dataf.back(), "/*/*/["+cols[col]+"]" );
322 } else {
323 clipper::HKL_data<dataFano> data( hkls );
324 for ( int ref = 0; ref < nref; ref++ ) {
325 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
326 Util::intr(vals[ref*ncol+1]),
327 Util::intr(vals[ref*ncol+2]) );
328 clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
329 int index = hkls.index_of( hkl1 );
330 float v = vals[ref*ncol+col];
331 if ( friedel ) data[index] = dataFano( data[index].F_pl(), v );
332 else data[index] = dataFano( v, data[index].F_mi() );
333 }
334 datafano.push_back( data );
335 mtzout.export_hkl_data( datafano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
336 }
337 }
338 // sigF or sigFano
339 if ( typs[col] == "Q" ) {
340 if ( !anom ) {
341 clipper::HKL_data<dataSig> data( hkls );
342 for ( int ref = 0; ref < nref; ref++ ) {
343 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
344 Util::intr(vals[ref*ncol+1]),
345 Util::intr(vals[ref*ncol+2]) );
346 dataSig dat( vals[ref*ncol+col] );
347 data.set_data( hkl, dat );
348 }
349 datasig.push_back( data );
350 mtzout.export_hkl_data( datasig.back(), "/*/*/["+cols[col]+"]" );
351 } else {
352 clipper::HKL_data<dataSigFano> data( hkls );
353 for ( int ref = 0; ref < nref; ref++ ) {
354 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
355 Util::intr(vals[ref*ncol+1]),
356 Util::intr(vals[ref*ncol+2]) );
357 clipper::HKL hkl1 = hkls.find_sym( hkl, isym, friedel );
358 int index = hkls.index_of( hkl1 );
359 float v = vals[ref*ncol+col];
360 if ( friedel ) data[index] = dataSigFano( data[index].sigF_pl(), v );
361 else data[index] = dataSigFano( v, data[index].sigF_mi() );
362 }
363 datasigfano.push_back( data );
364 mtzout.export_hkl_data( datasigfano.back(), "/*/*/["+cols[col]+"+,"+cols[col]+"-]" );
365 }
366 }
367 // Phase
368 if ( typs[col] == "P" ) {
369 clipper::HKL_data<dataPhi> data( hkls );
370 for ( int ref = 0; ref < nref; ref++ ) {
371 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
372 Util::intr(vals[ref*ncol+1]),
373 Util::intr(vals[ref*ncol+2]) );
374 dataPhi dat( Util::d2rad(vals[ref*ncol+col]) );
375 data.set_data( hkl, dat );
376 }
377 dataphi.push_back( data );
378 mtzout.export_hkl_data( dataphi.back(), "/*/*/["+cols[col]+"]" );
379 }
380 // FOM
381 if ( typs[col] == "W" ) {
382 clipper::HKL_data<dataFom> data( hkls );
383 for ( int ref = 0; ref < nref; ref++ ) {
384 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
385 Util::intr(vals[ref*ncol+1]),
386 Util::intr(vals[ref*ncol+2]) );
387 dataFom dat( vals[ref*ncol+col] );
388 data.set_data( hkl, dat );
389 }
390 datafom.push_back( data );
391 mtzout.export_hkl_data( datafom.back(), "/*/*/["+cols[col]+"]" );
392 }
393 // Flag
394 if ( typs[col] == "I" ) {
395 clipper::HKL_data<dataFlag> data( hkls );
396 for ( int ref = 0; ref < nref; ref++ ) {
397 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
398 Util::intr(vals[ref*ncol+1]),
399 Util::intr(vals[ref*ncol+2]) );
400 dataFlag dat( Util::intr(vals[ref*ncol+col]) );
401 data.set_data( hkl, dat );
402 }
403 dataflag.push_back( data );
404 mtzout.export_hkl_data( dataflag.back(), "/*/*/["+cols[col]+"]" );
405 }
406 // HL coeffs
407 if ( typs[col] == "A" && col < ncol-3 ) {
408 clipper::HKL_data<dataABCD> data( hkls );
409 for ( int ref = 0; ref < nref; ref++ ) {
410 clipper::HKL hkl( Util::intr(vals[ref*ncol ]),
411 Util::intr(vals[ref*ncol+1]),
412 Util::intr(vals[ref*ncol+2]) );
413 dataABCD dat( vals[ref*ncol+col ], vals[ref*ncol+col+1],
414 vals[ref*ncol+col+2], vals[ref*ncol+col+3] );
415 data.set_data( hkl, dat );
416 }
417 dataabcd.push_back( data );
418 mtzout.export_hkl_data( dataabcd.back(), "/*/*/["+cols[col]+","+cols[col+1]+","+cols[col+2]+","+cols[col+3]+"]" );
419 col += 3;
420 }
421 }
422
423 // Free-R flag conversion
424 typedef clipper::HKL_info::HKL_reference_index HRI;
425 if ( dataflag.size() > 0 ) {
426 int n, n0, n1, fl, nfl;
427 n = n0 = n1 = 0;
428 for ( HRI ih = dataflag[0].first(); !ih.last(); ih.next() ) {
429 if ( dataflag[0][ih].flag() >= 0 ) n++;
430 if ( dataflag[0][ih].flag() == 0 ) n0++;
431 if ( dataflag[0][ih].flag() == 1 ) n1++;
432 }
433 fl = 1; nfl = n1; // conventional CNS flag definition
434 if ( n0 < n1/2 ) { fl = 0; nfl = n0; } // check for inverted CNS flags
435 // generate CCP4 free flags
436 int nfree = Util::intr( double(n) / double(nfl) );
437 for ( HRI ih = dataflag[0].first(); !ih.last(); ih.next() )
438 if ( dataflag[0][ih].flag() == fl ) // free: add to free set
439 datafree[ih] = dataFlag( 0 );
440 else // missing or work: pick a working set
441 datafree[ih] = dataFlag( random()%(nfree-1) + 1 );
442 } else {
443 int nfree = nref / 1000;
444 if ( nfree < 10 ) nfree = 10;
445 if ( nfree > 20 ) nfree = 20;
446 for ( HRI ih = datafree.first(); !ih.last(); ih.next() )
447 datafree[ih] = dataFlag( random()%nfree );
448 }
449 mtzout.export_hkl_data( datafree, "/*/*/[FreeF_flag]" );
450
451 mtzout.close_write();
452
453 std::cout << ( anom ? "Merging HKL and -HKL" : "Not merging HKL and -HKL" ) << std::endl;
454 std::cout << ( complete ? "Inserting missing reflections" : "Not inserting missing reflections" ) << std::endl;
455 std::cout << "Number of reflections input : " << nref << std::endl;
456 std::cout << "Number of reflections output: " << hkls.num_reflections() << std::endl;
457 }
458