1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <core/Util.h>
21 #include <core/Thread.h>
22 #include <core/ManagedMemory.h>
23 #include <core/GpuUtil.h>
24 #include <cmath>
25 #include <csignal>
26 #include <list>
27 #include <algorithm>
28 #include <getopt.h>
29 #include <commands/parser.h>
30 
31 #ifdef GPU_ENABLED
32 #include <core/GpuUtil.h>
33 #endif
34 
35 #include <config.h> //This file is generated during build based on Git hash etc.
36 
InitParams(const char * description,class Everything * e)37 InitParams::InitParams(const char* description, class Everything* e)
38 : description(description), e(e), packageName(0), versionString(0), versionHash(0)
39 {
40 }
41 
42 //Program banner
printVersionBanner(const InitParams * ip)43 void printVersionBanner(const InitParams* ip)
44 {	string deco(15, '*');
45 	string prefix;
46 	logPrintf("\n");
47 	if(ip && ip->packageName)
48 	{	//Print other package's information (JDFTx is only a helper)
49 		logPrintf("%s %s %s %s %s\n", deco.c_str(), ip->packageName, ip->versionString,
50 			(strlen(ip->versionHash) ? ("(git hash " + string(ip->versionHash) + ")").c_str() : ""), deco.c_str());
51 		deco.assign(15, '+');
52 		prefix = "Linked to ";
53 	}
54 	logPrintf("%s %s" PACKAGE_NAME " " VERSION_MAJOR_MINOR_PATCH " %s %s\n",
55 		deco.c_str(), prefix.c_str(), (strlen(VERSION_HASH) ? "(git hash " VERSION_HASH ")" : ""), deco.c_str());
56 	logPrintf("\n"); logFlush();
57 }
58 
59 //Print usage information
printUsage(const char * name,const InitParams & ip)60 void printUsage(const char *name, const InitParams& ip)
61 {	printVersionBanner(&ip);
62 	logPrintf("Usage: %s [options]\n",name);
63 	logPrintf("\n\t%s\n\n", ip.description);
64 	logPrintf("options:\n\n");
65 	logPrintf("\t-h --help               help (this output)\n");
66 	logPrintf("\t-v --version            version\n");
67 	logPrintf("\t-i --input <filename>   specify command input file, default = stdin\n");
68 	logPrintf("\t-o --output <filename>  specify output log file, default = stdout\n");
69 	logPrintf("\t-d --no-append          overwrite output file instead of appending\n");
70 	logPrintf("\t-t --template           print an input file template\n");
71 	logPrintf("\t-m --mpi-debug-log      write output from secondary MPI processes to jdftx.<proc>.mpiDebugLog (instead of /dev/null)\n");
72 	logPrintf("\t-n --dry-run            quit after initialization (to verify commands and other input files)\n");
73 	logPrintf("\t-c --cores              number of cores per process (ignored when launched using SLURM)\n");
74 	logPrintf("\t-G --nGroups            number of MPI process groups (default or 0 => each process in own group of size 1)\n");
75 	logPrintf("\t-s --skip-defaults      skip printing status of default commands issued automatically.\n");
76 	logPrintf("\n");
77 }
78 
79 //Signal handlers:
80 bool killFlag = false; //!< The signal handlers set this flag to suggest clean termination
81 void sigIntHandler(int sig); //!< Handle Ctrl+C (quit cleanly, abruptly or not at all - based on user input)
82 void sigQuitHandler(int sig); //!< Handle Ctrl+\ (cleanly quit after current iteration, without prompting)
83 void sigErrorHandler(int sig); //!< Exit with a stack trace on segfaults and aborts (to ease debugging)
resetHandlers()84 void resetHandlers()
85 {	signal(SIGINT, SIG_IGN);
86 	signal(SIGQUIT, SIG_IGN);
87 }
registerHandlers()88 void registerHandlers()
89 {	signal(SIGINT, sigIntHandler);
90 	signal(SIGQUIT, sigQuitHandler);
91 	signal(SIGSEGV, sigErrorHandler);
92 	signal(SIGABRT, sigErrorHandler);
93 }
sigIntHandler(int sig)94 void sigIntHandler(int sig)
95 {	if(feof(stdin)) mpiWorld->exit(0);
96 	resetHandlers();
97 	printf(
98 		"\n---------------------------------------------\n"
99 		"Received SIGINT (Ctrl+C), do you want to:\n"
100 		"\t[Q] Quit rightaway?\n"
101 		"\t[A] Quit cleanly after current iteration?\n"
102 		"\t[I] Ignore and continue normally?\n");
103 	while(1)
104 	{
105 		printf("Enter [Q/A/I]: "); fflush(stdout);
106 		char c = getchar();
107 		switch(c)
108 		{	case 'q': case 'Q': printf("Quitting now ...\n"); mpiWorld->exit(0);
109 			case 'a': case 'A':
110 				printf("Will quit after current iteration ...\n");
111 				killFlag = true; registerHandlers(); return;
112 			case 'i': case 'I':
113 				printf("Ignoring and continuing normally ...\n");
114 				registerHandlers(); return;
115 			default:
116 				printf("Unrecognized input.\n");
117 		}
118 	}
119 }
sigQuitHandler(int sig)120 void sigQuitHandler(int sig)
121 {	resetHandlers();
122 	logPrintf("\n"
123 		"+---------------------------------------------------------------+\n"
124 		"|  Received SIGQUIT, will quit cleanly after current iteration  |\n"
125 		"+---------------------------------------------------------------+\n\n");
126 	logFlush();
127 	killFlag = true;
128 	registerHandlers();
129 }
sigErrorHandler(int sig)130 void sigErrorHandler(int sig)
131 {	fprintf(stderr, sig==SIGSEGV ? "Segmentation Fault.\n" : "Aborted.\n");
132 	stackTraceExit(1);
133 }
134 
135 string inputBasename("stdin"); //!< Basename of input file or "stdin" that can be used as a default run-name
136 FILE* globalLog = stdout; // this might be replaced by a file pointer in main before calling initSystem()
137 FILE* globalLogOrig;
138 FILE* nullLog = 0;
139 
logSuspend()140 void logSuspend()
141 {	if(nullLog) globalLog = nullLog;
142 }
143 
logResume()144 void logResume()
145 {	globalLog = globalLogOrig;
146 }
147 
148 int nProcessGroups = 0;
149 MPIUtil* mpiWorld = 0;
150 MPIUtil* mpiGroup = 0;
151 MPIUtil* mpiGroupHead = 0;
152 bool mpiDebugLog = false;
153 bool manualThreadCount = false;
154 size_t mempoolSize = 0;
155 static double startTime_us; //Time at which system was initialized in microseconds
156 const char* argv0 = 0;
157 uint32_t crc32(const string& s); //CRC32 checksum for a string (implemented below)
158 
159 //Print process index distribution given communicators:
printProcessDistribution(string header,string label,const MPIUtil * mpiUtil,const MPIUtil * mpiUtilHead)160 void printProcessDistribution(string header, string label, const MPIUtil* mpiUtil, const MPIUtil* mpiUtilHead)
161 {	if(mpiUtil->isHead())
162 	{	ostringstream oss;
163 		oss << label << " (";
164 		int jStart=mpiWorld->iProcess(), jStop=jStart; //range yet to be printed
165 		for(int jProc=1; jProc<mpiUtil->nProcesses(); jProc++)
166 		{	int jCur; //rank of jProc in world
167 			mpiUtil->recv(jCur, jProc, 0);
168 			if(jCur == jStop+1)
169 				jStop = jCur; //contiguous with previous range
170 			else
171 			{	oss << jStart; if(jStop>jStart) oss << '-' << jStop; oss << ','; //flush previous range
172 				jStart = jStop = jCur; //start new range
173 			}
174 		}
175 		oss << jStart; if(jStop>jStart) oss << '-' << jStop; oss << ')'; //flush pending range
176 		//send to world head to print:
177 		if(mpiUtilHead->isHead()) //note that mpiUtilHead->head == mpiWorld->head
178 		{	logPrintf("%s (process indices):  %s", header.c_str(), oss.str().c_str());
179 			for(int jHead=1; jHead<mpiUtilHead->nProcesses(); jHead++)
180 			{	string buf;
181 				mpiUtilHead->recv(buf, jHead, 0);
182 				logPrintf("  %s", buf.c_str());
183 			}
184 			logPrintf("\n");
185 		}
186 		else mpiUtilHead->send(oss.str(), 0, 0);
187 	}
188 	else mpiUtil->send(mpiWorld->iProcess(), 0, 0);
189 }
190 
initSystem(int argc,char ** argv,const InitParams * ip)191 void initSystem(int argc, char** argv, const InitParams* ip)
192 {
193 	argv0 = argv[0]; //remember how the executable was issued (for stack traces)
194 
195 	if(!mpiWorld) mpiWorld = new MPIUtil(argc, argv);
196 	nullLog = fopen("/dev/null", "w");
197 	if(!mpiWorld->isHead())
198 	{	if(mpiDebugLog)
199 		{	char fname[256]; sprintf(fname, "jdftx.%d.mpiDebugLog", mpiWorld->iProcess());
200 			globalLog = fopen(fname, "w");
201 		}
202 		else globalLog = nullLog;
203 	}
204 	globalLogOrig = globalLog;
205 
206 	//Star time and commandline:
207 	printVersionBanner(ip);
208 	time_t startTime = time(0);
209 	startTime_us = clock_us();
210 	logPrintf("Start date and time: %s", ctime(&startTime)); //note ctime output has a "\n" at the end
211 	//---- commandline
212 	logPrintf("Executable %s with ", argv[0]);
213 	if(argc>1)
214 	{	logPrintf("command-line:");
215 		for(int i=1; i<argc; i++) logPrintf(" %s", argv[i]);
216 		logPrintf("\n");
217 	}
218 	else logPrintf("empty command-line (run with -h or --help for command-line options).\n");
219 	registerHandlers();
220 
221 	//Determine and print distribution of processes on hosts:
222 	//--- get current hostname and checksum:
223 	string hostname;
224 	{	char hostnameTmp[256];
225 		gethostname(hostnameTmp, 256);
226 		hostname = string(hostnameTmp);
227 	}
228 	int hostsum = abs(int(crc32(hostname))); //ensure positive for MPI_split below
229 	//---- create MPI group for each host:
230 	MPIUtil mpiHost(0,0, MPIUtil::ProcDivision(mpiWorld, 0, hostsum));
231 	MPIUtil mpiHostGpu(0,0, MPIUtil::ProcDivision(mpiWorld, 0, isGpuEnabled() ? hostsum : 0)); //for grouping processes with GPUs
232 	MPIUtil mpiHostHead(0,0, MPIUtil::ProcDivision(mpiWorld, 0, mpiHost.iProcess())); //communicator between similar rank within each host
233 	printProcessDistribution("Running on hosts", hostname, &mpiHost, &mpiHostHead);
234 
235 	//Initialize process groups:
236 	if(nProcessGroups <= 0) nProcessGroups = mpiWorld->nProcesses(); //default: one group per process
237 	mpiGroup = new MPIUtil(0,0, MPIUtil::ProcDivision(mpiWorld, nProcessGroups));
238 	mpiGroupHead = new MPIUtil(0,0, MPIUtil::ProcDivision(mpiWorld, 0, mpiGroup->iProcess())); //communicator between similar rank within each group
239 	{	ostringstream oss; oss << mpiGroup->procDivision.iGroup;
240 		printProcessDistribution("Divided in process groups", oss.str(), mpiGroup, mpiGroupHead);
241 	}
242 
243 	double nGPUs = 0.;
244 	#ifdef GPU_ENABLED
245 	if(!gpuInit(globalLog, &mpiHostGpu, &nGPUs)) die_alone("gpuInit() failed\n\n")
246 	#endif
247 
248 	//Override available cores per node if specified:
249 	const char* envCpusPerNode = getenv("JDFTX_CPUS_PER_NODE");
250 	if(envCpusPerNode)
251 	{	int nCpusPerNode;
252 		if(sscanf(envCpusPerNode, "%d", &nCpusPerNode)==1)
253 			nProcsAvailable = nCpusPerNode; //Override found, update available processor count (Thread.h)
254 		else
255 			logPrintf("Could not determine total core count from JDFTX_CPUS_PER_NODE=\"%s\".\n", envCpusPerNode);
256 	}
257 
258 	//Divide up available cores between all MPI processes on a given node:
259 	if(!manualThreadCount) //skip if number of cores per process has been set with -c
260 	{	int nSiblings = mpiHost.nProcesses();
261 		int iSibling = mpiHost.iProcess();
262 		nProcsAvailable = std::max(1, (nProcsAvailable * (iSibling+1))/nSiblings - (nProcsAvailable*iSibling)/nSiblings);
263 	}
264 
265 	//Limit thread count if running within SLURM:
266 	const char* slurmCpusPerTask = getenv("SLURM_CPUS_PER_TASK");
267 	if(slurmCpusPerTask)
268 	{	int nThreadsMax;
269 		if(sscanf(slurmCpusPerTask, "%d", &nThreadsMax)==1)
270 			nProcsAvailable = nThreadsMax; //Slurm spec found, update available processor count (Thread.h)
271 		else
272 			logPrintf("Could not determine thread count from SLURM_CPUS_PER_TASK=\"%s\".\n", slurmCpusPerTask);
273 	}
274 	resumeOperatorThreading(); //if necessary, this informs MKL of the thread count
275 
276 	//Print total resources used by run:
277 	{	int nProcsTot = nProcsAvailable; mpiWorld->allReduce(nProcsTot, MPIUtil::ReduceSum);
278 		double nGPUsTot = nGPUs; mpiWorld->allReduce(nGPUsTot, MPIUtil::ReduceSum);
279 		logPrintf("Resource initialization completed at t[s]: %9.2lf\n", clock_sec());
280 		logPrintf("Run totals: %d processes, %d threads, %lg GPUs\n", mpiWorld->nProcesses(), nProcsTot, nGPUsTot);
281 	}
282 
283 	//Memory pool size:
284 	const char* mempoolSizeStr = getenv("JDFTX_MEMPOOL_SIZE");
285 	if(mempoolSizeStr)
286 	{	int mempoolSizeMB;
287 		if(sscanf(mempoolSizeStr, "%d", &mempoolSizeMB)==1 && mempoolSizeMB>=0)
288 		{	mempoolSize = ((size_t)mempoolSizeMB) << 20; //convert to bytes
289 			logPrintf("Memory pool size: %d MB (per process)\n", mempoolSizeMB);
290 		}
291 		else
292 			logPrintf("Could not determine memory pool size from JDFTX_MEMPOOL_SIZE=\"%s\".\n", mempoolSizeStr);
293 	}
294 
295 	//Add citations to the code for all calculations:
296 	Citations::add("Software package",
297 		"R. Sundararaman, K. Letchworth-Weaver, K.A. Schwarz, D. Gunceler, Y. Ozhabes and T.A. Arias, "
298 		"'JDFTx: software for joint density-functional theory', SoftwareX 6, 278 (2017)");
299 }
300 
initSystemCmdline(int argc,char ** argv,InitParams & ip)301 void initSystemCmdline(int argc, char** argv, InitParams& ip)
302 {
303 	mpiWorld = new MPIUtil(argc, argv);
304 
305 	//Parse command line:
306 	string logFilename; bool appendOutput=true;
307 	ip.dryRun=false; ip.printDefaults=true;
308 	option long_options[] =
309 		{	{"help", no_argument, 0, 'h'},
310 			{"version", no_argument, 0, 'v'},
311 			{"input",  required_argument, 0, 'i'},
312 			{"output", required_argument, 0, 'o'},
313 			{"no-append", no_argument, 0, 'd'},
314 			{"template", no_argument, 0, 't'},
315 			{"mpi-debug-log", no_argument, 0, 'm'},
316 			{"dry-run", no_argument, 0, 'n'},
317 			{"cores", required_argument, 0, 'c'},
318 			{"nGroups", required_argument, 0, 'G'},
319 			{"skip-defaults", no_argument, 0, 's'},
320 			{"write-manual", required_argument, 0, 'w'},
321 			{0, 0, 0, 0}
322 		};
323 	while (1)
324 	{	int c = getopt_long(argc, argv, "hvi:o:dtmnc:G:sw:", long_options, 0);
325 		if (c == -1) break; //end of options
326 		#define RUN_HEAD(code) if(mpiWorld->isHead()) { code } delete mpiWorld;
327 		switch (c)
328 		{	case 'v': RUN_HEAD( printVersionBanner(&ip); ) exit(0);
329 			case 'h': RUN_HEAD( printUsage(argv[0], ip); ) exit(0);
330 			case 'i': ip.inputFilename.assign(optarg); break;
331 			case 'o': logFilename.assign(optarg); break;
332 			case 'd': appendOutput=false; break;
333 			case 't': RUN_HEAD( if(ip.e) printDefaultTemplate(*ip.e); ) exit(0);
334 			case 'm': mpiDebugLog=true; break;
335 			case 'n': ip.dryRun=true; break;
336 			case 'c':
337 			{	int nCores = 0;
338 				if(sscanf(optarg, "%d", &nCores)==1 && nCores>0)
339 				{	nProcsAvailable=nCores;
340 					manualThreadCount =true;
341 				}
342 				break;
343 			}
344 			case 'G':
345 			{	if(!(sscanf(optarg, "%d", &nProcessGroups)==1 && nProcessGroups>=0))
346 				{	RUN_HEAD(
347 						printf("\nOption -G (--nGroups) must be a non-negative integer.\n");
348 						printUsage(argv[0], ip);
349 					)
350 					exit(1);
351 				}
352 				break;
353 			}
354 			case 's': ip.printDefaults=false; break;
355 			case 'w': RUN_HEAD( if(ip.e) writeCommandManual(*ip.e, optarg); ) exit(0);
356 			default: RUN_HEAD( printUsage(argv[0], ip); ) exit(1);
357 		}
358 		#undef RUN_HEAD
359 	}
360 
361 	//Open the logfile (if any):
362 	if(logFilename.length())
363 	{	globalLog = fopen(logFilename.c_str(), appendOutput ? "a" : "w");
364 		if(!globalLog)
365 		{	globalLog = stdout;
366 			logPrintf("WARNING: Could not open log file '%s' for writing, using standard output.\n", logFilename.c_str());
367 		}
368 	}
369 
370 	//Set input base name if necessary:
371 	if(ip.inputFilename.length())
372 	{	inputBasename = ip.inputFilename;
373 		//Remove extension:
374 		size_t lastDot = inputBasename.find_last_of(".");
375 		if(lastDot != string::npos)
376 			inputBasename = inputBasename.substr(0, lastDot); //Remove extension
377 		//Remove leading path:
378 		size_t lastSlash = inputBasename.find_last_of("\\/");
379 		if(lastSlash != string::npos)
380 			inputBasename = inputBasename.substr(lastSlash+1);
381 	}
382 
383 	//Print banners, setup threads, GPUs and signal handlers
384 	initSystem(argc, argv, &ip);
385 }
386 
387 #ifdef ENABLE_PROFILING
stopWatchManager(const StopWatch * addWatch=0,const string * watchName=0)388 void stopWatchManager(const StopWatch* addWatch=0, const string* watchName=0)
389 {	static std::multimap<string, const StopWatch*> watches; //static array of all watches
390 	if(addWatch) watches.insert(std::make_pair(*watchName,addWatch));
391 	else //print timings:
392 	{	logPrintf("\n");
393 		for(const auto& wPair: watches) wPair.second->print();
394 	}
395 }
396 #endif // ENABLE_PROFILING
397 
398 
finalizeSystem(bool successful)399 void finalizeSystem(bool successful)
400 {
401 	time_t endTime = time(0);
402 	char* endTimeString = ctime(&endTime);
403 	endTimeString[strlen(endTimeString)-1] = 0; //get rid of the newline in output of ctime
404 	double durationSec = 1e-6*(clock_us() - startTime_us);
405 	int durationDays = floor(durationSec/86400.); durationSec -= 86400.*durationDays;
406 	int durationHrs  = floor(durationSec/3600.);  durationSec -= 3600.*durationHrs;
407 	int durationMin  = floor(durationSec/60.);    durationSec -= 60.*durationMin;
408 	logPrintf("End date and time: %s  (Duration: %d-%d:%02d:%05.2lf)\n",
409 		endTimeString, durationDays, durationHrs, durationMin, durationSec);
410 
411 	if(successful) logPrintf("Done!\n");
412 	else
413 	{	logPrintf("Failed.\n");
414 		if(mpiWorld->isHead() && globalLog != stdout)
415 			fprintf(stderr, "Failed.\n");
416 	}
417 
418 	#ifdef ENABLE_PROFILING
419 	stopWatchManager();
420 	logPrintf("\n");
421 	ManagedMemoryBase::reportUsage();
422 	#endif
423 
424 	if(!mpiWorld->isHead())
425 	{	if(mpiDebugLog) fclose(globalLog);
426 		globalLog = 0;
427 	}
428 	fclose(nullLog);
429 	if(globalLog && globalLog != stdout)
430 		fclose(globalLog);
431 	delete mpiGroupHead;
432 	delete mpiGroup;
433 	delete mpiWorld;
434 }
435 
436 
437 //------------ Timing helpers ----------------
438 
clock_us_epoch()439 inline double clock_us_epoch() //time in microseconds (since standard POSIX specified epoch)
440 {	timeval tv;
441 	gettimeofday(&tv,NULL);
442 	return ((double)tv.tv_sec) * 1e6 + tv.tv_usec;
443 }
clock_us()444 double clock_us()
445 {	static double tStart = clock_us_epoch();
446 	return clock_us_epoch() - tStart;
447 }
clock_sec()448 double clock_sec()
449 {	return 1e-6*clock_us();
450 }
451 
452 
453 #ifdef ENABLE_PROFILING
StopWatch(string name)454 StopWatch::StopWatch(string name) : Ttot(0), TsqTot(0), nT(0), name(name) { stopWatchManager(this, &name); }
start()455 void StopWatch::start()
456 {
457 	#ifdef GPU_ENABLED
458 	cudaDeviceSynchronize();
459 	#endif
460 	tPrev = clock_us();
461 }
stop()462 void StopWatch::stop()
463 {
464 	#ifdef GPU_ENABLED
465 	cudaDeviceSynchronize();
466 	#endif
467 	double T = clock_us()-tPrev;
468 	Ttot+=T; TsqTot+=T*T; nT++;
469 }
print() const470 void StopWatch::print() const
471 {	if(nT)
472 	{	double meanT = Ttot/nT;
473 		double sigmaT = sqrt(TsqTot/nT - meanT*meanT);
474 		logPrintf("PROFILER: %30s %12.6lf +/- %12.6lf s, %4d calls, %13.6lf s total\n",
475 			name.c_str(), meanT*1e-6, sigmaT*1e-6, nT, Ttot*1e-6);
476 	}
477 }
478 #endif //ENABLE_PROFILING
479 
480 
481 // Print a minimal stack trace (convenient for debugging)
printStack(bool detailedStackScript)482 void printStack(bool detailedStackScript)
483 {	const int maxStackLength = 1024;
484 	void* tracePtrs[maxStackLength];
485 	int count = backtrace(tracePtrs, maxStackLength);
486 	char** funcNames = backtrace_symbols(tracePtrs, count);
487 	logPrintf("\nStack trace:\n");
488 	for(int i=0; i<count; i++)
489 		logPrintf("\t%2d: %s\n", i, funcNames[i]);
490 	if(detailedStackScript)
491 	{	logPrintf("Writing 'jdftx-stacktrace' (for use with script printStackTrace): "); logFlush();
492 		FILE* fp = fopen("jdftx-stacktrace", "w");
493 		if(fp)
494 		{	for(int i=0; i<count; i++)
495 				fprintf(fp, "%s\n", funcNames[i]);
496 			fclose(fp);
497 			logPrintf("done.\n");
498 		}
499 		else logPrintf("could not open file for writing.\n");
500 	}
501 	free(funcNames);
502 }
503 
504 //--------------- Miscellaneous ------------------
505 
506 // Get the size of a file
fileSize(const char * filename)507 off_t fileSize(const char *filename)
508 {	struct stat st;
509 	if(stat(filename, &st) == 0) return st.st_size;
510     return -1;
511 }
512 
isLittleEndian()513 bool isLittleEndian()
514 {	static bool isLE = false, initializedLE = false;
515 	if(!initializedLE)
516 	{	//To be absolutely sure, check with an 8-byte type:
517 		uint64_t testData = 0x0001020304050607;
518 		uint8_t* testPtr = (uint8_t*)(&testData);
519 		bool littleCheck = true;
520 		bool bigCheck = true;
521 		for(int j=0; j<8; j++)
522 		{	littleCheck &= (testPtr[j]==7-j);
523 			bigCheck &= (testPtr[j]==j);
524 		}
525 		if(littleCheck) isLE = true; //little endian
526 		else if(bigCheck) isLE = false; //big endian
527 		else //neither:
528 			die("Binary I/O not yet supported on mixed-endian CPUs.\n")
529 	}
530 	return isLE;
531 }
532 
533 //Convert data from operating endianness to little-endian
convertToLE(void * ptr,size_t size,size_t nmemb)534 void convertToLE(void* ptr, size_t size, size_t nmemb)
535 {	static StopWatch watch("endianSwap");
536 	if(isLittleEndian() || size==1) return; //nothing to do on little-endian systems, or on byte-wise data
537 	watch.start();
538 	//Determine chunk size over which byte-swapping must occur:
539 	size_t chunkSize = 0, nChunks = 0;
540 	switch(size)
541 	{	case 2:
542 		case 4:
543 		case 8:
544 			chunkSize = size;
545 			nChunks = nmemb;
546 			break;
547 		default:
548 			if(size % 8 == 0)
549 			{	//assume composite data set of doubles eg. complex
550 				chunkSize = 8;
551 				nChunks = nmemb * (size/8);
552 			}
553 			else
554 				die("Unsupported size '%zu' for binary I/O on big-endian systems.\n", size)
555 	}
556 	//Apply byte-swapping:
557 	char* bytes = (char*)ptr;
558 	for(size_t iChunk=0; iChunk<nChunks; iChunk++)
559 	{	for(size_t iByte=0; iByte<chunkSize/2; iByte++)
560 		{	std::swap(bytes[iByte], bytes[chunkSize-1-iByte]);
561 		}
562 		bytes += chunkSize; //move to next chunk
563 	}
564 	watch.stop();
565 }
566 
567 //Convert data from little-endian to operating endianness
convertFromLE(void * ptr,size_t size,size_t nmemb)568 void convertFromLE(void* ptr, size_t size, size_t nmemb)
569 {	convertToLE(ptr, size, nmemb); //swapping between little and big endian is its own inverse
570 }
571 
572 //Read from a little-endian binary file, regardless of operating endianness
freadLE(void * ptr,size_t size,size_t nmemb,FILE * fp)573 size_t freadLE(void *ptr, size_t size, size_t nmemb, FILE* fp)
574 {	size_t result = fread(ptr, size, nmemb, fp); //byte-by-byte read
575 	convertFromLE(ptr, size, nmemb); //modify byte-order in place, if necessary
576 	return result;
577 }
578 
579 //Write to a little-endian binary file, regardless of operating endianness
fwriteLE(const void * ptr,size_t size,size_t nmemb,FILE * fp)580 size_t fwriteLE(const void *ptr, size_t size, size_t nmemb, FILE *fp)
581 {	convertToLE((void*)ptr, size, nmemb); //modify byte-order in place, if necessary
582 	size_t result = fwrite((void*)ptr, size, nmemb, fp); //byte-by-byte write
583 	convertFromLE((void*)ptr, size, nmemb); //restore byte-order (since data should not be logically modified)
584 	return result;
585 }
586 
587 
588 // Exit on error with a more in-depth stack trace
stackTraceExit(int code)589 void stackTraceExit(int code)
590 {	printStack(true);
591 	mpiWorld->exit(code);
592 }
593 
594 // Stack trace for failed assertions
assertStackTraceExit(const char * expr,const char * function,const char * file,long line)595 int assertStackTraceExit(const char* expr, const char* function, const char* file, long line)
596 {	fprintf(stderr, "%s:%ld: %s:\n\tAssertion '%s' failed", file, line, function, expr);
597 	stackTraceExit(1);
598 	return 0;
599 }
600 
601 
602 namespace Citations
603 {
604 	//Single function whose static variable contains the list of citations
605 	//(Done this way, so that add may be called during static initialization safely)
606 	//If addCitation is non-null, enter the pair into the list
607 	//If getCitationList is non-null, retrieve the list
manage(std::pair<string,string> * addCitation=0,std::list<std::pair<string,string>> * getCitationList=0)608 	void manage(std::pair<string,string>* addCitation=0, std::list<std::pair<string,string>>* getCitationList=0)
609 	{	static std::list<std::pair<string,string>> citationList; //pair.first = paper, pair.second = reason
610 		if(addCitation)
611 		{	auto iter=citationList.begin();
612 			bool foundPrev = false, duplicate = false;
613 			for(; iter!=citationList.end(); iter++)
614 			{	if(foundPrev && iter->second!=addCitation->second)
615 					break; //End of chain of citations with current paper
616 				if(iter->second==addCitation->second)
617 				{	foundPrev = true;
618 					if(iter->first==addCitation->first)
619 					{	duplicate = true;
620 						break;
621 					}
622 				}
623 			}
624 			if(!duplicate) citationList.insert(iter, *addCitation);
625 		}
626 		if(getCitationList) *getCitationList = citationList;
627 	}
628 
add(string reason,string paper)629 	void add(string reason, string paper)
630 	{	std::pair<string,string> citation(paper, reason);
631 		manage(&citation, 0);
632 	}
633 
print(FILE * fp)634 	void print(FILE* fp)
635 	{	fprintf(fp, "\n---- Citations for features of the code used in this run ----\n\n");
636 		//Get the citation map:
637 		std::list<std::pair<string,string>> citationList;
638 		manage(0, &citationList);
639 		//Print entries:
640 		for(auto iter=citationList.begin(); iter!=citationList.end();)
641 		{	string paper = iter->first;
642 			for(;;iter++)
643 			{	if(iter==citationList.end() || iter->first != paper)
644 				{	//End the current chain of reasons for this paper
645 					istringstream iss(paper);
646 					while(!iss.eof())
647 					{	string line; getline(iss, line);
648 						fprintf(fp, "      %s\n", line.c_str());
649 					}
650 					fprintf(fp, "\n");
651 					break;
652 				}
653 				fprintf(fp, "   %s:\n", iter->second.c_str());
654 			}
655 		}
656 		fprintf(fp,
657 			"This list may not be complete. Please suggest additional citations or\n"
658 			"report any other bugs at https://github.com/shankar1729/jdftx/issues\n\n");
659 		fflush(fp);
660 	}
661 }
662 
663 
664 //Calculate CRC32 checksum for a buffer
crc32(const CharIterator begin,const CharIterator end)665 template<typename CharIterator> uint32_t crc32(const CharIterator begin, const CharIterator end)
666 {	//--- Code based on CRC32 snippet from c.snippets.org ---
667 	static uint32_t table[] = {
668 		0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
669 		0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
670 		0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
671 		0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
672 		0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
673 		0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
674 		0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
675 		0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
676 		0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
677 		0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
678 		0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
679 		0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
680 		0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
681 		0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
682 		0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
683 		0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
684 		0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
685 		0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
686 		0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
687 		0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
688 		0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
689 		0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d };
690 	//--- calculate and return checksum:
691 	uint32_t result = 0xFFFFFFFF;
692 	for(CharIterator ptr=begin; ptr<end; ptr++)
693 		result = (table[(result^(*ptr)) & 0xff] ^ (result >> 8));
694 	return ~result;
695 }
crc32(const string & s)696 uint32_t crc32(const string& s)
697 {	return crc32(s.begin(), s.end());
698 }
699