1 /* $Id: fileutil.cpp 532212 2017-04-03 13:24:13Z gouriano $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description:
29 * Some file utilities functions/classes.
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistre.hpp>
34 #include <corelib/ncbiutil.hpp>
35 #include <corelib/ncbifile.hpp>
36 #include "fileutil.hpp"
37 #include "srcutil.hpp"
38 #include <serial/error_codes.hpp>
39 #include <set>
40
41
42 #define NCBI_USE_ERRCODE_X Serial_Util
43
44 BEGIN_NCBI_SCOPE
45
46 static const int BUFFER_SIZE = 4096;
47
SourceFile(const string & name,bool binary)48 SourceFile::SourceFile(const string& name, bool binary)
49 : m_StreamPtr(0), m_Open(false)
50 {
51 if ( name == "stdin" || name == "-" ) {
52 m_StreamPtr = &NcbiCin;
53 }
54 else {
55 if ( !x_Open(name, binary) )
56 ERR_POST_X(1, Fatal << "cannot open file " << name);
57 }
58 }
59
SourceFile(const string & name,const list<string> & dirs,bool binary)60 SourceFile::SourceFile(const string& name, const list<string>& dirs,
61 bool binary)
62 {
63 if ( name == "stdin" || name == "-" ) {
64 m_StreamPtr = &NcbiCin;
65 } else if ( !x_Open(name, binary) ) {
66 ITERATE(list<string>, dir, dirs) {
67 if ( x_Open(Path(*dir, name), binary) ) {
68 return;
69 }
70 }
71 ERR_POST_X(2, Fatal << "cannot open file " << name);
72 }
73 }
74
~SourceFile(void)75 SourceFile::~SourceFile(void)
76 {
77 if ( m_Open ) {
78 delete m_StreamPtr;
79 m_StreamPtr = 0;
80 m_Open = false;
81 }
82 }
83
84
GetType(void) const85 SourceFile::EType SourceFile::GetType(void) const
86 {
87 CDirEntry entry(m_Name);
88 string ext(entry.GetExt());
89 if (NStr::CompareNocase(ext,".asn") == 0) {
90 return eASN;
91 } else if (NStr::CompareNocase(ext,".dtd") == 0) {
92 return eDTD;
93 } else if (NStr::CompareNocase(ext,".xsd") == 0) {
94 return eXSD;
95 } else if (NStr::CompareNocase(ext,".wsdl") == 0) {
96 return eWSDL;
97 } else if (NStr::CompareNocase(ext,".json") == 0) {
98 return eJSON;
99 } else if (NStr::CompareNocase(ext,".jsd") == 0) {
100 return eJSON;
101 }
102 return eUnknown;
103 }
104
105
x_Open(const string & name,bool binary)106 bool SourceFile::x_Open(const string& name, bool binary)
107 {
108 m_Name = name;
109 m_StreamPtr = new CNcbiIfstream(name.c_str(),
110 binary?
111 IOS_BASE::in | IOS_BASE::binary:
112 IOS_BASE::in);
113 m_Open = m_StreamPtr->good();
114 if ( !m_Open ) {
115 delete m_StreamPtr;
116 m_StreamPtr = 0;
117 }
118 return m_Open;
119 }
120
DestinationFile(const string & name,bool binary)121 DestinationFile::DestinationFile(const string& name, bool binary)
122 {
123 if ( name == "stdout" || name == "-" ) {
124 m_StreamPtr = &NcbiCout;
125 m_Open = false;
126 }
127 else {
128 m_StreamPtr = new CNcbiOfstream(name.c_str(),
129 binary?
130 IOS_BASE::out | IOS_BASE::binary:
131 IOS_BASE::out);
132 if ( !*m_StreamPtr ) {
133 delete m_StreamPtr;
134 m_StreamPtr = 0;
135 ERR_POST_X(3, Fatal << "cannot open file " << name);
136 }
137 m_Open = true;
138 }
139 }
140
~DestinationFile(void)141 DestinationFile::~DestinationFile(void)
142 {
143 if ( m_Open ) {
144 delete m_StreamPtr;
145 }
146 }
147
148 // default parameters
149 #undef DIR_SEPARATOR_CHAR
150 #undef DIR_SEPARATOR_CHAR2
151 #undef DISK_SEPARATOR_CHAR
152 #undef ALL_SEPARATOR_CHARS
153 #define PARENT_DIR ".."
154
155 #ifdef NCBI_OS_MSWIN
156 # define DIR_SEPARATOR_CHAR '\\'
157 # define DIR_SEPARATOR_CHAR2 '/'
158 # define DISK_SEPARATOR_CHAR ':'
159 # define ALL_SEPARATOR_CHARS ":/\\"
160 #endif
161
162 #ifndef DIR_SEPARATOR_CHAR
163 # define DIR_SEPARATOR_CHAR '/'
164 #endif
165
166 #ifndef ALL_SEPARATOR_CHARS
167 # define ALL_SEPARATOR_CHARS DIR_SEPARATOR_CHAR
168 #endif
169
170 #ifdef DISK_SEPARATOR_CHAR
171 inline
IsDiskSeparator(char c)172 bool IsDiskSeparator(char c)
173 {
174 return c == DISK_SEPARATOR_CHAR;
175 }
176 #else
177 inline
IsDiskSeparator(char)178 bool IsDiskSeparator(char /* c */)
179 {
180 return false;
181 }
182 #endif
183
184 inline
IsDirSeparator(char c)185 bool IsDirSeparator(char c)
186 {
187 #ifdef DISK_SEPARATOR_CHAR
188 if ( c == DISK_SEPARATOR_CHAR )
189 return true;
190 #endif
191 #ifdef DIR_SEPARATOR_CHAR2
192 if ( c == DIR_SEPARATOR_CHAR2 )
193 return true;
194 #endif
195 return c == DIR_SEPARATOR_CHAR;
196 }
197
IsLocalPath(const string & path)198 bool IsLocalPath(const string& path)
199 {
200 // determine if path is local to current directory
201 // exclude pathes like:
202 // "../xxx" everywhere
203 // "xxx/../yyy" everywhere
204 // "/xxx/yyy" on unix
205 // "d:xxx" on windows
206 // "HD:folder" on Mac
207 if ( path.empty() )
208 return false;
209
210 if ( IsDirSeparator(path[0]) )
211 return false;
212
213 SIZE_TYPE pos;
214 #ifdef PARENT_DIR
215 SIZE_TYPE parentDirLength = strlen(PARENT_DIR);
216 pos = 0;
217 while ( (pos = path.find(PARENT_DIR, pos)) != NPOS ) {
218 if ( pos == 0 || IsDirSeparator(path[pos - 1]) )
219 return false;
220 SIZE_TYPE end = pos + parentDirLength;
221 if ( end == path.size() || IsDirSeparator(path[end]) )
222 return false;
223 pos = end + 1;
224 }
225 #endif
226 #ifdef DISK_SEPARATOR_CHAR
227 if ( path.find(DISK_SEPARATOR_CHAR) != NPOS )
228 return false;
229 #endif
230 return true;
231 }
232
MakeAbsolutePath(const string & path)233 string MakeAbsolutePath(const string& path)
234 {
235 if (!path.empty() && !CDirEntry::IsAbsolutePath(path)) {
236 string res = Path(CDir::GetCwd(),path);
237 res = CDirEntry::NormalizePath(res);
238 return res;
239 }
240 return path;
241 }
242
Path(const string & dir,const string & file)243 string Path(const string& dir, const string& file)
244 {
245 if ( dir.empty() )
246 return file;
247 char lastChar = dir[dir.size() - 1];
248 if ( file.empty() )
249 _TRACE("Path(\"" << dir << "\", \"" << file << "\")");
250 // Avoid duplicate dir separators
251 if ( IsDirSeparator(lastChar) ) {
252 if ( IsDirSeparator(file[0]) )
253 return dir.substr(0, dir.size()-1) + file;
254 }
255 else {
256 if ( !IsDirSeparator(file[0]) )
257 return dir + DIR_SEPARATOR_CHAR + file;
258 }
259 return dir + file;
260 }
261
BaseName(const string & path)262 string BaseName(const string& path)
263 {
264 SIZE_TYPE dirEnd = path.find_last_of(ALL_SEPARATOR_CHARS);
265 string name;
266 if ( dirEnd != NPOS )
267 name = path.substr(dirEnd + 1);
268 else
269 name = path;
270 SIZE_TYPE extStart = name.rfind('.');
271 if ( extStart != NPOS )
272 name = name.substr(0, extStart);
273 return name;
274 }
275
DirName(const string & path)276 string DirName(const string& path)
277 {
278 SIZE_TYPE dirEnd = path.find_last_of(ALL_SEPARATOR_CHARS);
279 if ( dirEnd != NPOS ) {
280 if ( dirEnd == 0 /* "/" root directory */ ||
281 IsDiskSeparator(path[dirEnd]) /* disk separator */ )
282 ++dirEnd; // include separator
283
284 return path.substr(0, dirEnd);
285 }
286 else {
287 return NcbiEmptyString;
288 }
289 }
290
GetStdPath(const string & path)291 string GetStdPath(const string& path)
292 {
293 string stdpath = path;
294 // Replace each native separator character with the 'standard' one.
295 SIZE_TYPE ibeg = NStr::StartsWith(path, "http://", NStr::eNocase) ? 7 :
296 (NStr::StartsWith(path, "https://", NStr::eNocase) ? 8 : 0);
297 for (SIZE_TYPE i=ibeg ; i < stdpath.size(); i++) {
298 #ifdef NCBI_OS_MSWIN
299 if ( i==1 && IsDiskSeparator(stdpath[i]) ) {
300 continue;
301 }
302 #endif
303 if ( IsDirSeparator(stdpath[i]) )
304 stdpath[i] = '/';
305 }
306 string tmp = NStr::Replace(stdpath,"//","/",ibeg);
307 stdpath = NStr::Replace(tmp,"/./","/",ibeg);
308 return stdpath;
309 }
310
311
312 class SSubString
313 {
314 public:
SSubString(const string & val,size_t ord)315 SSubString(const string& val, size_t ord)
316 : value(val), order(ord)
317 {
318 }
319
320 struct ByOrder {
operator ()SSubString::ByOrder321 bool operator()(const SSubString& s1, const SSubString& s2) const
322 {
323 return s1.order < s2.order;
324 }
325 };
326 struct ByLength {
operator ()SSubString::ByLength327 bool operator()(const SSubString& s1, const SSubString& s2) const
328 {
329 if ( s1.value.size() > s2.value.size() )
330 return true;
331 if ( s1.value.size() < s2.value.size() )
332 return false;
333 return s1.order < s2.order;
334 }
335 };
336 string value;
337 size_t order;
338 };
339
MakeFileName(const string & fname,size_t addLength)340 string MakeFileName(const string& fname, size_t addLength)
341 {
342 string name = Identifier(fname);
343 size_t fullLength = name.size() + addLength;
344 if ( fullLength <= MAX_FILE_NAME_LENGTH )
345 return name;
346 size_t remove = fullLength - MAX_FILE_NAME_LENGTH;
347 // we'll have to truncate very long filename
348
349 _TRACE("MakeFileName(\""<<fname<<"\", "<<addLength<<") remove="<<remove);
350 // 1st step: parse name dividing by '_' sorting elements by their size
351 SIZE_TYPE removable = 0; // removable part of string
352 typedef set<SSubString, SSubString::ByLength> TByLength;
353 TByLength byLength;
354 {
355 SIZE_TYPE curr = 0; // current element position in string
356 size_t order = 0; // current element order
357 for (;;) {
358 SIZE_TYPE und = name.find('_', curr);
359 if ( und == NPOS ) {
360 // end of string
361 break;
362 }
363 _TRACE("MakeFileName: \""<<name.substr(curr, und - curr)<<"\"");
364 removable += (und - curr);
365 byLength.insert(SSubString(name.substr(curr, und - curr), order));
366 curr = und + 1;
367 ++order;
368 }
369 _TRACE("MakeFileName: \""<<name.substr(curr)<<"\"");
370 removable += name.size() - curr;
371 byLength.insert(SSubString(name.substr(curr), order));
372 }
373 _TRACE("MakeFileName: removable="<<removable);
374
375 // if removable part of string too small...
376 if ( removable - remove < size_t(MAX_FILE_NAME_LENGTH - addLength) / 2 ) {
377 // we'll do plain truncate
378 _TRACE("MakeFileName: return \""<<name.substr(0, MAX_FILE_NAME_LENGTH - addLength)<<"\"");
379 return name.substr(0, MAX_FILE_NAME_LENGTH - addLength);
380 }
381
382 // 2nd step: shorten elementes beginning with longest
383 while ( remove > 0 ) {
384 // extract most long element
385 SSubString s = *byLength.begin();
386 _TRACE("MakeFileName: shorten \""<<s.value<<"\"");
387 byLength.erase(byLength.begin());
388 // shorten it by one symbol
389 s.value = s.value.substr(0, s.value.size() - 1);
390 // insert it back
391 byLength.insert(s);
392 // decrement progress counter
393 remove--;
394 }
395 // 3rd step: reorder elements by their relative order in original string
396 typedef set<SSubString, SSubString::ByOrder> TByOrder;
397 TByOrder byOrder;
398 {
399 ITERATE ( TByLength, i, byLength ) {
400 byOrder.insert(*i);
401 }
402 }
403 // 4th step: join elements in resulting string
404 name.erase();
405 {
406 ITERATE ( TByOrder, i, byOrder ) {
407 if ( !name.empty() )
408 name += '_';
409 name += i->value;
410 }
411 }
412 _TRACE("MakeFileName: return \""<<name<<"\"");
413 return name;
414 }
415
CDelayedOfstream(const string & fileName)416 CDelayedOfstream::CDelayedOfstream(const string& fileName)
417 {
418 open(fileName);
419 }
420
~CDelayedOfstream(void)421 CDelayedOfstream::~CDelayedOfstream(void)
422 {
423 close();
424 }
425
open(const string & fileName)426 void CDelayedOfstream::open(const string& fileName)
427 {
428 close();
429 clear();
430 seekp(0, IOS_BASE::beg);
431 clear(); // eof set?
432 m_FileName = MakeAbsolutePath(fileName);
433 m_Istream.reset(new CNcbiIfstream(m_FileName.c_str()));
434 if ( !*m_Istream ) {
435 _TRACE("cannot open " << m_FileName);
436 m_Istream.reset(0);
437 m_Ostream.reset(new CNcbiOfstream(m_FileName.c_str()));
438 if ( !*m_Ostream ) {
439 _TRACE("cannot create " << m_FileName);
440 setstate(m_Ostream->rdstate());
441 m_Ostream.reset(0);
442 m_FileName.erase();
443 }
444 }
445 }
446
close(void)447 void CDelayedOfstream::close(void)
448 {
449 if ( !is_open() )
450 return;
451 if ( !equals() ) {
452 if ( !rewrite() )
453 setstate(m_Ostream->rdstate());
454 m_Ostream.reset(0);
455 }
456 m_Istream.reset(0);
457 m_FileName.erase();
458 }
459
equals(void)460 bool CDelayedOfstream::equals(void)
461 {
462 if ( !m_Istream.get() )
463 return false;
464 string s = CNcbiOstrstreamToString(*this);
465 size_t count = s.size();
466 const char* ptr = s.data();
467 while ( count > 0 ) {
468 char buffer[BUFFER_SIZE];
469 size_t c = count;
470 if ( c > BUFFER_SIZE )
471 c = BUFFER_SIZE;
472 if ( !m_Istream->read(buffer, c) ) {
473 _TRACE("read fault " << m_FileName <<
474 " need: " << c << " was: " << m_Istream->gcount());
475 return false;
476 }
477 if ( memcmp(buffer, ptr, c) != 0 ) {
478 _TRACE("file differs " << m_FileName);
479 return false;
480 }
481 ptr += c;
482 count -= c;
483 }
484 if ( m_Istream->get() != -1 ) {
485 _TRACE("file too long " << m_FileName);
486 return false;
487 }
488 return true;
489 }
490
rewrite(void)491 bool CDelayedOfstream::rewrite(void)
492 {
493 if ( !m_Ostream.get() ) {
494 m_Ostream.reset(new CNcbiOfstream(m_FileName.c_str()));
495 if ( !*m_Ostream ) {
496 _TRACE("rewrite fault " << m_FileName);
497 return false;
498 }
499 }
500 string s = CNcbiOstrstreamToString(*this);
501 if ( !m_Ostream->write(s.data(), s.size()) ) {
502 _TRACE("write fault " << m_FileName);
503 return false;
504 }
505 m_Ostream->close();
506 if ( !*m_Ostream ) {
507 _TRACE("close fault " << m_FileName);
508 return false;
509 }
510 return true;
511 }
512
Discard(void)513 void CDelayedOfstream::Discard(void)
514 {
515 if ( is_open() ) {
516 m_Ostream.reset(0);
517 m_Istream.reset(0);
518 CFile(m_FileName).Remove();
519 m_FileName.clear();
520 }
521 }
522
Empty(const CNcbiOstrstream & src)523 bool Empty(const CNcbiOstrstream& src)
524 {
525 return IsOssEmpty(const_cast<CNcbiOstrstream&>(src));
526 }
527
Write(CNcbiOstream & out,const CNcbiOstrstream & src)528 CNcbiOstream& Write(CNcbiOstream& out, const CNcbiOstrstream& src)
529 {
530 CNcbiOstrstream& source = const_cast<CNcbiOstrstream&>(src);
531 size_t size = (size_t)GetOssSize(source);
532 if ( size != 0 ) {
533 string str = CNcbiOstrstreamToString(source);
534 out.write(str.data(), size);
535 }
536 return out;
537 }
538
WriteTabbed(CNcbiOstream & out,const CNcbiOstrstream & code,const char * tab)539 CNcbiOstream& WriteTabbed(CNcbiOstream& out, const CNcbiOstrstream& code,
540 const char* tab)
541 {
542 CNcbiOstrstream& source = const_cast<CNcbiOstrstream&>(code);
543 size_t size = (size_t)GetOssSize(source);
544 if ( size != 0 ) {
545 if ( !tab )
546 tab = " ";
547 string str = CNcbiOstrstreamToString(source);
548 const char* ptr = str.data();
549 while ( size > 0 ) {
550 out << tab;
551 const char* endl =
552 reinterpret_cast<const char*>(memchr(ptr, '\n', size));
553 if ( !endl ) { // no more '\n'
554 out.write(ptr, size) << '\n';
555 break;
556 }
557 ++endl; // skip '\n'
558 size_t lineSize = endl - ptr;
559 out.write(ptr, lineSize);
560 ptr = endl;
561 size -= lineSize;
562 }
563 }
564 return out;
565 }
566
567 END_NCBI_SCOPE
568