1 /*  $Id: fileutil.cpp 532212 2017-04-03 13:24:13Z gouriano $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description:
29 *   Some file utilities functions/classes.
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistre.hpp>
34 #include <corelib/ncbiutil.hpp>
35 #include <corelib/ncbifile.hpp>
36 #include "fileutil.hpp"
37 #include "srcutil.hpp"
38 #include <serial/error_codes.hpp>
39 #include <set>
40 
41 
42 #define NCBI_USE_ERRCODE_X   Serial_Util
43 
44 BEGIN_NCBI_SCOPE
45 
46 static const int BUFFER_SIZE = 4096;
47 
SourceFile(const string & name,bool binary)48 SourceFile::SourceFile(const string& name, bool binary)
49     : m_StreamPtr(0), m_Open(false)
50 {
51     if ( name == "stdin" || name == "-" ) {
52         m_StreamPtr = &NcbiCin;
53     }
54     else {
55         if ( !x_Open(name, binary) )
56             ERR_POST_X(1, Fatal << "cannot open file " << name);
57     }
58 }
59 
SourceFile(const string & name,const list<string> & dirs,bool binary)60 SourceFile::SourceFile(const string& name, const list<string>& dirs,
61                        bool binary)
62 {
63     if ( name == "stdin" || name == "-" ) {
64         m_StreamPtr = &NcbiCin;
65     } else if ( !x_Open(name, binary) ) {
66         ITERATE(list<string>, dir, dirs) {
67             if ( x_Open(Path(*dir, name), binary) ) {
68                 return;
69             }
70         }
71         ERR_POST_X(2, Fatal << "cannot open file " << name);
72     }
73 }
74 
~SourceFile(void)75 SourceFile::~SourceFile(void)
76 {
77     if ( m_Open ) {
78         delete m_StreamPtr;
79         m_StreamPtr = 0;
80         m_Open = false;
81     }
82 }
83 
84 
GetType(void) const85 SourceFile::EType SourceFile::GetType(void) const
86 {
87     CDirEntry entry(m_Name);
88     string ext(entry.GetExt());
89     if (NStr::CompareNocase(ext,".asn") == 0) {
90         return eASN;
91     } else if (NStr::CompareNocase(ext,".dtd") == 0) {
92         return eDTD;
93     } else if (NStr::CompareNocase(ext,".xsd") == 0) {
94         return eXSD;
95     } else if (NStr::CompareNocase(ext,".wsdl") == 0) {
96         return eWSDL;
97     } else if (NStr::CompareNocase(ext,".json") == 0) {
98         return eJSON;
99     } else if (NStr::CompareNocase(ext,".jsd") == 0) {
100         return eJSON;
101     }
102     return eUnknown;
103 }
104 
105 
x_Open(const string & name,bool binary)106 bool SourceFile::x_Open(const string& name, bool binary)
107 {
108     m_Name = name;
109     m_StreamPtr = new CNcbiIfstream(name.c_str(),
110                                     binary?
111                                         IOS_BASE::in | IOS_BASE::binary:
112                                         IOS_BASE::in);
113     m_Open = m_StreamPtr->good();
114     if ( !m_Open ) {
115         delete m_StreamPtr;
116         m_StreamPtr = 0;
117     }
118     return m_Open;
119 }
120 
DestinationFile(const string & name,bool binary)121 DestinationFile::DestinationFile(const string& name, bool binary)
122 {
123     if ( name == "stdout" || name == "-" ) {
124         m_StreamPtr = &NcbiCout;
125         m_Open = false;
126     }
127     else {
128         m_StreamPtr = new CNcbiOfstream(name.c_str(),
129                                         binary?
130                                             IOS_BASE::out | IOS_BASE::binary:
131                                             IOS_BASE::out);
132         if ( !*m_StreamPtr ) {
133             delete m_StreamPtr;
134             m_StreamPtr = 0;
135             ERR_POST_X(3, Fatal << "cannot open file " << name);
136         }
137         m_Open = true;
138     }
139 }
140 
~DestinationFile(void)141 DestinationFile::~DestinationFile(void)
142 {
143     if ( m_Open ) {
144         delete m_StreamPtr;
145     }
146 }
147 
148 // default parameters
149 #undef DIR_SEPARATOR_CHAR
150 #undef DIR_SEPARATOR_CHAR2
151 #undef DISK_SEPARATOR_CHAR
152 #undef ALL_SEPARATOR_CHARS
153 #define PARENT_DIR ".."
154 
155 #ifdef NCBI_OS_MSWIN
156 #  define DIR_SEPARATOR_CHAR '\\'
157 #  define DIR_SEPARATOR_CHAR2 '/'
158 #  define DISK_SEPARATOR_CHAR ':'
159 #  define ALL_SEPARATOR_CHARS ":/\\"
160 #endif
161 
162 #ifndef DIR_SEPARATOR_CHAR
163 #  define DIR_SEPARATOR_CHAR '/'
164 #endif
165 
166 #ifndef ALL_SEPARATOR_CHARS
167 #  define ALL_SEPARATOR_CHARS DIR_SEPARATOR_CHAR
168 #endif
169 
170 #ifdef DISK_SEPARATOR_CHAR
171 inline
IsDiskSeparator(char c)172 bool IsDiskSeparator(char c)
173 {
174     return c == DISK_SEPARATOR_CHAR;
175 }
176 #else
177 inline
IsDiskSeparator(char)178 bool IsDiskSeparator(char /* c */)
179 {
180     return false;
181 }
182 #endif
183 
184 inline
IsDirSeparator(char c)185 bool IsDirSeparator(char c)
186 {
187 #ifdef DISK_SEPARATOR_CHAR
188     if ( c == DISK_SEPARATOR_CHAR )
189         return true;
190 #endif
191 #ifdef DIR_SEPARATOR_CHAR2
192     if ( c == DIR_SEPARATOR_CHAR2 )
193         return true;
194 #endif
195     return c == DIR_SEPARATOR_CHAR;
196 }
197 
IsLocalPath(const string & path)198 bool IsLocalPath(const string& path)
199 {
200     // determine if path is local to current directory
201     // exclude pathes like:
202     // "../xxx" everywhere
203     // "xxx/../yyy" everywhere
204     // "/xxx/yyy"  on unix
205     // "d:xxx" on windows
206     // "HD:folder" on Mac
207     if ( path.empty() )
208         return false;
209 
210     if ( IsDirSeparator(path[0]) )
211         return false;
212 
213     SIZE_TYPE pos;
214 #ifdef PARENT_DIR
215     SIZE_TYPE parentDirLength = strlen(PARENT_DIR);
216     pos = 0;
217     while ( (pos = path.find(PARENT_DIR, pos)) != NPOS ) {
218         if ( pos == 0 || IsDirSeparator(path[pos - 1]) )
219             return false;
220         SIZE_TYPE end = pos + parentDirLength;
221         if ( end == path.size() || IsDirSeparator(path[end]) )
222             return false;
223         pos = end + 1;
224     }
225 #endif
226 #ifdef DISK_SEPARATOR_CHAR
227     if ( path.find(DISK_SEPARATOR_CHAR) != NPOS )
228         return false;
229 #endif
230     return true;
231 }
232 
MakeAbsolutePath(const string & path)233 string MakeAbsolutePath(const string& path)
234 {
235     if (!path.empty() && !CDirEntry::IsAbsolutePath(path)) {
236         string res = Path(CDir::GetCwd(),path);
237         res = CDirEntry::NormalizePath(res);
238         return res;
239     }
240     return path;
241 }
242 
Path(const string & dir,const string & file)243 string Path(const string& dir, const string& file)
244 {
245     if ( dir.empty() )
246         return file;
247     char lastChar = dir[dir.size() - 1];
248     if ( file.empty() )
249         _TRACE("Path(\"" << dir << "\", \"" << file << "\")");
250     // Avoid duplicate dir separators
251     if ( IsDirSeparator(lastChar) ) {
252         if ( IsDirSeparator(file[0]) )
253             return dir.substr(0, dir.size()-1) + file;
254     }
255     else {
256         if ( !IsDirSeparator(file[0]) )
257             return dir + DIR_SEPARATOR_CHAR + file;
258     }
259     return dir + file;
260 }
261 
BaseName(const string & path)262 string BaseName(const string& path)
263 {
264     SIZE_TYPE dirEnd = path.find_last_of(ALL_SEPARATOR_CHARS);
265     string name;
266     if ( dirEnd != NPOS )
267         name = path.substr(dirEnd + 1);
268     else
269         name = path;
270     SIZE_TYPE extStart = name.rfind('.');
271     if ( extStart != NPOS )
272         name = name.substr(0, extStart);
273     return name;
274 }
275 
DirName(const string & path)276 string DirName(const string& path)
277 {
278     SIZE_TYPE dirEnd = path.find_last_of(ALL_SEPARATOR_CHARS);
279     if ( dirEnd != NPOS ) {
280         if ( dirEnd == 0 /* "/" root directory */ ||
281              IsDiskSeparator(path[dirEnd]) /* disk separator */ )
282             ++dirEnd; // include separator
283 
284         return path.substr(0, dirEnd);
285     }
286     else {
287         return NcbiEmptyString;
288     }
289 }
290 
GetStdPath(const string & path)291 string GetStdPath(const string& path)
292 {
293     string stdpath = path;
294     // Replace each native separator character with the 'standard' one.
295     SIZE_TYPE ibeg = NStr::StartsWith(path, "http://", NStr::eNocase) ? 7 :
296                     (NStr::StartsWith(path, "https://", NStr::eNocase) ? 8 : 0);
297     for (SIZE_TYPE i=ibeg ; i < stdpath.size(); i++) {
298 #ifdef NCBI_OS_MSWIN
299         if ( i==1 && IsDiskSeparator(stdpath[i]) ) {
300             continue;
301         }
302 #endif
303         if ( IsDirSeparator(stdpath[i]) )
304             stdpath[i] = '/';
305     }
306     string tmp = NStr::Replace(stdpath,"//","/",ibeg);
307     stdpath = NStr::Replace(tmp,"/./","/",ibeg);
308     return stdpath;
309 }
310 
311 
312 class SSubString
313 {
314 public:
SSubString(const string & val,size_t ord)315     SSubString(const string& val, size_t ord)
316         : value(val), order(ord)
317         {
318         }
319 
320     struct ByOrder {
operator ()SSubString::ByOrder321         bool operator()(const SSubString& s1, const SSubString& s2) const
322             {
323                 return s1.order < s2.order;
324             }
325     };
326     struct ByLength {
operator ()SSubString::ByLength327         bool operator()(const SSubString& s1, const SSubString& s2) const
328             {
329                 if ( s1.value.size() > s2.value.size() )
330                     return true;
331                 if ( s1.value.size() < s2.value.size() )
332                     return false;
333                 return s1.order < s2.order;
334             }
335     };
336     string value;
337     size_t order;
338 };
339 
MakeFileName(const string & fname,size_t addLength)340 string MakeFileName(const string& fname, size_t addLength)
341 {
342     string name = Identifier(fname);
343     size_t fullLength = name.size() + addLength;
344     if ( fullLength <= MAX_FILE_NAME_LENGTH )
345         return name;
346     size_t remove = fullLength - MAX_FILE_NAME_LENGTH;
347     // we'll have to truncate very long filename
348 
349     _TRACE("MakeFileName(\""<<fname<<"\", "<<addLength<<") remove="<<remove);
350     // 1st step: parse name dividing by '_' sorting elements by their size
351     SIZE_TYPE removable = 0; // removable part of string
352     typedef set<SSubString, SSubString::ByLength> TByLength;
353     TByLength byLength;
354     {
355         SIZE_TYPE curr = 0; // current element position in string
356         size_t order = 0; // current element order
357         for (;;) {
358             SIZE_TYPE und = name.find('_', curr);
359             if ( und == NPOS ) {
360                 // end of string
361                 break;
362             }
363             _TRACE("MakeFileName: \""<<name.substr(curr, und - curr)<<"\"");
364             removable += (und - curr);
365             byLength.insert(SSubString(name.substr(curr, und - curr), order));
366             curr = und + 1;
367             ++order;
368         }
369         _TRACE("MakeFileName: \""<<name.substr(curr)<<"\"");
370         removable += name.size() - curr;
371         byLength.insert(SSubString(name.substr(curr), order));
372     }
373     _TRACE("MakeFileName: removable="<<removable);
374 
375     // if removable part of string too small...
376     if ( removable - remove < size_t(MAX_FILE_NAME_LENGTH - addLength) / 2 ) {
377         // we'll do plain truncate
378         _TRACE("MakeFileName: return \""<<name.substr(0, MAX_FILE_NAME_LENGTH - addLength)<<"\"");
379         return name.substr(0, MAX_FILE_NAME_LENGTH - addLength);
380     }
381 
382     // 2nd step: shorten elementes beginning with longest
383     while ( remove > 0 ) {
384         // extract most long element
385         SSubString s = *byLength.begin();
386         _TRACE("MakeFileName: shorten \""<<s.value<<"\"");
387         byLength.erase(byLength.begin());
388         // shorten it by one symbol
389         s.value = s.value.substr(0, s.value.size() - 1);
390         // insert it back
391         byLength.insert(s);
392         // decrement progress counter
393         remove--;
394     }
395     // 3rd step: reorder elements by their relative order in original string
396     typedef set<SSubString, SSubString::ByOrder> TByOrder;
397     TByOrder byOrder;
398     {
399         ITERATE ( TByLength, i, byLength ) {
400             byOrder.insert(*i);
401         }
402     }
403     // 4th step: join elements in resulting string
404     name.erase();
405     {
406         ITERATE ( TByOrder, i, byOrder ) {
407             if ( !name.empty() )
408                 name += '_';
409             name += i->value;
410         }
411     }
412     _TRACE("MakeFileName: return \""<<name<<"\"");
413     return name;
414 }
415 
CDelayedOfstream(const string & fileName)416 CDelayedOfstream::CDelayedOfstream(const string& fileName)
417 {
418     open(fileName);
419 }
420 
~CDelayedOfstream(void)421 CDelayedOfstream::~CDelayedOfstream(void)
422 {
423     close();
424 }
425 
open(const string & fileName)426 void CDelayedOfstream::open(const string& fileName)
427 {
428     close();
429     clear();
430     seekp(0, IOS_BASE::beg);
431     clear(); // eof set?
432     m_FileName = MakeAbsolutePath(fileName);
433     m_Istream.reset(new CNcbiIfstream(m_FileName.c_str()));
434     if ( !*m_Istream ) {
435         _TRACE("cannot open " << m_FileName);
436         m_Istream.reset(0);
437         m_Ostream.reset(new CNcbiOfstream(m_FileName.c_str()));
438         if ( !*m_Ostream ) {
439             _TRACE("cannot create " << m_FileName);
440             setstate(m_Ostream->rdstate());
441             m_Ostream.reset(0);
442             m_FileName.erase();
443         }
444     }
445 }
446 
close(void)447 void CDelayedOfstream::close(void)
448 {
449     if ( !is_open() )
450         return;
451     if ( !equals() ) {
452         if ( !rewrite() )
453             setstate(m_Ostream->rdstate());
454         m_Ostream.reset(0);
455     }
456     m_Istream.reset(0);
457     m_FileName.erase();
458 }
459 
equals(void)460 bool CDelayedOfstream::equals(void)
461 {
462     if ( !m_Istream.get() )
463         return false;
464     string s = CNcbiOstrstreamToString(*this);
465     size_t count = s.size();
466     const char* ptr = s.data();
467     while ( count > 0 ) {
468         char buffer[BUFFER_SIZE];
469         size_t c = count;
470         if ( c > BUFFER_SIZE )
471             c = BUFFER_SIZE;
472         if ( !m_Istream->read(buffer, c) ) {
473             _TRACE("read fault " << m_FileName <<
474                    " need: " << c << " was: " << m_Istream->gcount());
475             return false;
476         }
477         if ( memcmp(buffer, ptr, c) != 0 ) {
478             _TRACE("file differs " << m_FileName);
479             return false;
480         }
481         ptr += c;
482         count -= c;
483     }
484     if ( m_Istream->get() != -1 ) {
485         _TRACE("file too long " << m_FileName);
486         return false;
487     }
488     return true;
489 }
490 
rewrite(void)491 bool CDelayedOfstream::rewrite(void)
492 {
493     if ( !m_Ostream.get() ) {
494         m_Ostream.reset(new CNcbiOfstream(m_FileName.c_str()));
495         if ( !*m_Ostream ) {
496             _TRACE("rewrite fault " << m_FileName);
497             return false;
498         }
499     }
500     string s = CNcbiOstrstreamToString(*this);
501     if ( !m_Ostream->write(s.data(), s.size()) ) {
502         _TRACE("write fault " << m_FileName);
503         return false;
504     }
505     m_Ostream->close();
506     if ( !*m_Ostream ) {
507         _TRACE("close fault " << m_FileName);
508         return false;
509     }
510     return true;
511 }
512 
Discard(void)513 void CDelayedOfstream::Discard(void)
514 {
515     if ( is_open() ) {
516         m_Ostream.reset(0);
517         m_Istream.reset(0);
518         CFile(m_FileName).Remove();
519         m_FileName.clear();
520     }
521 }
522 
Empty(const CNcbiOstrstream & src)523 bool Empty(const CNcbiOstrstream& src)
524 {
525     return IsOssEmpty(const_cast<CNcbiOstrstream&>(src));
526 }
527 
Write(CNcbiOstream & out,const CNcbiOstrstream & src)528 CNcbiOstream& Write(CNcbiOstream& out, const CNcbiOstrstream& src)
529 {
530     CNcbiOstrstream& source = const_cast<CNcbiOstrstream&>(src);
531     size_t size = (size_t)GetOssSize(source);
532     if ( size != 0 ) {
533         string str = CNcbiOstrstreamToString(source);
534         out.write(str.data(), size);
535     }
536     return out;
537 }
538 
WriteTabbed(CNcbiOstream & out,const CNcbiOstrstream & code,const char * tab)539 CNcbiOstream& WriteTabbed(CNcbiOstream& out, const CNcbiOstrstream& code,
540                           const char* tab)
541 {
542     CNcbiOstrstream& source = const_cast<CNcbiOstrstream&>(code);
543     size_t size = (size_t)GetOssSize(source);
544     if ( size != 0 ) {
545         if ( !tab )
546             tab = "    ";
547         string str = CNcbiOstrstreamToString(source);
548         const char* ptr = str.data();
549         while ( size > 0 ) {
550             out << tab;
551             const char* endl =
552                 reinterpret_cast<const char*>(memchr(ptr, '\n', size));
553             if ( !endl ) { // no more '\n'
554                 out.write(ptr, size) << '\n';
555                 break;
556             }
557             ++endl; // skip '\n'
558             size_t lineSize = endl - ptr;
559             out.write(ptr, lineSize);
560             ptr = endl;
561             size -= lineSize;
562         }
563     }
564     return out;
565 }
566 
567 END_NCBI_SCOPE
568