1 /* $Id: CoinFileIO.cpp 2083 2019-01-06 19:38:09Z unxusr $ */
2 // Copyright (C) 2005, COIN-OR.  All Rights Reserved.
3 // This code is licensed under the terms of the Eclipse Public License (EPL).
4 
5 #if defined(_MSC_VER)
6 // Turn off compiler warning about long names
7 #pragma warning(disable : 4786)
8 #endif
9 
10 #include "CoinUtilsConfig.h"
11 #include "CoinFileIO.hpp"
12 
13 #include "CoinError.hpp"
14 #include "CoinHelperFunctions.hpp"
15 
16 #include <vector>
17 #include <cstring>
18 
19 // ------ CoinFileIOBase -------
20 
CoinFileIOBase(const std::string & fileName)21 CoinFileIOBase::CoinFileIOBase(const std::string &fileName)
22   : fileName_(fileName)
23 {
24 }
25 
~CoinFileIOBase()26 CoinFileIOBase::~CoinFileIOBase()
27 {
28 }
29 
getFileName() const30 const char *CoinFileIOBase::getFileName() const
31 {
32   return fileName_.c_str();
33 }
34 
35 // ------------------------------------------------------
36 //   next we implement some subclasses of CoinFileInput
37 //   for plain text and compressed files
38 // ------------------------------------------------------
39 
40 // ------ Input for plain text ------
41 
42 #include <stdio.h>
43 
44 // This reads plain text files
CoinPlainFileInput(const std::string & fileName)45 CoinPlainFileInput::CoinPlainFileInput(const std::string &fileName)
46   : CoinFileInput(fileName)
47   , f_(0)
48 {
49   readType_ = "plain";
50   if (fileName != "stdin") {
51     f_ = fopen(fileName.c_str(), "r");
52     if (f_ == 0)
53       throw CoinError("Could not open file for reading!",
54         "CoinPlainFileInput",
55         "CoinPlainFileInput");
56   } else {
57     f_ = stdin;
58   }
59 }
60 /// When already opened
CoinPlainFileInput(FILE * fp)61 CoinPlainFileInput::CoinPlainFileInput(FILE *fp)
62   : CoinFileInput("")
63   , f_(fp)
64 {
65   readType_ = "plain";
66 }
67 
~CoinPlainFileInput()68 CoinPlainFileInput::~CoinPlainFileInput()
69 {
70   if (f_ != 0)
71     fclose(f_);
72 }
73 
read(void * buffer,int size)74 int CoinPlainFileInput::read(void *buffer, int size)
75 {
76   return static_cast< int >(fread(buffer, 1, size, f_));
77 }
78 
gets(char * buffer,int size)79 char *CoinPlainFileInput::gets(char *buffer, int size)
80 {
81   return fgets(buffer, size, f_);
82 }
83 
84 // ------ helper class supporting buffered gets -------
85 
86 // This is a CoinFileInput class to handle cases, where the gets method
87 // is not easy to implement (i.e. bzlib has no equivalent to gets, and
88 // zlib's gzgets is extremely slow). It's subclasses only have to implement
89 // the readRaw method, while the read and gets methods are handled by this
90 // class using an internal buffer.
91 class CoinGetslessFileInput : public CoinFileInput {
92 public:
CoinGetslessFileInput(const std::string & fileName)93   CoinGetslessFileInput(const std::string &fileName)
94     : CoinFileInput(fileName)
95     , dataBuffer_(8 * 1024)
96     , dataStart_(&dataBuffer_[0])
97     , dataEnd_(&dataBuffer_[0])
98   {
99   }
100 
~CoinGetslessFileInput()101   virtual ~CoinGetslessFileInput() {}
102 
read(void * buffer,int size)103   virtual int read(void *buffer, int size)
104   {
105     if (size <= 0)
106       return 0;
107 
108     // return value
109     int r = 0;
110 
111     // treat destination as char *
112     char *dest = static_cast< char * >(buffer);
113 
114     // First consume data from buffer if available.
115     if (dataStart_ < dataEnd_) {
116       int amount = static_cast< int >(dataEnd_ - dataStart_);
117       if (amount > size)
118         amount = size;
119 
120       CoinMemcpyN(dataStart_, amount, dest);
121 
122       dest += amount;
123       size -= amount;
124 
125       dataStart_ += amount;
126 
127       r = amount;
128     }
129 
130     // If we require more data, use readRaw.
131     // We don't use the buffer here, as readRaw is ecpected to be efficient.
132     if (size > 0)
133       r += readRaw(dest, size);
134 
135     return r;
136   }
137 
gets(char * buffer,int size)138   virtual char *gets(char *buffer, int size)
139   {
140     if (size <= 1)
141       return 0;
142 
143     char *dest = buffer;
144     char *destLast = dest + size - 2; // last position allowed to be written
145 
146     bool initiallyEmpty = (dataStart_ == dataEnd_);
147 
148     for (;;) {
149       // refill dataBuffer if needed
150       if (dataStart_ == dataEnd_) {
151         dataStart_ = dataEnd_ = &dataBuffer_[0];
152         int count = readRaw(dataStart_, static_cast< int >(dataBuffer_.size()));
153 
154         // at EOF?
155         if (count <= 0) {
156           *dest = 0;
157           // if it was initially empty we had nothing written and should
158           // return 0, otherwise at least the buffer contents were
159           // transfered and buffer has to be returned.
160           return initiallyEmpty ? 0 : buffer;
161         }
162 
163         dataEnd_ = dataStart_ + count;
164       }
165 
166       // copy character from buffer
167       *dest = *dataStart_++;
168 
169       // terminate, if character was \n or bufferEnd was reached
170       if (*dest == '\n' || dest == destLast) {
171         *++dest = 0;
172         return buffer;
173       }
174 
175       ++dest;
176     }
177 
178     // we should never reach this place
179     throw CoinError("Reached unreachable code!",
180       "gets",
181       "CoinGetslessFileInput");
182   }
183 
184 protected:
185   // This should be implemented by the subclasses. It essentially behaves
186   // like fread: the location pointed to by buffer should be filled with
187   // size bytes. Return value is the number of bytes written (0 indicates EOF).
188   virtual int readRaw(void *buffer, int size) = 0;
189 
190 private:
191   std::vector< char > dataBuffer_; // memory used for buffering
192   char *dataStart_; // pointer to currently buffered data
193   char *dataEnd_; // pointer to "one behind last data element"
194 };
195 
196 // -------- input for gzip compressed files -------
197 
198 #ifdef COIN_HAS_ZLIB
199 
200 #include <zlib.h>
201 
202 // This class handles gzip'ed files using libz.
203 // While zlib offers the gzread and gzgets functions which do all we want,
204 // the gzgets is _very_ slow as it gets single bytes via the complex gzread.
205 // So we use the CoinGetslessFileInput as base.
206 class CoinGzipFileInput : public CoinGetslessFileInput {
207 public:
CoinGzipFileInput(const std::string & fileName)208   CoinGzipFileInput(const std::string &fileName)
209     : CoinGetslessFileInput(fileName)
210     , gzf_(0)
211   {
212     readType_ = "zlib";
213     gzf_ = gzopen(fileName.c_str(), "r");
214     if (gzf_ == 0)
215       throw CoinError("Could not open file for reading!",
216         "CoinGzipFileInput",
217         "CoinGzipFileInput");
218   }
219 
~CoinGzipFileInput()220   virtual ~CoinGzipFileInput()
221   {
222     if (gzf_ != 0)
223       gzclose(gzf_);
224   }
225 
226 protected:
readRaw(void * buffer,int size)227   virtual int readRaw(void *buffer, int size)
228   {
229     return gzread(gzf_, buffer, size);
230   }
231 
232 private:
233   gzFile gzf_;
234 };
235 
236 #endif // COIN_HAS_ZLIB
237 
238 // ------- input for bzip2 compressed files ------
239 
240 #ifdef COIN_HAS_BZLIB
241 
242 #include <bzlib.h>
243 
244 // This class handles files compressed by bzip2 using libbz.
245 // As bzlib has no builtin gets, we use the CoinGetslessFileInput.
246 class CoinBzip2FileInput : public CoinGetslessFileInput {
247 public:
CoinBzip2FileInput(const std::string & fileName)248   CoinBzip2FileInput(const std::string &fileName)
249     : CoinGetslessFileInput(fileName)
250     , f_(0)
251     , bzf_(0)
252   {
253     int bzError = BZ_OK;
254     readType_ = "bzlib";
255 
256     f_ = fopen(fileName.c_str(), "r");
257 
258     if (f_ != 0)
259       bzf_ = BZ2_bzReadOpen(&bzError, f_, 0, 0, 0, 0);
260 
261     if (f_ == 0 || bzError != BZ_OK || bzf_ == 0)
262       throw CoinError("Could not open file for reading!",
263         "CoinBzip2FileInput",
264         "CoinBzip2FileInput");
265   }
266 
~CoinBzip2FileInput()267   virtual ~CoinBzip2FileInput()
268   {
269     int bzError = BZ_OK;
270     if (bzf_ != 0)
271       BZ2_bzReadClose(&bzError, bzf_);
272 
273     if (f_ != 0)
274       fclose(f_);
275   }
276 
277 protected:
readRaw(void * buffer,int size)278   virtual int readRaw(void *buffer, int size)
279   {
280     int bzError = BZ_OK;
281     int count = BZ2_bzRead(&bzError, bzf_, buffer, size);
282 
283     if (bzError == BZ_OK || bzError == BZ_STREAM_END)
284       return count;
285 
286     // Error?
287     return 0;
288   }
289 
290 private:
291   FILE *f_;
292   BZFILE *bzf_;
293 };
294 
295 #endif // COIN_HAS_BZLIB
296 
297 // ----- implementation of CoinFileInput's methods
298 
299 /// indicates whether CoinFileInput supports gzip'ed files
haveGzipSupport()300 bool CoinFileInput::haveGzipSupport()
301 {
302 #ifdef COIN_HAS_ZLIB
303   return true;
304 #else
305   return false;
306 #endif
307 }
308 
309 /// indicates whether CoinFileInput supports bzip2'ed files
haveBzip2Support()310 bool CoinFileInput::haveBzip2Support()
311 {
312 #ifdef COIN_HAS_BZLIB
313   return true;
314 #else
315   return false;
316 #endif
317 }
318 
create(const std::string & fileName)319 CoinFileInput *CoinFileInput::create(const std::string &fileName)
320 {
321   // first try to open file, and read first bytes
322   unsigned char header[4];
323   size_t count; // So stdin will be plain file
324   if (fileName != "stdin") {
325     FILE *f = fopen(fileName.c_str(), "r");
326 
327     if (f == 0)
328       throw CoinError("Could not open file for reading!",
329         "create",
330         "CoinFileInput");
331     count = fread(header, 1, 4, f);
332     fclose(f);
333   } else {
334     // Reading from stdin - for moment not compressed
335     count = 0; // So stdin will be plain file
336   }
337   // gzip files start with the magic numbers 0x1f 0x8b
338   if (count >= 2 && header[0] == 0x1f && header[1] == 0x8b) {
339 #ifdef COIN_HAS_ZLIB
340     return new CoinGzipFileInput(fileName);
341 #else
342     throw CoinError("Cannot read gzip'ed file because zlib was "
343                     "not compiled into COIN!",
344       "create",
345       "CoinFileInput");
346 #endif
347   }
348 
349   // bzip2 files start with the string "BZh"
350   if (count >= 3 && header[0] == 'B' && header[1] == 'Z' && header[2] == 'h') {
351 #ifdef COIN_HAS_BZLIB
352     return new CoinBzip2FileInput(fileName);
353 #else
354     throw CoinError("Cannot read bzip2'ed file because bzlib was "
355                     "not compiled into COIN!",
356       "create",
357       "CoinFileInput");
358 #endif
359   }
360 
361   // fallback: probably plain text file
362   return new CoinPlainFileInput(fileName);
363 }
364 
CoinFileInput(const std::string & fileName)365 CoinFileInput::CoinFileInput(const std::string &fileName)
366   : CoinFileIOBase(fileName)
367 {
368 }
369 
~CoinFileInput()370 CoinFileInput::~CoinFileInput()
371 {
372 }
373 
374 // ------------------------------------------------------
375 //   Some subclasses of CoinFileOutput
376 //   for plain text and compressed files
377 // ------------------------------------------------------
378 
379 // -------- CoinPlainFileOutput ---------
380 
381 // Class to handle output to text files without compression.
382 class CoinPlainFileOutput : public CoinFileOutput {
383 public:
CoinPlainFileOutput(const std::string & fileName)384   CoinPlainFileOutput(const std::string &fileName)
385     : CoinFileOutput(fileName)
386     , f_(0)
387   {
388     if (fileName == "-" || fileName == "stdout") {
389       f_ = stdout;
390     } else {
391       f_ = fopen(fileName.c_str(), "w");
392       if (f_ == 0)
393         throw CoinError("Could not open file for writing!",
394           "CoinPlainFileOutput",
395           "CoinPlainFileOutput");
396     }
397   }
398 
~CoinPlainFileOutput()399   virtual ~CoinPlainFileOutput()
400   {
401     if (f_ != 0 && f_ != stdout)
402       fclose(f_);
403   }
404 
write(const void * buffer,int size)405   virtual int write(const void *buffer, int size)
406   {
407     return static_cast< int >(fwrite(buffer, 1, size, f_));
408   }
409 
410   // we have something better than the default implementation
puts(const char * s)411   virtual bool puts(const char *s)
412   {
413     return fputs(s, f_) >= 0;
414   }
415 
416 private:
417   FILE *f_;
418 };
419 
420 // ------- CoinGzipFileOutput ---------
421 
422 #ifdef COIN_HAS_ZLIB
423 
424 // no need to include the header, as this was done for the input class
425 
426 // Handle output with gzip compression
427 class CoinGzipFileOutput : public CoinFileOutput {
428 public:
CoinGzipFileOutput(const std::string & fileName)429   CoinGzipFileOutput(const std::string &fileName)
430     : CoinFileOutput(fileName)
431     , gzf_(0)
432   {
433     gzf_ = gzopen(fileName.c_str(), "w");
434     if (gzf_ == 0)
435       throw CoinError("Could not open file for writing!",
436         "CoinGzipFileOutput",
437         "CoinGzipFileOutput");
438   }
439 
~CoinGzipFileOutput()440   virtual ~CoinGzipFileOutput()
441   {
442     if (gzf_ != 0)
443       gzclose(gzf_);
444   }
445 
write(const void * buffer,int size)446   virtual int write(const void *buffer, int size)
447   {
448     return gzwrite(gzf_, const_cast< void * >(buffer), size);
449   }
450 
451   // as zlib's gzputs is no more clever than our own, there's
452   // no need to replace the default.
453 
454 private:
455   gzFile gzf_;
456 };
457 
458 #endif // COIN_HAS_ZLIB
459 
460 // ------- CoinBzip2FileOutput -------
461 
462 #ifdef COIN_HAS_BZLIB
463 
464 // no need to include the header, as this was done for the input class
465 
466 // Output to bzip2 compressed file
467 class CoinBzip2FileOutput : public CoinFileOutput {
468 public:
CoinBzip2FileOutput(const std::string & fileName)469   CoinBzip2FileOutput(const std::string &fileName)
470     : CoinFileOutput(fileName)
471     , f_(0)
472     , bzf_(0)
473   {
474     int bzError = BZ_OK;
475 
476     f_ = fopen(fileName.c_str(), "w");
477 
478     if (f_ != 0)
479       bzf_ = BZ2_bzWriteOpen(&bzError, f_,
480         9, /* Number of 100k blocks used for compression.
481 				    Must be between 1 and 9 inclusive. As 9
482 				    gives best compression and I guess we can
483 				    spend some memory, we use it. */
484         0, /* verbosity */
485         30 /* suggested by bzlib manual */);
486 
487     if (f_ == 0 || bzError != BZ_OK || bzf_ == 0)
488       throw CoinError("Could not open file for writing!",
489         "CoinBzip2FileOutput",
490         "CoinBzip2FileOutput");
491   }
492 
~CoinBzip2FileOutput()493   virtual ~CoinBzip2FileOutput()
494   {
495     int bzError = BZ_OK;
496     if (bzf_ != 0)
497       BZ2_bzWriteClose(&bzError, bzf_, 0, 0, 0);
498 
499     if (f_ != 0)
500       fclose(f_);
501   }
502 
write(const void * buffer,int size)503   virtual int write(const void *buffer, int size)
504   {
505     int bzError = BZ_OK;
506     BZ2_bzWrite(&bzError, bzf_, const_cast< void * >(buffer), size);
507     return (bzError == BZ_OK) ? size : 0;
508   }
509 
510 private:
511   FILE *f_;
512   BZFILE *bzf_;
513 };
514 
515 #endif // COIN_HAS_BZLIB
516 
517 // ------- implementation of CoinFileOutput's methods
518 
compressionSupported(Compression compression)519 bool CoinFileOutput::compressionSupported(Compression compression)
520 {
521   switch (compression) {
522   case COMPRESS_NONE:
523     return true;
524 
525   case COMPRESS_GZIP:
526 #ifdef COIN_HAS_ZLIB
527     return true;
528 #else
529     return false;
530 #endif
531 
532   case COMPRESS_BZIP2:
533 #ifdef COIN_HAS_BZLIB
534     return true;
535 #else
536     return false;
537 #endif
538 
539   default:
540     return false;
541   }
542 }
543 
create(const std::string & fileName,Compression compression)544 CoinFileOutput *CoinFileOutput::create(const std::string &fileName,
545   Compression compression)
546 {
547   switch (compression) {
548   case COMPRESS_NONE:
549     return new CoinPlainFileOutput(fileName);
550 
551   case COMPRESS_GZIP:
552 #ifdef COIN_HAS_ZLIB
553     return new CoinGzipFileOutput(fileName);
554 #endif
555     break;
556 
557   case COMPRESS_BZIP2:
558 #ifdef COIN_HAS_BZLIB
559     return new CoinBzip2FileOutput(fileName);
560 #endif
561     break;
562 
563   default:
564     break;
565   }
566 
567   throw CoinError("Unsupported compression selected!",
568     "create",
569     "CoinFileOutput");
570 }
571 
CoinFileOutput(const std::string & fileName)572 CoinFileOutput::CoinFileOutput(const std::string &fileName)
573   : CoinFileIOBase(fileName)
574 {
575 }
576 
~CoinFileOutput()577 CoinFileOutput::~CoinFileOutput()
578 {
579 }
580 
puts(const char * s)581 bool CoinFileOutput::puts(const char *s)
582 {
583   int len = static_cast< int >(strlen(s));
584   if (len == 0)
585     return true;
586 
587   return write(s, len) == len;
588 }
589 
590 /*
591   Tests if the given string looks like an absolute path to a file.
592     - unix:	string begins with `/'
593     - windows:	string begins with `\' or `drv:', where drv is a drive
594 		designator.
595 */
fileAbsPath(const std::string & path)596 bool fileAbsPath(const std::string &path)
597 {
598   const char dirsep = CoinFindDirSeparator();
599 
600   // If the first two chars are drive designators then treat it as absolute
601   // path (noone in their right mind would create a file named 'Z:' on unix,
602   // right?...)
603   const size_t len = path.length();
604   if (len >= 2 && path[1] == ':') {
605     const char ch = path[0];
606     if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
607       return true;
608     }
609   }
610 
611   return path[0] == dirsep;
612 }
613 
614 /*
615    Tests if file readable and may change name to add
616    compression extension.  Here to get ZLIB etc in one place
617 
618    stdin goes by unmolested by all the fussing with file names. We shouldn't
619    close it, either.
620 */
fileCoinReadable(std::string & fileName,const std::string & dfltPrefix)621 bool fileCoinReadable(std::string &fileName, const std::string &dfltPrefix)
622 {
623   if (fileName != "stdin") {
624     const char dirsep = CoinFindDirSeparator();
625     std::string directory;
626     if (dfltPrefix == "") {
627       directory = (dirsep == '/' ? "./" : ".\\");
628     } else {
629       directory = dfltPrefix;
630       if (directory[directory.length() - 1] != dirsep) {
631         directory += dirsep;
632       }
633     }
634 
635     bool absolutePath = fileAbsPath(fileName);
636     std::string field = fileName;
637 
638     if (absolutePath) {
639       // nothing to do
640     } else if (field[0] == '~') {
641       char *home_dir = getenv("HOME");
642       if (home_dir) {
643         std::string home(home_dir);
644         field = field.erase(0, 1);
645         fileName = home + field;
646       } else {
647         fileName = field;
648       }
649     } else {
650       fileName = directory + field;
651     }
652   }
653   // I am opening it to make sure not odd
654   FILE *fp;
655   if (strcmp(fileName.c_str(), "stdin")) {
656     fp = fopen(fileName.c_str(), "r");
657   } else {
658     fp = stdin;
659   }
660 #ifdef COIN_HAS_ZLIB
661   if (!fp) {
662     std::string fname = fileName;
663     fname += ".gz";
664     fp = fopen(fname.c_str(), "r");
665     if (fp)
666       fileName = fname;
667   }
668 #endif
669 #ifdef COIN_HAS_BZLIB
670   if (!fp) {
671     std::string fname = fileName;
672     fname += ".bz2";
673     fp = fopen(fname.c_str(), "r");
674     if (fp)
675       fileName = fname;
676   }
677 #endif
678   if (!fp) {
679     return false;
680   } else {
681     if (fp != stdin) {
682       fclose(fp);
683     }
684     return true;
685   }
686 }
687 
688 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
689 */
690