1 /*
2  file_utils.cpp     MindForger thinking notebook
3 
4  Copyright (C) 2016-2020 Martin Dvorak <martin.dvorak@mindforger.com>
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License
8  as published by the Free Software Foundation; either version 2
9  of the License, or (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "file_utils.h"
20 
21 using namespace std;
22 
23 namespace m8r {
24 
pathToDirectoryAndFile(const std::string & path,std::string & directory,std::string & file)25 void pathToDirectoryAndFile(const std::string& path, std::string& directory, std::string& file)
26 {
27     if(!path.empty()) {
28         size_t found;
29         // IMPROVE complete the code
30         found=path.find_last_of(FILE_PATH_SEPARATOR);
31         if(found == string::npos) {
32             directory = ".";
33         } else {
34             directory = path.substr(0,found);
35         }
36 
37         file = path.substr(found+1);
38     }
39 }
40 
pathToLinuxDelimiters(const std::string & path,std::string & linuxPath)41 void pathToLinuxDelimiters(const std::string& path, std::string& linuxPath)
42 {
43     if(!path.empty()) {
44         linuxPath.assign(path);
45         std::replace(linuxPath.begin(), linuxPath.end(), '\\', '/');
46     }
47 }
48 
stringToLines(const string * text,vector<string * > & lines)49 bool stringToLines(const string* text, vector<string*>& lines)
50 {
51     if(text && !text->empty()) {
52         istringstream input{*text};
53         string line;
54         while(getline(input, line)) {
55             // IMPROVE heap allocation possibly expensive
56             lines.push_back(new string{line});
57         }
58         return true;
59     }
60 
61     return false;
62 }
63 
fileToLines(const string * filename,vector<string * > & lines,size_t & fileSize)64 bool fileToLines(const string* filename, vector<string*>& lines, size_t &fileSize)
65 {
66     ifstream infile(*filename);
67     string line;
68     while(getline(infile, line)) {
69         fileSize+=line.size()+1;
70         // IMPROVE heap allocation possibly expensive
71         lines.push_back(new string{line});
72     }
73     infile.close();
74     return fileSize>0;
75 }
76 
fileToString(const string & filename)77 string* fileToString(const string& filename)
78 {
79     ifstream is(filename);
80     string* s = new string{};
81 
82     is.seekg(0, ios::end);
83     s->reserve(is.tellg());
84     is.seekg(0, ios::beg);
85     s->assign((istreambuf_iterator<char>(is)),istreambuf_iterator<char>());
86 
87     return s;
88 }
89 
stringToFile(const string & filename,const string & content)90 void stringToFile(const string& filename, const string& content)
91 {
92     ofstream out(filename);
93     out << content;
94     out.close();
95 }
96 
fileModificationTime(const string * filename)97 time_t fileModificationTime(const string* filename)
98 {
99 #ifdef __linux__
100     // IMPROVE stat t_stat{}; doesn't compile for me - ignore IDE warning for this - value initializer {} MUST present
101     typedef struct stat attrs;
102     attrs t_stat{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; // crazy initializer required by older GCC versions
103     stat(filename->c_str(), &t_stat);
104     return t_stat.st_mtime; // modification time ~ file content modification; st_ctime ~ file metata change (more sensitive)
105 #elif _WIN32
106     time_t tMod = 0;
107     FILETIME ft;
108     HANDLE hFile;
109     ULARGE_INTEGER ull;
110     hFile = CreateFileA(filename->c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr);
111     if(hFile != INVALID_HANDLE_VALUE) {
112         if(GetFileTime(hFile, nullptr, nullptr, &ft)) {
113             // Convert the last-write time to local time.
114             ull.LowPart = ft.dwLowDateTime;
115             ull.HighPart = ft.dwHighDateTime;
116             tMod = ull.QuadPart / 10000000ULL - 11644473600ULL;
117         }
118         CloseHandle(hFile);
119     }
120     return tMod;
121 #else
122     // IMPROVE complete the code
123     typedef struct stat attrs;
124     attrs t_stat{};
125     stat(filename->c_str(), &t_stat);
126     return t_stat.st_mtime; // modification time ~ file content modification; st_ctime ~ file metata change (more sensitive)
127 #endif
128 }
129 
copyFile(const string & from,const string & to)130 bool copyFile(const string &from, const string &to)
131 {
132     ifstream  src(from, ios::binary);
133     ofstream  dst(to,   ios::binary);
134     dst << src.rdbuf();
135     return true;
136 }
137 
moveFile(const string & from,const string & to)138 bool moveFile(const string &from, const string &to)
139 {
140     copyFile(from,to);
141     if(remove(from.c_str())) {
142       return false;
143     } else {
144         return true;
145     }
146 }
147 
148 #define BUFSIZE 4096
149 
resolvePath(const std::string & path,std::string & resolvedAbsolutePath)150 void resolvePath(const std::string& path, std::string& resolvedAbsolutePath)
151 {
152 #ifdef _WIN32
153     char  buffer[BUFSIZE] = "";
154     if(GetFullPathNameA(path.c_str(), BUFSIZE, buffer, nullptr)) {
155         resolvedAbsolutePath.assign(buffer);
156 
157     } else {
158         cerr << "Error: unable to resolve path '" << path << "'" << endl;
159         resolvedAbsolutePath.assign(path);
160     }
161 
162 
163 #else
164     // output buffer MUST be set to NULL (check realpath manpage)
165     char * rp = realpath(path.c_str(), NULL);
166     if(!rp) {
167         cerr << "Error: unable to resolve path '" << path << "'" << endl;
168         resolvedAbsolutePath.assign(path);
169     } else {
170         resolvedAbsolutePath.assign(rp);
171         free(rp);
172     }
173 #endif //_WIN32
174 }
175 
isDirectoryOrFileExists(const char * path)176 bool isDirectoryOrFileExists(const char* path)
177 {
178     struct stat info;
179     if(stat(path, &info)) {
180         return false;
181     } else {
182         return true;
183     }
184 }
185 
isDirectory(const char * path)186 bool isDirectory(const char* path)
187 {
188     struct stat info;
189     if(stat(path, &info)) {
190         return false;
191     } else if(S_ISDIR(info.st_mode)) {
192         return true;
193     } else {
194         return false;
195     }
196 }
197 
isFile(const char * path)198 bool isFile(const char* path)
199 {
200     struct stat info;
201     if(stat(path, &info)) {
202         return false;
203     } else if(S_ISDIR(info.st_mode)) {
204         return false;
205     } else {
206         return true;
207     }
208 }
209 
isPathRelative(const string & path)210 bool isPathRelative(const string& path)
211 {
212     // IMPROVE relative vs. absolute is platform specific (remind Windows; new C++ standards have methods for this)
213     if(path.size() && path.at(0) == '/') {
214         return true;
215     } else {
216         return false;
217     }
218 }
219 
createDirectory(const string & path)220 bool createDirectory(const string& path) {
221 #ifdef _WIN32
222     int e = _mkdir(path.c_str());
223 #else
224     int e = mkdir(path.c_str(), S_IRUSR | S_IWUSR | S_IXUSR);
225 #endif
226 
227     if(e) {
228         cerr << "Failed to create directory '" << path << "' with error " << e;
229         return false;
230     } else {
231         return true;
232     }
233 }
234 
makeTempDirectory(char * dirNamePrefix)235 char* makeTempDirectory(char* dirNamePrefix)
236 {
237 #ifdef _WIN32
238     char *ret = nullptr;
239     char  *tempPathBuffer = new char[MAX_PATH];
240     UUID uuid;
241     RPC_CSTR uuidStr;
242     GetTempPathA(MAX_PATH, tempPathBuffer);
243     strcat(tempPathBuffer, FILE_PATH_SEPARATOR);
244     if (strlen(tempPathBuffer) + strlen(dirNamePrefix) < MAX_PATH) {
245         strcat(tempPathBuffer, dirNamePrefix);
246         UuidCreate(&uuid);
247         UuidToStringA(&uuid, &uuidStr);
248         if (strlen(tempPathBuffer) + strlen((char*)uuidStr) < MAX_PATH) {
249              strcat(tempPathBuffer, (char*)uuidStr);
250              if (CreateDirectoryA(tempPathBuffer, nullptr)) {
251                  ret = tempPathBuffer;
252              }
253         }
254         RpcStringFreeA(&uuidStr);
255     }
256     if (ret == nullptr) {
257         delete [] tempPathBuffer;
258     }
259     return ret;
260 #else
261     char *tmpl = new char[100];
262     tmpl[0] = 0;
263     strcat(tmpl, SYSTEM_TEMP_DIRECTORY);
264     strcat(tmpl, FILE_PATH_SEPARATOR);
265     strcat(tmpl, dirNamePrefix);
266     strcat(tmpl, "XXXXXX");
267     return mkdtemp(tmpl);
268 #endif
269 }
270 
removeDirectoryRecursively(const char * path)271 int removeDirectoryRecursively(const char* path)
272 {
273    DIR* d = opendir(path);
274    size_t path_len = strlen(path);
275    int r = -1;
276    if(d) {
277        struct dirent* p;
278        r = 0;
279        while(!r && (p=readdir(d))) {
280            int r2 = -1;
281            char *buf;
282            size_t len;
283            // skip the names "." and ".." as I don't want to recurse on them
284            if(!strcmp(p->d_name, ".") || !strcmp(p->d_name, "..")) {
285                continue;
286            }
287            len = path_len + strlen(p->d_name) + 2;
288            buf = new char[len];
289            if(buf) {
290                struct stat statbuf;
291                // IMPROVE MF_DEBUG
292                snprintf(buf, len, "%s/%s", path, p->d_name);
293                if(!stat(buf, &statbuf)) {
294                    if(S_ISDIR(statbuf.st_mode)) {
295                        r2 = removeDirectoryRecursively(buf);
296                    } else {
297                        r2 = unlink(buf);
298                    }
299                }
300                delete[] buf;
301            }
302            r = r2;
303        }
304        closedir(d);
305    }
306    if(!r) {
307 #ifdef _WIN32
308        r = _rmdir(path);
309 #else
310        r = rmdir(path);
311 #endif
312    }
313 
314    return r;
315 }
316 
317 
318 
319 
320 
321 #ifdef GZIP_DEFLATE_VIA_ZIP_LIBRARY
322 
unzip(const char * srcFile,const char * dstFile)323 void unzip(const char* srcFile, const char* dstFile)
324 {
325     int err = 0;
326     zip* z = zip_open(srcFile, 0, &err);
327 
328     struct zip_stat zipStat;
329     zip_stat_init(&zipStat);
330     zip_stat(z, dstFile, 0, &zipStat);
331 
332     char* contents = new char[zipStat.size];
333 
334     zip_file* f = zip_fopen(z, dstFile, 0);
335     const zip_int64_t did_read = zip_fread(f, contents, zipStat.size);
336     if(did_read > 0) {
337         zip_fclose(f);
338     }
339 
340     zip_close(z);
341 
342     delete[] contents;
343 }
344 
345 #endif
346 
347 #ifdef GZIP_DEFLATE_VIA_ZLIB_PIPE_
348 
ungzipFile(FILE * source,FILE * dest)349 int ungzipFile(FILE* source, FILE* dest)
350 {
351     #define CHUNK 16384
352 
353     /* avoid end-of-line conversions */
354     SET_BINARY_MODE(source);
355     SET_BINARY_MODE(dest);
356 
357     int ret;
358     unsigned have;
359     z_stream strm;
360     unsigned char in[CHUNK];
361     unsigned char out[CHUNK];
362 
363     /* allocate inflate state */
364     strm.zalloc = Z_NULL;
365     strm.zfree = Z_NULL;
366     strm.opaque = Z_NULL;
367     strm.avail_in = 0;
368     strm.next_in = Z_NULL;
369     ret = inflateInit(&strm);
370     if (ret != Z_OK)
371         return ret;
372 
373     /* decompress until deflate stream ends or end of file */
374     do {
375         strm.avail_in = fread(in, 1, CHUNK, source);
376         if (ferror(source)) {
377             (void)inflateEnd(&strm);
378             return Z_ERRNO;
379         }
380         if (strm.avail_in == 0)
381             break;
382         strm.next_in = in;
383 
384         /* run inflate() on input until output buffer not full */
385         do {
386             strm.avail_out = CHUNK;
387             strm.next_out = out;
388             ret = inflate(&strm, Z_NO_FLUSH);
389 
390 
391             // IMPROVE don't want to crash assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
392             if(ret == Z_STREAM_ERROR) return Z_STREAM_ERROR;
393 
394 
395             switch (ret) {
396             case Z_NEED_DICT:
397                 ret = Z_DATA_ERROR;     /* and fall through */
398             case Z_DATA_ERROR:
399             case Z_MEM_ERROR:
400                 (void)inflateEnd(&strm);
401                 return ret;
402             }
403             have = CHUNK - strm.avail_out;
404             if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
405                 (void)inflateEnd(&strm);
406                 return Z_ERRNO;
407             }
408         } while (strm.avail_out == 0);
409 
410         /* done when inflate() says it's done */
411     } while (ret != Z_STREAM_END);
412 
413     /* clean up and return */
414     (void)inflateEnd(&strm);
415     return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
416 }
417 
ungzip(const char * srcFile,const char * dstFile)418 int ungzip(const char* srcFile, const char* dstFile)
419 {
420     if(isDirectoryOrFileExists(srcFile) && !isDirectoryOrFileExists(dstFile)) {
421         FILE* srcFILE = fopen(srcFile, "r");
422         FILE* dstFILE = fopen(dstFile, "r+");
423         int r = ungzipFile(srcFILE, dstFILE);
424 
425         // IMPROVE polish error messages - debug/cerr
426         if(r != Z_OK) {
427             fputs("zpipe: ", stderr);
428             switch (r) {
429             case Z_ERRNO:
430                 if (ferror(stdin))
431                     fputs("error reading stdin\n", stderr);
432                 if (ferror(stdout))
433                     fputs("error writing stdout\n", stderr);
434                 break;
435             case Z_STREAM_ERROR:
436                 fputs("invalid compression level\n", stderr);
437                 break;
438             case Z_DATA_ERROR:
439                 fputs("invalid or incomplete deflate data\n", stderr);
440                 break;
441             case Z_MEM_ERROR:
442                 fputs("out of memory\n", stderr);
443                 break;
444             case Z_VERSION_ERROR:
445                 fputs("zlib version mismatch!\n", stderr);
446             }
447         }
448         return r;
449     }
450 
451     return Z_STREAM_ERROR;
452 }
453 
454 #endif
455 
456 // TODO this code must be completely rewritten
457 #ifdef __cplusplus
458 extern "C" {
459 #endif
460 
461 /* The following macro calls a zlib routine and checks the return
462    value. If the return value ("status") is not OK, it prints an error
463    message and exits the program. Zlib's error statuses are all less
464    than zero. */
465 
466 #define GZIP_CALL_ZLIB(x) {                                                  \
467         int status;                                                     \
468         status = x;                                                     \
469         if (status < 0) {                                               \
470             fprintf (stderr,                                            \
471                      "%s:%d: %s returned a bad status of %d.\n",        \
472                      __FILE__, __LINE__, #x, status);                   \
473             exit (EXIT_FAILURE);                                        \
474         }                                                               \
475     }
476 
477 /* if "test" is true, print an error message and halt execution. */
478 
479 #define GZIP_FAIL(test,message) {                             \
480         if (test) {                                      \
481             inflateEnd (& strm);                         \
482             fprintf (stderr, "%s:%d: " message           \
483                      " file '%s' failed: %s\n",          \
484                      __FILE__, __LINE__, srcFile,      \
485                      strerror (errno));                  \
486             exit (EXIT_FAILURE);                         \
487         }                                                \
488     }
489 
490 
ungzip(const char * srcFile,const char * dstFile)491 int ungzip(const char* srcFile, const char* dstFile)
492 {
493     UNUSED_ARG(dstFile);
494 
495     // IMPROVE make these variables
496 #define CHUNK 0x4000
497 #define windowBits 15
498 #define ENABLE_ZLIB_GZIP 32
499 
500     FILE* srcFILE;
501     FILE* dstFILE;
502     z_stream strm = {nullptr,0,0,nullptr,0,0,nullptr,nullptr,nullptr,nullptr,nullptr,0,0,0};
503     unsigned char in[CHUNK];
504     unsigned char out[CHUNK];
505 
506     strm.zalloc = nullptr;
507     strm.zfree = nullptr;
508     strm.opaque = nullptr;
509     strm.next_in = in;
510     strm.avail_in = 0;
511     GZIP_CALL_ZLIB (inflateInit2 (& strm, windowBits | ENABLE_ZLIB_GZIP));
512 
513     // IMPROVE error reporting
514     if(!isDirectoryOrFileExists(srcFile) || isDirectoryOrFileExists(dstFile)) {
515         return Z_STREAM_ERROR;
516     }
517 
518     // open src file
519     srcFILE = fopen (srcFile, "rb");
520     GZIP_FAIL (! srcFILE, "open");
521 
522     // open dst file
523     dstFILE = fopen(dstFile, "wb");
524     GZIP_FAIL (! dstFILE, "open");
525 
526     while (1) {
527         size_t bytes_read;
528         int zlib_status;
529 
530         bytes_read = fread (in, sizeof (char), sizeof (in), srcFILE);
531         GZIP_FAIL (ferror (srcFILE), "read");
532         strm.avail_in = static_cast<uInt>(bytes_read);
533         strm.next_in = in;
534         do {
535             unsigned have;
536             strm.avail_out = CHUNK;
537             strm.next_out = out;
538             zlib_status = inflate (& strm, Z_NO_FLUSH);
539 
540             switch (zlib_status) {
541             case Z_OK:
542             case Z_STREAM_END:
543             case Z_BUF_ERROR:
544                 break;
545             default:
546                 inflateEnd (& strm);
547                 fprintf (stderr, "Gzip error %d in '%s'.\n", zlib_status, srcFile);
548                 return -1;
549             }
550 
551             have = CHUNK - strm.avail_out;
552 
553             // write deflated data
554             fwrite(out, sizeof (unsigned char), have, dstFILE);
555         } while (strm.avail_out == 0);
556 
557         if (feof (srcFILE)) {
558             inflateEnd (& strm);
559             break;
560         }
561     }
562     GZIP_FAIL (fclose (srcFILE), "close");
563     GZIP_FAIL (fclose (dstFILE), "close");
564     return 0;
565 }
566 
567 #ifdef __cplusplus
568 }
569 #endif
570 
571 // IMPROVE error handling to be fixed
copyDirectoryRecursively(const char * srcPath,const char * dstPath,bool extractGz)572 int copyDirectoryRecursively(const char* srcPath, const char* dstPath, bool extractGz)
573 {
574     DIR *d = opendir(srcPath);
575     size_t srcPathLen = strlen(srcPath);
576     size_t dstPathLen = strlen(dstPath);
577     int r = -1;
578     if(d) {
579         MF_DEBUG("DIR: " << dstPath << endl);
580         if(!isDirectoryOrFileExists(dstPath)) createDirectory(string{dstPath});
581 
582         struct dirent *p;
583         r = 0;
584         while(!r && (p=readdir(d))) {
585             int r2 = -1;
586             char *srcBuf, *dstBuf;
587             size_t srcLen, dstLen;
588             // skip the names "." and ".." as I don't want to recurse on them
589             if(!strcmp(p->d_name, ".") || !strcmp(p->d_name, "..")) {
590                 continue;
591             }
592             srcLen = srcPathLen + strlen(p->d_name) + 2;
593             dstLen = dstPathLen + strlen(p->d_name) + 2;
594             srcBuf = new char[srcLen];
595             dstBuf = new char[dstLen];
596             if(srcBuf) {
597                 struct stat statbuf;
598                 // IMPROVE MF_DEBUG
599                 snprintf(srcBuf, srcLen, "%s/%s", srcPath, p->d_name);
600                 snprintf(dstBuf, dstLen, "%s/%s", dstPath, p->d_name);
601                 if(!stat(srcBuf, &statbuf)) {
602                     if(S_ISDIR(statbuf.st_mode)) {
603                         r2 = copyDirectoryRecursively(srcBuf, dstBuf, extractGz);
604                     } else {
605                         MF_DEBUG("FILE: " << dstBuf << endl);
606                         if(!isDirectoryOrFileExists(dstBuf)) {
607                             copyFile(string{srcBuf}, string{dstBuf});
608                             if(extractGz) {
609                                 if(stringEndsWith(dstBuf,".gz") && strlen(dstBuf)>3) {
610                                     char* dstExt = new char[strlen(dstBuf)+1];
611                                     strcpy(dstExt, dstBuf);
612                                     if(strlen(dstExt)>3) {
613                                         dstExt[strlen(dstExt)-3] = 0;
614                                         MF_DEBUG("  ungzip: '" << dstBuf << "' > '" << dstExt << "'" << endl);
615                                         ungzip(dstBuf, dstExt);
616                                     }
617                                     delete[] dstExt;
618                                 }
619                             }
620                         }
621                         r2 = 0;
622                     }
623                 }
624                 delete[] srcBuf;
625                 delete[] dstBuf;
626             }
627             r = r2;
628         }
629         closedir(d);
630     }
631 
632     return r;
633 }
634 
getExecutablePath()635 char* getExecutablePath() {
636 #ifdef __APPLE__
637     static char exePath[2048];
638     uint32_t len = sizeof(exePath);
639     if(_NSGetExecutablePath(exePath, &len) != 0) {
640         // buffer too small
641         exePath[0] = '\0';
642     } else {
643         // resolve symlinks, ., .. if possible
644         char *canonicalPath = realpath(exePath, NULL);
645         if(canonicalPath != NULL) {
646             strncpy(exePath,canonicalPath,len);
647             free(canonicalPath);
648         }
649     }
650     return exePath;
651 #elif defined(_WIN32)
652     static char exePath[MAX_PATH+1];
653     GetModuleFileNameA( nullptr, exePath, MAX_PATH );
654     return exePath;
655 #else
656     return nullptr;
657 #endif
658 
659 }
660 
661 } // m8r namespace
662