1 /**
2  * Author......: See docs/credits.txt
3  * License.....: MIT
4  */
5 
6 #include "common.h"
7 #include "types.h"
8 #include "memory.h"
9 #include "shared.h"
10 #include "filehandling.h"
11 
12 #include <Alloc.h>
13 #include <7zCrc.h>
14 #include <7zFile.h>
15 #include <Xz.h>
16 #include <XzCrc64.h>
17 
18 /* Maybe _LZMA_NO_SYSTEM_SIZE_T defined? */
19 #if defined (__clang__) || defined (__GNUC__)
20 #include <assert.h>
21 _Static_assert(sizeof (size_t) == sizeof (SizeT), "Check why sizeof(size_t) != sizeof(SizeT)");
22 #endif
23 
24 #ifndef HCFILE_BUFFER_SIZE
25 #define HCFILE_BUFFER_SIZE 256 * 1024
26 #endif
27 
28 #ifndef HCFILE_CHUNK_SIZE
29 #define HCFILE_CHUNK_SIZE 4 * 1024 * 1024
30 #endif
31 
32 static bool xz_initialized = false;
33 
34 static const ISzAlloc xz_alloc = { hc_lzma_alloc, hc_lzma_free };
35 
36 struct xzfile
37 {
38   CAlignOffsetAlloc  alloc;
39   UInt64             inBlocks;
40   Byte              *inBuf;
41   bool               inEof;
42   SizeT              inLen;
43   SizeT              inPos;
44   Int64              inProcessed;
45   CFileInStream      inStream;
46   Int64              outProcessed;
47   UInt64             outSize;
48   CXzUnpacker        state;
49   CXzs               streams;
50 };
51 
52 #if defined (__CYGWIN__)
53 // workaround for zlib with cygwin build
_wopen(const char * path,int oflag,...)54 int _wopen (const char *path, int oflag, ...)
55 {
56   va_list ap;
57   va_start (ap, oflag);
58   int r = open (path, oflag, ap);
59   va_end (ap);
60   return r;
61 }
62 #endif
63 
hc_fopen(HCFILE * fp,const char * path,const char * mode)64 bool hc_fopen (HCFILE *fp, const char *path, const char *mode)
65 {
66   if (fp == NULL || path == NULL || mode == NULL) return false;
67 
68   /* cleanup */
69   fp->fd       = -1;
70   fp->pfp      = NULL;
71   fp->gfp      = NULL;
72   fp->ufp      = NULL;
73   fp->xfp      = NULL;
74   fp->bom_size = 0;
75   fp->path     = NULL;
76   fp->mode     = NULL;
77 
78   int oflag = -1;
79 
80   int fmode = S_IRUSR|S_IWUSR;
81 
82   if (strncmp (mode, "a", 1) == 0 || strncmp (mode, "ab", 2) == 0)
83   {
84     oflag = O_WRONLY | O_CREAT | O_APPEND;
85 
86     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
87     if (strncmp (mode, "ab", 2) == 0) oflag |= O_BINARY;
88     #endif
89   }
90   else if (strncmp (mode, "r", 1) == 0 || strncmp (mode, "rb", 2) == 0)
91   {
92     oflag = O_RDONLY;
93     fmode = -1;
94 
95     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
96     if (strncmp (mode, "rb", 2) == 0) oflag |= O_BINARY;
97     #endif
98   }
99   else if (strncmp (mode, "w", 1) == 0 || strncmp (mode, "wb", 2) == 0)
100   {
101     oflag = O_WRONLY | O_CREAT | O_TRUNC;
102 
103     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
104     if (strncmp (mode, "wb", 2) == 0) oflag |= O_BINARY;
105     #endif
106   }
107   else
108   {
109     // ADD more strncmp to handle more "mode"
110     return false;
111   }
112 
113   unsigned char check[8] = { 0 };
114 
115   bool is_gzip = false;
116   bool is_zip  = false;
117   bool is_xz   = false;
118 
119   int fd_tmp = open (path, O_RDONLY);
120 
121   if (fd_tmp != -1)
122   {
123     lseek (fd_tmp, 0, SEEK_SET);
124 
125     if (read (fd_tmp, check, sizeof (check)) > 0)
126     {
127       if (check[0] == 0x1f && check[1] == 0x8b && check[2] == 0x08)                     is_gzip = true;
128       if (check[0] == 0x50 && check[1] == 0x4b && check[2] == 0x03 && check[3] == 0x04) is_zip  = true;
129       if (memcmp(check, XZ_SIG, XZ_SIG_SIZE) == 0)                                      is_xz   = true;
130 
131       // compressed files with BOM will be undetected!
132 
133       if (is_gzip == false && is_zip == false && is_xz == false)
134       {
135         fp->bom_size = hc_string_bom_size (check);
136       }
137     }
138 
139     close (fd_tmp);
140   }
141 
142   if (fmode == -1)
143   {
144     fp->fd = open (path, oflag);
145   }
146   else
147   {
148     fp->fd = open (path, oflag, fmode);
149   }
150 
151   if (fp->fd == -1) return false;
152 
153   if (is_gzip)
154   {
155     if ((fp->gfp = gzdopen (fp->fd, mode)) == NULL) return false;
156 
157     gzbuffer (fp->gfp, HCFILE_BUFFER_SIZE);
158   }
159   else if (is_zip)
160   {
161     if ((fp->ufp = unzOpen64 (path)) == NULL) return false;
162 
163     if (unzOpenCurrentFile (fp->ufp) != UNZ_OK) return false;
164   }
165   else if (is_xz)
166   {
167     /* thread safe on little endian */
168     if (xz_initialized == false)
169     {
170       CrcGenerateTable ();
171       Crc64GenerateTable ();
172       Sha256Prepare ();
173       xz_initialized = true;
174     }
175 
176     xzfile_t *xfp = (xzfile_t *) hccalloc (1, sizeof (*xfp));
177     if (xfp == NULL) return false;
178 
179     /* prepare cache line aligned memory allocator */
180     AlignOffsetAlloc_CreateVTable (&xfp->alloc);
181     xfp->alloc.numAlignBits = 7;
182     xfp->alloc.baseAlloc = &xz_alloc;
183     ISzAllocPtr alloc = &xfp->alloc.vt;
184     xfp->inBuf = (Byte *) ISzAlloc_Alloc (alloc, HCFILE_BUFFER_SIZE);
185     if (xfp->inBuf == NULL)
186     {
187       hcfree (xfp);
188       close (fp->fd);
189       return false;
190     }
191 
192     /* open the file */
193     CFileInStream *inStream = &xfp->inStream;
194     FileInStream_CreateVTable (inStream);
195     CSzFile *file = &inStream->file;
196     File_Construct (file);
197     WRes wres = InFile_Open (file, path);
198     if (wres != SZ_OK)
199     {
200       ISzAlloc_Free (alloc, xfp->inBuf);
201       hcfree (xfp);
202       close (fp->fd);
203       return false;
204     }
205 
206     /* scan the file */
207     CLookToRead2 lookStream;
208     LookToRead2_CreateVTable (&lookStream, false);
209     lookStream.buf = xfp->inBuf;
210     lookStream.bufSize = HCFILE_BUFFER_SIZE;
211     lookStream.realStream = &inStream->vt;
212     LookToRead2_Init (&lookStream);
213     Xzs_Construct (&xfp->streams);
214     Int64 offset = 0;
215     SRes res = Xzs_ReadBackward (&xfp->streams, &lookStream.vt, &offset, NULL, alloc);
216     if (res != SZ_OK || offset != 0)
217     {
218       Xzs_Free (&xfp->streams, alloc);
219       File_Close (file);
220       ISzAlloc_Free (alloc, xfp->inBuf);
221       hcfree (xfp);
222       close (fp->fd);
223       return false;
224     }
225 
226     xfp->inBlocks = Xzs_GetNumBlocks (&xfp->streams);
227     xfp->outSize = Xzs_GetUnpackSize (&xfp->streams);
228 
229     /* seek to start of the file and fill the buffer */
230     SizeT inLen = HCFILE_BUFFER_SIZE;
231     res = ISeekInStream_Seek (&inStream->vt, &offset, SZ_SEEK_SET);
232     if (res == SZ_OK)
233     {
234       res = ISeekInStream_Read (&inStream->vt, xfp->inBuf, &inLen);
235     }
236     if (res != SZ_OK || inLen == 0)
237     {
238       Xzs_Free (&xfp->streams, alloc);
239       File_Close (file);
240       ISzAlloc_Free (alloc, xfp->inBuf);
241       hcfree (xfp);
242       close (fp->fd);
243       return false;
244     }
245 
246     xfp->inLen = inLen;
247 
248     /* read headers */
249     SizeT outLen = 0;
250     ECoderStatus status;
251     CXzUnpacker *state = &xfp->state;
252     XzUnpacker_Construct (state, alloc);
253     res = XzUnpacker_Code (state, NULL, &outLen, xfp->inBuf, &inLen, false, CODER_FINISH_ANY, &status);
254     if (res != SZ_OK)
255     {
256       XzUnpacker_Free (state);
257       Xzs_Free (&xfp->streams, alloc);
258       File_Close (file);
259       ISzAlloc_Free (alloc, xfp->inBuf);
260       hcfree (xfp);
261       close (fp->fd);
262       return false;
263     }
264 
265     xfp->inPos = inLen;
266     xfp->inProcessed = inLen;
267     fp->xfp = xfp;
268   }
269   else
270   {
271     if ((fp->pfp = fdopen (fp->fd, mode)) == NULL) return false;
272 
273     if (fp->bom_size)
274     {
275       // atm just skip bom
276 
277       const int nread = fread (check, sizeof (char), fp->bom_size, fp->pfp);
278 
279       if (nread != fp->bom_size) return false;
280     }
281   }
282 
283   fp->path = path;
284   fp->mode = mode;
285 
286   return true;
287 }
288 
hc_fopen_raw(HCFILE * fp,const char * path,const char * mode)289 bool hc_fopen_raw (HCFILE *fp, const char *path, const char *mode)
290 {
291   if (fp == NULL || path == NULL || mode == NULL) return false;
292 
293   /* cleanup */
294   fp->fd       = -1;
295   fp->pfp      = NULL;
296   fp->gfp      = NULL;
297   fp->ufp      = NULL;
298   fp->xfp      = NULL;
299   fp->bom_size = 0;
300   fp->path     = NULL;
301   fp->mode     = NULL;
302 
303   int oflag = -1;
304 
305   int fmode = S_IRUSR|S_IWUSR;
306 
307   if (strncmp (mode, "a", 1) == 0 || strncmp (mode, "ab", 2) == 0)
308   {
309     oflag = O_WRONLY | O_CREAT | O_APPEND;
310 
311     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
312     if (strncmp (mode, "ab", 2) == 0) oflag |= O_BINARY;
313     #endif
314   }
315   else if (strncmp (mode, "r", 1) == 0 || strncmp (mode, "rb", 2) == 0)
316   {
317     oflag = O_RDONLY;
318     fmode = -1;
319 
320     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
321     if (strncmp (mode, "rb", 2) == 0) oflag |= O_BINARY;
322     #endif
323   }
324   else if (strncmp (mode, "w", 1) == 0 || strncmp (mode, "wb", 2) == 0)
325   {
326     oflag = O_WRONLY | O_CREAT | O_TRUNC;
327 
328     #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
329     if (strncmp (mode, "wb", 2) == 0) oflag |= O_BINARY;
330     #endif
331   }
332   else
333   {
334     // ADD more strncmp to handle more "mode"
335     return false;
336   }
337 
338   if (fmode == -1)
339   {
340     fp->fd = open (path, oflag);
341   }
342   else
343   {
344     fp->fd = open (path, oflag, fmode);
345   }
346 
347   if (fp->fd == -1) return false;
348 
349   if ((fp->pfp = fdopen (fp->fd, mode)) == NULL) return false;
350 
351   fp->path = path;
352   fp->mode = mode;
353 
354   return true;
355 }
356 
hc_fread(void * ptr,size_t size,size_t nmemb,HCFILE * fp)357 size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp)
358 {
359   size_t n = (size_t) -1;
360 
361   if (ptr == NULL || fp == NULL) return n;
362 
363   if (size == 0 || nmemb == 0) return 0;
364 
365   if (fp->pfp)
366   {
367     #ifdef _WIN
368     u64 len = (u64) size * nmemb;
369 
370     #ifndef _WIN64
371     /* check 2 GB limit with 32 bit build */
372     if (len >= INT32_MAX) return n;
373     #endif
374 
375     if (len <= HCFILE_CHUNK_SIZE)
376     {
377       n = fread (ptr, size, nmemb, fp->pfp);
378     }
379     else
380     {
381       size_t left = (size_t) len;
382       size_t pos = 0;
383 
384       /* assume success */
385       n = nmemb;
386 
387       do
388       {
389         size_t chunk = (left > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : left;
390         size_t bytes = fread ((unsigned char *) ptr + pos, 1, chunk, fp->pfp);
391         pos += bytes;
392         left -= bytes;
393         if (chunk != bytes)
394         {
395           /* partial read */
396           n = pos / size;
397           break;
398         }
399       } while (left);
400     }
401     #else
402     n = fread (ptr, size, nmemb, fp->pfp);
403     #endif
404   }
405   else if (fp->gfp)
406   {
407     n = gzfread (ptr, size, nmemb, fp->gfp);
408   }
409   else if (fp->ufp)
410   {
411     u64 len = (u64) size * nmemb;
412     u64 pos = 0;
413 
414     #if defined(_WIN) && !defined(_WIN64)
415     /* check 2 GB limit with 32 bit build */
416     if (len >= INT32_MAX) return n;
417     #endif
418 
419     /* assume success */
420     n = nmemb;
421 
422     do
423     {
424       unsigned chunk = (len > INT_MAX) ? INT_MAX : (unsigned) len;
425       int result = unzReadCurrentFile (fp->ufp, (unsigned char *) ptr + pos, chunk);
426       if (result < 0) return (size_t) -1;
427       pos += (u64) result;
428       len -= (u64) result;
429       if (chunk != (unsigned) result)
430       {
431         /* partial read */
432         n = pos / size;
433         break;
434       }
435     } while (len);
436   }
437   else if (fp->xfp)
438   {
439     Byte *outBuf = (Byte *) ptr;
440     SizeT outLen = (SizeT) size * nmemb;
441     SizeT outPos = 0;
442     SRes res = SZ_OK;
443     xzfile_t *xfp = fp->xfp;
444 
445     #if defined(_WIN) && !defined(_WIN64)
446     /* check 2 GB limit with 32 bit build */
447     if (outLen >= INT32_MAX) return n;
448     #endif
449 
450     /* assume success */
451     n = nmemb;
452 
453     do
454     {
455       /* fill buffer if needed */
456       if (xfp->inLen == xfp->inPos && !xfp->inEof)
457       {
458         xfp->inPos = 0;
459         xfp->inLen = HCFILE_BUFFER_SIZE;
460         res = ISeekInStream_Read (&xfp->inStream.vt, xfp->inBuf, &xfp->inLen);
461         if (res != SZ_OK || xfp->inLen == 0) xfp->inEof = true;
462       }
463 
464       /* decode */
465       ECoderStatus status;
466       SizeT inLeft  = xfp->inLen - xfp->inPos;
467       SizeT outLeft = outLen - outPos;
468       res = XzUnpacker_Code (&xfp->state, outBuf + outPos, &outLeft, xfp->inBuf + xfp->inPos, &inLeft, inLeft == 0, CODER_FINISH_ANY, &status);
469       xfp->inPos += inLeft;
470       xfp->inProcessed += inLeft;
471       if (res != SZ_OK) return (size_t) -1;
472       if (inLeft == 0 && outLeft == 0)
473       {
474         /* partial read */
475         n = (size_t) (outPos / size);
476         break;
477       }
478       outPos += outLeft;
479       xfp->outProcessed += outLeft;
480     } while (outPos < outLen);
481   }
482 
483   return n;
484 }
485 
hc_fwrite(const void * ptr,size_t size,size_t nmemb,HCFILE * fp)486 size_t hc_fwrite (const void *ptr, size_t size, size_t nmemb, HCFILE *fp)
487 {
488   size_t n = -1;
489 
490   if (ptr == NULL || fp == NULL) return n;
491 
492   if (size == 0 || nmemb == 0) return 0;
493 
494   if (fp->pfp)
495   {
496     #ifdef _WIN
497     u64 len = (u64) size * nmemb;
498 
499     #ifndef _WIN64
500     /* check 2 GB limit with 32 bit build */
501     if (len >= INT32_MAX)
502     {
503       return n;
504     }
505     #endif
506 
507     if (len <= HCFILE_CHUNK_SIZE)
508     {
509       n = fwrite (ptr, size, nmemb, fp->pfp);
510     }
511     else
512     {
513       size_t left = (size_t) len;
514       size_t pos = 0;
515 
516       /* assume success */
517       n = nmemb;
518 
519       do
520       {
521         size_t chunk = (left > HCFILE_CHUNK_SIZE) ? HCFILE_CHUNK_SIZE : left;
522         size_t bytes = fwrite ((unsigned char *) ptr + pos, 1, chunk, fp->pfp);
523         pos += bytes;
524         left -= bytes;
525         if (chunk != bytes) return -1;
526       } while (left);
527     }
528     #else
529     n = fwrite (ptr, size, nmemb, fp->pfp);
530     #endif
531   }
532   else if (fp->gfp)
533   {
534     n = gzfwrite (ptr, size, nmemb, fp->gfp);
535   }
536 
537   return n;
538 }
539 
hc_fseek(HCFILE * fp,off_t offset,int whence)540 int hc_fseek (HCFILE *fp, off_t offset, int whence)
541 {
542   int r = -1;
543 
544   if (fp == NULL) return r;
545 
546   if (fp->pfp)
547   {
548     r = fseeko (fp->pfp, offset, whence);
549   }
550   else if (fp->gfp)
551   {
552     r = gzseek (fp->gfp, offset, whence);
553   }
554   else if (fp->ufp)
555   {
556     /*
557     // untested and not used in wordlist engine
558     zlib_filefunc64_32_def *d = NULL;
559     if (whence == SEEK_SET)
560     {
561       r = ZSEEK64 (*d, fp->ufp, offset, ZLIB_FILEFUNC_SEEK_SET);
562     }
563     else if (whence == SEEK_CUR)
564     {
565       r = ZSEEK64 (*d, fp->ufp, offset, ZLIB_FILEFUNC_SEEK_CUR);
566     }
567     else if (whence == SEEK_END)
568     {
569       r = ZSEEK64 (*d, fp->ufp, offset, ZLIB_FILEFUNC_SEEK_END);
570     }
571     // or
572     // r = unzSetOffset (fp->ufp, offset);
573     */
574   }
575   else if (fp->xfp)
576   {
577     /* TODO */
578   }
579 
580   return r;
581 }
582 
hc_rewind(HCFILE * fp)583 void hc_rewind (HCFILE *fp)
584 {
585   if (fp == NULL) return;
586 
587   if (fp->pfp)
588   {
589     rewind (fp->pfp);
590   }
591   else if (fp->gfp)
592   {
593     gzrewind (fp->gfp);
594   }
595   else if (fp->ufp)
596   {
597     unzGoToFirstFile (fp->ufp);
598   }
599   else if (fp->xfp)
600   {
601     xzfile_t *xfp = fp->xfp;
602 
603     /* cleanup */
604     xfp->inEof = false;
605     xfp->inLen = 0;
606     xfp->inPos = 0;
607     xfp->inProcessed  = 0;
608     xfp->outProcessed = 0;
609 
610     /* reset */
611     Int64 offset = 0;
612     CFileInStream *inStream = &xfp->inStream;
613     SRes res = ISeekInStream_Seek (&inStream->vt, &offset, SZ_SEEK_SET);
614     if (res != SZ_OK) return;
615     CXzUnpacker *state = &xfp->state;
616     XzUnpacker_Init (&xfp->state);
617 
618     /* fill the buffer */
619     SizeT inLen = HCFILE_BUFFER_SIZE;
620     res = ISeekInStream_Read (&inStream->vt, xfp->inBuf, &inLen);
621     if (res != SZ_OK || inLen == 0) return;
622 
623     xfp->inLen = inLen;
624 
625     /* read headers */
626     SizeT outLen = 0;
627     ECoderStatus status;
628     XzUnpacker_Code (state, NULL, &outLen, xfp->inBuf, &inLen, false, CODER_FINISH_ANY, &status);
629     xfp->inPos = inLen;
630     xfp->inProcessed = inLen;
631   }
632 }
633 
hc_fstat(HCFILE * fp,struct stat * buf)634 int hc_fstat (HCFILE *fp, struct stat *buf)
635 {
636   int r = -1;
637 
638   if (fp == NULL || buf == NULL || fp->fd == -1) return r;
639 
640   r = fstat (fp->fd, buf);
641   if (r != 0) return r;
642 
643   if (fp->gfp)
644   {
645     /* TODO: For compressed files hc_ftell() reports uncompressed bytes, but hc_fstat() reports compressed bytes */
646   }
647   else if (fp->ufp)
648   {
649     /* TODO: For compressed files hc_ftell() reports uncompressed bytes, but hc_fstat() reports compressed bytes */
650   }
651   else if (fp->xfp)
652   {
653     /* check that the uncompressed size is known */
654     const xzfile_t *xfp = fp->xfp;
655     if (xfp->outSize != (UInt64) ((Int64) -1))
656     {
657       buf->st_size = (off_t) xfp->outSize;
658     }
659   }
660 
661   return r;
662 }
663 
hc_ftell(HCFILE * fp)664 off_t hc_ftell (HCFILE *fp)
665 {
666   off_t n = 0;
667 
668   if (fp == NULL) return -1;
669 
670   if (fp->pfp)
671   {
672     n = ftello (fp->pfp);
673   }
674   else if (fp->gfp)
675   {
676     n = (off_t) gztell (fp->gfp);
677   }
678   else if (fp->ufp)
679   {
680     n = unztell (fp->ufp);
681   }
682   else if (fp->xfp)
683   {
684     /* uncompressed bytes */
685     const xzfile_t *xfp = fp->xfp;
686     n = (off_t) xfp->outProcessed;
687   }
688 
689   return n;
690 }
691 
hc_fputc(int c,HCFILE * fp)692 int hc_fputc (int c, HCFILE *fp)
693 {
694   int r = -1;
695 
696   if (fp == NULL) return r;
697 
698   if (fp->pfp)
699   {
700     r = fputc (c, fp->pfp);
701   }
702   else if (fp->gfp)
703   {
704     r = gzputc (fp->gfp, c);
705   }
706 
707   return r;
708 }
709 
hc_fgetc(HCFILE * fp)710 int hc_fgetc (HCFILE *fp)
711 {
712   int r = EOF;
713 
714   if (fp == NULL) return r;
715 
716   if (fp->pfp)
717   {
718     r = fgetc (fp->pfp);
719   }
720   else if (fp->gfp)
721   {
722     r = gzgetc (fp->gfp);
723   }
724   else if (fp->ufp)
725   {
726     unsigned char c = 0;
727 
728     if (unzReadCurrentFile (fp->ufp, &c, 1) == 1) r = (int) c;
729   }
730   else if (fp->xfp)
731   {
732     Byte out;
733     SRes res = SZ_OK;
734     xzfile_t *xfp = fp->xfp;
735 
736     /* fill buffer if needed */
737     if (xfp->inLen == xfp->inPos && !xfp->inEof)
738     {
739       xfp->inPos = 0;
740       xfp->inLen = HCFILE_BUFFER_SIZE;
741       res = ISeekInStream_Read (&xfp->inStream.vt, xfp->inBuf, &xfp->inLen);
742       if (res != SZ_OK || xfp->inLen == 0) xfp->inEof = true;
743     }
744 
745     /* decode single byte */
746     ECoderStatus status;
747     SizeT inLeft = xfp->inLen - xfp->inPos;
748     SizeT outLeft = 1;
749     res = XzUnpacker_Code (&xfp->state, &out, &outLeft, xfp->inBuf + xfp->inPos, &inLeft, inLeft == 0, CODER_FINISH_ANY, &status);
750     if (inLeft == 0 && outLeft == 0) return r;
751     xfp->inPos += inLeft;
752     xfp->inProcessed += inLeft;
753     if (res != SZ_OK) return r;
754     xfp->outProcessed++;
755     r = (int) out;
756   }
757 
758   return r;
759 }
760 
hc_fgets(char * buf,int len,HCFILE * fp)761 char *hc_fgets (char *buf, int len, HCFILE *fp)
762 {
763   char *r = NULL;
764 
765   if (fp == NULL || buf == NULL || len <= 0) return r;
766 
767   if (fp->pfp)
768   {
769     r = fgets (buf, len, fp->pfp);
770   }
771   else if (fp->gfp)
772   {
773     r = gzgets (fp->gfp, buf, len);
774   }
775   else if (fp->ufp)
776   {
777     if (unzReadCurrentFile (fp->ufp, buf, len) > 0) r = buf;
778   }
779   else if (fp->xfp)
780   {
781     Byte *outBuf = (Byte *) buf;
782     SizeT outLen = (SizeT) len - 1;
783     SRes res = SZ_OK;
784     xzfile_t *xfp = fp->xfp;
785 
786     while (outLen > 0)
787     {
788       /* fill buffer if needed */
789       if (xfp->inLen == xfp->inPos && !xfp->inEof)
790       {
791         xfp->inPos = 0;
792         xfp->inLen = HCFILE_BUFFER_SIZE;
793         res = ISeekInStream_Read (&xfp->inStream.vt, xfp->inBuf, &xfp->inLen);
794         if (res != SZ_OK || xfp->inLen == 0) xfp->inEof = true;
795       }
796 
797       /* decode single byte */
798       ECoderStatus status;
799       SizeT inLeft = xfp->inLen - xfp->inPos;
800       SizeT outLeft = 1;
801       res = XzUnpacker_Code (&xfp->state, outBuf, &outLeft, xfp->inBuf + xfp->inPos, &inLeft, inLeft == 0, CODER_FINISH_ANY, &status);
802       if (inLeft == 0 && outLeft == 0) break;
803       xfp->inPos += inLeft;
804       xfp->inProcessed += inLeft;
805       if (res != SZ_OK) break;
806       xfp->outProcessed++;
807       if (*outBuf++ == '\n')
808       {
809         /* success */
810         r = buf;
811         break;
812       }
813 
814       outLen--;
815     }
816 
817     /* always NULL terminate */
818     *outBuf = 0;
819   }
820 
821   return r;
822 }
823 
hc_vfprintf(HCFILE * fp,const char * format,va_list ap)824 int hc_vfprintf (HCFILE *fp, const char *format, va_list ap)
825 {
826   int r = -1;
827 
828   if (fp == NULL) return r;
829 
830   if (fp->pfp)
831   {
832     r = vfprintf (fp->pfp, format, ap);
833   }
834   else if (fp->gfp)
835   {
836     r = gzvprintf (fp->gfp, format, ap);
837   }
838 
839   return r;
840 }
841 
hc_fprintf(HCFILE * fp,const char * format,...)842 int hc_fprintf (HCFILE *fp, const char *format, ...)
843 {
844   int r = -1;
845 
846   if (fp == NULL) return r;
847 
848   va_list ap;
849 
850   va_start (ap, format);
851 
852   if (fp->pfp)
853   {
854     r = vfprintf (fp->pfp, format, ap);
855   }
856   else if (fp->gfp)
857   {
858     r = gzvprintf (fp->gfp, format, ap);
859   }
860 
861   va_end (ap);
862 
863   return r;
864 }
865 
hc_fscanf(HCFILE * fp,const char * format,void * ptr)866 int hc_fscanf (HCFILE *fp, const char *format, void *ptr)
867 {
868   if (fp == NULL) return -1;
869 
870   char buf[HCBUFSIZ_TINY];
871 
872   char *b = hc_fgets (buf, HCBUFSIZ_TINY - 1, fp);
873 
874   if (b == NULL)
875   {
876     return -1;
877   }
878 
879   sscanf (b, format, ptr);
880 
881   return 1;
882 }
883 
hc_feof(HCFILE * fp)884 int hc_feof (HCFILE *fp)
885 {
886   int r = -1;
887 
888   if (fp == NULL) return r;
889 
890   if (fp->pfp)
891   {
892     r = feof (fp->pfp);
893   }
894   else if (fp->gfp)
895   {
896     r = gzeof (fp->gfp);
897   }
898   else if (fp->ufp)
899   {
900     r = unzeof (fp->ufp);
901   }
902   else if (fp->xfp)
903   {
904     const xzfile_t *xfp = fp->xfp;
905     r = (xfp->inEof && xfp->inPos == xfp->inLen);
906   }
907 
908   return r;
909 }
910 
hc_fflush(HCFILE * fp)911 void hc_fflush (HCFILE *fp)
912 {
913   if (fp == NULL) return;
914 
915   if (fp->pfp)
916   {
917     fflush (fp->pfp);
918   }
919   else if (fp->gfp)
920   {
921     gzflush (fp->gfp, Z_SYNC_FLUSH);
922   }
923 }
924 
hc_fsync(HCFILE * fp)925 void hc_fsync (HCFILE *fp)
926 {
927   if (fp == NULL) return;
928 
929   if (fp->pfp)
930   {
931 #if defined (_WIN)
932     HANDLE h = (HANDLE) _get_osfhandle (fp->fd);
933 
934     FlushFileBuffers (h);
935 #else
936     fsync (fp->fd);
937 #endif
938   }
939 }
940 
hc_fclose(HCFILE * fp)941 void hc_fclose (HCFILE *fp)
942 {
943   if (fp == NULL) return;
944 
945   if (fp->pfp)
946   {
947     fclose (fp->pfp);
948   }
949   else if (fp->gfp)
950   {
951     gzclose (fp->gfp);
952   }
953   else if (fp->ufp)
954   {
955     unzCloseCurrentFile (fp->ufp);
956 
957     unzClose (fp->ufp);
958 
959     close (fp->fd);
960   }
961   else if (fp->xfp)
962   {
963     xzfile_t *xfp = fp->xfp;
964     ISzAllocPtr alloc = &xfp->alloc.vt;
965     XzUnpacker_Free (&xfp->state);
966     Xzs_Free (&xfp->streams, alloc);
967     File_Close (&xfp->inStream.file);
968     ISzAlloc_Free (alloc, xfp->inBuf);
969     hcfree (xfp);
970     close (fp->fd);
971   }
972 
973   fp->fd = -1;
974   fp->pfp = NULL;
975   fp->gfp = NULL;
976   fp->ufp = NULL;
977   fp->xfp = NULL;
978 
979   fp->path = NULL;
980   fp->mode = NULL;
981 }
982 
fgetl(HCFILE * fp,char * line_buf,const size_t line_sz)983 size_t fgetl (HCFILE *fp, char *line_buf, const size_t line_sz)
984 {
985   int c;
986 
987   size_t line_len = 0;
988 
989   size_t line_truncated = 0;
990 
991   while ((c = hc_fgetc (fp)) != EOF)
992   {
993     if (c == '\n') break;
994 
995     if (line_len == line_sz)
996     {
997       line_truncated++;
998     }
999     else
1000     {
1001       line_buf[line_len] = (char) c;
1002 
1003       line_len++;
1004     }
1005   }
1006 
1007   if (line_truncated > 0)
1008   {
1009     fprintf (stderr, "\nOversized line detected! Truncated %" PRIu64 " bytes\n", (u64) line_truncated);
1010   }
1011   else
1012   {
1013     while (line_len > 0 && line_buf[line_len - 1] == '\r')
1014     {
1015       line_len--;
1016     }
1017   }
1018 
1019   line_buf[line_len] = 0;
1020 
1021   return line_len;
1022 }
1023 
count_lines(HCFILE * fp)1024 u64 count_lines (HCFILE *fp)
1025 {
1026   u64 cnt = 0;
1027 
1028   char *buf = (char *) hcmalloc (HCBUFSIZ_LARGE + 1);
1029 
1030   char prev = '\n';
1031 
1032   while (!hc_feof (fp))
1033   {
1034     size_t nread = hc_fread (buf, sizeof (char), HCBUFSIZ_LARGE, fp);
1035 
1036     if (nread < 1) continue;
1037 
1038     for (size_t i = 0; i < nread; i++)
1039     {
1040       if (prev == '\n') cnt++;
1041 
1042       prev = buf[i];
1043     }
1044   }
1045 
1046   hcfree (buf);
1047 
1048   return cnt;
1049 }
1050 
in_superchop(char * buf)1051 size_t in_superchop (char *buf)
1052 {
1053   size_t len = strlen (buf);
1054 
1055   while (len)
1056   {
1057     if (buf[len - 1] == '\n')
1058     {
1059       len--;
1060 
1061       buf[len] = 0;
1062 
1063       continue;
1064     }
1065 
1066     if (buf[len - 1] == '\r')
1067     {
1068       len--;
1069 
1070       buf[len] = 0;
1071 
1072       continue;
1073     }
1074 
1075     break;
1076   }
1077 
1078   return len;
1079 }
1080 
superchop_with_length(char * buf,const size_t len)1081 size_t superchop_with_length (char *buf, const size_t len)
1082 {
1083   size_t new_len = len;
1084 
1085   while (new_len)
1086   {
1087     if (buf[new_len - 1] == '\n')
1088     {
1089       new_len--;
1090 
1091       buf[new_len] = 0;
1092 
1093       continue;
1094     }
1095 
1096     if (buf[new_len - 1] == '\r')
1097     {
1098       new_len--;
1099 
1100       buf[new_len] = 0;
1101 
1102       continue;
1103     }
1104 
1105     break;
1106   }
1107 
1108   return new_len;
1109 }
1110