1 #ifndef __PLINK2_DECOMPRESS_H__
2 #define __PLINK2_DECOMPRESS_H__
3 
4 // This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,
5 // Christopher Chang.
6 //
7 // This library is free software: you can redistribute it and/or modify it
8 // under the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
15 // for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with this library.  If not, see <http://www.gnu.org/licenses/>.
19 
20 
21 // This has been separated from plink2_cmdline due to the relatively
22 // heavyweight dependence on zstd.
23 #include "include/plink2_text.h"
24 #include "plink2_cmdline.h"
25 
26 #ifdef __cplusplus
27 namespace plink2 {
28 #endif
29 
30 extern const char kErrprintfDecompress[];
31 
CleanupTextFile2(const char * file_descrip,textFILE * txfp,PglErr * reterrp)32 HEADER_INLINE BoolErr CleanupTextFile2(const char* file_descrip, textFILE* txfp, PglErr* reterrp) {
33   if (unlikely(CleanupTextFile(txfp, reterrp))) {
34     logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
35     return 1;
36   }
37   return 0;
38 }
39 
40 PglErr InitTextStreamEx(const char* fname, uint32_t alloc_at_end, uint32_t enforced_max_line_blen, uint32_t max_line_blen, uint32_t decompress_thread_ct, TextStream* txsp);
41 
InitTextStream(const char * fname,uint32_t max_line_blen,uint32_t decompress_thread_ct,TextStream * txsp)42 HEADER_INLINE PglErr InitTextStream(const char* fname, uint32_t max_line_blen, uint32_t decompress_thread_ct, TextStream* txsp) {
43   return InitTextStreamEx(fname, 0, kMaxLongLine, max_line_blen, decompress_thread_ct, txsp);
44 }
45 
46 // required_byte_ct can't be greater than kMaxLongLine.
47 // Now ok for unstandardized_byte_ct to be bigstack_left(), since other
48 // allocations are made on the heap instead of the arena (to be more usable in
49 // non-plink2 software).
50 // Note that the actual buffer size is max_line_blen + kDecompressChunkSize,
51 // not max_line_blen.
StandardizeMaxLineBlenEx(uintptr_t unstandardized_byte_ct,uint32_t required_byte_ct,uint32_t * max_line_blenp)52 HEADER_INLINE BoolErr StandardizeMaxLineBlenEx(uintptr_t unstandardized_byte_ct, uint32_t required_byte_ct, uint32_t* max_line_blenp) {
53 #ifdef __LP64__
54   if (unstandardized_byte_ct >= S_CAST(uintptr_t, kMaxLongLine) + S_CAST(uintptr_t, kDecompressChunkSize)) {
55     *max_line_blenp = kMaxLongLine;
56     return 0;
57   }
58 #endif
59   if (unlikely(unstandardized_byte_ct < kDecompressChunkSize + RoundUpPow2(MAXV(kDecompressChunkSize, required_byte_ct), kCacheline))) {
60     return 1;
61   }
62   *max_line_blenp = RoundDownPow2(unstandardized_byte_ct, kCacheline) - kDecompressChunkSize;
63   return 0;
64 }
65 
StandardizeMaxLineBlen(uintptr_t unstandardized_byte_ct,uint32_t * max_line_blenp)66 HEADER_INLINE BoolErr StandardizeMaxLineBlen(uintptr_t unstandardized_byte_ct, uint32_t* max_line_blenp) {
67   return StandardizeMaxLineBlenEx(unstandardized_byte_ct, kMaxMediumLine + 1, max_line_blenp);
68 }
69 
SizeAndInitTextStream(const char * fname,uintptr_t unstandardized_byte_ct,uint32_t decompress_thread_ct,TextStream * txsp)70 HEADER_INLINE PglErr SizeAndInitTextStream(const char* fname, uintptr_t unstandardized_byte_ct, uint32_t decompress_thread_ct, TextStream* txsp) {
71   // plink 1.9 immediately failed with an out-of-memory error if a "long line"
72   // buffer would be smaller than kMaxMediumLine + 1 bytes, so may as well make
73   // that the default lower bound.  (The precise value is currently irrelevant
74   // since kTextStreamBlenLowerBound is larger and we take the maximum of the
75   // two at compile time, but it's useful to distinguish "minimum acceptable
76   // potentially-long-line buffer size" from "load/decompression block size
77   // which generally has good performance".)
78   uint32_t max_line_blen;
79   if (unlikely(StandardizeMaxLineBlen(unstandardized_byte_ct, &max_line_blen))) {
80     return kPglRetNomem;
81   }
82   return InitTextStream(fname, max_line_blen, decompress_thread_ct, txsp);
83 }
84 
TextStreamMemStart(TextStream * txs_ptr)85 HEADER_INLINE unsigned char* TextStreamMemStart(TextStream* txs_ptr) {
86   // placed here instead of plink2_text.h since it's pretty specific to arena
87   // memory-management
88   return R_CAST(unsigned char*, GET_PRIVATE(*txs_ptr, m).base.dst);
89 }
90 
91 // TODO: logputs("\n") first when necessary
92 void TextErrPrint(const char* file_descrip, const char* errmsg, PglErr reterr);
93 
TextFileErrPrint(const char * file_descrip,const textFILE * txfp)94 HEADER_INLINE void TextFileErrPrint(const char* file_descrip, const textFILE* txfp) {
95   TextErrPrint(file_descrip, TextFileError(txfp), TextFileErrcode(txfp));
96 }
97 
TextStreamErrPrint(const char * file_descrip,const TextStream * txsp)98 HEADER_INLINE void TextStreamErrPrint(const char* file_descrip, const TextStream* txsp) {
99   TextErrPrint(file_descrip, TextStreamError(txsp), TextStreamErrcode(txsp));
100 }
101 
TextStreamErrPrintRewind(const char * file_descrip,const TextStream * txsp,PglErr * reterrp)102 HEADER_INLINE void TextStreamErrPrintRewind(const char* file_descrip, const TextStream* txsp, PglErr* reterrp) {
103   if ((*reterrp == kPglRetOpenFail) || (*reterrp == kPglRetEof)) {
104     // attempting to rewind/reopen a pipe file descriptor should manifest as
105     // one of these two errors.  (todo: verify that open-fail is possible.)
106     *reterrp = kPglRetRewindFail;
107   }
108   if (*reterrp == kPglRetRewindFail) {
109     logerrprintfww(kErrprintfRewind, file_descrip);
110   } else {
111     TextStreamErrPrint(file_descrip, txsp);
112   }
113 }
114 
CleanupTextStream2(const char * file_descrip,TextStream * txsp,PglErr * reterrp)115 HEADER_INLINE BoolErr CleanupTextStream2(const char* file_descrip, TextStream* txsp, PglErr* reterrp) {
116   if (unlikely(CleanupTextStream(txsp, reterrp))) {
117     logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
118     return 1;
119   }
120   return 0;
121 }
122 
123 
InitTokenStreamEx(const char * fname,uint32_t alloc_at_end,uint32_t decompress_thread_ct,TokenStream * tksp)124 HEADER_INLINE PglErr InitTokenStreamEx(const char* fname, uint32_t alloc_at_end, uint32_t decompress_thread_ct, TokenStream* tksp) {
125   return InitTextStreamEx(fname, alloc_at_end, 0, kTokenStreamBlen - kDecompressChunkSize, decompress_thread_ct, &(tksp->txs));
126 }
127 
InitTokenStream(const char * fname,uint32_t decompress_thread_ct,TokenStream * tksp)128 HEADER_INLINE PglErr InitTokenStream(const char* fname, uint32_t decompress_thread_ct, TokenStream* tksp) {
129   return InitTextStreamEx(fname, 0, 0, kTokenStreamBlen - kDecompressChunkSize, decompress_thread_ct, &(tksp->txs));
130 }
131 
TokenStreamErrPrint(const char * file_descrip,const TokenStream * tksp)132 HEADER_INLINE void TokenStreamErrPrint(const char* file_descrip, const TokenStream* tksp) {
133   PglErr reterr = TokenStreamErrcode(tksp);
134   const char* errmsg = TokenStreamError(tksp);
135   if (reterr != kPglRetMalformedInput) {
136     TextErrPrint(file_descrip, errmsg, reterr);
137   } else {
138     logerrprintfww("Error: Pathologically long token in %s.\n", file_descrip);
139   }
140 }
141 
CleanupTokenStream2(const char * file_descrip,TokenStream * tksp,PglErr * reterrp)142 HEADER_INLINE BoolErr CleanupTokenStream2(const char* file_descrip, TokenStream* tksp, PglErr* reterrp) {
143   if (unlikely(CleanupTokenStream(tksp, reterrp))) {
144     logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
145     return 1;
146   }
147   return 0;
148 }
149 
CleanupTokenStream3(const char * file_descrip,TokenStream * tksp,PglErr * reterrp)150 HEADER_INLINE BoolErr CleanupTokenStream3(const char* file_descrip, TokenStream* tksp, PglErr* reterrp) {
151   *reterrp = kPglRetSuccess;
152   return CleanupTokenStream2(file_descrip, tksp, reterrp);
153 }
154 
155 #ifdef __cplusplus
156 }  // namespace plink2
157 #endif
158 
159 #endif  // __PLINK2_DECOMPRESS_H__
160