1 #ifndef __PLINK2_DECOMPRESS_H__
2 #define __PLINK2_DECOMPRESS_H__
3
4 // This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,
5 // Christopher Chang.
6 //
7 // This library is free software: you can redistribute it and/or modify it
8 // under the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
15 // for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with this library. If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This has been separated from plink2_cmdline due to the relatively
22 // heavyweight dependence on zstd.
23 #include "include/plink2_text.h"
24 #include "plink2_cmdline.h"
25
26 #ifdef __cplusplus
27 namespace plink2 {
28 #endif
29
30 extern const char kErrprintfDecompress[];
31
CleanupTextFile2(const char * file_descrip,textFILE * txfp,PglErr * reterrp)32 HEADER_INLINE BoolErr CleanupTextFile2(const char* file_descrip, textFILE* txfp, PglErr* reterrp) {
33 if (unlikely(CleanupTextFile(txfp, reterrp))) {
34 logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
35 return 1;
36 }
37 return 0;
38 }
39
40 PglErr InitTextStreamEx(const char* fname, uint32_t alloc_at_end, uint32_t enforced_max_line_blen, uint32_t max_line_blen, uint32_t decompress_thread_ct, TextStream* txsp);
41
InitTextStream(const char * fname,uint32_t max_line_blen,uint32_t decompress_thread_ct,TextStream * txsp)42 HEADER_INLINE PglErr InitTextStream(const char* fname, uint32_t max_line_blen, uint32_t decompress_thread_ct, TextStream* txsp) {
43 return InitTextStreamEx(fname, 0, kMaxLongLine, max_line_blen, decompress_thread_ct, txsp);
44 }
45
46 // required_byte_ct can't be greater than kMaxLongLine.
47 // Now ok for unstandardized_byte_ct to be bigstack_left(), since other
48 // allocations are made on the heap instead of the arena (to be more usable in
49 // non-plink2 software).
50 // Note that the actual buffer size is max_line_blen + kDecompressChunkSize,
51 // not max_line_blen.
StandardizeMaxLineBlenEx(uintptr_t unstandardized_byte_ct,uint32_t required_byte_ct,uint32_t * max_line_blenp)52 HEADER_INLINE BoolErr StandardizeMaxLineBlenEx(uintptr_t unstandardized_byte_ct, uint32_t required_byte_ct, uint32_t* max_line_blenp) {
53 #ifdef __LP64__
54 if (unstandardized_byte_ct >= S_CAST(uintptr_t, kMaxLongLine) + S_CAST(uintptr_t, kDecompressChunkSize)) {
55 *max_line_blenp = kMaxLongLine;
56 return 0;
57 }
58 #endif
59 if (unlikely(unstandardized_byte_ct < kDecompressChunkSize + RoundUpPow2(MAXV(kDecompressChunkSize, required_byte_ct), kCacheline))) {
60 return 1;
61 }
62 *max_line_blenp = RoundDownPow2(unstandardized_byte_ct, kCacheline) - kDecompressChunkSize;
63 return 0;
64 }
65
StandardizeMaxLineBlen(uintptr_t unstandardized_byte_ct,uint32_t * max_line_blenp)66 HEADER_INLINE BoolErr StandardizeMaxLineBlen(uintptr_t unstandardized_byte_ct, uint32_t* max_line_blenp) {
67 return StandardizeMaxLineBlenEx(unstandardized_byte_ct, kMaxMediumLine + 1, max_line_blenp);
68 }
69
SizeAndInitTextStream(const char * fname,uintptr_t unstandardized_byte_ct,uint32_t decompress_thread_ct,TextStream * txsp)70 HEADER_INLINE PglErr SizeAndInitTextStream(const char* fname, uintptr_t unstandardized_byte_ct, uint32_t decompress_thread_ct, TextStream* txsp) {
71 // plink 1.9 immediately failed with an out-of-memory error if a "long line"
72 // buffer would be smaller than kMaxMediumLine + 1 bytes, so may as well make
73 // that the default lower bound. (The precise value is currently irrelevant
74 // since kTextStreamBlenLowerBound is larger and we take the maximum of the
75 // two at compile time, but it's useful to distinguish "minimum acceptable
76 // potentially-long-line buffer size" from "load/decompression block size
77 // which generally has good performance".)
78 uint32_t max_line_blen;
79 if (unlikely(StandardizeMaxLineBlen(unstandardized_byte_ct, &max_line_blen))) {
80 return kPglRetNomem;
81 }
82 return InitTextStream(fname, max_line_blen, decompress_thread_ct, txsp);
83 }
84
TextStreamMemStart(TextStream * txs_ptr)85 HEADER_INLINE unsigned char* TextStreamMemStart(TextStream* txs_ptr) {
86 // placed here instead of plink2_text.h since it's pretty specific to arena
87 // memory-management
88 return R_CAST(unsigned char*, GET_PRIVATE(*txs_ptr, m).base.dst);
89 }
90
91 // TODO: logputs("\n") first when necessary
92 void TextErrPrint(const char* file_descrip, const char* errmsg, PglErr reterr);
93
TextFileErrPrint(const char * file_descrip,const textFILE * txfp)94 HEADER_INLINE void TextFileErrPrint(const char* file_descrip, const textFILE* txfp) {
95 TextErrPrint(file_descrip, TextFileError(txfp), TextFileErrcode(txfp));
96 }
97
TextStreamErrPrint(const char * file_descrip,const TextStream * txsp)98 HEADER_INLINE void TextStreamErrPrint(const char* file_descrip, const TextStream* txsp) {
99 TextErrPrint(file_descrip, TextStreamError(txsp), TextStreamErrcode(txsp));
100 }
101
TextStreamErrPrintRewind(const char * file_descrip,const TextStream * txsp,PglErr * reterrp)102 HEADER_INLINE void TextStreamErrPrintRewind(const char* file_descrip, const TextStream* txsp, PglErr* reterrp) {
103 if ((*reterrp == kPglRetOpenFail) || (*reterrp == kPglRetEof)) {
104 // attempting to rewind/reopen a pipe file descriptor should manifest as
105 // one of these two errors. (todo: verify that open-fail is possible.)
106 *reterrp = kPglRetRewindFail;
107 }
108 if (*reterrp == kPglRetRewindFail) {
109 logerrprintfww(kErrprintfRewind, file_descrip);
110 } else {
111 TextStreamErrPrint(file_descrip, txsp);
112 }
113 }
114
CleanupTextStream2(const char * file_descrip,TextStream * txsp,PglErr * reterrp)115 HEADER_INLINE BoolErr CleanupTextStream2(const char* file_descrip, TextStream* txsp, PglErr* reterrp) {
116 if (unlikely(CleanupTextStream(txsp, reterrp))) {
117 logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
118 return 1;
119 }
120 return 0;
121 }
122
123
InitTokenStreamEx(const char * fname,uint32_t alloc_at_end,uint32_t decompress_thread_ct,TokenStream * tksp)124 HEADER_INLINE PglErr InitTokenStreamEx(const char* fname, uint32_t alloc_at_end, uint32_t decompress_thread_ct, TokenStream* tksp) {
125 return InitTextStreamEx(fname, alloc_at_end, 0, kTokenStreamBlen - kDecompressChunkSize, decompress_thread_ct, &(tksp->txs));
126 }
127
InitTokenStream(const char * fname,uint32_t decompress_thread_ct,TokenStream * tksp)128 HEADER_INLINE PglErr InitTokenStream(const char* fname, uint32_t decompress_thread_ct, TokenStream* tksp) {
129 return InitTextStreamEx(fname, 0, 0, kTokenStreamBlen - kDecompressChunkSize, decompress_thread_ct, &(tksp->txs));
130 }
131
TokenStreamErrPrint(const char * file_descrip,const TokenStream * tksp)132 HEADER_INLINE void TokenStreamErrPrint(const char* file_descrip, const TokenStream* tksp) {
133 PglErr reterr = TokenStreamErrcode(tksp);
134 const char* errmsg = TokenStreamError(tksp);
135 if (reterr != kPglRetMalformedInput) {
136 TextErrPrint(file_descrip, errmsg, reterr);
137 } else {
138 logerrprintfww("Error: Pathologically long token in %s.\n", file_descrip);
139 }
140 }
141
CleanupTokenStream2(const char * file_descrip,TokenStream * tksp,PglErr * reterrp)142 HEADER_INLINE BoolErr CleanupTokenStream2(const char* file_descrip, TokenStream* tksp, PglErr* reterrp) {
143 if (unlikely(CleanupTokenStream(tksp, reterrp))) {
144 logerrprintfww(kErrprintfFread, file_descrip, strerror(errno));
145 return 1;
146 }
147 return 0;
148 }
149
CleanupTokenStream3(const char * file_descrip,TokenStream * tksp,PglErr * reterrp)150 HEADER_INLINE BoolErr CleanupTokenStream3(const char* file_descrip, TokenStream* tksp, PglErr* reterrp) {
151 *reterrp = kPglRetSuccess;
152 return CleanupTokenStream2(file_descrip, tksp, reterrp);
153 }
154
155 #ifdef __cplusplus
156 } // namespace plink2
157 #endif
158
159 #endif // __PLINK2_DECOMPRESS_H__
160