1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2017 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdio.h>
41
42 #include "hunzip.hxx"
43 #include "csutil.hxx"
44
45 #define CODELEN 65536
46 #define BASEBITREC 5000
47
48 #define UNCOMPRESSED '\002'
49 #define MAGIC "hz0"
50 #define MAGIC_ENCRYPT "hz1"
51 #define MAGICLEN (sizeof(MAGIC) - 1)
52
fail(const char * err,const char * par)53 int Hunzip::fail(const char* err, const char* par) {
54 fprintf(stderr, err, par);
55 return -1;
56 }
57
Hunzip(const char * file,const char * key)58 Hunzip::Hunzip(const char* file, const char* key)
59 : bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0) {
60 in[0] = out[0] = line[0] = '\0';
61 filename = mystrdup(file);
62 if (getcode(key) == -1)
63 bufsiz = -1;
64 else
65 bufsiz = getbuf();
66 }
67
getcode(const char * key)68 int Hunzip::getcode(const char* key) {
69 unsigned char c[2];
70 int i, j, n;
71 int allocatedbit = BASEBITREC;
72 const char* enc = key;
73
74 if (!filename)
75 return -1;
76
77 myopen(fin, filename, std::ios_base::in | std::ios_base::binary);
78 if (!fin.is_open())
79 return -1;
80
81 // read magic number
82 if (!fin.read(in, 3) ||
83 !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
84 strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
85 return fail(MSG_FORMAT, filename);
86 }
87
88 // check encryption
89 if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
90 unsigned char cs;
91 if (!key)
92 return fail(MSG_KEY, filename);
93 if (!fin.read(reinterpret_cast<char*>(c), 1))
94 return fail(MSG_FORMAT, filename);
95 for (cs = 0; *enc; enc++)
96 cs ^= *enc;
97 if (cs != c[0])
98 return fail(MSG_KEY, filename);
99 enc = key;
100 } else
101 key = NULL;
102
103 // read record count
104 if (!fin.read(reinterpret_cast<char*>(c), 2))
105 return fail(MSG_FORMAT, filename);
106
107 if (key) {
108 c[0] ^= *enc;
109 if (*(++enc) == '\0')
110 enc = key;
111 c[1] ^= *enc;
112 }
113
114 n = ((int)c[0] << 8) + c[1];
115 dec.resize(BASEBITREC);
116 dec[0].v[0] = 0;
117 dec[0].v[1] = 0;
118
119 // read codes
120 for (i = 0; i < n; i++) {
121 unsigned char l;
122 if (!fin.read(reinterpret_cast<char*>(c), 2))
123 return fail(MSG_FORMAT, filename);
124 if (key) {
125 if (*(++enc) == '\0')
126 enc = key;
127 c[0] ^= *enc;
128 if (*(++enc) == '\0')
129 enc = key;
130 c[1] ^= *enc;
131 }
132 if (!fin.read(reinterpret_cast<char*>(&l), 1))
133 return fail(MSG_FORMAT, filename);
134 if (key) {
135 if (*(++enc) == '\0')
136 enc = key;
137 l ^= *enc;
138 }
139 if (!fin.read(in, l / 8 + 1))
140 return fail(MSG_FORMAT, filename);
141 if (key)
142 for (j = 0; j <= l / 8; j++) {
143 if (*(++enc) == '\0')
144 enc = key;
145 in[j] ^= *enc;
146 }
147 int p = 0;
148 for (j = 0; j < l; j++) {
149 int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
150 int oldp = p;
151 p = dec[p].v[b];
152 if (p == 0) {
153 lastbit++;
154 if (lastbit == allocatedbit) {
155 allocatedbit += BASEBITREC;
156 dec.resize(allocatedbit);
157 }
158 dec[lastbit].v[0] = 0;
159 dec[lastbit].v[1] = 0;
160 dec[oldp].v[b] = lastbit;
161 p = lastbit;
162 }
163 }
164 dec[p].c[0] = c[0];
165 dec[p].c[1] = c[1];
166 }
167 return 0;
168 }
169
~Hunzip()170 Hunzip::~Hunzip() {
171 if (filename)
172 free(filename);
173 }
174
getbuf()175 int Hunzip::getbuf() {
176 int p = 0;
177 int o = 0;
178 do {
179 if (inc == 0) {
180 fin.read(in, BUFSIZE);
181 inbits = fin.gcount() * 8;
182 }
183 for (; inc < inbits; inc++) {
184 int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
185 int oldp = p;
186 p = dec[p].v[b];
187 if (p == 0) {
188 if (oldp == lastbit) {
189 fin.close();
190 // add last odd byte
191 if (dec[lastbit].c[0])
192 out[o++] = dec[lastbit].c[1];
193 return o;
194 }
195 out[o++] = dec[oldp].c[0];
196 out[o++] = dec[oldp].c[1];
197 if (o == BUFSIZE)
198 return o;
199 p = dec[p].v[b];
200 }
201 }
202 inc = 0;
203 } while (inbits == BUFSIZE * 8);
204 return fail(MSG_FORMAT, filename);
205 }
206
getline(std::string & dest)207 bool Hunzip::getline(std::string& dest) {
208 char linebuf[BUFSIZE];
209 int l = 0, eol = 0, left = 0, right = 0;
210 if (bufsiz == -1)
211 return false;
212 while (l < bufsiz && !eol) {
213 linebuf[l++] = out[outc];
214 switch (out[outc]) {
215 case '\t':
216 break;
217 case 31: { // escape
218 if (++outc == bufsiz) {
219 bufsiz = getbuf();
220 outc = 0;
221 }
222 linebuf[l - 1] = out[outc];
223 break;
224 }
225 case ' ':
226 break;
227 default:
228 if (((unsigned char)out[outc]) < 47) {
229 if (out[outc] > 32) {
230 right = out[outc] - 31;
231 if (++outc == bufsiz) {
232 bufsiz = getbuf();
233 outc = 0;
234 }
235 }
236 if (out[outc] == 30)
237 left = 9;
238 else
239 left = out[outc];
240 linebuf[l - 1] = '\n';
241 eol = 1;
242 }
243 }
244 if (++outc == bufsiz) {
245 outc = 0;
246 bufsiz = fin.is_open() ? getbuf() : -1;
247 }
248 }
249 if (right)
250 strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
251 else
252 linebuf[l] = '\0';
253 strcpy(line + left, linebuf);
254 dest.assign(line);
255 return true;
256 }
257