1/* ucl_mchw.ch -- matching functions using a window
2
3   This file is part of the UCL data compression library.
4
5   Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer
6   All Rights Reserved.
7
8   The UCL library is free software; you can redistribute it and/or
9   modify it under the terms of the GNU General Public License as
10   published by the Free Software Foundation; either version 2 of
11   the License, or (at your option) any later version.
12
13   The UCL library is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with the UCL library; see the file COPYING.
20   If not, write to the Free Software Foundation, Inc.,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
23   Markus F.X.J. Oberhumer
24   <markus@oberhumer.com>
25   http://www.oberhumer.com/opensource/ucl/
26 */
27
28
29/***********************************************************************
30//
31************************************************************************/
32
33typedef struct
34{
35    int init;
36
37    ucl_uint look;          /* bytes in lookahead buffer */
38
39    ucl_uint m_len;
40    ucl_uint m_off;
41
42    ucl_uint last_m_len;
43    ucl_uint last_m_off;
44
45    const ucl_bytep bp;
46    const ucl_bytep ip;
47    const ucl_bytep in;
48    const ucl_bytep in_end;
49    ucl_bytep out;
50
51    ucl_uint32 bb_b;
52    unsigned bb_k;
53    unsigned bb_c_endian;
54    unsigned bb_c_s;
55    unsigned bb_c_s8;
56    ucl_bytep bb_p;
57    ucl_bytep bb_op;
58
59    struct ucl_compress_config_t conf;
60    ucl_uintp result;
61
62    ucl_progress_callback_p cb;
63
64    ucl_uint textsize;      /* text size counter */
65    ucl_uint codesize;      /* code size counter */
66    ucl_uint printcount;    /* counter for reporting progress every 1K bytes */
67
68    /* some stats */
69    unsigned long lit_bytes;
70    unsigned long match_bytes;
71    unsigned long rep_bytes;
72    unsigned long lazy;
73}
74UCL_COMPRESS_T;
75
76
77
78#if (ACC_OS_TOS && (ACC_CC_PUREC || ACC_CC_TURBOC))
79/* the cast is needed to work around a code generation bug */
80#define getbyte(c)  ((c).ip < (c).in_end ? (int) (unsigned) *((c).ip)++ : (-1))
81#else
82#define getbyte(c)  ((c).ip < (c).in_end ? *((c).ip)++ : (-1))
83#endif
84
85#include "ucl_swd.ch"
86
87
88
89/***********************************************************************
90//
91************************************************************************/
92
93static int
94init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s,
95             const ucl_bytep dict, ucl_uint dict_len,
96             ucl_uint32 flags )
97{
98    int r;
99
100    assert(!c->init);
101    c->init = 1;
102
103    s->c = c;
104
105    c->last_m_len = c->last_m_off = 0;
106
107    c->textsize = c->codesize = c->printcount = 0;
108    c->lit_bytes = c->match_bytes = c->rep_bytes = 0;
109    c->lazy = 0;
110
111    r = swd_init(s,dict,dict_len);
112    if (r != UCL_E_OK)
113    {
114        swd_exit(s);
115        return r;
116    }
117
118    s->use_best_off = (flags & 1) ? 1 : 0;
119    return UCL_E_OK;
120}
121
122
123/***********************************************************************
124//
125************************************************************************/
126
127static int
128find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s,
129             ucl_uint this_len, ucl_uint skip )
130{
131    assert(c->init);
132
133    if (skip > 0)
134    {
135        assert(this_len >= skip);
136        swd_accept(s, this_len - skip);
137        c->textsize += this_len - skip + 1;
138    }
139    else
140    {
141        assert(this_len <= 1);
142        c->textsize += this_len - skip;
143    }
144
145    s->m_len = SWD_THRESHOLD;
146#ifdef SWD_BEST_OFF
147    if (s->use_best_off)
148        memset(s->best_pos,0,sizeof(s->best_pos));
149#endif
150    swd_findbest(s);
151    c->m_len = s->m_len;
152#if defined(__UCL_CHECKER)
153    /* s->m_off may be uninitialized if we didn't find a match,
154     * but then its value will never be used.
155     */
156    c->m_off = (s->m_len == SWD_THRESHOLD) ? 0 : s->m_off;
157#else
158    c->m_off = s->m_off;
159#endif
160
161    swd_getbyte(s);
162
163    if (s->b_char < 0)
164    {
165        c->look = 0;
166        c->m_len = 0;
167        swd_exit(s);
168    }
169    else
170    {
171        c->look = s->look + 1;
172    }
173    c->bp = c->ip - c->look;
174
175#if 0
176    /* brute force match search */
177    if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look)
178    {
179        const ucl_bytep ip = c->bp;
180        const ucl_bytep m  = c->bp - c->m_off;
181        const ucl_bytep in = c->in;
182
183        if (ip - in > s->n)
184            in = ip - s->n;
185        for (;;)
186        {
187            while (*in != *ip)
188                in++;
189            if (in == ip)
190                break;
191            if (in != m)
192                if (memcmp(in,ip,c->m_len+1) == 0)
193                    printf("%p %p %p %5d\n",in,ip,m,c->m_len);
194            in++;
195        }
196    }
197#endif
198
199    if (c->cb && c->textsize > c->printcount)
200    {
201        (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user);
202        c->printcount += 1024;
203    }
204
205    return UCL_E_OK;
206}
207
208
209/***********************************************************************
210// bit buffer
211************************************************************************/
212
213static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize)
214{
215    if (endian != -1)
216    {
217        if (endian != 0)
218            return UCL_E_ERROR;
219        c->bb_c_endian = endian;
220    }
221    if (bitsize != -1)
222    {
223        if (bitsize != 8 && bitsize != 16 && bitsize != 32)
224            return UCL_E_ERROR;
225        c->bb_c_s = bitsize;
226        c->bb_c_s8 = bitsize / 8;
227    }
228    c->bb_b = 0; c->bb_k = 0;
229    c->bb_p = NULL;
230    c->bb_op = NULL;
231    return UCL_E_OK;
232}
233
234
235static void bbWriteBits(UCL_COMPRESS_T *c)
236{
237    ucl_bytep p = c->bb_p;
238    ucl_uint32 b = c->bb_b;
239
240    p[0] = UCL_BYTE(b >>  0);
241    if (c->bb_c_s >= 16)
242    {
243        p[1] = UCL_BYTE(b >>  8);
244        if (c->bb_c_s == 32)
245        {
246            p[2] = UCL_BYTE(b >> 16);
247            p[3] = UCL_BYTE(b >> 24);
248        }
249    }
250}
251
252
253static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit)
254{
255    assert(bit == 0 || bit == 1);
256    assert(c->bb_k <= c->bb_c_s);
257
258    if (c->bb_k < c->bb_c_s)
259    {
260        if (c->bb_k == 0)
261        {
262            assert(c->bb_p == NULL);
263            c->bb_p = c->bb_op;
264            c->bb_op += c->bb_c_s8;
265        }
266        assert(c->bb_p != NULL);
267        assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
268
269        c->bb_b = (c->bb_b << 1) + bit;
270        c->bb_k++;
271    }
272    else
273    {
274        assert(c->bb_p != NULL);
275        assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
276
277        bbWriteBits(c);
278        c->bb_p = c->bb_op;
279        c->bb_op += c->bb_c_s8;
280        c->bb_b = bit;
281        c->bb_k = 1;
282    }
283}
284
285
286static void bbPutByte(UCL_COMPRESS_T *c, unsigned b)
287{
288    /**printf("putbyte %p %p %x  (%d)\n", op, bb_p, x, bb_k);*/
289    assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op);
290    *c->bb_op++ = UCL_BYTE(b);
291}
292
293
294static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit)
295{
296    if (c->bb_k > 0)
297    {
298        assert(c->bb_k <= c->bb_c_s);
299        while (c->bb_k != c->bb_c_s)
300            bbPutBit(c, filler_bit);
301        bbWriteBits(c);
302        c->bb_k = 0;
303    }
304    c->bb_p = NULL;
305}
306
307
308
309/*
310vi:ts=4:et
311*/
312
313