1title	Lempel-Ziv Compressor
2; $Source: /usr/home/dhesi/zoo/RCS/lzc.asm,v $
3; $Id: lzc.asm,v 1.4 91/07/07 09:36:18 dhesi Exp $
4
5;Derived from Tom Pfau's public domain assembly code.
6;The contents of this file are hereby released to the public domain.
7;                                   -- Rahul Dhesi 1988/08/24
8
9UNBUF_IO	equ	1		;use unbuffered I/O
10
11public	_lzc
12
13	include asmconst.ai
14	include macros.ai
15
16check_gap	equ	4000		;Check ratio every so many bits
17scale_limit	equ	32000		;scale down if bitsout > scale_limit
18rat_thresh	equ	0ffffh		;don't reset if rat > rat_thresh
19
20;Hash table entry
21hash_rec	struc
22first	dw	?			; First entry with this value
23next	dw	?			; Next entry along chain
24char	db	?			; Suffix char
25hash_rec	ends
26
27extrn	docrc:near			;Procedure for block CRC in lzd.asm
28
29ifdef	UNBUF_IO
30extrn	_read:near
31extrn	_blockwrite:near
32else
33extrn	_zooread:near
34extrn	_zoowrite:near
35endif
36
37;Declare Segments
38_text	segment byte public 'code'
39_text	ends
40
41dgroup	group	_data
42	assume ds:dgroup,es:dgroup
43_data	segment word public 'data'
44extrn	_out_buf_adr:word		;address of C output buffer
45extrn	_in_buf_adr:word		;address of C input buffer
46
47extrn	memflag:byte			;got memory? flag
48
49save_sp		dw	?
50
51bytesin		dw	?		;count of bytes read
52bitsout		dw	?		;count of bits written
53ratio		dw	?		;recent ratio
54ratflag		db	?		;flag to remind us to check ratio
55bit_interval	dw	?		;interval at which to test ratio
56
57input_handle	dw	?
58output_handle	dw	?
59hash_seg	dw	?
60prefix_code	dw	?
61free_code	dw	?
62max_code	dw	?
63nbits		dw	?
64k		db	?
65bit_offset	dw	?
66input_offset	dw	0
67input_size	dw	0
68_data	ends
69
70memory	segment para public 'memory'
71hash	label	hash_rec
72memory	ends
73
74add_code macro
75	local	ac1,ac2,ac3
76	mov	bx,free_code		;Get code to use
77	push	ds			;point to hash table
78	mov	ds,hash_seg
79	or	di,di			;First use of this prefix?
80	jz	ac1			;zero means yes
81	mov	[si].next,bx		;point last use to new entry
82	jmp	short ac2
83ac1:	mov	[si].first,bx		;Point first use to new entry
84ac2:	cmp	bx,maxmax		;Have we reached code limit?
85	je	ac3			;equal means yes, just return
86
87	;call	index			;get address of new entry
88	call_index			;macro for speed
89
90	mov	[si].first,-1		;initialize pointers
91	mov	[si].next,-1
92	mov	[si].char,al		;save suffix char
93	inc	es:free_code		;adjust next code
94ac3:	pop	ds			;restore seg reg
95	endm
96
97read_char macro				;Macro for speed
98	local m$1,m$2,m$3,m$4
99	inc	[bytesin]		;Maintain input byte count for ratio
100	mov	di,input_offset		;Anything left in buffer?
101	cmp	di,input_size
102	jb	m$1			;less means yes
103
104	mov	cx,inbufsiz
105	call	doread			;read block
106
107	cmp	ax,-1
108	jnz	m$3
109	jmp	IO_err			;input error
110m$3:
111	mov	dx,_in_buf_adr		;for docrc
112	call	docrc
113	or	ax,ax			;Anything left?
114	jz	m$2			;zero means no, finished
115	mov	input_size,ax		;Save bytes read
116	mov	input_offset,0		;Point to beginning of buffer
117	mov	di,0
118m$1:
119	mov	si,_in_buf_adr
120	add	si,di
121	lodsb				;Read it in
122	inc	input_offset		;Adjust pointer
123	clc				;Success
124	jmp	short m$4
125m$2:	stc				;Nothing left
126m$4:
127	endm
128
129;Start writing code
130_text	segment
131	assume	cs:_text,ds:dgroup,es:dgroup,ss:nothing
132
133_lzc	proc	near
134	push	bp			;Standard C entry code
135	mov	bp,sp
136	push	di
137	push	si
138
139	push	ds			;Save ds to be sure
140	mov	bx,ds
141	mov	es,bx
142
143;Get two parameters, both integers, that are input file handle and
144;output file handle
145	mov	ax,[bp+4]
146	mov	[input_handle],ax
147	mov	ax,[bp+6]
148	mov	[output_handle],ax
149
150	call	compress		;Compress file
151					;Status received back in AX
152	pop	ds
153	pop	si			;Standard C return code
154	pop	di
155	mov	sp,bp
156	pop	bp
157	ret
158
159_lzc	endp
160
161compress	proc	near
162	mov	[save_sp],sp		;Save SP in case of error return
163
164;Initialize variables for serial re-entrancy
165	mov	[bit_offset],0
166	mov	[input_offset],0
167	mov	[input_size],0
168
169	test	memflag,0ffH		;Memory allocated?
170	jnz	gotmem			;If allocated, continue
171	malloc	<((maxmax * 5) / 16 + 20)>	;allocate it
172	jnc	here1
173	jmp	MALLOC_err1
174here1:
175	mov	hash_seg,ax		;Save segment address of mem block
176	mov	memflag,0ffh		;Set flag to remind us later
177gotmem:
178
179l1:	call	init_table		;Initialize the table and some vars
180	mov	ax,clear		;Write a clear code
181	call	write_code
182	;call	read_char		;Read first char
183	read_char			;macro for speed
184l4:
185
186;new code to check compression ratio
187	test	[ratflag],0FFH		;Time to check ratio?
188	jz	rd$1			;Skip checking if zero
189	call	check_ratio
190rd$1:
191	xor	ah,ah			;Turn char into code
192l4a:	mov	prefix_code,ax		;Set prefix code
193	;call	read_char		;Read next char
194	read_char			;macro for speed
195	jc	l17			;Carry means eof
196	mov	k,al			;Save char in k
197	mov	bx,prefix_code		;Get prefix code
198
199	call	lookup_code		;See if this pair in table
200
201	jnc	l4a			;nc means yes, new code in ax
202	;call	add_code		;Add pair to table
203	add_code			;Macro for speed
204	push	bx			;Save new code
205	mov	ax,prefix_code		;Write old prefix code
206	call	write_code
207	pop	bx
208	mov	al,k			;Get last char
209	cmp	bx,max_code		;Exceed code size?
210
211	jnb	l4$
212	jmp	l4
213l4$:
214	cmp	nbits,maxbits		;Currently less than maxbits?
215	jb	l14			;yes
216	mov	ax,clear		;Write a clear code
217	call	write_code
218	call	init_table		;Reinit table
219	mov	al,k			;get last char
220	jmp	l4			;Start over
221l14:	inc	nbits			;Increase number of bits
222	shl	max_code,1		;Double max code size
223	jmp	l4			;Get next char
224l17:	mov	ax,prefix_code		;Write last code
225	call	write_code
226	mov	ax,eof			;Write eof code
227	call	write_code
228	mov	ax,bit_offset		;Make sure buffer is flushed to file
229	cmp	ax,0
230	je	OK_ret
231	mov	dx,ax			;dx <- ax
232	shr	ax,1
233	shr	ax,1
234	shr	ax,1			;ax <- ax div 8
235	and	dx,07			;dx <- ax mod 8
236					;If extra bits, make sure they get
237	je	l17a			;written
238	inc	ax
239l17a:	call	flush
240OK_ret:
241	xor	ax,ax			;Normal return -- compressed
242	ret
243IO_err:
244	mov	ax,2			;I/O error return
245	mov	sp,[save_sp]		;Restore stack pointer
246	ret
247
248MALLOC_err1:				;hash table alloc error
249	mov	ax,1			;Malloc error return
250	mov	sp,[save_sp]		;Restore stack pointer
251	ret
252compress	endp
253
254init_table	proc	near
255	mov	[bytesin],0		;Input byte count
256	mov	[bitsout],0		;Output bit count
257	mov	[ratio],0
258	mov	[ratflag],0
259	mov	[bit_interval],check_gap
260
261	mov	nbits,9			;Set code size to 9
262	mov	max_code,512		;Set max code to 512
263	push	es			;Save seg reg
264	mov	es,hash_seg		;Address hash table
265	mov	ax,-1			;Unused flag
266	mov	cx,640			;Clear first 256 entries
267	mov	di,offset hash		;Point to first entry
268rep	stosw				;Clear it out
269	pop	es			;Restore seg reg
270	mov	free_code,first_free	;Set next code to use
271	ret				;done
272init_table	endp
273
274write_code	proc	near
275	push	ax			;Save code
276	mov	cx,nbits
277	add	[bitsout],cx		;Maintain output bit count for ratio
278	sub	[bit_interval],cx
279	jg	rd$2			;OK if not reached interval
280	mov	[ratflag],1		;else set flag -- check ratio soon
281rd$2:
282	mov	ax,bit_offset		;Get bit offset
283	mov	cx,nbits		;Adjust bit offset by code size
284	add	bit_offset,cx
285
286	mov	dx,ax			;dx <- ax
287	shr	ax,1
288	shr	ax,1
289	shr	ax,1			;ax <- ax div 8
290	and	dx,07			;dx <- ax mod 8
291
292	;Now ax contains byte offset and
293	;dx contains bit offset within that byte (I think)
294
295	cmp	ax,outbufsiz-4		;Approaching end of buffer?
296	jb	wc1			;less means no
297	call	flush			;Write the buffer
298
299	push	dx			;dx contains offset within byte
300	add	dx,nbits		;adjust by code size
301	mov	bit_offset,dx		;new bit offset
302	pop	dx			;restore dx
303
304;ax is an index into output buffer.  Next, ax <- address of buffer[ax]
305	add	ax,[_out_buf_adr]
306	mov	si,ax			;put in si
307	mov	al,byte ptr [si]	;move byte to first position
308
309;put value of al into first byte of output buffer
310	push	bx
311	mov	bx,[_out_buf_adr]
312	mov	[bx],al
313	pop	bx
314	xor	ax,ax			;Byte offset of zero
315wc1:	add	ax,[_out_buf_adr]	;Point into buffer
316	mov	di,ax			;Destination
317	pop	ax			;Restore code
318	mov	cx,dx			;offset within byte
319	xor	dx,dx			;dx will catch bits rotated out
320	jcxz	wc3			;If offset in byte is zero, skip shift
321wc2:	shl	ax,1			;Rotate code
322	rcl	dx,1
323	loop	wc2
324	or	al,byte ptr [di]	;Grab bits currently in buffer
325wc3:	stosw				;Save data
326	mov	al,dl			;Grab extra bits
327	stosb				;and save
328	ret
329write_code	endp
330
331flush	proc	near
332	push	ax			;Save all registers
333	push	bx			;AX contains number of bytes to write
334	push	cx
335	push	dx
336
337	push	si			;may not be necessary to save si & di
338	push	di
339
340	push	ax			;save byte count
341
342	push	ax			;byte count
343	push	_out_buf_adr		;buffer address
344	push	output_handle		;zoofile
345ifdef	UNBUF_IO
346	call	_blockwrite
347else
348	call	_zoowrite
349endif
350	add	sp,6
351
352	pop	cx			;recover byte count
353
354	cmp	ax,cx
355
356	jz	here2
357	jmp	IO_err		;I/O error
358
359here2:
360	pop	di
361	pop	si
362
363	pop	dx
364	pop	cx
365	pop	bx
366	pop	ax
367	ret
368flush		endp
369
370lookup_code	proc	near
371	push	ds			;Save seg reg
372	mov	ds,hash_seg		;point to hash table
373
374	;call	index			;convert code to address
375	call_index			;macro for speed
376
377	mov	di,0			;flag
378	mov	bx,[si].first
379	cmp	bx,-1			;Has this code been used?
380	je	gc4			;equal means no
381	inc	di			;set flag
382gc2:
383	;call	index			;convert code to address
384	call_index			;macro for speed
385
386	cmp	[si].char,al		;is char the same?
387	jne	gc3			;ne means no
388	clc				;success
389	mov	ax,bx			;put found code in ax
390	pop	ds			;restore seg reg
391	ret				;done
392gc3:
393	mov	bx,[si].next
394	cmp	bx,-1			;More left with this prefix?
395	je	gc4			;equal means no
396	jmp	gc2			;try again
397gc4:	stc				;not found
398	pop	ds			;restore seg reg
399	ret				;done
400lookup_code	endp
401
402comment #
403index	proc	near
404	mov	si,bx			;si = bx * 5 (5 byte hash entries)
405	shl	si,1			;si = bx * 2 * 2 + bx
406	shl	si,1
407	add	si,bx
408	ret
409index		endp
410# end comment
411
412check_ratio	proc	near
413	push	ax
414
415;	mov	dl,'*'			;'*' printed means checking ratio
416;	call	sendout
417
418	;Getting ready to check ratios.  If bitsout is over scale_limit,
419	;then we scale down both bitsout and bytesin by a factor
420	;of 4.  This will avoid overflow.
421	mov	cx,[bitsout]
422	cmp	cx,scale_limit
423	jb	scale_ok
424
425	mov	cl,2
426	shr	[bytesin],cl
427	shr	[bitsout],cl
428
429scale_ok:
430	;Note MASM bug:  "mov ah,high [bytesin]" and
431	;"mov al,low [bytesin]" don't work.
432	mov	ah,byte ptr [bytesin]
433	mov	dl,byte ptr [bytesin+1]
434
435	sub	al,al
436	sub	dh,dh			;dx:ax = 8 * bitsin = 256 * bytesin
437	mov	cx,[bitsout]		;cx <- bitsout
438	or	cx,cx			;Division by zero?
439	jnz	candivide		;No -- go ahead divide
440	mov	ax,0FFFFH		;yes -- assume max poss value
441	jmp	short divided
442candivide:
443	;Calculate cx as (bytesin * 256) / bitsout  = bitsin * 8 / bitsout
444	div	cx			;ax <- rat <- dx:ax / cx
445	shl	ax,1
446	shl	ax,1			;rat <- 4 * bytes_in / bytes_out
447divided:
448	;Enter here with ax = rat = bitsin / bitsout.
449
450;	call print_data			;print info for debugging
451
452	;If rat > rat_thresh then ratio is good;  do not reset table
453;	cmp	ax,rat_thresh
454;	ja	ratdone
455
456	;Compare rat against ratio
457	mov	bx,ax			;save rat in bx
458	cmp	ax,[ratio]		;cmp rat,ratio
459	jb	ratless			;trouble if ratio is now smaller
460	mov	ax,[ratio]
461	call	mul7			;ax <- 7 * ratio
462	add	ax,bx			;ax = 7 * ratio + rat
463	shr	ax,1
464	shr	ax,1
465	shr	ax,1			;ax = (7 * ratio + rat) / 8
466	mov	[ratio],ax		;ratio = (7 * ratio + rat) / 8
467	jmp	short ratdone
468ratless:				;ratio is going down, so...
469	mov	[bytesin],0
470	mov	[bitsout],0
471
472;	mov	dl,'#'			;'#' printed means table reset
473;	call	sendout
474
475	mov	ax,clear		;Write a clear code
476	call	write_code
477	call	init_table		;Reinit table
478ratdone:
479	mov	[ratflag],0
480	mov	[bit_interval],check_gap
481	pop	ax
482	ret
483check_ratio	endp
484
485comment #
486sendout	proc	near			;char in dl send to console
487	push	ax
488	mov	ah,02
489	int	21H
490	pop	ax
491	ret
492sendout	endp
493# end comment
494
495mul7	proc	near			;multiply ax by 7
496	push	dx
497	mov	dx,7
498	mul	dx			;dx:ax <- 7 * ax
499	pop	dx
500	ret
501mul7	endp
502
503comment #
504mul3	proc	near			;multiply ax by 3
505	push	dx
506	mov	dx,3
507	mul	dx			;dx:ax <- 3 * ax
508	pop	dx
509	ret
510mul3	endp
511# end comment
512
513comment #
514mul1_125 proc	near			;multiply ax by 1.125
515	push	bx
516	mov	bx,ax
517	shr	bx,1
518	shr	bx,1
519	shr	bx,1			;bx = n / 8
520	add	ax,bx			;ax <- n + n / 8
521	pop	bx
522	ret
523mul1_125 endp
524# end comment
525
526comment #
527print_data proc near
528	;Debugging -- print bytesin, bitsout, rat, and ratio
529	push	ax
530	push	bx
531	push	cx
532	push	dx
533
534	push	ax		;print rat
535	call	_prtint
536	add	sp,2
537
538	push	[ratio]		;print ratio
539	call	_prtint
540	add	sp,2
541
542	push	[bytesin]
543	call	_prtint
544	add	sp,2
545
546	push	[bitsout]
547	call	_prtint
548	add	sp,2
549
550	pop	dx
551	pop	cx
552	pop	bx
553	pop	ax
554	ret
555print_data endp
556# end comment
557
558;doread reads cx characters and stores them in input buffer
559;return value from zooread is returned in ax
560doread	proc	near		;reads block
561	push	bx
562	push	cx
563	push	dx
564	push	si
565	push	di
566
567	push	cx			;byte count
568	push	_in_buf_adr		;buffer address
569	push	input_handle		;zoofile
570ifdef	UNBUF_IO
571	call	_read
572else
573	call	_zooread
574endif
575	add	sp,6
576
577	pop	di
578	pop	si
579	pop	dx
580	pop	cx
581	pop	bx
582	ret
583doread	endp
584
585_text	ends
586
587end
588
589