1;---- urlencode.asm ----------------------------------------------------------;
2;
3; URL encode a sequence of octets (RFC 1738).
4;
5; The sequence comes from the command line. If no sequence is specified,
6; it is taken from stdin. The output is sent to stdout.
7;
8; The characters a-zA-Z0-9 are not encoded. Additionally, any ASCII
9; non-control 7-bit characters, except the percent sign (%), that appear
10; on the command line are not encoded.
11;
12; Everything else is encoded into %XX where XX is the hexadecimal value
13; of the octet, always two bytes long.
14;
15; Valid options are:
16;
17;	-a - encode alphanumerics
18;	-d - encode [0-9]
19;	-e - end of options
20;	-h - help
21;	-n - do not encode new lines
22;	-p - encode spaces as +
23;	-l - append a new line at the end
24;	-r - version info
25;	-t - encode [A-Za-z]
26;
27; This code, when assembled and linked, will work under FreeBSD, and perhaps
28; other BSD systems. It requires NASM for assembly:
29;
30;	nasm -f elf urlencode.asm
31;	ld -o urlencode urlencode.o
32;	strip unrlencode
33;
34; Started:	23 Oct 2000
35; Updated:	24 Oct 2000
36;
37; Version 1.0
38;
39; Copyright (c) 2000 G. Adam Stanislav
40; All rights reserved.
41;
42; Redistribution and use in source and binary forms, with or without
43; modification, are permitted provided that the following conditions
44; are met:
45; 1. Redistributions of source code must retain the above copyright
46;    notice, this list of conditions and the following disclaimer.
47; 2. Redistributions in binary form must reproduce the above copyright
48;    notice, this list of conditions and the following disclaimer in the
49;    documentation and/or other materials provided with the distribution.
50;
51; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
52; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
55; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61; SUCH DAMAGE.
62;
63;-----------------------------------------------------------------------------;
64
65%define	BUFSIZE	2048
66%define	NFLAG	01h
67%define	PFLAG	02h
68%define	LFLAG	04h
69%define	PERC	08h
70%define	TFLAG	10h
71%define	DFLAG	20h
72%define	XFLAG	40h
73%define	EFLAG	80h
74%define	AFLAG	(TFLAG|DFLAG)
75
76section	.bss
77ibuffer	resb	BUFSIZE
78obuffer	resb	BUFSIZE
79
80section	.data
81etable	times 256	db	0
82hex	db	'0123456789ABCDEF'
83rerr	db	'URLENCODE: Read error.', 0Ah
84rerrlen	equ	$-rerr
85align 4
86werr	db	'URLENCODE: Write error.', 0Ah
87werrlen	equ	$-werr
88align 4
89rmsg	db	'URLENCODE version 1.0 (24 Oct 2000)', 0Ah
90	db	'Copyright 2000 G. Adam Stanislav', 0Ah
91	db	'All rights reserved.', 0Ah
92rlen	equ	$-rmsg
93	db	0Ah
94umsg	db	'Usage: urlencode [options] [string ...]', 0Ah, 0Ah
95	db	09h, '-% = encode % (only needed with -x)', 0Ah
96	db	09h, '-a = encode alphanumeric characters (same as -dt)', 0Ah
97	db	09h, '-d = encode digits (0-9)', 0Ah
98	db	09h, '-e = encode rest of arguments; exit if [string ...] is missing', 0Ah
99	db	09h, '-h = help', 0Ah
100	db	09h, '-l = append a new line', 0Ah
101	db	09h, '-n = do not encode new lines', 0Ah
102	db	09h, "-p = encode spaces as `+' (ignored if excluded)", 0Ah
103	db	09h, '-r = print version information', 0Ah
104	db	09h, '-t = encode alphabetic characters (A-Za-z)', 0Ah
105	db	09h, '-x = exclude all (no encoding)', 0Ah
106	db	09h, '-[list] = exclude listed characters', 0Ah, 0Ah
107ulen	equ	$-umsg
108	db	'The exclusion list may be URL-encoded. It may contain ranges. Its members', 0Ah
109	db	"may be escaped with `\' (e.g., `-[\-\%\]\\]' excludes `-', `%', `]', and `\'", 0Ah
110	db	"from encoding, `-[%23-%26]' excludes `#', `$', `%', and `&').", 0Ah, 0Ah
111	db	'If no [string ...] is specified, input comes from stdin.', 0Ah
112hlen	equ	$-rmsg
113
114section	.text
115
116encode:
117	mov	dl, al
118
119	test	ah, XFLAG
120	je	.notx
121
122	cmp	al, '%'
123	jne	.xa
124
125	test	ah, PERC
126	jne	near .go
127
128.xa:
129	test	ah, AFLAG
130	je	near putchar
131
132	test	ah, DFLAG
133	je	.xt
134
135	cmp	al, '0'
136	jl	near putchar
137	cmp	al, '9'
138	jbe	.cmd
139
140.xt:
141	cmp	al, 'z'
142	ja	near putchar
143
144	cmp	al, 'a'
145	jae	.cmd
146
147	cmp	al, 'A'
148	jb	near putchar
149
150	cmp	al, 'Z'
151	ja	near putchar
152	jmp	short .cmd
153
154.notx:
155	cmp	al, 0Ah
156	jne	.test
157
158	test	ah, NFLAG
159	jne	putchar
160
161.test:
162	test	ah, DFLAG
163	jne	.alpha
164
165	cmp	al, '0'
166	jb	.cmd
167	cmp	al, '9'
168	jbe	putchar
169
170.alpha:
171	test	ah, TFLAG
172	jne	.cmd
173
174	cmp	al, 'A'
175	jb	.cmd
176	cmp	al, 'Z'
177	jbe	putchar
178
179	cmp	al, 'a'
180	jb	.cmd
181	cmp	al, 'z'
182	jbe	putchar
183
184.cmd:
185	; Check if the char is at the command line
186	cmp	byte [etable+edx], 0
187	jne	putchar
188
189.encode:
190	cmp	dl, ' '
191	jne	.go
192	test	ah, PFLAG
193	je	.go
194
195	mov	al, '+'
196	jmp	short putchar
197
198.go:
199	mov	al, '%'
200	call	putchar
201
202	push	edx
203	shr	dl, 4
204	mov	al, [hex+edx]
205	pop	edx
206	call	putchar
207
208	and	dl, 0Fh
209	mov	al, [hex+edx]
210
211	; Fall through
212
213align 4
214putchar:
215	stosb
216	inc	ecx
217	cmp	ecx, BUFSIZE
218	jb	write.done
219
220	; Fall through to write
221
222write:
223	; Send the contents of the output buffer to stdout.
224	; The buffer starts at EDI - ECX and is ECX bytes long.
225
226	jecxz	.done			; Empty buffer, do nothing
227
228	push	eax
229	sub	edi, ecx		; Find start of buffer
230	sub	eax, eax
231	push	ecx
232	inc	al			; stdout = 1
233	push	edi
234	push	eax
235	push	edx
236	mov	al, 4			; SYS_write
237	int	80h
238	pop	edx
239	sub	ecx, ecx		; Buffer is now empty
240	add	esp, byte 12
241	or	eax, eax
242	js	.errexit
243	pop	eax
244
245.done:
246	ret
247
248align 4
249.errexit:
250	sub	eax, eax
251	push	dword werrlen
252	mov	al, 2			; stderr
253	push	dword werr
254	push	eax
255	push	edx
256	add	al, al			; SYS_write
257	int	80h
258
259	sub	eax, eax
260	mov	al, 2			; return failure
261	push	eax
262	push	edx
263	dec	al			; SYS_exit
264	int	80h
265
266align 4
267getchar:
268	or	ebx, ebx
269	jne	.fetch
270	call	read
271
272.fetch:
273	lodsb
274	dec	ebx
275	ret
276
277align 4
278read:
279	push	eax
280	push	ecx
281	push	dword BUFSIZE
282	mov	esi, ibuffer
283	sub	eax, eax		; stdin = 0
284	push	esi
285	push	eax
286	push	edx
287	mov	al, 3			; SYS_read
288	int	80h
289	pop	edx
290	add	esp, byte 12
291	pop	ecx
292	or	eax, eax
293	mov	ebx, eax
294	pop	eax
295	je	.exit
296	js	.errexit
297	ret
298
299align 4
300.exit:
301	test	ah, LFLAG
302	je	.flush
303
304	mov	al, 0Ah
305	call	putchar
306
307.flush:
308	call	write			; Flush output buffer
309
310	sub	eax, eax		; Return success
311	push	eax
312	push	edx
313	inc	al			; SYS_exit
314	int	80h
315
316.errexit:
317	sub	eax, eax
318	push	dword rerrlen
319	mov	al, 2			; stderr
320	push	dword rerr
321	push	eax
322	push	edx
323	add	al, al			; SYS_write
324	int	80h
325
326	sub	eax, eax
327	inc	al			; Return failure, SYS_exit
328	push	eax
329	push	eax
330	int	80h
331
332global	_start
333_start:
334	sub	eax, eax
335	sub	ebx, ebx
336	sub	ecx, ecx
337	sub	edx, edx
338	lea	ebp, [esp+8]
339	mov	edi, obuffer
340	cld
341
342.next:
343	mov	esi, [ebp]
344
345	or	esi, esi
346	je	near .preloop
347
348	test	ah, EFLAG
349	jne	near .cmdinnerloop
350	lodsb
351	cmp	al, '-'
352	jne	near .cmdfirst
353
354	cmp	byte [esi], 0
355	je	near .loop
356
357	add	ebp, byte 4
358
359.cmd:
360	lodsb
361	or	al, al
362	je	.next
363
364	cmp	al, '['
365	je	near .parse
366
367.perc:
368	cmp	al, '%'
369	jne	.t
370
371	or	ah, PERC
372	jmp	short .cmd
373
374.t:
375	cmp	al, 't'
376	jne	.d
377
378	or	ah, TFLAG
379	jmp	short .cmd
380
381.d:
382	cmp	al, 'd'
383	jne	.e
384
385	or	ah, DFLAG
386	jmp	short .cmd
387
388.e:
389	cmp	al, 'e'
390	jne	.x
391
392	or	ah, EFLAG
393	jmp	short .cmd
394
395.x:
396	cmp	al, 'x'
397	jne	.a
398
399	or	ah, XFLAG
400	jmp	short .cmd
401
402.a:
403	cmp	al, 'a'
404	jne	.p
405
406	or	ah, AFLAG
407	jmp	short .cmd
408
409.p:
410	cmp	al, 'p'
411	jne	.n
412
413	or	ah, PFLAG
414	jmp	short .cmd
415
416.n:
417	cmp	al, 'n'
418	jne	.l
419
420	or	ah, NFLAG
421	jmp	short .cmd
422
423.l:
424	cmp	al, 'l'
425	jne	.r
426
427	or	ah, LFLAG
428	jmp	short .cmd
429
430.r:
431	cmp	al, 'r'
432	jne	.h
433
434	sub	ah, ah
435	push	dword rlen
436
437.msg:
438	mov	al, 2			; stderr
439	push	dword rmsg
440	push	eax
441	push	edx
442	add	al, al			; SYS_write
443	int	80h
444
445	sub	eax, eax		; return success
446	push	eax
447	push	eax
448	inc	al			; SYS_exit
449	int	80h
450
451.h:
452	cmp	al, 'h'
453	jne	.err
454
455	sub	ah, ah
456	push	dword hlen
457	jmp	short .msg
458
459.err:
460	sub	ah, ah
461	push	dword ulen
462	mov	al, 2			; stderr
463	push	dword umsg
464	push	eax
465	push	edx
466	add	al, al			; SYS_write
467	int	80h
468
469	mov	eax, -1			; return failure (-1)
470	push	eax
471	push	edx
472	neg	eax			; SYS_exit
473	int	80h
474
475	; While this may seem like an endless loop, it is not.
476	; The read procedure exits to the OS when there is no
477	; input left to read.
478
479.preloop:
480	test	ah, EFLAG
481	jne	near read.exit
482
483.loop:
484	call	getchar
485	cmp	al, 0Ah
486	je	.nl
487
488	call	encode
489	jmp	short .loop
490
491.nl:
492	call	encode
493	call	write
494	jmp	short .loop
495
496	; Parse the exclude list.
497.parse:
498	lodsb
499
500.parsechar:
501	or	al, al
502	je	.err
503
504	cmp	al, ']'
505	je	near .cmd
506
507	call	.parseperc
508
509.ch1:
510	; We have a character to exclude. It is either just a character,
511	; or the first of a range.
512	movzx	edx, al
513	mov	byte [etable+edx], -1
514
515	lodsb
516	cmp	al, '-'
517	jne	.parsechar
518
519	lodsb
520	or	al, al
521	je	.err
522
523	cmp	al, ']'
524	je	.err
525
526	call	.parseperc
527
528	cmp	dl, al
529	je	.parse
530	ja	.down
531	movzx	ebx, al
532
533.store:
534	mov	byte [etable+ebx], -1
535	dec	bl
536	cmp	bl, dl
537	ja	.store
538
539.stored:
540	sub	bl, bl
541
542	jmp	short .parse
543
544.down:
545	movzx	ebx, al
546
547.dstore:
548	mov	byte [etable+ebx], -1
549	inc	bl
550	cmp	bl, dl
551	jb	.dstore
552	jmp	short .stored
553
554.parseperc:
555	cmp	al, '\'
556	jne	.tryperc
557
558	lodsb
559	or	al, al
560	je	near .err
561
562.gotit:
563	ret
564
565.tryperc:
566	cmp	al, '%'
567	jne	.gotit
568
569	lodsb
570	cmp	al, '0'
571	jl	near .err
572
573	cmp	al, '9'
574	ja	.ul1
575
576	sub	al, '0'
577	jmp	short .hex2
578
579.ul1:
580	cmp	al, 'A'
581	jb	near .err
582
583	cmp	al, 'F'
584	ja	.ll1
585
586	add	al, 10 - 'A'
587	jmp	short .hex2
588
589.ll1:
590	cmp	al, 'a'
591	jb	near .err
592
593	cmp	al, 'f'
594	ja	near .err
595
596	add	al, 10 - 'a'
597
598.hex2:
599	mov	dh, al
600
601	lodsb
602	cmp	al, '0'
603	jl	near .err
604
605	cmp	al, '9'
606	ja	.ul2
607
608	sub	al, '0'
609	jmp	short .hexit
610
611.ul2:
612	cmp	al, 'A'
613	jb	near .err
614
615	cmp	al, 'F'
616	ja	.ll2
617
618	add	al, 10 - 'A'
619	jmp	short .hexit
620
621.ll2:
622	cmp	al, 'a'
623	jb	near .err
624
625	cmp	al, 'f'
626	ja	near .err
627
628.hexit:
629	shl	dh, 4
630	or	al, dh
631	sub	dh, dh
632	ret
633
634.cmdbigloop:
635	add	ebp, byte 4
636	mov	esi, [ebp]
637	or	esi, esi
638	je	near read.exit
639
640	mov	al, ' '
641	call	encode
642
643.cmdinnerloop:
644	lodsb
645
646.cmdfirst:
647	or	al, al
648	je	.cmdbigloop
649	call	encode
650	jmp	short .cmdinnerloop
651
652;------ End of code -----------------------------------------------------------
653
654