1;---- urldecode.asm ----------------------------------------------------------;
2;
3; URL decode a sequence of octets (RFC 1738).
4;
5; The sequence comes from stdin or command line arguments. The output
6; is sent to stdout.
7;
8; Options:
9;
10;	-e - end of options
11;	-h - help
12;	-l - append a new line
13;	-p - decode + into space
14;	-r - version
15;
16; This code, when assembled and linked, will work under FreeBSD, and perhaps
17; other BSD systems. It requires NASM for assembly:
18;
19;	nasm -f elf urldecode.asm
20;	ld -o urldecode urldecode.o
21;	strip unrldecode
22;
23; Started:	23 Oct 2000
24; Updated:	25 Oct 2000
25;
26; Version 1.0
27;
28; Copyright (c) 2000 G. Adam Stanislav
29; All rights reserved.
30;
31; Redistribution and use in source and binary forms, with or without
32; modification, are permitted provided that the following conditions
33; are met:
34; 1. Redistributions of source code must retain the above copyright
35;    notice, this list of conditions and the following disclaimer.
36; 2. Redistributions in binary form must reproduce the above copyright
37;    notice, this list of conditions and the following disclaimer in the
38;    documentation and/or other materials provided with the distribution.
39;
40; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
41; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50; SUCH DAMAGE.
51;
52;-----------------------------------------------------------------------------;
53
54%define	BUFSIZE	2048
55%define	PFLAG	01h
56%define	PERC	02h
57%define	LFLAG	04h
58%define	EFLAG	08h
59
60section	.bss
61ibuffer	resb	BUFSIZE
62obuffer	resb	BUFSIZE
63
64section	.data
65rerr	db	'URLDECODE: Read error.', 0Ah
66rerrlen	equ	$-rerr
67align 4
68werr	db	'URLDECODE: Write error.', 0Ah
69werrlen	equ	$-werr
70align 4
71rmsg	db	'URLDECODE version 1.0 (25 Oct 2000)', 0Ah
72	db	'Copyright 2000 G. Adam Stanislav', 0Ah
73	db	'All rights reserved.', 0Ah
74rlen	equ	$-rmsg
75	db	0Ah
76umsg	db	'Usage: urldecode [options] [string ...]', 0Ah, 0Ah
77	db	09h, '-e = decode rest of arguments; exit if [string ...] is missing', 0Ah
78	db	09h, '-h = help', 0Ah
79	db	09h, '-l = append a new line', 0Ah
80	db	09h, "-p = decode `+' into spaces", 0Ah
81	db	09h, '-r = version', 0Ah, 0Ah
82ulen	equ	$-umsg
83	db	"If no [string ...] is specified, input comes from stdin unless the `-e' option", 0Ah
84	db	'is used.', 0Ah, 0Ah
85hlen	equ	$-rmsg
86
87section	.text
88
89decode:
90	cmp	al, '+'
91	jne	.perc
92
93	test	ah, PFLAG
94	je	putchar
95
96	mov	al, ' '
97	jmp	short putchar
98
99.perc:
100	cmp	al, '%'
101	jne	putchar
102
103	or	ah, PERC
104	; In here, getchar works right even if we are getting it from
105	; the command line and not stdin. In that case EBX is so huge
106	; getchar will not default to calling "read" unless the command
107	; line contains billions of `%'.
108	call	getchar
109	and	ah, ~PERC
110
111	; We should have a hex digit in AL. If not, print % and "unget"
112	; AL (INC EBX / DEC ESI will do that).
113	sub	dl, dl
114	mov	dh, al
115
116	cmp	al, '0'
117	jl	.perc1
118
119	cmp	al, '9'
120	ja	.lc1
121
122	sub	al, '0'
123	jmp	short .hex2
124
125align 4
126.lc1:
127	cmp	al, 'f'
128	ja	.perc1
129
130	cmp	al, 'a'
131	jb	.uc1
132
133	add	al, 10 - 'a'
134	jmp	short .hex2
135
136.uc1:
137	cmp	al, 'A'
138	jb	.perc1
139	cmp	al, 'F'
140	jbe	.subA1
141
142.perc1:
143	mov	al, '%'
144	inc	ebx
145	dec	esi
146	jmp	short putchar
147
148align 4
149.subA1:
150	add	al, 10 - 'A'
151
152.hex2:
153	mov	dl, al
154	; Again, this works for both, stdin and command line input.
155	call	getchar
156
157	cmp	al, '0'
158	jl	.perc2
159
160	cmp	al, '9'
161	ja	.lc2
162
163	sub	al, '0'
164	jmp	short .decode
165
166.lc2:
167	cmp	al, 'f'
168	ja	.perc2
169
170	cmp	al, 'a'
171	jb	.hc2
172
173	add	al, 10 - 'a'
174	jmp	short .decode
175
176.perc2:
177	inc	ebx
178	dec	esi
179	mov	al, '%'
180	call	putchar
181	mov	al, dh
182	jmp	short putchar
183
184.hc2:
185	cmp	al, 'A'
186	jb	.perc2
187
188	cmp	al, 'F'
189	ja	.perc2
190
191	add	al, 10 - 'A'
192
193.decode:
194	shl	dl, 4
195	or	al, dl
196
197	; Fall through
198
199align 4
200putchar:
201	stosb
202	inc	ecx
203
204	cmp	al, 0Ah
205	je	write
206
207	cmp	ecx, BUFSIZE
208	jb	write.done
209
210	; Fall through to write
211
212write:
213	; Send the contents of the output buffer to stdout.
214	; The buffer starts at EDI - ECX and is ECX bytes long.
215
216	jecxz	.done			; Empty buffer, do nothing
217
218	push	eax
219	sub	edi, ecx		; Find start of buffer
220	sub	eax, eax
221	push	ecx
222	inc	al			; stdout = 1
223	push	edi
224	push	eax
225	push	edx
226	mov	al, 4			; SYS_write
227	int	80h
228	pop	edx
229	sub	ecx, ecx		; Buffer is now empty
230	add	esp, byte 12
231	or	eax, eax
232	js	.errexit
233	pop	eax
234
235.done:
236	ret
237
238align 4
239.errexit:
240	sub	eax, eax
241	push	dword werrlen
242	mov	al, 2			; stderr
243	push	dword werr
244	push	eax
245	push	edx
246	add	al, al			; SYS_write
247	int	80h
248
249	sub	eax, eax
250	mov	al, 2			; return failure
251	push	eax
252	push	edx
253	dec	al			; SYS_exit
254	int	80h
255
256align 4
257getchar:
258	or	ebx, ebx
259	jne	.fetch
260	call	read
261
262.fetch:
263	lodsb
264	dec	ebx
265	ret
266
267align 4
268read:
269	push	eax
270	push	ecx
271	push	dword BUFSIZE
272	mov	esi, ibuffer
273	sub	eax, eax		; stdin = 0
274	push	esi
275	push	eax
276	push	edx
277	mov	al, 3			; SYS_read
278	int	80h
279	pop	edx
280	add	esp, byte 12
281	pop	ecx
282	or	eax, eax
283	mov	ebx, eax
284	pop	eax
285	je	.exit
286	js	.errexit
287	ret
288
289align 4
290.exit:
291	test	ah, PERC
292	je	.l
293
294	mov	al, '%'
295	call	putchar
296
297.l:
298	test	ah, LFLAG
299	je	.flush
300
301	mov	al, 0Ah
302	call	putchar
303
304.flush:
305	call	write			; Flush output buffer
306
307	sub	eax, eax		; Return success
308	push	eax
309	push	edx
310	inc	al			; SYS_exit
311	int	80h
312
313.errexit:
314	sub	eax, eax
315	push	dword rerrlen
316	mov	al, 2			; stderr
317	push	dword rerr
318	push	eax
319	push	edx
320	add	al, al			; SYS_write
321	int	80h
322
323	sub	eax, eax
324	inc	al			; Return failure, SYS_exit
325	push	eax
326	push	eax
327	int	80h
328
329global	_start
330_start:
331	sub	eax, eax
332	sub	ebx, ebx
333	sub	ecx, ecx
334	sub	edx, edx
335	not	ebx
336	add	esp, byte 8
337	mov	edi, obuffer
338	cld
339
340.next:
341	pop	esi
342	or	esi, esi
343	jne	.dash
344	test	ah, EFLAG
345	jne	read.exit
346
347	not	ebx
348	jmp	.loop
349
350.dash:
351	test	ah, EFLAG
352	jne	near .cmdinnerloop
353
354	lodsb
355	cmp	al, '-'
356	jne	near .cmdfirst
357
358	cmp	byte [esi], 0
359	je	.err
360
361.cmd:
362	lodsb
363	or	al, al
364	je	.next
365
366	cmp	al, 'e'
367	jne	.l
368
369	or	ah, EFLAG
370	jmp	short .cmd
371
372.l:
373	cmp	al, 'l'
374	jne	.p
375
376	or	ah, LFLAG
377	jmp	short .cmd
378
379.p:
380	cmp	al, 'p'
381	jne	.r
382
383	or	ah, PFLAG
384	jmp	short .cmd
385
386.r:
387	cmp	al, 'r'
388	jne	.h
389
390	sub	ah, ah
391	push	dword rlen
392
393.msg:
394	mov	al, 2			; stderr
395	push	dword rmsg
396	push	eax
397	push	edx
398	add	al, al			; SYS_write
399	int	80h
400
401	sub	eax, eax		; return success
402	push	eax
403	push	eax
404	inc	al			; SYS_exit
405	int	80h
406
407.h:
408	cmp	al, 'h'
409	jne	.err
410
411	sub	ah, ah
412	push	dword hlen
413	jmp	short .msg
414
415.err:
416	sub	ah, ah
417	push	dword ulen
418	mov	al, 2			; stderr
419	push	dword umsg
420	push	eax
421	push	edx
422	add	al, al			; SYS_write
423	int	80h
424
425	mov	eax, -1			; return failure (-1)
426	push	eax
427	push	edx
428	neg	eax			; SYS_exit
429	int	80h
430
431	; While this may seem like an endless loop, it is not.
432	; The read procedure exits to the OS when there is no
433	; input left to read.
434
435align 4
436.loop:
437	call	getchar
438	call	decode
439	jmp	short .loop
440
441	; Process input from the command line instead of stdin.
442
443align 4
444.cmdloop:
445	pop	esi
446	or	esi, esi
447	je	near read.exit
448
449	mov	al, ' '
450	call	putchar
451
452.cmdinnerloop:
453	lodsb
454
455.cmdfirst:
456	or	al, al
457	je	.cmdloop
458	call	decode
459	jmp	short .cmdinnerloop
460
461;------ End of code -----------------------------------------------------------
462
463