1;---- urldecode.asm ----------------------------------------------------------; 2; 3; URL decode a sequence of octets (RFC 1738). 4; 5; The sequence comes from stdin or command line arguments. The output 6; is sent to stdout. 7; 8; Options: 9; 10; -e - end of options 11; -h - help 12; -l - append a new line 13; -p - decode + into space 14; -r - version 15; 16; This code, when assembled and linked, will work under FreeBSD, and perhaps 17; other BSD systems. It requires NASM for assembly: 18; 19; nasm -f elf urldecode.asm 20; ld -o urldecode urldecode.o 21; strip unrldecode 22; 23; Started: 23 Oct 2000 24; Updated: 25 Oct 2000 25; 26; Version 1.0 27; 28; Copyright (c) 2000 G. Adam Stanislav 29; All rights reserved. 30; 31; Redistribution and use in source and binary forms, with or without 32; modification, are permitted provided that the following conditions 33; are met: 34; 1. Redistributions of source code must retain the above copyright 35; notice, this list of conditions and the following disclaimer. 36; 2. Redistributions in binary form must reproduce the above copyright 37; notice, this list of conditions and the following disclaimer in the 38; documentation and/or other materials provided with the distribution. 39; 40; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 41; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50; SUCH DAMAGE. 51; 52;-----------------------------------------------------------------------------; 53 54%define BUFSIZE 2048 55%define PFLAG 01h 56%define PERC 02h 57%define LFLAG 04h 58%define EFLAG 08h 59 60section .bss 61ibuffer resb BUFSIZE 62obuffer resb BUFSIZE 63 64section .data 65rerr db 'URLDECODE: Read error.', 0Ah 66rerrlen equ $-rerr 67align 4 68werr db 'URLDECODE: Write error.', 0Ah 69werrlen equ $-werr 70align 4 71rmsg db 'URLDECODE version 1.0 (25 Oct 2000)', 0Ah 72 db 'Copyright 2000 G. Adam Stanislav', 0Ah 73 db 'All rights reserved.', 0Ah 74rlen equ $-rmsg 75 db 0Ah 76umsg db 'Usage: urldecode [options] [string ...]', 0Ah, 0Ah 77 db 09h, '-e = decode rest of arguments; exit if [string ...] is missing', 0Ah 78 db 09h, '-h = help', 0Ah 79 db 09h, '-l = append a new line', 0Ah 80 db 09h, "-p = decode `+' into spaces", 0Ah 81 db 09h, '-r = version', 0Ah, 0Ah 82ulen equ $-umsg 83 db "If no [string ...] is specified, input comes from stdin unless the `-e' option", 0Ah 84 db 'is used.', 0Ah, 0Ah 85hlen equ $-rmsg 86 87section .text 88 89decode: 90 cmp al, '+' 91 jne .perc 92 93 test ah, PFLAG 94 je putchar 95 96 mov al, ' ' 97 jmp short putchar 98 99.perc: 100 cmp al, '%' 101 jne putchar 102 103 or ah, PERC 104 ; In here, getchar works right even if we are getting it from 105 ; the command line and not stdin. In that case EBX is so huge 106 ; getchar will not default to calling "read" unless the command 107 ; line contains billions of `%'. 108 call getchar 109 and ah, ~PERC 110 111 ; We should have a hex digit in AL. If not, print % and "unget" 112 ; AL (INC EBX / DEC ESI will do that). 113 sub dl, dl 114 mov dh, al 115 116 cmp al, '0' 117 jl .perc1 118 119 cmp al, '9' 120 ja .lc1 121 122 sub al, '0' 123 jmp short .hex2 124 125align 4 126.lc1: 127 cmp al, 'f' 128 ja .perc1 129 130 cmp al, 'a' 131 jb .uc1 132 133 add al, 10 - 'a' 134 jmp short .hex2 135 136.uc1: 137 cmp al, 'A' 138 jb .perc1 139 cmp al, 'F' 140 jbe .subA1 141 142.perc1: 143 mov al, '%' 144 inc ebx 145 dec esi 146 jmp short putchar 147 148align 4 149.subA1: 150 add al, 10 - 'A' 151 152.hex2: 153 mov dl, al 154 ; Again, this works for both, stdin and command line input. 155 call getchar 156 157 cmp al, '0' 158 jl .perc2 159 160 cmp al, '9' 161 ja .lc2 162 163 sub al, '0' 164 jmp short .decode 165 166.lc2: 167 cmp al, 'f' 168 ja .perc2 169 170 cmp al, 'a' 171 jb .hc2 172 173 add al, 10 - 'a' 174 jmp short .decode 175 176.perc2: 177 inc ebx 178 dec esi 179 mov al, '%' 180 call putchar 181 mov al, dh 182 jmp short putchar 183 184.hc2: 185 cmp al, 'A' 186 jb .perc2 187 188 cmp al, 'F' 189 ja .perc2 190 191 add al, 10 - 'A' 192 193.decode: 194 shl dl, 4 195 or al, dl 196 197 ; Fall through 198 199align 4 200putchar: 201 stosb 202 inc ecx 203 204 cmp al, 0Ah 205 je write 206 207 cmp ecx, BUFSIZE 208 jb write.done 209 210 ; Fall through to write 211 212write: 213 ; Send the contents of the output buffer to stdout. 214 ; The buffer starts at EDI - ECX and is ECX bytes long. 215 216 jecxz .done ; Empty buffer, do nothing 217 218 push eax 219 sub edi, ecx ; Find start of buffer 220 sub eax, eax 221 push ecx 222 inc al ; stdout = 1 223 push edi 224 push eax 225 push edx 226 mov al, 4 ; SYS_write 227 int 80h 228 pop edx 229 sub ecx, ecx ; Buffer is now empty 230 add esp, byte 12 231 or eax, eax 232 js .errexit 233 pop eax 234 235.done: 236 ret 237 238align 4 239.errexit: 240 sub eax, eax 241 push dword werrlen 242 mov al, 2 ; stderr 243 push dword werr 244 push eax 245 push edx 246 add al, al ; SYS_write 247 int 80h 248 249 sub eax, eax 250 mov al, 2 ; return failure 251 push eax 252 push edx 253 dec al ; SYS_exit 254 int 80h 255 256align 4 257getchar: 258 or ebx, ebx 259 jne .fetch 260 call read 261 262.fetch: 263 lodsb 264 dec ebx 265 ret 266 267align 4 268read: 269 push eax 270 push ecx 271 push dword BUFSIZE 272 mov esi, ibuffer 273 sub eax, eax ; stdin = 0 274 push esi 275 push eax 276 push edx 277 mov al, 3 ; SYS_read 278 int 80h 279 pop edx 280 add esp, byte 12 281 pop ecx 282 or eax, eax 283 mov ebx, eax 284 pop eax 285 je .exit 286 js .errexit 287 ret 288 289align 4 290.exit: 291 test ah, PERC 292 je .l 293 294 mov al, '%' 295 call putchar 296 297.l: 298 test ah, LFLAG 299 je .flush 300 301 mov al, 0Ah 302 call putchar 303 304.flush: 305 call write ; Flush output buffer 306 307 sub eax, eax ; Return success 308 push eax 309 push edx 310 inc al ; SYS_exit 311 int 80h 312 313.errexit: 314 sub eax, eax 315 push dword rerrlen 316 mov al, 2 ; stderr 317 push dword rerr 318 push eax 319 push edx 320 add al, al ; SYS_write 321 int 80h 322 323 sub eax, eax 324 inc al ; Return failure, SYS_exit 325 push eax 326 push eax 327 int 80h 328 329global _start 330_start: 331 sub eax, eax 332 sub ebx, ebx 333 sub ecx, ecx 334 sub edx, edx 335 not ebx 336 add esp, byte 8 337 mov edi, obuffer 338 cld 339 340.next: 341 pop esi 342 or esi, esi 343 jne .dash 344 test ah, EFLAG 345 jne read.exit 346 347 not ebx 348 jmp .loop 349 350.dash: 351 test ah, EFLAG 352 jne near .cmdinnerloop 353 354 lodsb 355 cmp al, '-' 356 jne near .cmdfirst 357 358 cmp byte [esi], 0 359 je .err 360 361.cmd: 362 lodsb 363 or al, al 364 je .next 365 366 cmp al, 'e' 367 jne .l 368 369 or ah, EFLAG 370 jmp short .cmd 371 372.l: 373 cmp al, 'l' 374 jne .p 375 376 or ah, LFLAG 377 jmp short .cmd 378 379.p: 380 cmp al, 'p' 381 jne .r 382 383 or ah, PFLAG 384 jmp short .cmd 385 386.r: 387 cmp al, 'r' 388 jne .h 389 390 sub ah, ah 391 push dword rlen 392 393.msg: 394 mov al, 2 ; stderr 395 push dword rmsg 396 push eax 397 push edx 398 add al, al ; SYS_write 399 int 80h 400 401 sub eax, eax ; return success 402 push eax 403 push eax 404 inc al ; SYS_exit 405 int 80h 406 407.h: 408 cmp al, 'h' 409 jne .err 410 411 sub ah, ah 412 push dword hlen 413 jmp short .msg 414 415.err: 416 sub ah, ah 417 push dword ulen 418 mov al, 2 ; stderr 419 push dword umsg 420 push eax 421 push edx 422 add al, al ; SYS_write 423 int 80h 424 425 mov eax, -1 ; return failure (-1) 426 push eax 427 push edx 428 neg eax ; SYS_exit 429 int 80h 430 431 ; While this may seem like an endless loop, it is not. 432 ; The read procedure exits to the OS when there is no 433 ; input left to read. 434 435align 4 436.loop: 437 call getchar 438 call decode 439 jmp short .loop 440 441 ; Process input from the command line instead of stdin. 442 443align 4 444.cmdloop: 445 pop esi 446 or esi, esi 447 je near read.exit 448 449 mov al, ' ' 450 call putchar 451 452.cmdinnerloop: 453 lodsb 454 455.cmdfirst: 456 or al, al 457 je .cmdloop 458 call decode 459 jmp short .cmdinnerloop 460 461;------ End of code ----------------------------------------------------------- 462 463