1;---- urlencode.asm ----------------------------------------------------------; 2; 3; URL encode a sequence of octets (RFC 1738). 4; 5; The sequence comes from the command line. If no sequence is specified, 6; it is taken from stdin. The output is sent to stdout. 7; 8; The characters a-zA-Z0-9 are not encoded. Additionally, any ASCII 9; non-control 7-bit characters, except the percent sign (%), that appear 10; on the command line are not encoded. 11; 12; Everything else is encoded into %XX where XX is the hexadecimal value 13; of the octet, always two bytes long. 14; 15; Valid options are: 16; 17; -a - encode alphanumerics 18; -d - encode [0-9] 19; -e - end of options 20; -h - help 21; -n - do not encode new lines 22; -p - encode spaces as + 23; -l - append a new line at the end 24; -r - version info 25; -t - encode [A-Za-z] 26; 27; This code, when assembled and linked, will work under FreeBSD, and perhaps 28; other BSD systems. It requires NASM for assembly: 29; 30; nasm -f elf urlencode.asm 31; ld -o urlencode urlencode.o 32; strip unrlencode 33; 34; Started: 23 Oct 2000 35; Updated: 24 Oct 2000 36; 37; Version 1.0 38; 39; Copyright (c) 2000 G. Adam Stanislav 40; All rights reserved. 41; 42; Redistribution and use in source and binary forms, with or without 43; modification, are permitted provided that the following conditions 44; are met: 45; 1. Redistributions of source code must retain the above copyright 46; notice, this list of conditions and the following disclaimer. 47; 2. Redistributions in binary form must reproduce the above copyright 48; notice, this list of conditions and the following disclaimer in the 49; documentation and/or other materials provided with the distribution. 50; 51; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 52; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 55; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61; SUCH DAMAGE. 62; 63;-----------------------------------------------------------------------------; 64 65%define BUFSIZE 2048 66%define NFLAG 01h 67%define PFLAG 02h 68%define LFLAG 04h 69%define PERC 08h 70%define TFLAG 10h 71%define DFLAG 20h 72%define XFLAG 40h 73%define EFLAG 80h 74%define AFLAG (TFLAG|DFLAG) 75 76section .bss 77ibuffer resb BUFSIZE 78obuffer resb BUFSIZE 79 80section .data 81etable times 256 db 0 82hex db '0123456789ABCDEF' 83rerr db 'URLENCODE: Read error.', 0Ah 84rerrlen equ $-rerr 85align 4 86werr db 'URLENCODE: Write error.', 0Ah 87werrlen equ $-werr 88align 4 89rmsg db 'URLENCODE version 1.0 (24 Oct 2000)', 0Ah 90 db 'Copyright 2000 G. Adam Stanislav', 0Ah 91 db 'All rights reserved.', 0Ah 92rlen equ $-rmsg 93 db 0Ah 94umsg db 'Usage: urlencode [options] [string ...]', 0Ah, 0Ah 95 db 09h, '-% = encode % (only needed with -x)', 0Ah 96 db 09h, '-a = encode alphanumeric characters (same as -dt)', 0Ah 97 db 09h, '-d = encode digits (0-9)', 0Ah 98 db 09h, '-e = encode rest of arguments; exit if [string ...] is missing', 0Ah 99 db 09h, '-h = help', 0Ah 100 db 09h, '-l = append a new line', 0Ah 101 db 09h, '-n = do not encode new lines', 0Ah 102 db 09h, "-p = encode spaces as `+' (ignored if excluded)", 0Ah 103 db 09h, '-r = print version information', 0Ah 104 db 09h, '-t = encode alphabetic characters (A-Za-z)', 0Ah 105 db 09h, '-x = exclude all (no encoding)', 0Ah 106 db 09h, '-[list] = exclude listed characters', 0Ah, 0Ah 107ulen equ $-umsg 108 db 'The exclusion list may be URL-encoded. It may contain ranges. Its members', 0Ah 109 db "may be escaped with `\' (e.g., `-[\-\%\]\\]' excludes `-', `%', `]', and `\'", 0Ah 110 db "from encoding, `-[%23-%26]' excludes `#', `$', `%', and `&').", 0Ah, 0Ah 111 db 'If no [string ...] is specified, input comes from stdin.', 0Ah 112hlen equ $-rmsg 113 114section .text 115 116encode: 117 mov dl, al 118 119 test ah, XFLAG 120 je .notx 121 122 cmp al, '%' 123 jne .xa 124 125 test ah, PERC 126 jne near .go 127 128.xa: 129 test ah, AFLAG 130 je near putchar 131 132 test ah, DFLAG 133 je .xt 134 135 cmp al, '0' 136 jl near putchar 137 cmp al, '9' 138 jbe .cmd 139 140.xt: 141 cmp al, 'z' 142 ja near putchar 143 144 cmp al, 'a' 145 jae .cmd 146 147 cmp al, 'A' 148 jb near putchar 149 150 cmp al, 'Z' 151 ja near putchar 152 jmp short .cmd 153 154.notx: 155 cmp al, 0Ah 156 jne .test 157 158 test ah, NFLAG 159 jne putchar 160 161.test: 162 test ah, DFLAG 163 jne .alpha 164 165 cmp al, '0' 166 jb .cmd 167 cmp al, '9' 168 jbe putchar 169 170.alpha: 171 test ah, TFLAG 172 jne .cmd 173 174 cmp al, 'A' 175 jb .cmd 176 cmp al, 'Z' 177 jbe putchar 178 179 cmp al, 'a' 180 jb .cmd 181 cmp al, 'z' 182 jbe putchar 183 184.cmd: 185 ; Check if the char is at the command line 186 cmp byte [etable+edx], 0 187 jne putchar 188 189.encode: 190 cmp dl, ' ' 191 jne .go 192 test ah, PFLAG 193 je .go 194 195 mov al, '+' 196 jmp short putchar 197 198.go: 199 mov al, '%' 200 call putchar 201 202 push edx 203 shr dl, 4 204 mov al, [hex+edx] 205 pop edx 206 call putchar 207 208 and dl, 0Fh 209 mov al, [hex+edx] 210 211 ; Fall through 212 213align 4 214putchar: 215 stosb 216 inc ecx 217 cmp ecx, BUFSIZE 218 jb write.done 219 220 ; Fall through to write 221 222write: 223 ; Send the contents of the output buffer to stdout. 224 ; The buffer starts at EDI - ECX and is ECX bytes long. 225 226 jecxz .done ; Empty buffer, do nothing 227 228 push eax 229 sub edi, ecx ; Find start of buffer 230 sub eax, eax 231 push ecx 232 inc al ; stdout = 1 233 push edi 234 push eax 235 push edx 236 mov al, 4 ; SYS_write 237 int 80h 238 pop edx 239 sub ecx, ecx ; Buffer is now empty 240 add esp, byte 12 241 or eax, eax 242 js .errexit 243 pop eax 244 245.done: 246 ret 247 248align 4 249.errexit: 250 sub eax, eax 251 push dword werrlen 252 mov al, 2 ; stderr 253 push dword werr 254 push eax 255 push edx 256 add al, al ; SYS_write 257 int 80h 258 259 sub eax, eax 260 mov al, 2 ; return failure 261 push eax 262 push edx 263 dec al ; SYS_exit 264 int 80h 265 266align 4 267getchar: 268 or ebx, ebx 269 jne .fetch 270 call read 271 272.fetch: 273 lodsb 274 dec ebx 275 ret 276 277align 4 278read: 279 push eax 280 push ecx 281 push dword BUFSIZE 282 mov esi, ibuffer 283 sub eax, eax ; stdin = 0 284 push esi 285 push eax 286 push edx 287 mov al, 3 ; SYS_read 288 int 80h 289 pop edx 290 add esp, byte 12 291 pop ecx 292 or eax, eax 293 mov ebx, eax 294 pop eax 295 je .exit 296 js .errexit 297 ret 298 299align 4 300.exit: 301 test ah, LFLAG 302 je .flush 303 304 mov al, 0Ah 305 call putchar 306 307.flush: 308 call write ; Flush output buffer 309 310 sub eax, eax ; Return success 311 push eax 312 push edx 313 inc al ; SYS_exit 314 int 80h 315 316.errexit: 317 sub eax, eax 318 push dword rerrlen 319 mov al, 2 ; stderr 320 push dword rerr 321 push eax 322 push edx 323 add al, al ; SYS_write 324 int 80h 325 326 sub eax, eax 327 inc al ; Return failure, SYS_exit 328 push eax 329 push eax 330 int 80h 331 332global _start 333_start: 334 sub eax, eax 335 sub ebx, ebx 336 sub ecx, ecx 337 sub edx, edx 338 lea ebp, [esp+8] 339 mov edi, obuffer 340 cld 341 342.next: 343 mov esi, [ebp] 344 345 or esi, esi 346 je near .preloop 347 348 test ah, EFLAG 349 jne near .cmdinnerloop 350 lodsb 351 cmp al, '-' 352 jne near .cmdfirst 353 354 cmp byte [esi], 0 355 je near .loop 356 357 add ebp, byte 4 358 359.cmd: 360 lodsb 361 or al, al 362 je .next 363 364 cmp al, '[' 365 je near .parse 366 367.perc: 368 cmp al, '%' 369 jne .t 370 371 or ah, PERC 372 jmp short .cmd 373 374.t: 375 cmp al, 't' 376 jne .d 377 378 or ah, TFLAG 379 jmp short .cmd 380 381.d: 382 cmp al, 'd' 383 jne .e 384 385 or ah, DFLAG 386 jmp short .cmd 387 388.e: 389 cmp al, 'e' 390 jne .x 391 392 or ah, EFLAG 393 jmp short .cmd 394 395.x: 396 cmp al, 'x' 397 jne .a 398 399 or ah, XFLAG 400 jmp short .cmd 401 402.a: 403 cmp al, 'a' 404 jne .p 405 406 or ah, AFLAG 407 jmp short .cmd 408 409.p: 410 cmp al, 'p' 411 jne .n 412 413 or ah, PFLAG 414 jmp short .cmd 415 416.n: 417 cmp al, 'n' 418 jne .l 419 420 or ah, NFLAG 421 jmp short .cmd 422 423.l: 424 cmp al, 'l' 425 jne .r 426 427 or ah, LFLAG 428 jmp short .cmd 429 430.r: 431 cmp al, 'r' 432 jne .h 433 434 sub ah, ah 435 push dword rlen 436 437.msg: 438 mov al, 2 ; stderr 439 push dword rmsg 440 push eax 441 push edx 442 add al, al ; SYS_write 443 int 80h 444 445 sub eax, eax ; return success 446 push eax 447 push eax 448 inc al ; SYS_exit 449 int 80h 450 451.h: 452 cmp al, 'h' 453 jne .err 454 455 sub ah, ah 456 push dword hlen 457 jmp short .msg 458 459.err: 460 sub ah, ah 461 push dword ulen 462 mov al, 2 ; stderr 463 push dword umsg 464 push eax 465 push edx 466 add al, al ; SYS_write 467 int 80h 468 469 mov eax, -1 ; return failure (-1) 470 push eax 471 push edx 472 neg eax ; SYS_exit 473 int 80h 474 475 ; While this may seem like an endless loop, it is not. 476 ; The read procedure exits to the OS when there is no 477 ; input left to read. 478 479.preloop: 480 test ah, EFLAG 481 jne near read.exit 482 483.loop: 484 call getchar 485 cmp al, 0Ah 486 je .nl 487 488 call encode 489 jmp short .loop 490 491.nl: 492 call encode 493 call write 494 jmp short .loop 495 496 ; Parse the exclude list. 497.parse: 498 lodsb 499 500.parsechar: 501 or al, al 502 je .err 503 504 cmp al, ']' 505 je near .cmd 506 507 call .parseperc 508 509.ch1: 510 ; We have a character to exclude. It is either just a character, 511 ; or the first of a range. 512 movzx edx, al 513 mov byte [etable+edx], -1 514 515 lodsb 516 cmp al, '-' 517 jne .parsechar 518 519 lodsb 520 or al, al 521 je .err 522 523 cmp al, ']' 524 je .err 525 526 call .parseperc 527 528 cmp dl, al 529 je .parse 530 ja .down 531 movzx ebx, al 532 533.store: 534 mov byte [etable+ebx], -1 535 dec bl 536 cmp bl, dl 537 ja .store 538 539.stored: 540 sub bl, bl 541 542 jmp short .parse 543 544.down: 545 movzx ebx, al 546 547.dstore: 548 mov byte [etable+ebx], -1 549 inc bl 550 cmp bl, dl 551 jb .dstore 552 jmp short .stored 553 554.parseperc: 555 cmp al, '\' 556 jne .tryperc 557 558 lodsb 559 or al, al 560 je near .err 561 562.gotit: 563 ret 564 565.tryperc: 566 cmp al, '%' 567 jne .gotit 568 569 lodsb 570 cmp al, '0' 571 jl near .err 572 573 cmp al, '9' 574 ja .ul1 575 576 sub al, '0' 577 jmp short .hex2 578 579.ul1: 580 cmp al, 'A' 581 jb near .err 582 583 cmp al, 'F' 584 ja .ll1 585 586 add al, 10 - 'A' 587 jmp short .hex2 588 589.ll1: 590 cmp al, 'a' 591 jb near .err 592 593 cmp al, 'f' 594 ja near .err 595 596 add al, 10 - 'a' 597 598.hex2: 599 mov dh, al 600 601 lodsb 602 cmp al, '0' 603 jl near .err 604 605 cmp al, '9' 606 ja .ul2 607 608 sub al, '0' 609 jmp short .hexit 610 611.ul2: 612 cmp al, 'A' 613 jb near .err 614 615 cmp al, 'F' 616 ja .ll2 617 618 add al, 10 - 'A' 619 jmp short .hexit 620 621.ll2: 622 cmp al, 'a' 623 jb near .err 624 625 cmp al, 'f' 626 ja near .err 627 628.hexit: 629 shl dh, 4 630 or al, dh 631 sub dh, dh 632 ret 633 634.cmdbigloop: 635 add ebp, byte 4 636 mov esi, [ebp] 637 or esi, esi 638 je near read.exit 639 640 mov al, ' ' 641 call encode 642 643.cmdinnerloop: 644 lodsb 645 646.cmdfirst: 647 or al, al 648 je .cmdbigloop 649 call encode 650 jmp short .cmdinnerloop 651 652;------ End of code ----------------------------------------------------------- 653 654