1## core string functions ## 2 3length(s::String) = error("you must implement length(",typeof(s),")") 4next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)") 5next(s::DirectIndexString, i::Int) = (s[i],i+1) 6next(s::String, i::Integer) = next(s,int(i)) 7 8## generic supplied functions ## 9 10start(s::String) = 1 11done(s::String,i) = (i > length(s)) 12isempty(s::String) = done(s,start(s)) 13ref(s::String, i::Int) = next(s,i)[1] 14ref(s::String, i::Integer) = s[int(i)] 15ref(s::String, x::Real) = s[iround(x)] 16ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))] 17 18symbol(s::String) = symbol(cstring(s)) 19string(s::String) = s 20 21print(s::String) = for c=s; print(c); end 22print(x...) = for i=x; print(i); end 23println(args...) = print(args..., '\n') 24 25show(s::String) = print_quoted(s) 26 27(*)(s::String...) = strcat(s...) 28(^)(s::String, r::Integer) = repeat(s,r) 29 30size(s::String) = (length(s),) 31size(s::String, d::Integer) = d==1 ? length(s) : 32 error("in size: dimension ",d," out of range") 33 34strlen(s::DirectIndexString) = length(s) 35function strlen(s::String) 36 i = start(s) 37 if done(s,i) 38 return 0 39 end 40 n = 1 41 while true 42 c, j = next(s,i) 43 if done(s,j) 44 return n 45 end 46 n += 1 47 i = j 48 end 49end 50 51isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s)) 52function isvalid(s::String, i::Integer) 53 try 54 next(s,i) 55 true 56 catch 57 false 58 end 59end 60 61prevind(s::DirectIndexString, i::Integer) = i-1 62thisind(s::DirectIndexString, i::Integer) = i 63nextind(s::DirectIndexString, i::Integer) = i+1 64 65prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1) 66 67function thisind(s::String, i::Integer) 68 for j = i:-1:1 69 if isvalid(s,j) 70 return j 71 end 72 end 73 return 0 # out of range 74end 75 76function nextind(s::String, i::Integer) 77 for j = i+1:length(s) 78 if isvalid(s,j) 79 return j 80 end 81 end 82 length(s)+1 # out of range 83end 84 85ind2chr(s::DirectIndexString, i::Integer) = i 86chr2ind(s::DirectIndexString, i::Integer) = i 87 88function ind2chr(s::String, i::Integer) 89 s[i] # throws error if invalid 90 j = 1 91 k = start(s) 92 while true 93 c, l = next(s,k) 94 if i <= k 95 return j 96 end 97 j += 1 98 k = l 99 end 100end 101 102function chr2ind(s::String, i::Integer) 103 if i < 1 104 return i 105 end 106 j = 1 107 k = start(s) 108 while true 109 c, l = next(s,k) 110 if i == j 111 return k 112 end 113 j += 1 114 k = l 115 end 116end 117 118function strchr(s::String, c::Char, i::Integer) 119 i = nextind(s,i) 120 while !done(s,i) 121 d, j = next(s,i) 122 if c == d 123 return i 124 end 125 i = j 126 end 127 return 0 128end 129strchr(s::String, c::Char) = strchr(s, c, start(s)) 130contains(s::String, c::Char) = (strchr(s,c)!=0) 131 132function chars(s::String) 133 cx = Array(Char,strlen(s)) 134 i = 0 135 for c in s 136 cx[i += 1] = c 137 end 138 return cx 139end 140 141function cmp(a::String, b::String) 142 i = start(a) 143 j = start(b) 144 while !done(a,i) && !done(b,i) 145 c, i = next(a,i) 146 d, j = next(b,j) 147 if c != d 148 return c < d ? -1 : +1 149 end 150 end 151 done(a,i) && !done(b,j) ? -1 : 152 !done(a,i) && done(b,j) ? +1 : 0 153end 154 155isequal(a::String, b::String) = cmp(a,b) == 0 156isless(a::String, b::String) = cmp(a,b) < 0 157 158# faster comparisons for byte strings 159 160cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) 161isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0 162 163## character column width function ## 164 165charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c))) 166strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w) 167strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data) 168# TODO: implement and use u8_strnwidth that takes a length argument 169 170## generic string uses only length and next ## 171 172type GenericString <: String 173 string::String 174end 175 176length(s::GenericString) = length(s.string) 177next(s::GenericString, i::Int) = next(s.string, i) 178 179## plain old character arrays ## 180 181type CharString <: String 182 chars::Array{Char,1} 183 184 CharString(a::Array{Char,1}) = new(a) 185 CharString(c::Char...) = new([ c[i] | i=1:length(c) ]) 186end 187CharString(x...) = CharString(map(char,x)...) 188 189next(s::CharString, i::Int) = (s.chars[i], i+1) 190length(s::CharString) = length(s.chars) 191strlen(s::CharString) = length(s) 192 193string(c::Char) = CharString(c) 194string(c::Char, x::Char...) = CharString(c, x...) 195 196## substrings reference original strings ## 197 198type SubString <: String 199 string::String 200 offset::Int 201 length::Int 202 203 SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1) 204 SubString(s::SubString, i::Int, j::Int) = 205 new(s.string, i-1+s.offset, j-i+1) 206end 207SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j)) 208 209function next(s::SubString, i::Int) 210 if i < 1 || i > s.length 211 error("string index out of bounds") 212 end 213 c, i = next(s.string, i+s.offset) 214 c, i-s.offset 215end 216 217length(s::SubString) = s.length 218# TODO: strlen(s::SubString) = ?? 219# default implementation will work but it's slow 220# can this be delegated efficiently somehow? 221# that may require additional string interfaces 222 223function ref(s::String, r::Range1{Int}) 224 if first(r) < 1 || length(s) < last(r) 225 error("in substring slice: index out of range") 226 end 227 SubString(s, first(r), last(r)) 228end 229 230## efficient representation of repeated strings ## 231 232type RepString <: String 233 string::String 234 repeat::Integer 235end 236 237length(s::RepString) = length(s.string)*s.repeat 238strlen(s::RepString) = strlen(s.string)*s.repeat 239 240function next(s::RepString, i::Int) 241 if i < 1 || i > length(s) 242 error("string index out of bounds") 243 end 244 j = mod1(i,length(s.string)) 245 c, k = next(s.string, j) 246 c, k-j+i 247end 248 249function repeat(s::String, r::Integer) 250 r < 0 ? error("can't repeat a string ",r," times") : 251 r == 0 ? "" : 252 r == 1 ? s : 253 RepString(s,r) 254end 255 256## reversed strings without data movement ## 257 258type RevString <: String 259 string::String 260end 261 262length(s::RevString) = length(s.string) 263strlen(s::RevString) = strlen(s.string) 264 265start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1) 266function next(s::RevString, i::Int) 267 n = length(s); j = n-i+1 268 (s.string[j], n-thisind(s.string,j-1)+1) 269end 270 271reverse(s::String) = RevString(s) 272reverse(s::RevString) = s.string 273 274## ropes for efficient concatenation, etc. ## 275 276# Idea: instead of this standard binary tree structure, 277# how about we keep an array of substrings, with an 278# offset array. We can do binary search on the offset 279# array so we get O(log(n)) indexing time still, but we 280# can compute the offsets lazily and avoid all the 281# futzing around while the string is being constructed. 282 283type RopeString <: String 284 head::String 285 tail::String 286 depth::Int32 287 length::Int 288 289 RopeString(h::RopeString, t::RopeString) = 290 depth(h.tail) + depth(t) < depth(h.head) ? 291 RopeString(h.head, RopeString(h.tail, t)) : 292 new(h, t, max(h.depth,t.depth)+1, length(h)+length(t)) 293 294 RopeString(h::RopeString, t::String) = 295 depth(h.tail) < depth(h.head) ? 296 RopeString(h.head, RopeString(h.tail, t)) : 297 new(h, t, h.depth+1, length(h)+length(t)) 298 299 RopeString(h::String, t::RopeString) = 300 depth(t.head) < depth(t.tail) ? 301 RopeString(RopeString(h, t.head), t.tail) : 302 new(h, t, t.depth+1, length(h)+length(t)) 303 304 RopeString(h::String, t::String) = 305 new(h, t, 1, length(h)+length(t)) 306end 307 308depth(s::String) = 0 309depth(s::RopeString) = s.depth 310 311function next(s::RopeString, i::Int) 312 if i <= length(s.head) 313 return next(s.head, i) 314 else 315 c, j = next(s.tail, i-length(s.head)) 316 return c, j+length(s.head) 317 end 318end 319 320length(s::RopeString) = s.length 321strlen(s::RopeString) = strlen(s.head) + strlen(s.tail) 322 323strcat() = "" 324strcat(s::String) = s 325strcat(x...) = strcat(map(string,x)...) 326strcat(s::String, t::String...) = 327 (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t)) 328 329print(s::RopeString) = print(s.head, s.tail) 330 331## transformed strings ## 332 333type TransformedString <: String 334 transform::Function 335 string::String 336end 337 338length(s::TransformedString) = length(s.string) 339strlen(s::TransformedString) = strlen(s.string) 340 341function next(s::TransformedString, i::Int) 342 c, j = next(s.string,i) 343 c = s.transform(c, i) 344 return c, j 345end 346 347## uppercase and lowercase transformations ## 348 349uppercase(c::Char) = ccall(:towupper, Char, (Char,), c) 350lowercase(c::Char) = ccall(:towlower, Char, (Char,), c) 351 352uppercase(s::String) = TransformedString((c,i)->uppercase(c), s) 353lowercase(s::String) = TransformedString((c,i)->lowercase(c), s) 354 355ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s) 356lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s) 357 358const uc = uppercase 359const lc = lowercase 360 361## string map ## 362 363function map(f::Function, s::String) 364 out = memio(length(s)) 365 for c in s 366 write(out, f(c)::Char) 367 end 368 takebuf_string(out) 369end 370 371## conversion of general objects to strings ## 372 373string(x) = print_to_string(show, x) 374cstring(x...) = print_to_string(print, x...) 375 376function cstring(p::Ptr{Uint8}) 377 p == C_NULL ? error("cannot convert NULL to string") : 378 ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString 379end 380 381## string promotion rules ## 382 383promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String 384promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String 385promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String 386 387## printing literal quoted string data ## 388 389# TODO: this is really the inverse of print_unbackslashed 390 391function print_quoted_literal(s::String) 392 print('"') 393 for c = s; c == '"' ? print("\\\"") : print(c); end 394 print('"') 395end 396 397## string escaping & unescaping ## 398 399escape_nul(s::String, i::Int) = 400 !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0" 401 402is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F' 403need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1]) 404 405function print_escaped(s::String, esc::String) 406 i = start(s) 407 while !done(s,i) 408 c, j = next(s,i) 409 c == '\0' ? print(escape_nul(s,j)) : 410 c == '\e' ? print(L"\e") : 411 c == '\\' ? print("\\\\") : 412 contains(esc,c) ? print('\\', c) : 413 iswprint(c) ? print(c) : 414 7 <= c <= 13 ? print('\\', "abtnvfr"[c-6]) : 415 c <= '\x7f' ? print(L"\x", hex(c, 2)) : 416 c <= '\uffff' ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) : 417 print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4)) 418 i = j 419 end 420end 421 422escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"") 423print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"')) 424#" # work around syntax highlighting problem 425quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s) 426 427# bare minimum unescaping function unescapes only given characters 428 429function print_unescaped_chars(s::String, esc::String) 430 if !contains(esc,'\\') 431 esc = strcat("\\", esc) 432 end 433 i = start(s) 434 while !done(s,i) 435 c, i = next(s,i) 436 if c == '\\' && !done(s,i) && contains(esc,s[i]) 437 c, i = next(s,i) 438 end 439 print(c) 440 end 441end 442 443unescape_chars(s::String, esc::String) = 444 print_to_string(length(s), print_unescaped_chars, s, esc) 445 446# general unescaping of traditional C and Unicode escape sequences 447 448function print_unescaped(s::String) 449 i = start(s) 450 while !done(s,i) 451 c, i = next(s,i) 452 if !done(s,i) && c == '\\' 453 c, i = next(s,i) 454 if c == 'x' || c == 'u' || c == 'U' 455 n = k = 0 456 m = c == 'x' ? 2 : 457 c == 'u' ? 4 : 8 458 while (k+=1) <= m && !done(s,i) 459 c, j = next(s,i) 460 n = '0' <= c <= '9' ? n<<4 + c-'0' : 461 'a' <= c <= 'f' ? n<<4 + c-'a'+10 : 462 'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break 463 i = j 464 end 465 if k == 1 466 error("\\x used with no following hex digits") 467 end 468 if m == 2 # \x escape sequence 469 write(uint8(n)) 470 else 471 print(char(n)) 472 end 473 elseif '0' <= c <= '7' 474 k = 1 475 n = c-'0' 476 while (k+=1) <= 3 && !done(s,i) 477 c, j = next(s,i) 478 n = '0' <= c <= '7' ? n<<3 + c-'0' : break 479 i = j 480 end 481 if n > 255 482 error("octal escape sequence out of range") 483 end 484 write(uint8(n)) 485 else 486 print(c == 'a' ? '\a' : 487 c == 'b' ? '\b' : 488 c == 't' ? '\t' : 489 c == 'n' ? '\n' : 490 c == 'v' ? '\v' : 491 c == 'f' ? '\f' : 492 c == 'r' ? '\r' : 493 c == 'e' ? '\e' : c) 494 end 495 else 496 print(c) 497 end 498 end 499end 500 501unescape_string(s::String) = print_to_string(length(s), print_unescaped, s) 502 503## checking UTF-8 & ACSII validity ## 504 505byte_string_classify(s::ByteString) = 506 ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s)) 507 # 0: neither valid ASCII nor UTF-8 508 # 1: valid ASCII 509 # 2: valid UTF-8 510 511is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1 512is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0 513 514check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence") 515check_utf8 (s::ByteString) = is_valid_utf8(s) ? s : error("invalid UTF-8 sequence") 516 517## string interpolation parsing ## 518 519function _jl_interp_parse(s::String, unescape::Function, printer::Function) 520 sx = {} 521 i = j = start(s) 522 while !done(s,j) 523 c, k = next(s,j) 524 if c == '$' 525 if !isempty(s[i:j-1]) 526 push(sx, unescape(s[i:j-1])) 527 end 528 ex, j = parseatom(s,k) 529 push(sx, ex) 530 i = j 531 elseif c == '\\' && !done(s,k) 532 if s[k] == '$' 533 if !isempty(s[i:j-1]) 534 push(sx, unescape(s[i:j-1])) 535 end 536 i = k 537 end 538 c, j = next(s,k) 539 else 540 j = k 541 end 542 end 543 if !isempty(s[i:]) 544 push(sx, unescape(s[i:j-1])) 545 end 546 length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] : 547 expr(:call, :print_to_string, printer, sx...) 548end 549 550_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print) 551_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x))) 552 553function _jl_interp_parse_bytes(s::String) 554 writer(x...) = for w=x; write(w); end 555 _jl_interp_parse(s, unescape_string, writer) 556end 557 558## core string macros ## 559 560macro str(s); _jl_interp_parse(s); end 561macro S_str(s); _jl_interp_parse(s); end 562macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end 563macro E_str(s); check_utf8(unescape_string(s)); end 564macro B_str(s); _jl_interp_parse_bytes(s); end 565macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end 566 567## shell-like command parsing ## 568 569function _jl_shell_parse(s::String, interp::Bool) 570 571 in_single_quotes = false 572 in_double_quotes = false 573 574 args = {} 575 arg = {} 576 i = start(s) 577 j = i 578 579 function update_arg(x) 580 if !isa(x,String) || !isempty(x) 581 push(arg, x) 582 end 583 end 584 function append_arg() 585 if isempty(arg); arg = {"",}; end 586 push(args, arg) 587 arg = {} 588 end 589 590 while !done(s,j) 591 c, k = next(s,j) 592 if !in_single_quotes && !in_double_quotes && iswspace(c) 593 update_arg(s[i:j-1]) 594 append_arg() 595 j = k 596 while !done(s,j) 597 c, k = next(s,j) 598 if !iswspace(c) 599 i = j 600 break 601 end 602 j = k 603 end 604 elseif interp && !in_single_quotes && c == '$' 605 update_arg(s[i:j-1]); i = k; j = k 606 if done(s,k) 607 error("\$ right before end of command") 608 end 609 if iswspace(s[k]) 610 error("space not allowed right after \$") 611 end 612 ex, j = parseatom(s,j) 613 update_arg(ex); i = j 614 else 615 if !in_double_quotes && c == '\'' 616 in_single_quotes = !in_single_quotes 617 update_arg(s[i:j-1]); i = k 618 elseif !in_single_quotes && c == '"' 619 in_double_quotes = !in_double_quotes 620 update_arg(s[i:j-1]); i = k 621 elseif c == '\\' 622 if in_double_quotes 623 if done(s,k) 624 error("unterminated double quote") 625 end 626 if s[k] == '"' || s[k] == '$' 627 update_arg(s[i:j-1]); i = k 628 c, k = next(s,k) 629 end 630 elseif !in_single_quotes 631 if done(s,k) 632 error("dangling backslash") 633 end 634 update_arg(s[i:j-1]); i = k 635 c, k = next(s,k) 636 end 637 end 638 j = k 639 end 640 end 641 642 if in_single_quotes; error("unterminated single quote"); end 643 if in_double_quotes; error("unterminated double quote"); end 644 645 update_arg(s[i:]) 646 append_arg() 647 648 if !interp 649 return args 650 end 651 652 # construct an expression 653 exprs = {} 654 for arg in args 655 push(exprs, expr(:tuple, arg)) 656 end 657 expr(:tuple,exprs) 658end 659_jl_shell_parse(s::String) = _jl_shell_parse(s,true) 660 661function shell_split(s::String) 662 parsed = _jl_shell_parse(s,false) 663 args = String[] 664 for arg in parsed 665 push(args, strcat(arg...)) 666 end 667 args 668end 669 670function print_shell_word(word::String) 671 if isempty(word) 672 print("''") 673 end 674 has_single = false 675 has_special = false 676 for c in word 677 if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$' 678 has_special = true 679 if c == '\'' 680 has_single = true 681 end 682 end 683 end 684 if !has_special 685 print(word) 686 elseif !has_single 687 print('\'', word, '\'') 688 else 689 print('"') 690 for c in word 691 if c == '"' || c == '$' 692 print('\\') 693 end 694 print(c) 695 end 696 print('"') 697 end 698end 699 700function print_shell_escaped(cmd::String, args::String...) 701 print_shell_word(cmd) 702 for arg in args 703 print(' ') 704 print_shell_word(arg) 705 end 706end 707 708shell_escape(cmd::String, args::String...) = 709 print_to_string(print_shell_escaped, cmd, args...) 710 711## interface to parser ## 712 713function parse(s::String, pos, greedy) 714 # returns (expr, end_pos). expr is () in case of parse error. 715 ex, pos = ccall(:jl_parse_string, Any, 716 (Ptr{Uint8}, Int32, Int32), 717 cstring(s), pos-1, greedy ? 1:0) 718 if isa(ex,Expr) && is(ex.head,:error) 719 throw(ParseError(ex.args[1])) 720 end 721 if ex == (); throw(ParseError("end of input")); end 722 ex, pos+1 # C is zero-based, Julia is 1-based 723end 724 725parse(s::String) = parse(s, 1, true) 726parse(s::String, pos) = parse(s, pos, true) 727parseatom(s::String) = parse(s, 1, false) 728parseatom(s::String, pos) = parse(s, pos, false) 729 730## miscellaneous string functions ## 731 732function lpad(s::String, n::Integer, p::String) 733 m = n - strlen(s) 734 if m <= 0; return s; end 735 l = strlen(p) 736 if l==1 737 return p^m * s 738 end 739 q = div(m,l) 740 r = m - q*l 741 cstring(p^q*p[1:chr2ind(p,r)]*s) 742end 743 744function rpad(s::String, n::Integer, p::String) 745 m = n - strlen(s) 746 if m <= 0; return s; end 747 l = strlen(p) 748 if l==1 749 return s * p^m 750 end 751 q = div(m,l) 752 r = m - q*l 753 cstring(s*p^q*p[1:chr2ind(p,r)]) 754end 755 756lpad(s, n::Integer, p) = lpad(string(s), n, string(p)) 757rpad(s, n::Integer, p) = rpad(string(s), n, string(p)) 758 759lpad(s, n::Integer) = lpad(string(s), n, " ") 760rpad(s, n::Integer) = rpad(string(s), n, " ") 761 762function split(s::String, delims, include_empty::Bool) 763 i = 1 764 strs = String[] 765 len = length(s) 766 while true 767 tokstart = tokend = i 768 while !done(s,i) 769 (c,i) = next(s,i) 770 if contains(delims, c) 771 break 772 end 773 tokend = i 774 end 775 tok = s[tokstart:(tokend-1)] 776 if include_empty || !isempty(tok) 777 push(strs, tok) 778 end 779 if !((i <= len) || (i==len+1 && tokend!=i)) 780 break 781 end 782 end 783 strs 784end 785 786split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false) 787split(s::String, x) = split(s, x, true) 788split(s::String, x::Char, incl::Bool) = split(s, (x,), incl) 789 790function print_joined(strings, delim, last) 791 i = start(strings) 792 if done(strings,i) 793 return 794 end 795 str, i = next(strings,i) 796 print(str) 797 while !done(strings,i) 798 str, i = next(strings,i) 799 print(done(strings,i) ? last : delim) 800 print(str) 801 end 802end 803 804function print_joined(strings, delim) 805 i = start(strings) 806 while !done(strings,i) 807 str, i = next(strings,i) 808 print(str) 809 if !done(strings,i) 810 print(delim) 811 end 812 end 813end 814print_joined(strings) = print_joined(strings, "") 815 816join(args...) = print_to_string(print_joined, args...) 817 818chop(s::String) = s[1:thisind(s,length(s))-1] 819chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s) 820chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s 821 822function lstrip(s::String) 823 i = start(s) 824 while !done(s,i) 825 c, j = next(s,i) 826 if !iswspace(c) 827 return s[i:end] 828 end 829 i = j 830 end 831 "" 832end 833 834function rstrip(s::String) 835 r = reverse(s) 836 i = start(r) 837 while !done(r,i) 838 c, j = next(r,i) 839 if !iswspace(c) 840 return s[1:end-i+1] 841 end 842 i = j 843 end 844 "" 845end 846 847strip(s::String) = lstrip(rstrip(s)) 848 849## string to integer functions ## 850 851function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer) 852 if !(2 <= base <= 36); error("invalid base: ",base); end 853 i = start(s) 854 if done(s,i) 855 error("premature end of integer (in ",show_to_string(s),")") 856 end 857 c,i = next(s,i) 858 sgn = one(T) 859 if T <: Signed && c == '-' 860 sgn = -sgn 861 if done(s,i) 862 error("premature end of integer (in ",show_to_string(s),")") 863 end 864 c,i = next(s,i) 865 end 866 base = convert(T,base) 867 n::T = 0 868 while true 869 d = '0' <= c <= '9' ? c-'0' : 870 'A' <= c <= 'Z' ? c-'A'+10 : 871 'a' <= c <= 'z' ? c-'a'+10 : typemax(Int) 872 if d >= base 873 error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")") 874 end 875 # TODO: overflow detection? 876 n = n*base + d 877 if done(s,i) 878 break 879 end 880 c,i = next(s,i) 881 end 882 return flipsign(n,sgn) 883end 884 885parse_int(s::String, base::Integer) = parse_int(Int,s,base) 886parse_int(T::Type, s::String) = parse_int(T,s,10) 887parse_int(s::String) = parse_int(Int,s,10) 888 889parse_bin(T::Type, s::String) = parse_int(T,s,2) 890parse_oct(T::Type, s::String) = parse_int(T,s,8) 891parse_hex(T::Type, s::String) = parse_int(T,s,16) 892 893parse_bin(s::String) = parse_int(Int,s,2) 894parse_oct(s::String) = parse_int(Int,s,8) 895parse_hex(s::String) = parse_int(Int,s,16) 896 897integer (s::String) = int(s) 898unsigned(s::String) = uint(s) 899int (s::String) = parse_int(Int,s) 900uint (s::String) = parse_int(Uint,s) 901int8 (s::String) = parse_int(Int8,s) 902uint8 (s::String) = parse_int(Uint8,s) 903int16 (s::String) = parse_int(Int16,s) 904uint16 (s::String) = parse_int(Uint16,s) 905int32 (s::String) = parse_int(Int32,s) 906uint32 (s::String) = parse_int(Uint32,s) 907int64 (s::String) = parse_int(Int64,s) 908uint64 (s::String) = parse_int(Uint64,s) 909 910## integer to string functions ## 911 912const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data 913 914function int2str(n::Union(Int64,Uint64), b::Integer, l::Int) 915 if b < 2 || b > 36; error("int2str: invalid base ", b); end 916 neg = n < 0 917 n = unsigned(abs(n)) 918 b = convert(typeof(n), b) 919 ndig = ndigits(n, b) 920 sz = max(convert(Int, ndig), l) + neg 921 data = Array(Uint8, sz) 922 i = sz 923 if ispow2(b) 924 digmask = b-1 925 shift = trailing_zeros(b) 926 while i > neg 927 ch = n & digmask 928 data[i] = _jl_dig_syms[int(ch)+1] 929 n >>= shift 930 i -= 1 931 end 932 else 933 while i > neg 934 ch = n % b 935 data[i] = _jl_dig_syms[int(ch)+1] 936 n = div(n,b) 937 i -= 1 938 end 939 end 940 if neg 941 data[1] = '-' 942 end 943 ASCIIString(data) 944end 945int2str(n::Integer, b::Integer) = int2str(n, b, 0) 946int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l) 947 948string(x::Signed) = dec(int64(x)) 949cstring(x::Signed) = dec(int64(x)) 950 951## string to float functions ## 952 953function float64_isvalid(s::String, out::Array{Float64,1}) 954 s = cstring(s) 955 return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0) 956end 957 958function float32_isvalid(s::String, out::Array{Float32,1}) 959 s = cstring(s) 960 return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0) 961end 962 963begin 964 local tmp::Array{Float64,1} = Array(Float64,1) 965 local tmpf::Array{Float32,1} = Array(Float32,1) 966 global float64, float32 967 function float64(s::String) 968 if !float64_isvalid(s, tmp) 969 throw(ArgumentError("float64(String): invalid number format")) 970 end 971 return tmp[1] 972 end 973 974 function float32(s::String) 975 if !float32_isvalid(s, tmpf) 976 throw(ArgumentError("float32(String): invalid number format")) 977 end 978 return tmpf[1] 979 end 980end 981 982float(x::String) = float64(x) 983parse_float(x::String) = float64(x) 984parse_float(::Type{Float64}, x::String) = float64(x) 985parse_float(::Type{Float32}, x::String) = float32(x) 986 987# copying a byte string (generally not needed due to "immutability") 988 989strcpy{T<:ByteString}(s::T) = T(copy(s.data)) 990 991# lexicographically compare byte arrays (used by Latin-1 and UTF-8) 992 993function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1}) 994 c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint), 995 a, b, min(length(a),length(b))) 996 c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b)) 997end 998 999# find the index of the first occurrence of a byte value in a byte array 1000 1001function memchr(a::Array{Uint8,1}, b::Integer) 1002 p = pointer(a) 1003 q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a)) 1004 q == C_NULL ? 0 : q - p + 1 1005end 1006 1007# concatenate byte arrays into a single array 1008 1009memcat() = Array(Uint8,0) 1010memcat(a::Array{Uint8,1}) = copy(a) 1011 1012function memcat(arrays::Array{Uint8,1}...) 1013 n = 0 1014 for a in arrays 1015 n += length(a) 1016 end 1017 arr = Array(Uint8, n) 1018 ptr = pointer(arr) 1019 offset = 0 1020 for a in arrays 1021 ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint), 1022 ptr+offset, a, length(a)) 1023 offset += length(a) 1024 end 1025 return arr 1026end 1027 1028# concatenate the data fields of byte strings 1029 1030memcat(s::ByteString) = memcat(s.data) 1031memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...) 1032