1## core string functions ##
2
3length(s::String) = error("you must implement length(",typeof(s),")")
4next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)")
5next(s::DirectIndexString, i::Int) = (s[i],i+1)
6next(s::String, i::Integer) = next(s,int(i))
7
8## generic supplied functions ##
9
10start(s::String) = 1
11done(s::String,i) = (i > length(s))
12isempty(s::String) = done(s,start(s))
13ref(s::String, i::Int) = next(s,i)[1]
14ref(s::String, i::Integer) = s[int(i)]
15ref(s::String, x::Real) = s[iround(x)]
16ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))]
17
18symbol(s::String) = symbol(cstring(s))
19string(s::String) = s
20
21print(s::String) = for c=s; print(c); end
22print(x...) = for i=x; print(i); end
23println(args...) = print(args..., '\n')
24
25show(s::String) = print_quoted(s)
26
27(*)(s::String...) = strcat(s...)
28(^)(s::String, r::Integer) = repeat(s,r)
29
30size(s::String) = (length(s),)
31size(s::String, d::Integer) = d==1 ? length(s) :
32    error("in size: dimension ",d," out of range")
33
34strlen(s::DirectIndexString) = length(s)
35function strlen(s::String)
36    i = start(s)
37    if done(s,i)
38        return 0
39    end
40    n = 1
41    while true
42        c, j = next(s,i)
43        if done(s,j)
44            return n
45        end
46        n += 1
47        i = j
48    end
49end
50
51isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s))
52function isvalid(s::String, i::Integer)
53    try
54        next(s,i)
55        true
56    catch
57        false
58    end
59end
60
61prevind(s::DirectIndexString, i::Integer) = i-1
62thisind(s::DirectIndexString, i::Integer) = i
63nextind(s::DirectIndexString, i::Integer) = i+1
64
65prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1)
66
67function thisind(s::String, i::Integer)
68    for j = i:-1:1
69        if isvalid(s,j)
70            return j
71        end
72    end
73    return 0 # out of range
74end
75
76function nextind(s::String, i::Integer)
77    for j = i+1:length(s)
78        if isvalid(s,j)
79            return j
80        end
81    end
82    length(s)+1 # out of range
83end
84
85ind2chr(s::DirectIndexString, i::Integer) = i
86chr2ind(s::DirectIndexString, i::Integer) = i
87
88function ind2chr(s::String, i::Integer)
89    s[i] # throws error if invalid
90    j = 1
91    k = start(s)
92    while true
93        c, l = next(s,k)
94        if i <= k
95            return j
96        end
97        j += 1
98        k = l
99    end
100end
101
102function chr2ind(s::String, i::Integer)
103    if i < 1
104        return i
105    end
106    j = 1
107    k = start(s)
108    while true
109        c, l = next(s,k)
110        if i == j
111            return k
112        end
113        j += 1
114        k = l
115    end
116end
117
118function strchr(s::String, c::Char, i::Integer)
119    i = nextind(s,i)
120    while !done(s,i)
121        d, j = next(s,i)
122        if c == d
123            return i
124        end
125        i = j
126    end
127    return 0
128end
129strchr(s::String, c::Char) = strchr(s, c, start(s))
130contains(s::String, c::Char) = (strchr(s,c)!=0)
131
132function chars(s::String)
133    cx = Array(Char,strlen(s))
134    i = 0
135    for c in s
136        cx[i += 1] = c
137    end
138    return cx
139end
140
141function cmp(a::String, b::String)
142    i = start(a)
143    j = start(b)
144    while !done(a,i) && !done(b,i)
145        c, i = next(a,i)
146        d, j = next(b,j)
147        if c != d
148            return c < d ? -1 : +1
149        end
150    end
151    done(a,i) && !done(b,j) ? -1 :
152    !done(a,i) && done(b,j) ? +1 : 0
153end
154
155isequal(a::String, b::String) = cmp(a,b) == 0
156isless(a::String, b::String)  = cmp(a,b) <  0
157
158# faster comparisons for byte strings
159
160cmp(a::ByteString, b::ByteString)     = lexcmp(a.data, b.data)
161isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0
162
163## character column width function ##
164
165charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c)))
166strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
167strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data)
168# TODO: implement and use u8_strnwidth that takes a length argument
169
170## generic string uses only length and next ##
171
172type GenericString <: String
173    string::String
174end
175
176length(s::GenericString) = length(s.string)
177next(s::GenericString, i::Int) = next(s.string, i)
178
179## plain old character arrays ##
180
181type CharString <: String
182    chars::Array{Char,1}
183
184    CharString(a::Array{Char,1}) = new(a)
185    CharString(c::Char...) = new([ c[i] | i=1:length(c) ])
186end
187CharString(x...) = CharString(map(char,x)...)
188
189next(s::CharString, i::Int) = (s.chars[i], i+1)
190length(s::CharString) = length(s.chars)
191strlen(s::CharString) = length(s)
192
193string(c::Char) = CharString(c)
194string(c::Char, x::Char...) = CharString(c, x...)
195
196## substrings reference original strings ##
197
198type SubString <: String
199    string::String
200    offset::Int
201    length::Int
202
203    SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1)
204    SubString(s::SubString, i::Int, j::Int) =
205        new(s.string, i-1+s.offset, j-i+1)
206end
207SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j))
208
209function next(s::SubString, i::Int)
210    if i < 1 || i > s.length
211        error("string index out of bounds")
212    end
213    c, i = next(s.string, i+s.offset)
214    c, i-s.offset
215end
216
217length(s::SubString) = s.length
218# TODO: strlen(s::SubString) = ??
219# default implementation will work but it's slow
220# can this be delegated efficiently somehow?
221# that may require additional string interfaces
222
223function ref(s::String, r::Range1{Int})
224    if first(r) < 1 || length(s) < last(r)
225        error("in substring slice: index out of range")
226    end
227    SubString(s, first(r), last(r))
228end
229
230## efficient representation of repeated strings ##
231
232type RepString <: String
233    string::String
234    repeat::Integer
235end
236
237length(s::RepString) = length(s.string)*s.repeat
238strlen(s::RepString) = strlen(s.string)*s.repeat
239
240function next(s::RepString, i::Int)
241    if i < 1 || i > length(s)
242        error("string index out of bounds")
243    end
244    j = mod1(i,length(s.string))
245    c, k = next(s.string, j)
246    c, k-j+i
247end
248
249function repeat(s::String, r::Integer)
250    r <  0 ? error("can't repeat a string ",r," times") :
251    r == 0 ? "" :
252    r == 1 ? s  :
253    RepString(s,r)
254end
255
256## reversed strings without data movement ##
257
258type RevString <: String
259    string::String
260end
261
262length(s::RevString) = length(s.string)
263strlen(s::RevString) = strlen(s.string)
264
265start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1)
266function next(s::RevString, i::Int)
267    n = length(s); j = n-i+1
268    (s.string[j], n-thisind(s.string,j-1)+1)
269end
270
271reverse(s::String) = RevString(s)
272reverse(s::RevString) = s.string
273
274## ropes for efficient concatenation, etc. ##
275
276# Idea: instead of this standard binary tree structure,
277# how about we keep an array of substrings, with an
278# offset array. We can do binary search on the offset
279# array so we get O(log(n)) indexing time still, but we
280# can compute the offsets lazily and avoid all the
281# futzing around while the string is being constructed.
282
283type RopeString <: String
284    head::String
285    tail::String
286    depth::Int32
287    length::Int
288
289    RopeString(h::RopeString, t::RopeString) =
290        depth(h.tail) + depth(t) < depth(h.head) ?
291            RopeString(h.head, RopeString(h.tail, t)) :
292            new(h, t, max(h.depth,t.depth)+1, length(h)+length(t))
293
294    RopeString(h::RopeString, t::String) =
295        depth(h.tail) < depth(h.head) ?
296            RopeString(h.head, RopeString(h.tail, t)) :
297            new(h, t, h.depth+1, length(h)+length(t))
298
299    RopeString(h::String, t::RopeString) =
300        depth(t.head) < depth(t.tail) ?
301            RopeString(RopeString(h, t.head), t.tail) :
302            new(h, t, t.depth+1, length(h)+length(t))
303
304    RopeString(h::String, t::String) =
305        new(h, t, 1, length(h)+length(t))
306end
307
308depth(s::String) = 0
309depth(s::RopeString) = s.depth
310
311function next(s::RopeString, i::Int)
312    if i <= length(s.head)
313        return next(s.head, i)
314    else
315        c, j = next(s.tail, i-length(s.head))
316        return c, j+length(s.head)
317    end
318end
319
320length(s::RopeString) = s.length
321strlen(s::RopeString) = strlen(s.head) + strlen(s.tail)
322
323strcat() = ""
324strcat(s::String) = s
325strcat(x...) = strcat(map(string,x)...)
326strcat(s::String, t::String...) =
327    (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t))
328
329print(s::RopeString) = print(s.head, s.tail)
330
331## transformed strings ##
332
333type TransformedString <: String
334    transform::Function
335    string::String
336end
337
338length(s::TransformedString) = length(s.string)
339strlen(s::TransformedString) = strlen(s.string)
340
341function next(s::TransformedString, i::Int)
342    c, j = next(s.string,i)
343    c = s.transform(c, i)
344    return c, j
345end
346
347## uppercase and lowercase transformations ##
348
349uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
350lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)
351
352uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
353lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)
354
355ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
356lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)
357
358const uc = uppercase
359const lc = lowercase
360
361## string map ##
362
363function map(f::Function, s::String)
364    out = memio(length(s))
365    for c in s
366        write(out, f(c)::Char)
367    end
368    takebuf_string(out)
369end
370
371## conversion of general objects to strings ##
372
373string(x) = print_to_string(show, x)
374cstring(x...) = print_to_string(print, x...)
375
376function cstring(p::Ptr{Uint8})
377    p == C_NULL ? error("cannot convert NULL to string") :
378    ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString
379end
380
381## string promotion rules ##
382
383promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
384promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
385promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String
386
387## printing literal quoted string data ##
388
389# TODO: this is really the inverse of print_unbackslashed
390
391function print_quoted_literal(s::String)
392    print('"')
393    for c = s; c == '"' ? print("\\\"") : print(c); end
394    print('"')
395end
396
397## string escaping & unescaping ##
398
399escape_nul(s::String, i::Int) =
400    !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0"
401
402is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
403need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1])
404
405function print_escaped(s::String, esc::String)
406    i = start(s)
407    while !done(s,i)
408        c, j = next(s,i)
409        c == '\0'       ? print(escape_nul(s,j)) :
410        c == '\e'       ? print(L"\e") :
411        c == '\\'       ? print("\\\\") :
412        contains(esc,c) ? print('\\', c) :
413        iswprint(c)     ? print(c) :
414        7 <= c <= 13    ? print('\\', "abtnvfr"[c-6]) :
415        c <= '\x7f'     ? print(L"\x", hex(c, 2)) :
416        c <= '\uffff'   ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
417                          print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4))
418        i = j
419    end
420end
421
422escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"")
423print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"'))
424#"  # work around syntax highlighting problem
425quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s)
426
427# bare minimum unescaping function unescapes only given characters
428
429function print_unescaped_chars(s::String, esc::String)
430    if !contains(esc,'\\')
431        esc = strcat("\\", esc)
432    end
433    i = start(s)
434    while !done(s,i)
435        c, i = next(s,i)
436        if c == '\\' && !done(s,i) && contains(esc,s[i])
437            c, i = next(s,i)
438        end
439        print(c)
440    end
441end
442
443unescape_chars(s::String, esc::String) =
444    print_to_string(length(s), print_unescaped_chars, s, esc)
445
446# general unescaping of traditional C and Unicode escape sequences
447
448function print_unescaped(s::String)
449    i = start(s)
450    while !done(s,i)
451        c, i = next(s,i)
452        if !done(s,i) && c == '\\'
453            c, i = next(s,i)
454            if c == 'x' || c == 'u' || c == 'U'
455                n = k = 0
456                m = c == 'x' ? 2 :
457                    c == 'u' ? 4 : 8
458                while (k+=1) <= m && !done(s,i)
459                    c, j = next(s,i)
460                    n = '0' <= c <= '9' ? n<<4 + c-'0' :
461                        'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
462                        'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
463                    i = j
464                end
465                if k == 1
466                    error("\\x used with no following hex digits")
467                end
468                if m == 2 # \x escape sequence
469                    write(uint8(n))
470                else
471                    print(char(n))
472                end
473            elseif '0' <= c <= '7'
474                k = 1
475                n = c-'0'
476                while (k+=1) <= 3 && !done(s,i)
477                    c, j = next(s,i)
478                    n = '0' <= c <= '7' ? n<<3 + c-'0' : break
479                    i = j
480                end
481                if n > 255
482                    error("octal escape sequence out of range")
483                end
484                write(uint8(n))
485            else
486                print(c == 'a' ? '\a' :
487                      c == 'b' ? '\b' :
488                      c == 't' ? '\t' :
489                      c == 'n' ? '\n' :
490                      c == 'v' ? '\v' :
491                      c == 'f' ? '\f' :
492                      c == 'r' ? '\r' :
493                      c == 'e' ? '\e' : c)
494            end
495        else
496            print(c)
497        end
498    end
499end
500
501unescape_string(s::String) = print_to_string(length(s), print_unescaped, s)
502
503## checking UTF-8 & ACSII validity ##
504
505byte_string_classify(s::ByteString) =
506    ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
507    # 0: neither valid ASCII nor UTF-8
508    # 1: valid ASCII
509    # 2: valid UTF-8
510
511is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1
512is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0
513
514check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence")
515check_utf8 (s::ByteString) = is_valid_utf8(s)  ? s : error("invalid UTF-8 sequence")
516
517## string interpolation parsing ##
518
519function _jl_interp_parse(s::String, unescape::Function, printer::Function)
520    sx = {}
521    i = j = start(s)
522    while !done(s,j)
523        c, k = next(s,j)
524        if c == '$'
525            if !isempty(s[i:j-1])
526                push(sx, unescape(s[i:j-1]))
527            end
528            ex, j = parseatom(s,k)
529            push(sx, ex)
530            i = j
531        elseif c == '\\' && !done(s,k)
532            if s[k] == '$'
533                if !isempty(s[i:j-1])
534                    push(sx, unescape(s[i:j-1]))
535                end
536                i = k
537            end
538            c, j = next(s,k)
539        else
540            j = k
541        end
542    end
543    if !isempty(s[i:])
544        push(sx, unescape(s[i:j-1]))
545    end
546    length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] :
547        expr(:call, :print_to_string, printer, sx...)
548end
549
550_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print)
551_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x)))
552
553function _jl_interp_parse_bytes(s::String)
554    writer(x...) = for w=x; write(w); end
555    _jl_interp_parse(s, unescape_string, writer)
556end
557
558## core string macros ##
559
560macro   str(s); _jl_interp_parse(s); end
561macro S_str(s); _jl_interp_parse(s); end
562macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end
563macro E_str(s); check_utf8(unescape_string(s)); end
564macro B_str(s); _jl_interp_parse_bytes(s); end
565macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end
566
567## shell-like command parsing ##
568
569function _jl_shell_parse(s::String, interp::Bool)
570
571    in_single_quotes = false
572    in_double_quotes = false
573
574    args = {}
575    arg = {}
576    i = start(s)
577    j = i
578
579    function update_arg(x)
580        if !isa(x,String) || !isempty(x)
581            push(arg, x)
582        end
583    end
584    function append_arg()
585        if isempty(arg); arg = {"",}; end
586        push(args, arg)
587        arg = {}
588    end
589
590    while !done(s,j)
591        c, k = next(s,j)
592        if !in_single_quotes && !in_double_quotes && iswspace(c)
593            update_arg(s[i:j-1])
594            append_arg()
595            j = k
596            while !done(s,j)
597                c, k = next(s,j)
598                if !iswspace(c)
599                    i = j
600                    break
601                end
602                j = k
603            end
604        elseif interp && !in_single_quotes && c == '$'
605            update_arg(s[i:j-1]); i = k; j = k
606            if done(s,k)
607                error("\$ right before end of command")
608            end
609            if iswspace(s[k])
610                error("space not allowed right after \$")
611            end
612            ex, j = parseatom(s,j)
613            update_arg(ex); i = j
614        else
615            if !in_double_quotes && c == '\''
616                in_single_quotes = !in_single_quotes
617                update_arg(s[i:j-1]); i = k
618            elseif !in_single_quotes && c == '"'
619                in_double_quotes = !in_double_quotes
620                update_arg(s[i:j-1]); i = k
621            elseif c == '\\'
622                if in_double_quotes
623                    if done(s,k)
624                        error("unterminated double quote")
625                    end
626                    if s[k] == '"' || s[k] == '$'
627                        update_arg(s[i:j-1]); i = k
628                        c, k = next(s,k)
629                    end
630                elseif !in_single_quotes
631                    if done(s,k)
632                        error("dangling backslash")
633                    end
634                    update_arg(s[i:j-1]); i = k
635                    c, k = next(s,k)
636                end
637            end
638            j = k
639        end
640    end
641
642    if in_single_quotes; error("unterminated single quote"); end
643    if in_double_quotes; error("unterminated double quote"); end
644
645    update_arg(s[i:])
646    append_arg()
647
648    if !interp
649        return args
650    end
651
652    # construct an expression
653    exprs = {}
654    for arg in args
655        push(exprs, expr(:tuple, arg))
656    end
657    expr(:tuple,exprs)
658end
659_jl_shell_parse(s::String) = _jl_shell_parse(s,true)
660
661function shell_split(s::String)
662    parsed = _jl_shell_parse(s,false)
663    args = String[]
664    for arg in parsed
665       push(args, strcat(arg...))
666    end
667    args
668end
669
670function print_shell_word(word::String)
671    if isempty(word)
672        print("''")
673    end
674    has_single = false
675    has_special = false
676    for c in word
677        if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
678            has_special = true
679            if c == '\''
680                has_single = true
681            end
682        end
683    end
684    if !has_special
685        print(word)
686    elseif !has_single
687        print('\'', word, '\'')
688    else
689        print('"')
690        for c in word
691            if c == '"' || c == '$'
692                print('\\')
693            end
694            print(c)
695        end
696        print('"')
697    end
698end
699
700function print_shell_escaped(cmd::String, args::String...)
701    print_shell_word(cmd)
702    for arg in args
703        print(' ')
704        print_shell_word(arg)
705    end
706end
707
708shell_escape(cmd::String, args::String...) =
709    print_to_string(print_shell_escaped, cmd, args...)
710
711## interface to parser ##
712
713function parse(s::String, pos, greedy)
714    # returns (expr, end_pos). expr is () in case of parse error.
715    ex, pos = ccall(:jl_parse_string, Any,
716                    (Ptr{Uint8}, Int32, Int32),
717                    cstring(s), pos-1, greedy ? 1:0)
718    if isa(ex,Expr) && is(ex.head,:error)
719        throw(ParseError(ex.args[1]))
720    end
721    if ex == (); throw(ParseError("end of input")); end
722    ex, pos+1 # C is zero-based, Julia is 1-based
723end
724
725parse(s::String)          = parse(s, 1, true)
726parse(s::String, pos)     = parse(s, pos, true)
727parseatom(s::String)      = parse(s, 1, false)
728parseatom(s::String, pos) = parse(s, pos, false)
729
730## miscellaneous string functions ##
731
732function lpad(s::String, n::Integer, p::String)
733    m = n - strlen(s)
734    if m <= 0; return s; end
735    l = strlen(p)
736    if l==1
737        return p^m * s
738    end
739    q = div(m,l)
740    r = m - q*l
741    cstring(p^q*p[1:chr2ind(p,r)]*s)
742end
743
744function rpad(s::String, n::Integer, p::String)
745    m = n - strlen(s)
746    if m <= 0; return s; end
747    l = strlen(p)
748    if l==1
749        return s * p^m
750    end
751    q = div(m,l)
752    r = m - q*l
753    cstring(s*p^q*p[1:chr2ind(p,r)])
754end
755
756lpad(s, n::Integer, p) = lpad(string(s), n, string(p))
757rpad(s, n::Integer, p) = rpad(string(s), n, string(p))
758
759lpad(s, n::Integer) = lpad(string(s), n, " ")
760rpad(s, n::Integer) = rpad(string(s), n, " ")
761
762function split(s::String, delims, include_empty::Bool)
763    i = 1
764    strs = String[]
765    len = length(s)
766    while true
767        tokstart = tokend = i
768        while !done(s,i)
769            (c,i) = next(s,i)
770            if contains(delims, c)
771                break
772            end
773            tokend = i
774        end
775        tok = s[tokstart:(tokend-1)]
776        if include_empty || !isempty(tok)
777            push(strs, tok)
778        end
779        if !((i <= len) || (i==len+1 && tokend!=i))
780            break
781        end
782    end
783    strs
784end
785
786split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false)
787split(s::String, x) = split(s, x, true)
788split(s::String, x::Char, incl::Bool) = split(s, (x,), incl)
789
790function print_joined(strings, delim, last)
791    i = start(strings)
792    if done(strings,i)
793        return
794    end
795    str, i = next(strings,i)
796    print(str)
797    while !done(strings,i)
798        str, i = next(strings,i)
799        print(done(strings,i) ? last : delim)
800        print(str)
801    end
802end
803
804function print_joined(strings, delim)
805    i = start(strings)
806    while !done(strings,i)
807        str, i = next(strings,i)
808        print(str)
809        if !done(strings,i)
810            print(delim)
811        end
812    end
813end
814print_joined(strings) = print_joined(strings, "")
815
816join(args...) = print_to_string(print_joined, args...)
817
818chop(s::String) = s[1:thisind(s,length(s))-1]
819chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s)
820chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s
821
822function lstrip(s::String)
823    i = start(s)
824    while !done(s,i)
825        c, j = next(s,i)
826        if !iswspace(c)
827            return s[i:end]
828        end
829        i = j
830    end
831    ""
832end
833
834function rstrip(s::String)
835    r = reverse(s)
836    i = start(r)
837    while !done(r,i)
838        c, j = next(r,i)
839        if !iswspace(c)
840            return s[1:end-i+1]
841        end
842        i = j
843    end
844    ""
845end
846
847strip(s::String) = lstrip(rstrip(s))
848
849## string to integer functions ##
850
851function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer)
852    if !(2 <= base <= 36); error("invalid base: ",base); end
853    i = start(s)
854    if done(s,i)
855        error("premature end of integer (in ",show_to_string(s),")")
856    end
857    c,i = next(s,i)
858    sgn = one(T)
859    if T <: Signed && c == '-'
860        sgn = -sgn
861        if done(s,i)
862            error("premature end of integer (in ",show_to_string(s),")")
863        end
864        c,i = next(s,i)
865    end
866    base = convert(T,base)
867    n::T = 0
868    while true
869        d = '0' <= c <= '9' ? c-'0' :
870            'A' <= c <= 'Z' ? c-'A'+10 :
871            'a' <= c <= 'z' ? c-'a'+10 : typemax(Int)
872        if d >= base
873            error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")")
874        end
875        # TODO: overflow detection?
876        n = n*base + d
877        if done(s,i)
878            break
879        end
880        c,i = next(s,i)
881    end
882    return flipsign(n,sgn)
883end
884
885parse_int(s::String, base::Integer) = parse_int(Int,s,base)
886parse_int(T::Type, s::String)       = parse_int(T,s,10)
887parse_int(s::String)                = parse_int(Int,s,10)
888
889parse_bin(T::Type, s::String) = parse_int(T,s,2)
890parse_oct(T::Type, s::String) = parse_int(T,s,8)
891parse_hex(T::Type, s::String) = parse_int(T,s,16)
892
893parse_bin(s::String) = parse_int(Int,s,2)
894parse_oct(s::String) = parse_int(Int,s,8)
895parse_hex(s::String) = parse_int(Int,s,16)
896
897integer (s::String) = int(s)
898unsigned(s::String) = uint(s)
899int     (s::String) = parse_int(Int,s)
900uint    (s::String) = parse_int(Uint,s)
901int8    (s::String) = parse_int(Int8,s)
902uint8   (s::String) = parse_int(Uint8,s)
903int16   (s::String) = parse_int(Int16,s)
904uint16  (s::String) = parse_int(Uint16,s)
905int32   (s::String) = parse_int(Int32,s)
906uint32  (s::String) = parse_int(Uint32,s)
907int64   (s::String) = parse_int(Int64,s)
908uint64  (s::String) = parse_int(Uint64,s)
909
910## integer to string functions ##
911
912const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data
913
914function int2str(n::Union(Int64,Uint64), b::Integer, l::Int)
915    if b < 2 || b > 36; error("int2str: invalid base ", b); end
916    neg = n < 0
917    n = unsigned(abs(n))
918    b = convert(typeof(n), b)
919    ndig = ndigits(n, b)
920    sz = max(convert(Int, ndig), l) + neg
921    data = Array(Uint8, sz)
922    i = sz
923    if ispow2(b)
924        digmask = b-1
925        shift = trailing_zeros(b)
926        while i > neg
927            ch = n & digmask
928            data[i] = _jl_dig_syms[int(ch)+1]
929            n >>= shift
930            i -= 1
931        end
932    else
933        while i > neg
934            ch = n % b
935            data[i] = _jl_dig_syms[int(ch)+1]
936            n = div(n,b)
937            i -= 1
938        end
939    end
940    if neg
941        data[1] = '-'
942    end
943    ASCIIString(data)
944end
945int2str(n::Integer, b::Integer)         = int2str(n, b, 0)
946int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l)
947
948string(x::Signed) = dec(int64(x))
949cstring(x::Signed) = dec(int64(x))
950
951## string to float functions ##
952
953function float64_isvalid(s::String, out::Array{Float64,1})
954    s = cstring(s)
955    return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0)
956end
957
958function float32_isvalid(s::String, out::Array{Float32,1})
959    s = cstring(s)
960    return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0)
961end
962
963begin
964    local tmp::Array{Float64,1} = Array(Float64,1)
965    local tmpf::Array{Float32,1} = Array(Float32,1)
966    global float64, float32
967    function float64(s::String)
968        if !float64_isvalid(s, tmp)
969            throw(ArgumentError("float64(String): invalid number format"))
970        end
971        return tmp[1]
972    end
973
974    function float32(s::String)
975        if !float32_isvalid(s, tmpf)
976            throw(ArgumentError("float32(String): invalid number format"))
977        end
978        return tmpf[1]
979    end
980end
981
982float(x::String) = float64(x)
983parse_float(x::String) = float64(x)
984parse_float(::Type{Float64}, x::String) = float64(x)
985parse_float(::Type{Float32}, x::String) = float32(x)
986
987# copying a byte string (generally not needed due to "immutability")
988
989strcpy{T<:ByteString}(s::T) = T(copy(s.data))
990
991# lexicographically compare byte arrays (used by Latin-1 and UTF-8)
992
993function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1})
994    c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint),
995              a, b, min(length(a),length(b)))
996    c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
997end
998
999# find the index of the first occurrence of a byte value in a byte array
1000
1001function memchr(a::Array{Uint8,1}, b::Integer)
1002    p = pointer(a)
1003    q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a))
1004    q == C_NULL ? 0 : q - p + 1
1005end
1006
1007# concatenate byte arrays into a single array
1008
1009memcat() = Array(Uint8,0)
1010memcat(a::Array{Uint8,1}) = copy(a)
1011
1012function memcat(arrays::Array{Uint8,1}...)
1013    n = 0
1014    for a in arrays
1015        n += length(a)
1016    end
1017    arr = Array(Uint8, n)
1018    ptr = pointer(arr)
1019    offset = 0
1020    for a in arrays
1021        ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint),
1022              ptr+offset, a, length(a))
1023        offset += length(a)
1024    end
1025    return arr
1026end
1027
1028# concatenate the data fields of byte strings
1029
1030memcat(s::ByteString) = memcat(s.data)
1031memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...)
1032