1# split.tcl --
2#
3#	Various ways of splitting a string.
4#
5# Copyright (c) 2000      by Ajuba Solutions.
6# Copyright (c) 2000      by Eric Melski <ericm@ajubasolutions.com>
7# Copyright (c) 2001      by Reinhard Max <max@suse.de>
8# Copyright (c) 2003      by Pat Thoyts <patthoyts@users.sourceforge.net>
9# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
10#
11# See the file "license.terms" for information on usage and redistribution
12# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13#
14# RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $
15
16# ### ### ### ######### ######### #########
17## Requirements
18
19package require Tcl 8.2
20
21namespace eval ::textutil::split {}
22
23########################################################################
24# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
25# http://www.techentin.net
26# mailto:techentin.robert@mayo.edu
27#
28# Later, he send me an email stated that I can use it anywhere, because
29# no copyright was added, so the code is defacto in the public domain.
30#
31# You can found it in the Tcl'ers Wiki here:
32# http://mini.net/cgi-bin/wikit/460.html
33#
34# Bob wrote:
35# If you need to split string into list using some more complicated rule
36# than builtin split command allows, use following function. It mimics
37# Perl split operator which allows regexp as element separator, but,
38# like builtin split, it expects string to split as first arg and regexp
39# as second (optional) By default, it splits by any amount of whitespace.
40# Note that if you add parenthesis into regexp, parenthesed part of separator
41# would be added into list as additional element. Just like in Perl. -- cary
42#
43# Speed improvement by Reinhard Max:
44# Instead of repeatedly copying around the not yet matched part of the
45# string, I use [regexp]'s -start option to restrict the match to that
46# part. This reduces the complexity from something like O(n^1.5) to
47# O(n). My test case for that was:
48#
49# foreach i {1 10 100 1000 10000} {
50#     set s [string repeat x $i]
51#     puts [time {splitx $s .}]
52# }
53#
54
55if {[package vsatisfies [package provide Tcl] 8.3]} {
56
57    proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
58        # Bugfix 476988
59        if {[string length $str] == 0} {
60            return {}
61        }
62        if {[string length $regexp] == 0} {
63            return [::split $str ""]
64        }
65	if {[regexp $regexp {}]} {
66	    return -code error \
67		"splitting on regexp \"$regexp\" would cause infinite loop"
68	}
69
70        set list  {}
71        set start 0
72        while {[regexp -start $start -indices -- $regexp $str match submatch]} {
73            foreach {subStart subEnd} $submatch break
74            foreach {matchStart matchEnd} $match break
75            incr matchStart -1
76            incr matchEnd
77            lappend list [string range $str $start $matchStart]
78            if {$subStart >= $start} {
79                lappend list [string range $str $subStart $subEnd]
80            }
81            set start $matchEnd
82        }
83        lappend list [string range $str $start end]
84        return $list
85    }
86
87} else {
88    # For tcl <= 8.2 we do not have regexp -start...
89    proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] {
90
91        if {[string length $str] == 0} {
92            return {}
93        }
94        if {[string length $regexp] == 0} {
95            return [::split $str {}]
96        }
97	if {[regexp $regexp {}]} {
98	    return -code error \
99		"splitting on regexp \"$regexp\" would cause infinite loop"
100	}
101
102        set list  {}
103        while {[regexp -indices -- $regexp $str match submatch]} {
104            lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
105            if {[lindex $submatch 0] >= 0} {
106                lappend list [string range $str [lindex $submatch 0] \
107                                  [lindex $submatch 1]]
108            }
109            set str [string range $str [expr {[lindex $match 1]+1}] end]
110        }
111        lappend list $str
112        return $list
113    }
114
115}
116
117#
118# splitn --
119#
120# splitn splits the string $str into chunks of length $len.  These
121# chunks are returned as a list.
122#
123# If $str really contains a ByteArray object (as retrieved from binary
124# encoded channels) splitn must honor this by splitting the string
125# into chunks of $len bytes.
126#
127# It is an error to call splitn with a nonpositive $len.
128#
129# If splitn is called with an empty string, it returns the empty list.
130#
131# If the length of $str is not an entire multiple of the chunk length,
132# the last chunk in the generated list will be shorter than $len.
133#
134# The implementation presented here was given by Bryan Oakley, as
135# part of a ``contest'' I staged on c.l.t in July 2004.  I selected
136# this version, as it does not rely on runtime generated code, is
137# very fast for chunk size one, not too bad in all the other cases,
138# and uses [split] or [string range] which have been around for quite
139# some time.
140#
141# -- Robert Suetterlin (robert@mpe.mpg.de)
142#
143proc ::textutil::split::splitn {str {len 1}} {
144
145    if {$len <= 0} {
146        return -code error "len must be > 0"
147    }
148
149    if {$len == 1} {
150        return [split $str {}]
151    }
152
153    set result [list]
154    set max [string length $str]
155    set i 0
156    set j [expr {$len -1}]
157    while {$i < $max} {
158        lappend result [string range $str $i $j]
159        incr i $len
160        incr j $len
161    }
162
163    return $result
164}
165
166# ### ### ### ######### ######### #########
167## Data structures
168
169namespace eval ::textutil::split {
170    namespace export splitx splitn
171}
172
173# ### ### ### ######### ######### #########
174## Ready
175
176package provide textutil::split 0.8
177