1# split.tcl -- 2# 3# Various ways of splitting a string. 4# 5# Copyright (c) 2000 by Ajuba Solutions. 6# Copyright (c) 2000 by Eric Melski <ericm@ajubasolutions.com> 7# Copyright (c) 2001 by Reinhard Max <max@suse.de> 8# Copyright (c) 2003 by Pat Thoyts <patthoyts@users.sourceforge.net> 9# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 10# 11# See the file "license.terms" for information on usage and redistribution 12# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 13# 14# RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $ 15 16# ### ### ### ######### ######### ######### 17## Requirements 18 19package require Tcl 8.2 20 21namespace eval ::textutil::split {} 22 23######################################################################## 24# This one was written by Bob Techentin (RWT in Tcl'ers Wiki): 25# http://www.techentin.net 26# mailto:techentin.robert@mayo.edu 27# 28# Later, he send me an email stated that I can use it anywhere, because 29# no copyright was added, so the code is defacto in the public domain. 30# 31# You can found it in the Tcl'ers Wiki here: 32# http://mini.net/cgi-bin/wikit/460.html 33# 34# Bob wrote: 35# If you need to split string into list using some more complicated rule 36# than builtin split command allows, use following function. It mimics 37# Perl split operator which allows regexp as element separator, but, 38# like builtin split, it expects string to split as first arg and regexp 39# as second (optional) By default, it splits by any amount of whitespace. 40# Note that if you add parenthesis into regexp, parenthesed part of separator 41# would be added into list as additional element. Just like in Perl. -- cary 42# 43# Speed improvement by Reinhard Max: 44# Instead of repeatedly copying around the not yet matched part of the 45# string, I use [regexp]'s -start option to restrict the match to that 46# part. This reduces the complexity from something like O(n^1.5) to 47# O(n). My test case for that was: 48# 49# foreach i {1 10 100 1000 10000} { 50# set s [string repeat x $i] 51# puts [time {splitx $s .}] 52# } 53# 54 55if {[package vsatisfies [package provide Tcl] 8.3]} { 56 57 proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} { 58 # Bugfix 476988 59 if {[string length $str] == 0} { 60 return {} 61 } 62 if {[string length $regexp] == 0} { 63 return [::split $str ""] 64 } 65 if {[regexp $regexp {}]} { 66 return -code error \ 67 "splitting on regexp \"$regexp\" would cause infinite loop" 68 } 69 70 set list {} 71 set start 0 72 while {[regexp -start $start -indices -- $regexp $str match submatch]} { 73 foreach {subStart subEnd} $submatch break 74 foreach {matchStart matchEnd} $match break 75 incr matchStart -1 76 incr matchEnd 77 lappend list [string range $str $start $matchStart] 78 if {$subStart >= $start} { 79 lappend list [string range $str $subStart $subEnd] 80 } 81 set start $matchEnd 82 } 83 lappend list [string range $str $start end] 84 return $list 85 } 86 87} else { 88 # For tcl <= 8.2 we do not have regexp -start... 89 proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] { 90 91 if {[string length $str] == 0} { 92 return {} 93 } 94 if {[string length $regexp] == 0} { 95 return [::split $str {}] 96 } 97 if {[regexp $regexp {}]} { 98 return -code error \ 99 "splitting on regexp \"$regexp\" would cause infinite loop" 100 } 101 102 set list {} 103 while {[regexp -indices -- $regexp $str match submatch]} { 104 lappend list [string range $str 0 [expr {[lindex $match 0] -1}]] 105 if {[lindex $submatch 0] >= 0} { 106 lappend list [string range $str [lindex $submatch 0] \ 107 [lindex $submatch 1]] 108 } 109 set str [string range $str [expr {[lindex $match 1]+1}] end] 110 } 111 lappend list $str 112 return $list 113 } 114 115} 116 117# 118# splitn -- 119# 120# splitn splits the string $str into chunks of length $len. These 121# chunks are returned as a list. 122# 123# If $str really contains a ByteArray object (as retrieved from binary 124# encoded channels) splitn must honor this by splitting the string 125# into chunks of $len bytes. 126# 127# It is an error to call splitn with a nonpositive $len. 128# 129# If splitn is called with an empty string, it returns the empty list. 130# 131# If the length of $str is not an entire multiple of the chunk length, 132# the last chunk in the generated list will be shorter than $len. 133# 134# The implementation presented here was given by Bryan Oakley, as 135# part of a ``contest'' I staged on c.l.t in July 2004. I selected 136# this version, as it does not rely on runtime generated code, is 137# very fast for chunk size one, not too bad in all the other cases, 138# and uses [split] or [string range] which have been around for quite 139# some time. 140# 141# -- Robert Suetterlin (robert@mpe.mpg.de) 142# 143proc ::textutil::split::splitn {str {len 1}} { 144 145 if {$len <= 0} { 146 return -code error "len must be > 0" 147 } 148 149 if {$len == 1} { 150 return [split $str {}] 151 } 152 153 set result [list] 154 set max [string length $str] 155 set i 0 156 set j [expr {$len -1}] 157 while {$i < $max} { 158 lappend result [string range $str $i $j] 159 incr i $len 160 incr j $len 161 } 162 163 return $result 164} 165 166# ### ### ### ######### ######### ######### 167## Data structures 168 169namespace eval ::textutil::split { 170 namespace export splitx splitn 171} 172 173# ### ### ### ######### ######### ######### 174## Ready 175 176package provide textutil::split 0.8 177