1package regexp2 2 3import ( 4 "bytes" 5 "fmt" 6) 7 8// Match is a single regex result match that contains groups and repeated captures 9// -Groups 10// -Capture 11type Match struct { 12 Group //embeded group 0 13 14 regex *Regexp 15 otherGroups []Group 16 17 // input to the match 18 textpos int 19 textstart int 20 21 capcount int 22 caps []int 23 sparseCaps map[int]int 24 25 // output from the match 26 matches [][]int 27 matchcount []int 28 29 // whether we've done any balancing with this match. If we 30 // have done balancing, we'll need to do extra work in Tidy(). 31 balancing bool 32} 33 34// Group is an explicit or implit (group 0) matched group within the pattern 35type Group struct { 36 Capture // the last capture of this group is embeded for ease of use 37 38 Name string // group name 39 Captures []Capture // captures of this group 40} 41 42// Capture is a single capture of text within the larger original string 43type Capture struct { 44 // the original string 45 text []rune 46 // the position in the original string where the first character of 47 // captured substring was found. 48 Index int 49 // the length of the captured substring. 50 Length int 51} 52 53// String returns the captured text as a String 54func (c *Capture) String() string { 55 return string(c.text[c.Index : c.Index+c.Length]) 56} 57 58// Runes returns the captured text as a rune slice 59func (c *Capture) Runes() []rune { 60 return c.text[c.Index : c.Index+c.Length] 61} 62 63func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match { 64 m := Match{ 65 regex: regex, 66 matchcount: make([]int, capcount), 67 matches: make([][]int, capcount), 68 textstart: startpos, 69 balancing: false, 70 } 71 m.Name = "0" 72 m.text = text 73 m.matches[0] = make([]int, 2) 74 return &m 75} 76 77func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match { 78 m := newMatch(regex, capcount, text, startpos) 79 m.sparseCaps = caps 80 return m 81} 82 83func (m *Match) reset(text []rune, textstart int) { 84 m.text = text 85 m.textstart = textstart 86 for i := 0; i < len(m.matchcount); i++ { 87 m.matchcount[i] = 0 88 } 89 m.balancing = false 90} 91 92func (m *Match) tidy(textpos int) { 93 94 interval := m.matches[0] 95 m.Index = interval[0] 96 m.Length = interval[1] 97 m.textpos = textpos 98 m.capcount = m.matchcount[0] 99 //copy our root capture to the list 100 m.Group.Captures = []Capture{m.Group.Capture} 101 102 if m.balancing { 103 // The idea here is that we want to compact all of our unbalanced captures. To do that we 104 // use j basically as a count of how many unbalanced captures we have at any given time 105 // (really j is an index, but j/2 is the count). First we skip past all of the real captures 106 // until we find a balance captures. Then we check each subsequent entry. If it's a balance 107 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy 108 // it down to the last free position. 109 for cap := 0; cap < len(m.matchcount); cap++ { 110 limit := m.matchcount[cap] * 2 111 matcharray := m.matches[cap] 112 113 var i, j int 114 115 for i = 0; i < limit; i++ { 116 if matcharray[i] < 0 { 117 break 118 } 119 } 120 121 for j = i; i < limit; i++ { 122 if matcharray[i] < 0 { 123 // skip negative values 124 j-- 125 } else { 126 // but if we find something positive (an actual capture), copy it back to the last 127 // unbalanced position. 128 if i != j { 129 matcharray[j] = matcharray[i] 130 } 131 j++ 132 } 133 } 134 135 m.matchcount[cap] = j / 2 136 } 137 138 m.balancing = false 139 } 140} 141 142// isMatched tells if a group was matched by capnum 143func (m *Match) isMatched(cap int) bool { 144 return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1) 145} 146 147// matchIndex returns the index of the last specified matched group by capnum 148func (m *Match) matchIndex(cap int) int { 149 i := m.matches[cap][m.matchcount[cap]*2-2] 150 if i >= 0 { 151 return i 152 } 153 154 return m.matches[cap][-3-i] 155} 156 157// matchLength returns the length of the last specified matched group by capnum 158func (m *Match) matchLength(cap int) int { 159 i := m.matches[cap][m.matchcount[cap]*2-1] 160 if i >= 0 { 161 return i 162 } 163 164 return m.matches[cap][-3-i] 165} 166 167// Nonpublic builder: add a capture to the group specified by "c" 168func (m *Match) addMatch(c, start, l int) { 169 170 if m.matches[c] == nil { 171 m.matches[c] = make([]int, 2) 172 } 173 174 capcount := m.matchcount[c] 175 176 if capcount*2+2 > len(m.matches[c]) { 177 oldmatches := m.matches[c] 178 newmatches := make([]int, capcount*8) 179 copy(newmatches, oldmatches[:capcount*2]) 180 m.matches[c] = newmatches 181 } 182 183 m.matches[c][capcount*2] = start 184 m.matches[c][capcount*2+1] = l 185 m.matchcount[c] = capcount + 1 186 //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches) 187} 188 189// Nonpublic builder: Add a capture to balance the specified group. This is used by the 190// balanced match construct. (?<foo-foo2>...) 191// 192// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c). 193// However, since we have backtracking, we need to keep track of everything. 194func (m *Match) balanceMatch(c int) { 195 m.balancing = true 196 197 // we'll look at the last capture first 198 capcount := m.matchcount[c] 199 target := capcount*2 - 2 200 201 // first see if it is negative, and therefore is a reference to the next available 202 // capture group for balancing. If it is, we'll reset target to point to that capture. 203 if m.matches[c][target] < 0 { 204 target = -3 - m.matches[c][target] 205 } 206 207 // move back to the previous capture 208 target -= 2 209 210 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. 211 if target >= 0 && m.matches[c][target] < 0 { 212 m.addMatch(c, m.matches[c][target], m.matches[c][target+1]) 213 } else { 214 m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */) 215 } 216} 217 218// Nonpublic builder: removes a group match by capnum 219func (m *Match) removeMatch(c int) { 220 m.matchcount[c]-- 221} 222 223// GroupCount returns the number of groups this match has matched 224func (m *Match) GroupCount() int { 225 return len(m.matchcount) 226} 227 228// GroupByName returns a group based on the name of the group, or nil if the group name does not exist 229func (m *Match) GroupByName(name string) *Group { 230 num := m.regex.GroupNumberFromName(name) 231 if num < 0 { 232 return nil 233 } 234 return m.GroupByNumber(num) 235} 236 237// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist 238func (m *Match) GroupByNumber(num int) *Group { 239 // check our sparse map 240 if m.sparseCaps != nil { 241 if newNum, ok := m.sparseCaps[num]; ok { 242 num = newNum 243 } 244 } 245 if num >= len(m.matchcount) || num < 0 { 246 return nil 247 } 248 249 if num == 0 { 250 return &m.Group 251 } 252 253 m.populateOtherGroups() 254 255 return &m.otherGroups[num-1] 256} 257 258// Groups returns all the capture groups, starting with group 0 (the full match) 259func (m *Match) Groups() []Group { 260 m.populateOtherGroups() 261 g := make([]Group, len(m.otherGroups)+1) 262 g[0] = m.Group 263 copy(g[1:], m.otherGroups) 264 return g 265} 266 267func (m *Match) populateOtherGroups() { 268 // Construct all the Group objects first time called 269 if m.otherGroups == nil { 270 m.otherGroups = make([]Group, len(m.matchcount)-1) 271 for i := 0; i < len(m.otherGroups); i++ { 272 m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1]) 273 } 274 } 275} 276 277func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) { 278 c := m.matchcount[groupnum] 279 if c == 0 { 280 return 281 } 282 283 matches := m.matches[groupnum] 284 285 index := matches[(c-1)*2] 286 last := index + matches[(c*2)-1] 287 288 for ; index < last; index++ { 289 buf.WriteRune(m.text[index]) 290 } 291} 292 293func newGroup(name string, text []rune, caps []int, capcount int) Group { 294 g := Group{} 295 g.text = text 296 if capcount > 0 { 297 g.Index = caps[(capcount-1)*2] 298 g.Length = caps[(capcount*2)-1] 299 } 300 g.Name = name 301 g.Captures = make([]Capture, capcount) 302 for i := 0; i < capcount; i++ { 303 g.Captures[i] = Capture{ 304 text: text, 305 Index: caps[i*2], 306 Length: caps[i*2+1], 307 } 308 } 309 //log.Printf("newGroup! capcount %v, %+v", capcount, g) 310 311 return g 312} 313 314func (m *Match) dump() string { 315 buf := &bytes.Buffer{} 316 buf.WriteRune('\n') 317 if len(m.sparseCaps) > 0 { 318 for k, v := range m.sparseCaps { 319 fmt.Fprintf(buf, "Slot %v -> %v\n", k, v) 320 } 321 } 322 323 for i, g := range m.Groups() { 324 fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures)) 325 326 for _, c := range g.Captures { 327 fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String()) 328 } 329 } 330 /* 331 for i := 0; i < len(m.matchcount); i++ { 332 fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i)) 333 334 for j := 0; j < m.matchcount[i]; j++ { 335 text := "" 336 337 if m.matches[i][j*2] >= 0 { 338 start := m.matches[i][j*2] 339 text = m.text[start : start+m.matches[i][j*2+1]] 340 } 341 342 fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text) 343 } 344 } 345 */ 346 return buf.String() 347} 348