1 //
2 // HtRegexReplace.cc
3 //
4 // HtRegexReplace: A subclass of HtRegex that can perform replacements
5 //
6 // Part of the ht://Dig package <http://www.htdig.org/>
7 // Copyright (c) 2000-2004 The ht://Dig Group
8 // For copyright details, see the file COPYING in your distribution
9 // or the GNU Library General Public License (LGPL) version 2 or later
10 // <http://www.gnu.org/copyleft/lgpl.html>
11 //
12 // $Id: HtRegexReplace.cc,v 1.4 2004/05/28 13:15:21 lha Exp $
13 //
14
15 #include "HtRegexReplace.h"
16 #include <locale.h>
17
18
HtRegexReplace()19 HtRegexReplace::HtRegexReplace()
20 {
21 }
22
HtRegexReplace(const char * from,const char * to,int case_sensitive)23 HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive)
24 : HtRegex(from, case_sensitive)
25 {
26 memset(®s, 0, sizeof(regs));
27 repBuf = 0;
28 segSize =
29 segUsed = 0;
30 segMark = 0;
31 repLen = 0;
32
33 setReplace(to);
34 }
35
~HtRegexReplace()36 HtRegexReplace::~HtRegexReplace()
37 {
38 empty();
39 }
40
replace(String & str,int nullpattern,int nullstr)41 int HtRegexReplace::replace(String &str, int nullpattern, int nullstr)
42 {
43 const int regCount = sizeof(regs) / sizeof(regs[0]);
44 if (compiled == 0 || repBuf == 0) return nullpattern;
45 if (str.length() == 0) return nullstr;
46
47 if (regexec(&re, str.get(), regCount, regs, 0) == 0)
48 {
49 // Firstly work out how long the result string will be. We think this will be more effecient
50 // than letting the buffer grow in stages as we build the result, but who knows?
51 //cout << "!!! Match !!!" << endl;
52 size_t resLen = repLen;
53 int i, reg, repPos;
54 const char *src = str.get();
55
56 for (i = 1; i < (int) segUsed; i += 2)
57 {
58 reg = segMark[i];
59 if (reg < regCount && regs[reg].rm_so != -1)
60 resLen += regs[reg].rm_eo - regs[reg].rm_so;
61 }
62 //cout << "result will be " << resLen << " chars long" << endl;
63 String result(resLen); // Make the result string preallocating the buffer size
64 for (i = 0, repPos = 0;; )
65 {
66 //cout << "appending segment " << i << endl;
67 result.append(repBuf + repPos, segMark[i] - repPos); // part of the replace string
68 repPos = segMark[i]; // move forward
69 if (++i == (int) segUsed) break; // was that the last segment?
70 reg = segMark[i++]; // get the register number
71 if (reg < regCount && regs[reg].rm_so != -1)
72 result.append((char *) src + regs[reg].rm_so, regs[reg].rm_eo - regs[reg].rm_so);
73 }
74 str = result;
75 //cout << "return " << result.get() << endl;
76
77 return 1;
78 }
79
80 return 0;
81 }
82
83 // Private: place a mark in the mark buffer growing it if necessary.
putMark(int n)84 void HtRegexReplace::putMark(int n)
85 {
86 // assert(segUsed <= segSize);
87 if (segUsed == segSize)
88 {
89 size_t newSize = segSize * 2 + 5; // grow in chunks
90 int *newMark = new int[newSize]; // do we assume that new can't fail?
91 memcpy(newMark, segMark, segSize * sizeof(int));
92 delete segMark;
93 segMark = newMark;
94 segSize = newSize;
95 }
96 segMark[segUsed++] = n;
97 }
98
empty()99 void HtRegexReplace::empty()
100 {
101 // Destroy any existing replace pattern
102 delete repBuf; repBuf = 0;
103 segSize = segUsed = 0;
104 delete segMark; segMark = 0;
105 repLen = 0;
106 }
107
setReplace(const char * to)108 void HtRegexReplace::setReplace(const char *to)
109 {
110 empty();
111
112 repBuf = new char[strlen(to)]; // replace buffer can never contain more text than to string
113 int bufPos = 0; // our position within the output buffer
114
115 while (*to)
116 {
117 if (*to == '\\')
118 {
119 if (*++to == '\0') break;
120 if (*to >= '0' && *to <= '9')
121 {
122 putMark(bufPos);
123 putMark(*to - '0');
124 }
125 else
126 {
127 // We could handle some C style escapes here, but instead we just pass the character
128 // after the backslash through. This means that \\, \" and \' will do the right thing.
129 // It's unlikely that anyone will need any C style escapes in ht://Dig anyway.
130 repBuf[bufPos++] = *to;
131 }
132 to++;
133 }
134 else
135 {
136 repBuf[bufPos++] = *to++;
137 }
138 }
139 putMark(bufPos);
140 repLen = (size_t) bufPos;
141 }
142