1 //
2 // HtRegexReplace.cc
3 //
4 // HtRegexReplace: A subclass of HtRegex that can perform replacements
5 //
6 // Part of the ht://Dig package   <http://www.htdig.org/>
7 // Copyright (c) 2000-2004 The ht://Dig Group
8 // For copyright details, see the file COPYING in your distribution
9 // or the GNU Library General Public License (LGPL) version 2 or later
10 // <http://www.gnu.org/copyleft/lgpl.html>
11 //
12 // $Id: HtRegexReplace.cc,v 1.4 2004/05/28 13:15:21 lha Exp $
13 //
14 
15 #include "HtRegexReplace.h"
16 #include <locale.h>
17 
18 
HtRegexReplace()19 HtRegexReplace::HtRegexReplace()
20 {
21 }
22 
HtRegexReplace(const char * from,const char * to,int case_sensitive)23 HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive)
24 	: HtRegex(from, case_sensitive)
25 {
26 	memset(&regs, 0, sizeof(regs));
27 	repBuf		= 0;
28 	segSize		=
29 	segUsed		= 0;
30 	segMark		= 0;
31 	repLen		= 0;
32 
33 	setReplace(to);
34 }
35 
~HtRegexReplace()36 HtRegexReplace::~HtRegexReplace()
37 {
38 	empty();
39 }
40 
replace(String & str,int nullpattern,int nullstr)41 int HtRegexReplace::replace(String &str, int nullpattern, int nullstr)
42 {
43 	const int regCount = sizeof(regs) / sizeof(regs[0]);
44 	if (compiled == 0 || repBuf == 0) return nullpattern;
45 	if (str.length() == 0) return nullstr;
46 
47 	if (regexec(&re, str.get(), regCount, regs, 0) == 0)
48 	{
49 		// Firstly work out how long the result string will be. We think this will be more effecient
50 		// than letting the buffer grow in stages as we build the result, but who knows?
51 		//cout << "!!! Match !!!" << endl;
52 		size_t resLen = repLen;
53 		int i, reg, repPos;
54 		const char *src = str.get();
55 
56 		for (i = 1; i < (int) segUsed; i += 2)
57 		{
58 			reg = segMark[i];
59 			if (reg < regCount && regs[reg].rm_so != -1)
60 				resLen += regs[reg].rm_eo - regs[reg].rm_so;
61 		}
62 		//cout << "result will be " << resLen << " chars long" << endl;
63 		String result(resLen);	// Make the result string preallocating the buffer size
64 		for (i = 0, repPos = 0;; )
65 		{
66 			//cout << "appending segment " << i << endl;
67 			result.append(repBuf + repPos, segMark[i] - repPos);		// part of the replace string
68 			repPos = segMark[i];		// move forward
69 			if (++i == (int) segUsed) break;	// was that the last segment?
70 			reg = segMark[i++];			// get the register number
71 			if (reg < regCount && regs[reg].rm_so != -1)
72 				result.append((char *) src + regs[reg].rm_so, regs[reg].rm_eo - regs[reg].rm_so);
73 		}
74 		str = result;
75 		//cout << "return " << result.get() << endl;
76 
77 		return 1;
78 	}
79 
80 	return 0;
81 }
82 
83 // Private: place a mark in the mark buffer growing it if necessary.
putMark(int n)84 void HtRegexReplace::putMark(int n)
85 {
86 	// assert(segUsed <= segSize);
87 	if (segUsed == segSize)
88 	{
89 		size_t newSize = segSize * 2 + 5;		// grow in chunks
90 		int *newMark = new int[newSize];		// do we assume that new can't fail?
91 		memcpy(newMark, segMark, segSize * sizeof(int));
92 		delete segMark;
93 		segMark = newMark;
94 		segSize = newSize;
95 	}
96 	segMark[segUsed++] = n;
97 }
98 
empty()99 void HtRegexReplace::empty()
100 {
101 	// Destroy any existing replace pattern
102     delete repBuf; repBuf = 0;
103     segSize = segUsed = 0;
104     delete segMark; segMark = 0;
105     repLen = 0;
106 }
107 
setReplace(const char * to)108 void HtRegexReplace::setReplace(const char *to)
109 {
110 	empty();
111 
112 	repBuf = new char[strlen(to)];		// replace buffer can never contain more text than to string
113 	int bufPos = 0;			// our position within the output buffer
114 
115 	while (*to)
116 	{
117 		if (*to == '\\')
118 		{
119 			if (*++to == '\0') break;
120 			if (*to >= '0' && *to <= '9')
121 			{
122 				putMark(bufPos);
123 				putMark(*to - '0');
124 			}
125 			else
126 			{
127 				// We could handle some C style escapes here, but instead we just pass the character
128 				// after the backslash through. This means that \\, \" and \' will do the right thing.
129 				// It's unlikely that anyone will need any C style escapes in ht://Dig anyway.
130 				repBuf[bufPos++] = *to;
131 			}
132 			to++;
133 		}
134 		else
135 		{
136 			repBuf[bufPos++] = *to++;
137 		}
138 	}
139 	putMark(bufPos);
140 	repLen = (size_t) bufPos;
141 }
142