1 /*************************************************
2 *      PCRE string replacement                   *
3 *************************************************/
4 
5 /*
6 PCRE is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language.
8 pcre_subst is a wrapper around pcre_exec designed to make it easier to
9 perform PERL style replacements with PCRE.
10 
11 Written by: Bert Driehuis <driehuis@playbeing.org>
12 
13            Copyright (c) 2000 Bert Driehuis
14 
15 -----------------------------------------------------------------------------
16 Permission is granted to anyone to use this software for any purpose on any
17 computer system, and to redistribute it freely, subject to the following
18 restrictions:
19 
20 1. This software is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23 
24 2. The origin of this software must not be misrepresented, either by
25    explicit claim or by omission.
26 
27 3. Altered versions must be plainly marked as such, and must not be
28    misrepresented as being the original software.
29 
30 4. If PCRE is embedded in any software that is released under the GNU
31    General Purpose Licence (GPL), then the terms of that licence shall
32    supersede any condition above with which it is incompatible.
33 */
34 
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include <pcre.h>
39 #include "pcre_subst.h"
40 
41 #ifdef DEBUG_PCRE_SUBST
42 static void
dumpstr(const char * str,int len,int start,int end)43 dumpstr(const char *str, int len, int start, int end)
44 {
45 	int i;
46 	for (i = 0; i < strlen(str); i++) {
47 		if (i >= start && i < end)
48 			putchar(str[i]);
49 		else
50 			putchar('-');
51 	}
52 	putchar('\n');
53 }
54 
55 static void
dumpmatch(const char * str,int len,const char * rep,int nmat,const int * ovec)56 dumpmatch(const char *str, int len, const char *rep, int nmat, const int *ovec)
57 {
58 	int i;
59 	printf("%s	Input\n", str);
60 	printf("nmat=%d", nmat);
61 	for (i = 0; i < nmat * 2; i++)
62 		printf(" %d", ovec[i]);
63 	printf("\n");
64 	for (i = 0; i < nmat * 2; i += 2)
65 		dumpstr(str, len, ovec[i], ovec[i+1]);
66 	printf("\n");
67 }
68 #endif
69 
70 static int
findreplen(const char * rep,int nmat,const int * replen)71 findreplen(const char *rep, int nmat, const int *replen)
72 {
73 	int len = 0;
74 	int val;
75 	char *cp = (char *)rep;
76 	while(*cp) {
77 		if (*cp == '$' && isdigit(cp[1])) {
78 			val = strtoul(&cp[1], &cp, 10);
79 			if (val && val <= nmat + 1)
80 				len += replen[val -1];
81 			else
82 				fprintf(stderr, "repl %d out of range\n", val);
83 		} else {
84 			cp++;
85 			len++;
86 		}
87 	}
88 	return len;
89 }
90 
91 static void
doreplace(char * out,const char * rep,int nmat,int * replen,const char ** repstr)92 doreplace(char *out, const char *rep, int nmat, int *replen, const char **repstr)
93 {
94 	int val;
95 	char *cp = (char *)rep;
96 	while(*cp) {
97 		if (*cp == '$' && isdigit(cp[1])) {
98 			val = strtoul(&cp[1], &cp, 10);
99 			if (val && val <= nmat + 1) {
100 				strncpy(out, repstr[val - 1], replen[val - 1]);
101 				out += replen[val -1];
102 			}
103 		} else {
104 			*out++ = *cp++;
105 		}
106 	}
107 }
108 
109 static char *
edit(const char * str,int len,const char * rep,int nmat,const int * ovec)110 edit(const char *str, int len, const char *rep, int nmat, const int *ovec)
111 {
112 	int i, slen, rlen;
113 	const int *mvec = ovec;
114 	char *res, *cp;
115 	int replen[OIDC_PCRE_MAXCAPTURE];
116 	const char *repstr[OIDC_PCRE_MAXCAPTURE];
117 	nmat--;
118 	ovec += 2;
119 	for (i = 0; i < nmat; i++) {
120 		replen[i] = ovec[i * 2 + 1] - ovec[i * 2];
121 		repstr[i] = &str[ovec[i * 2]];
122 #ifdef DEBUG_PCRE_SUBST
123 		printf(">>>%d %d %.*s\n", i, replen[i], replen[i], repstr[i]);
124 #endif
125 	}
126 	slen = len;
127 	len -= mvec[1] - mvec[0];
128 	len += rlen = findreplen(rep, nmat, replen);
129 #ifdef DEBUG_PCRE_SUBST
130 	printf("resulting length %d (srclen=%d)\n", len, slen);
131 #endif
132 	cp = res = pcre_malloc(len + 1);
133 	if (mvec[0] > 0) {
134 		strncpy(cp, str, mvec[0]);
135 		cp += mvec[0];
136 	}
137 	doreplace(cp, rep, nmat, replen, repstr);
138 	cp += rlen;
139 	if (mvec[1] < slen)
140 		strcpy(cp, &str[mvec[1]]);
141 	res[len] = 0;
142 	return res;
143 }
144 
145 char *
pcre_subst(const pcre * ppat,const pcre_extra * extra,const char * str,int len,int offset,int options,const char * rep)146 pcre_subst(const pcre *ppat, const pcre_extra *extra, const char *str, int len,
147 			int offset, int options, const char *rep)
148 {
149 	int nmat;
150 	int ovec[OIDC_PCRE_MAXCAPTURE * 3];
151 	nmat = pcre_exec(ppat, extra, str, len, offset, options,
152 		ovec, sizeof(ovec));
153 #ifdef DEBUG_PCRE_SUBST
154 	dumpmatch(str, len, rep, nmat, ovec);
155 #endif
156 	if (nmat <= 0)
157 		return NULL;
158 	return(edit(str, len, rep, nmat, ovec));
159 }
160 
161 #ifdef DEBUG_BUILD
162 int
main()163 main()
164 {
165 	char *pat = "quick\\s(\\w+)\\s(fox)";
166 	char *rep = "$1ish $2";
167 	char *str = "The quick brown foxy";
168 	char *newstr;
169 	const char *err;
170 	int erroffset;
171 	pcre_extra *extra;
172 	pcre *ppat = pcre_compile(pat, 0, &err, &erroffset, NULL);
173 	if (ppat == NULL) {
174 		fprintf(stderr, "%s at %d\n", err, erroffset);
175 		exit(1);
176 	}
177 	extra = pcre_study(ppat, 0, &err);
178 	if (err != NULL)
179 		fprintf(stderr, "Study %s failed: %s\n", pat, err);
180 	newstr = pcre_subst(ppat, extra, str, strlen(str), 0, 0, rep);
181 	if (newstr) {
182 		printf("Newstr\t%s\n", newstr);
183 		pcre_free(newstr);
184 	} else {
185 		printf("No match\n");
186 	}
187 	return 0;
188 }
189 #endif
190