1 /*
2 * variables.c
3 *
4 * Created on: Dec 26, 2014
5 * Author: James Cassell
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include "variables.h"
13 #include "bytecode.h"
14 #include "xmalloc.h"
15
16 #include <string.h>
17 #include <ctype.h>
18
variables_modify_string(const char * string,int modifiers)19 EXPORTED char *variables_modify_string (const char *string, int modifiers) {
20 int len, i;
21 char *result;
22 char *working_buffer;
23 len = strlen(string);
24 if (!len) {
25 return (BFV_LENGTH & modifiers) ? xstrdup("0") : xstrdup("");
26 }
27 /* Consider the string '\\\'
28 * length will be doubled with :quotewildcard
29 * length will then be tripled with :encodeurl
30 * so we allocate a buffer to encode the worst
31 * case final string of 2 * 3 = 6 times the length
32 * of the original string
33 */
34 result = xstrdup(string);
35 working_buffer = xstrdup(string);
36 result = xrealloc(result, 2 * 3 * len + 1);
37 working_buffer = xrealloc(working_buffer, 2 * 3 * len + 1);
38
39 /*
40 * +--------------------------------+
41 * | Precedence Modifier |
42 * +--------------------------------+
43 * | 40 :lower |
44 * | :upper |
45 * +--------------------------------+
46 * | 30 :lowerfirst |
47 * | :upperfirst |
48 * +--------------------------------+
49 * | 20 :quotewildcard |
50 * +--------------------------------+
51 * | 15 :encodeurl |
52 * +--------------------------------+
53 * | 10 :length |
54 * +--------------------------------+
55 */
56 /* Precedence 40 */
57 switch ((BFV_LOWER | BFV_UPPER) & modifiers) {
58 case BFV_LOWER:
59 for (i = 0; i < len; i++) {
60 result[i] = tolower(result[i]);
61 }
62 break;
63 case BFV_UPPER:
64 for (i = 0; i < len; i++) {
65 result[i] = tolower(result[i]);
66 }
67 break;
68 }
69 /* Precedence 30 */
70 switch ((BFV_LOWERFIRST | BFV_UPPERFIRST) & modifiers) {
71 case BFV_LOWERFIRST:
72 result[0] = tolower(result[0]);
73 break;
74 case BFV_UPPERFIRST:
75 result[0] = toupper(result[0]);
76 break;
77 }
78 /*
79 * 4.1.2. Modifier ":quotewildcard"
80
81 This modifier adds the necessary quoting to ensure that the expanded
82 text will only match a literal occurrence if used as a parameter to
83 :matches. Every character with special meaning ("*", "?", and "\")
84 is prefixed with "\" in the expansion.
85 *
86 */
87 /* Precedence 20 */
88 if (BFV_QUOTEWILDCARD & modifiers) {
89 char *original, *quoted;
90 original = result;
91 quoted = working_buffer;
92 while (*original) {
93 switch (*original) {
94 case '*':
95 case '?':
96 case '\\':
97 *quoted = '\\';
98 quoted++;
99 break;
100 }
101 *quoted = *original;
102 quoted++;
103 original++;
104 }
105 *quoted = '\0';
106 {
107 char *temp;
108 temp = result;
109 result = working_buffer;
110 working_buffer = temp;
111 }
112 }
113 /*
114 *
115 * 2.1. Percent-Encoding
116
117 A percent-encoding mechanism is used to represent a data octet in a
118 component when that octet's corresponding character is outside the
119 allowed set or is being used as a delimiter of, or within, the
120 component. A percent-encoded octet is encoded as a character
121 triplet, consisting of the percent character "%" followed by the two
122 hexadecimal digits representing that octet's numeric value. For
123 example, "%20" is the percent-encoding for the binary octet
124 "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
125 character (SP). Section 2.4 describes when percent-encoding and
126 decoding is applied.
127
128 pct-encoded = "%" HEXDIG HEXDIG
129
130 The uppercase hexadecimal digits 'A' through 'F' are equivalent to
131 the lowercase digits 'a' through 'f', respectively. If two URIs
132 differ only in the case of hexadecimal digits used in percent-encoded
133 octets, they are equivalent. For consistency, URI producers and
134 normalizers should use uppercase hexadecimal digits for all percent-
135 encodings.
136 *
137 * 2.3. Unreserved Characters
138
139 Characters that are allowed in a URI but do not have a reserved
140 purpose are called unreserved. These include uppercase and lowercase
141 letters, decimal digits, hyphen, period, underscore, and tilde.
142
143 unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
144
145 URIs that differ in the replacement of an unreserved character with
146 its corresponding percent-encoded US-ASCII octet are equivalent: they
147 identify the same resource. However, URI comparison implementations
148 do not always perform normalization prior to comparison (see Section
149 6). For consistency, percent-encoded octets in the ranges of ALPHA
150 (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
151 underscore (%5F), or tilde (%7E) should not be created by URI
152 producers and, when found in a URI, should be decoded to their
153 corresponding unreserved characters by URI normalizers.
154
155 */
156 /* Precedence 15 */
157 if (BFV_ENCODEURL & modifiers) {
158 char *original, *quoted;
159 original = result;
160 quoted = working_buffer;
161
162 while (*original) {
163 switch (*original) {
164 case 'a' ... 'z':
165 case 'A' ... 'Z':
166 case '0' ... '9':
167 *quoted = *original;
168 quoted ++;
169 break;
170 default:
171 snprintf(quoted, 4, "%%%02X", *original);
172 quoted += 3;
173 break;
174 }
175 original++;
176 }
177 *quoted = '\0';
178 {
179 char *temp;
180 temp = result;
181 result = working_buffer;
182 working_buffer = temp;
183 }
184 }
185 /* Precedence 10 */
186 if (BFV_LENGTH & modifiers) {
187 snprintf(working_buffer, strlen(result), "%zu", strlen(result));
188 {
189 char *temp;
190 temp = result;
191 result = working_buffer;
192 working_buffer = temp;
193 }
194 }
195 free(working_buffer);
196 return result;
197 }
198