1 /*
2 * ModSecurity, http://www.modsecurity.org/
3 * Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
4 *
5 * You may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * If any of the files related to licensing are missing or if you have any
11 * other questions related to licensing please contact Trustwave Holdings, Inc.
12 * directly using the email address security@modsecurity.org.
13 *
14 */
15
16 #include "src/actions/transformations/html_entity_decode.h"
17
18 #include <string.h>
19
20 #include <iostream>
21 #include <string>
22 #include <algorithm>
23 #include <functional>
24 #include <cctype>
25 #include <locale>
26
27 #include "modsecurity/transaction.h"
28 #include "src/actions/transformations/transformation.h"
29
30
31 namespace modsecurity {
32 namespace actions {
33 namespace transformations {
34
35
evaluate(const std::string & value,Transaction * transaction)36 std::string HtmlEntityDecode::evaluate(const std::string &value,
37 Transaction *transaction) {
38 std::string ret;
39 unsigned char *input;
40
41 input = reinterpret_cast<unsigned char *>
42 (malloc(sizeof(char) * value.length()+1));
43
44 if (input == NULL) {
45 return "";
46 }
47
48 memcpy(input, value.c_str(), value.length()+1);
49
50 size_t i = inplace(input, value.length());
51
52 ret.assign(reinterpret_cast<char *>(input), i);
53 free(input);
54
55 return ret;
56 }
57
58
inplace(unsigned char * input,uint64_t input_len)59 int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
60 unsigned char *d = input;
61 int i, count;
62
63 if ((input == NULL) || (input_len == 0)) {
64 return 0;
65 }
66
67 i = count = 0;
68 while ((i < input_len) && (count < input_len)) {
69 int z, copy = 1;
70
71 /* Require an ampersand and at least one character to
72 * start looking into the entity.
73 */
74 if ((input[i] == '&') && (i + 1 < input_len)) {
75 int k, j = i + 1;
76
77 if (input[j] == '#') {
78 /* Numerical entity. */
79 copy++;
80
81 if (!(j + 1 < input_len)) {
82 goto HTML_ENT_OUT; /* Not enough bytes. */
83 }
84 j++;
85
86 if ((input[j] == 'x') || (input[j] == 'X')) {
87 /* Hexadecimal entity. */
88 copy++;
89
90 if (!(j + 1 < input_len)) {
91 goto HTML_ENT_OUT; /* Not enough bytes. */
92 }
93 j++; /* j is the position of the first digit now. */
94
95 k = j;
96 while ((j < input_len) && (isxdigit(input[j]))) {
97 j++;
98 }
99 if (j > k) { /* Do we have at least one digit? */
100 /* Decode the entity. */
101 char *x;
102 x = reinterpret_cast<char *>(calloc(sizeof(char),
103 ((j - k) + 1)));
104 memcpy(x, (const char *)&input[k], j - k);
105 *d++ = (unsigned char)strtol(x, NULL, 16);
106 free(x);
107 count++;
108
109 /* Skip over the semicolon if it's there. */
110 if ((j < input_len) && (input[j] == ';')) {
111 i = j + 1;
112 } else {
113 i = j;
114 }
115 continue;
116 } else {
117 goto HTML_ENT_OUT;
118 }
119 } else {
120 /* Decimal entity. */
121 k = j;
122 while ((j < input_len) && (isdigit(input[j]))) {
123 j++;
124 }
125 if (j > k) { /* Do we have at least one digit? */
126 /* Decode the entity. */
127 char *x;
128 x = reinterpret_cast<char *>(calloc(sizeof(char),
129 ((j - k) + 1)));
130 memcpy(x, (const char *)&input[k], j - k);
131 *d++ = (unsigned char)strtol(x, NULL, 10);
132 free(x);
133 count++;
134
135 /* Skip over the semicolon if it's there. */
136 if ((j < input_len) && (input[j] == ';')) {
137 i = j + 1;
138 } else {
139 i = j;
140 }
141 continue;
142 } else {
143 goto HTML_ENT_OUT;
144 }
145 }
146 } else {
147 /* Text entity. */
148 k = j;
149 while ((j < input_len) && (isalnum(input[j]))) {
150 j++;
151 }
152 if (j > k) { /* Do we have at least one digit? */
153 char *x;
154 x = reinterpret_cast<char *>(calloc(sizeof(char),
155 ((j - k) + 1)));
156 memcpy(x, (const char *)&input[k], j - k);
157
158 /* Decode the entity. */
159 /* ENH What about others? */
160 if (strcasecmp(x, "quot") == 0) {
161 *d++ = '"';
162 } else if (strcasecmp(x, "amp") == 0) {
163 *d++ = '&';
164 } else if (strcasecmp(x, "lt") == 0) {
165 *d++ = '<';
166 } else if (strcasecmp(x, "gt") == 0) {
167 *d++ = '>';
168 } else if (strcasecmp(x, "nbsp") == 0) {
169 *d++ = NBSP;
170 } else {
171 /* We do no want to convert this entity,
172 * copy the raw data over. */
173 copy = j - k + 1;
174 free(x);
175 goto HTML_ENT_OUT;
176 }
177 free(x);
178
179 count++;
180
181 /* Skip over the semicolon if it's there. */
182 if ((j < input_len) && (input[j] == ';')) {
183 i = j + 1;
184 } else {
185 i = j;
186 }
187
188 continue;
189 }
190 }
191 }
192
193 HTML_ENT_OUT:
194
195 for (z = 0; ((z < copy) && (count < input_len)); z++) {
196 *d++ = input[i++];
197 count++;
198 }
199 }
200
201 *d = '\0';
202
203 return count;
204 }
205
206 } // namespace transformations
207 } // namespace actions
208 } // namespace modsecurity
209