1 /***************************************************************************
2 *
3 * thmlhtmlhref.cpp - ThML to HTML filter with hrefs
4 *
5 * $Id: thmlhtmlhref.cpp 3548 2017-12-10 05:11:38Z scribe $
6 *
7 * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
8 * CrossWire Bible Society
9 * P. O. Box 2528
10 * Tempe, AZ 85280-2528
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation version 2.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 */
22
23 #include <stdlib.h>
24 #include <thmlhtmlhref.h>
25 #include <swmodule.h>
26 #include <utilxml.h>
27 #include <utilstr.h>
28 #include <versekey.h>
29 #include <url.h>
30
31
32 SWORD_NAMESPACE_START
33
34
MyUserData(const SWModule * module,const SWKey * key)35 ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
36 isBiblicalText = false;
37 inSecHead = false;
38 if (module) {
39 version = module->getName();
40 isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
41 }
42 }
43
44
ThMLHTMLHREF()45 ThMLHTMLHREF::ThMLHTMLHREF() {
46 setTokenStart("<");
47 setTokenEnd(">");
48
49 setEscapeStart("&");
50 setEscapeEnd(";");
51
52 setEscapeStringCaseSensitive(true);
53 setPassThruNumericEscapeString(true);
54
55 addAllowedEscapeString("quot");
56 addAllowedEscapeString("amp");
57 addAllowedEscapeString("lt");
58 addAllowedEscapeString("gt");
59
60 addAllowedEscapeString("nbsp");
61 addAllowedEscapeString("brvbar"); // "Š"
62 addAllowedEscapeString("sect"); // "§"
63 addAllowedEscapeString("copy"); // "©"
64 addAllowedEscapeString("laquo"); // "«"
65 addAllowedEscapeString("reg"); // "®"
66 addAllowedEscapeString("acute"); // "Ž"
67 addAllowedEscapeString("para"); // "¶"
68 addAllowedEscapeString("raquo"); // "»"
69
70 addAllowedEscapeString("Aacute"); // "Á"
71 addAllowedEscapeString("Agrave"); // "À"
72 addAllowedEscapeString("Acirc"); // "Â"
73 addAllowedEscapeString("Auml"); // "Ä"
74 addAllowedEscapeString("Atilde"); // "Ã"
75 addAllowedEscapeString("Aring"); // "Å"
76 addAllowedEscapeString("aacute"); // "á"
77 addAllowedEscapeString("agrave"); // "à"
78 addAllowedEscapeString("acirc"); // "â"
79 addAllowedEscapeString("auml"); // "ä"
80 addAllowedEscapeString("atilde"); // "ã"
81 addAllowedEscapeString("aring"); // "å"
82 addAllowedEscapeString("Eacute"); // "É"
83 addAllowedEscapeString("Egrave"); // "È"
84 addAllowedEscapeString("Ecirc"); // "Ê"
85 addAllowedEscapeString("Euml"); // "Ë"
86 addAllowedEscapeString("eacute"); // "é"
87 addAllowedEscapeString("egrave"); // "è"
88 addAllowedEscapeString("ecirc"); // "ê"
89 addAllowedEscapeString("euml"); // "ë"
90 addAllowedEscapeString("Iacute"); // "Í"
91 addAllowedEscapeString("Igrave"); // "Ì"
92 addAllowedEscapeString("Icirc"); // "Î"
93 addAllowedEscapeString("Iuml"); // "Ï"
94 addAllowedEscapeString("iacute"); // "í"
95 addAllowedEscapeString("igrave"); // "ì"
96 addAllowedEscapeString("icirc"); // "î"
97 addAllowedEscapeString("iuml"); // "ï"
98 addAllowedEscapeString("Oacute"); // "Ó"
99 addAllowedEscapeString("Ograve"); // "Ò"
100 addAllowedEscapeString("Ocirc"); // "Ô"
101 addAllowedEscapeString("Ouml"); // "Ö"
102 addAllowedEscapeString("Otilde"); // "Õ"
103 addAllowedEscapeString("oacute"); // "ó"
104 addAllowedEscapeString("ograve"); // "ò"
105 addAllowedEscapeString("ocirc"); // "ô"
106 addAllowedEscapeString("ouml"); // "ö"
107 addAllowedEscapeString("otilde"); // "õ"
108 addAllowedEscapeString("Uacute"); // "Ú"
109 addAllowedEscapeString("Ugrave"); // "Ù"
110 addAllowedEscapeString("Ucirc"); // "Û"
111 addAllowedEscapeString("Uuml"); // "Ü"
112 addAllowedEscapeString("uacute"); // "ú"
113 addAllowedEscapeString("ugrave"); // "ù"
114 addAllowedEscapeString("ucirc"); // "û"
115 addAllowedEscapeString("uuml"); // "ü"
116 addAllowedEscapeString("Yacute"); // "Ý"
117 addAllowedEscapeString("yacute"); // "ý"
118 addAllowedEscapeString("yuml"); // "ÿ"
119
120 addAllowedEscapeString("deg"); // "°"
121 addAllowedEscapeString("plusmn"); // "±"
122 addAllowedEscapeString("sup2"); // "²"
123 addAllowedEscapeString("sup3"); // "³"
124 addAllowedEscapeString("sup1"); // "¹"
125 addAllowedEscapeString("nbsp"); // "º"
126 addAllowedEscapeString("pound"); // "£"
127 addAllowedEscapeString("cent"); // "¢"
128 addAllowedEscapeString("frac14"); // "Œ"
129 addAllowedEscapeString("frac12"); // "œ"
130 addAllowedEscapeString("frac34"); // "Ÿ"
131 addAllowedEscapeString("iquest"); // "¿"
132 addAllowedEscapeString("iexcl"); // "¡"
133 addAllowedEscapeString("ETH"); // "Ð"
134 addAllowedEscapeString("eth"); // "ð"
135 addAllowedEscapeString("THORN"); // "Þ"
136 addAllowedEscapeString("thorn"); // "þ"
137 addAllowedEscapeString("AElig"); // "Æ"
138 addAllowedEscapeString("aelig"); // "æ"
139 addAllowedEscapeString("Oslash"); // "Ø"
140 addAllowedEscapeString("curren"); // "€"
141 addAllowedEscapeString("Ccedil"); // "Ç"
142 addAllowedEscapeString("ccedil"); // "ç"
143 addAllowedEscapeString("szlig"); // "ß"
144 addAllowedEscapeString("Ntilde"); // "Ñ"
145 addAllowedEscapeString("ntilde"); // "ñ"
146 addAllowedEscapeString("yen"); // "¥"
147 addAllowedEscapeString("not"); // "¬"
148 addAllowedEscapeString("ordf"); // "ª"
149 addAllowedEscapeString("uml"); // "š"
150 addAllowedEscapeString("shy"); // ""
151 addAllowedEscapeString("macr"); // "¯"
152
153 addAllowedEscapeString("micro"); // "µ"
154 addAllowedEscapeString("middot"); // "·"
155 addAllowedEscapeString("cedil"); // "ž"
156 addAllowedEscapeString("ordm"); // "º"
157 addAllowedEscapeString("times"); // "×"
158 addAllowedEscapeString("divide"); // "÷"
159 addAllowedEscapeString("oslash"); // "ø"
160
161 setTokenCaseSensitive(true);
162 // addTokenSubstitute("scripture", "<i> ");
163 addTokenSubstitute("/scripture", "</i> ");
164
165 renderNoteNumbers = false;
166 }
167
168
handleToken(SWBuf & buf,const char * token,BasicFilterUserData * userData)169 bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
170 if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
171 MyUserData *u = (MyUserData *)userData;
172
173 XMLTag tag(token);
174 if ((!tag.isEndTag()) && (!tag.isEmpty()))
175 u->startTag = tag;
176
177 if (tag.getName() && !strcmp(tag.getName(), "sync")) {
178 SWBuf value = tag.getAttribute("value");
179 if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //>
180 if(value.length())
181 buf.appendFormatted("<small><em class=\"morph\">(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\" class=\"morph\">%s</a>)</em></small>",
182 URL::encode(value.c_str()).c_str(),
183 value.c_str());
184 }
185 else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //>
186 if(value.length())
187 // empty "type=" is deliberate.
188 buf.appendFormatted("<small><em class=\"strongs\"><<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\" class=\"strongs\">%s</a>></em></small>",
189 URL::encode(value.c_str()).c_str(),
190 value.c_str());
191 }
192 else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
193 char ch = *value;
194 value<<1;
195 buf.appendFormatted("<small><em class=\"strongs\"><<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\" class=\"strongs\">",
196 ((ch == 'H') ? "Hebrew" : "Greek"),
197 URL::encode(value.c_str()).c_str());
198 buf += (value.length()) ? value.c_str() : "";
199 buf += "</a>></em></small>";
200 }
201 else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
202 buf += (tag.isEndTag() ? "</b>" : "<b>");
203 }
204
205 }
206 // <note> tag
207 else if (!strcmp(tag.getName(), "note")) {
208 if (!tag.isEndTag()) {
209 if (!tag.isEmpty()) {
210 SWBuf type = tag.getAttribute("type");
211 SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
212 SWBuf noteName = tag.getAttribute("n");
213 if (u->vkey) {
214 // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
215 char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
216 buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
217 ch,
218 URL::encode(footnoteNumber.c_str()).c_str(),
219 URL::encode(u->version.c_str()).c_str(),
220 URL::encode(u->vkey->getText()).c_str(),
221 ch,
222 ch,
223 (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
224 }
225 else {
226 char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
227 buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
228 ch,
229 URL::encode(footnoteNumber.c_str()).c_str(),
230 URL::encode(u->version.c_str()).c_str(),
231 URL::encode(u->key->getText()).c_str(),
232 ch,
233 ch,
234 (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
235 }
236 u->suspendTextPassThru = true;
237 }
238 }
239 if (tag.isEndTag()) {
240 u->suspendTextPassThru = false;
241 }
242 }
243 else if (!strcmp(tag.getName(), "scripture")) {
244 buf += (tag.isEndTag() ? "</i>" : "<i>");
245 }
246 // <scripRef> tag
247 else if (!strcmp(tag.getName(), "scripRef")) {
248 if (!tag.isEndTag()) {
249 if (!tag.isEmpty()) {
250 u->suspendTextPassThru = true;
251 }
252 }
253 if (tag.isEndTag()) { // </scripRef>
254 if (!u->isBiblicalText) {
255 SWBuf refList = u->startTag.getAttribute("passage");
256 if (!refList.length())
257 refList = u->lastTextNode;
258 SWBuf version = tag.getAttribute("version");
259
260 buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
261 (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
262 (version.length()) ? URL::encode(version.c_str()).c_str() : "");
263 buf += u->lastTextNode.c_str();
264 buf += "</a>";
265 }
266 else {
267 SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
268 SWBuf noteName = tag.getAttribute("n");
269 if (u->vkey) {
270 // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
271 //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str());
272 buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup class=\"x\">*x%s</sup></small></a>",
273 URL::encode(footnoteNumber.c_str()).c_str(),
274 URL::encode(u->version.c_str()).c_str(),
275 URL::encode(u->vkey->getText()).c_str(),
276 (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
277 }
278 }
279
280 // let's let text resume to output again
281 u->suspendTextPassThru = false;
282 }
283 }
284 else if (tag.getName() && !strcmp(tag.getName(), "div")) {
285 if (tag.isEndTag() && u->inSecHead) {
286 buf += "</i></b><br />";
287 u->inSecHead = false;
288 }
289 else if (tag.getAttribute("class")) {
290 if (!stricmp(tag.getAttribute("class"), "sechead")) {
291 u->inSecHead = true;
292 buf += "<br /><b><i>";
293 }
294 else if (!stricmp(tag.getAttribute("class"), "title")) {
295 u->inSecHead = true;
296 buf += "<br /><b><i>";
297 }
298 else {
299 buf += tag;
300 }
301 }
302 else {
303 buf += tag;
304 }
305 }
306 else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
307 const char *src = strstr(token, "src");
308 if (!src) // assert we have a src attribute
309 return false;
310
311 const char *c, *d;
312 if (((c = strchr(src+3, '"')) == NULL) ||
313 ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
314 return false; // abandon hope.
315
316 SWBuf imagename = "file:";
317 if (*c == '/') // as below, inside for loop.
318 imagename += userData->module->getConfigEntry("AbsoluteDataPath");
319 while (c != d) // move bits into the name.
320 imagename += *(c++);
321
322 // images become clickable, if the UI supports showImage.
323 buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><",
324 URL::encode(imagename.c_str()).c_str(),
325 URL::encode(u->version.c_str()).c_str());
326
327 for (c = token; *c; c++) {
328 if ((*c == '/') && (*(c+1) == '\0'))
329 continue;
330 if (c == src) {
331 for (;((*c) && (*c != '"')); c++)
332 buf += *c;
333
334 if (!*c) { c--; continue; }
335
336 buf += '"';
337 if (*(c+1) == '/') {
338 buf += "file:";
339 buf += userData->module->getConfigEntry("AbsoluteDataPath");
340 if (buf[buf.length()-2] == '/')
341 c++; // skip '/'
342 }
343 continue;
344 }
345 buf += *c;
346 }
347 buf += " border=0 /></a>";
348 }
349 else {
350 buf += '<';
351 /*for (const char *tok = token; *tok; tok++)
352 buf += *tok;*/
353 buf += token;
354 buf += '>';
355 //return false; // we still didn't handle token
356 }
357 }
358 return true;
359 }
360
361
362 SWORD_NAMESPACE_END
363