1 /***************************************************************************
2 *
3 * thmlhtml.cpp - ThML to HTML filter
4 *
5 * $Id: thmlhtml.cpp 3548 2017-12-10 05:11:38Z scribe $
6 *
7 * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
8 * CrossWire Bible Society
9 * P. O. Box 2528
10 * Tempe, AZ 85280-2528
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation version 2.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 */
22
23 #include <stdlib.h>
24 #include <thmlhtml.h>
25 #include <swmodule.h>
26 #include <utilxml.h>
27
28
29 SWORD_NAMESPACE_START
30
31
ThMLHTML()32 ThMLHTML::ThMLHTML() {
33 setTokenStart("<");
34 setTokenEnd(">");
35
36 setEscapeStart("&");
37 setEscapeEnd(";");
38
39 setEscapeStringCaseSensitive(true);
40 setPassThruNumericEscapeString(true);
41
42 addAllowedEscapeString("quot");
43 addAllowedEscapeString("amp");
44 addAllowedEscapeString("lt");
45 addAllowedEscapeString("gt");
46
47 addAllowedEscapeString("nbsp");
48 addAllowedEscapeString("brvbar"); // "�"
49 addAllowedEscapeString("sect"); // "�"
50 addAllowedEscapeString("copy"); // "�"
51 addAllowedEscapeString("laquo"); // "�"
52 addAllowedEscapeString("reg"); // "�"
53 addAllowedEscapeString("acute"); // "�"
54 addAllowedEscapeString("para"); // "�"
55 addAllowedEscapeString("raquo"); // "�"
56
57 addAllowedEscapeString("Aacute"); // "�"
58 addAllowedEscapeString("Agrave"); // "�"
59 addAllowedEscapeString("Acirc"); // "�"
60 addAllowedEscapeString("Auml"); // "�"
61 addAllowedEscapeString("Atilde"); // "�"
62 addAllowedEscapeString("Aring"); // "�"
63 addAllowedEscapeString("aacute"); // "�"
64 addAllowedEscapeString("agrave"); // "�"
65 addAllowedEscapeString("acirc"); // "�"
66 addAllowedEscapeString("auml"); // "�"
67 addAllowedEscapeString("atilde"); // "�"
68 addAllowedEscapeString("aring"); // "�"
69 addAllowedEscapeString("Eacute"); // "�"
70 addAllowedEscapeString("Egrave"); // "�"
71 addAllowedEscapeString("Ecirc"); // "�"
72 addAllowedEscapeString("Euml"); // "�"
73 addAllowedEscapeString("eacute"); // "�"
74 addAllowedEscapeString("egrave"); // "�"
75 addAllowedEscapeString("ecirc"); // "�"
76 addAllowedEscapeString("euml"); // "�"
77 addAllowedEscapeString("Iacute"); // "�"
78 addAllowedEscapeString("Igrave"); // "�"
79 addAllowedEscapeString("Icirc"); // "�"
80 addAllowedEscapeString("Iuml"); // "�"
81 addAllowedEscapeString("iacute"); // "�"
82 addAllowedEscapeString("igrave"); // "�"
83 addAllowedEscapeString("icirc"); // "�"
84 addAllowedEscapeString("iuml"); // "�"
85 addAllowedEscapeString("Oacute"); // "�"
86 addAllowedEscapeString("Ograve"); // "�"
87 addAllowedEscapeString("Ocirc"); // "�"
88 addAllowedEscapeString("Ouml"); // "�"
89 addAllowedEscapeString("Otilde"); // "�"
90 addAllowedEscapeString("oacute"); // "�"
91 addAllowedEscapeString("ograve"); // "�"
92 addAllowedEscapeString("ocirc"); // "�"
93 addAllowedEscapeString("ouml"); // "�"
94 addAllowedEscapeString("otilde"); // "�"
95 addAllowedEscapeString("Uacute"); // "�"
96 addAllowedEscapeString("Ugrave"); // "�"
97 addAllowedEscapeString("Ucirc"); // "�"
98 addAllowedEscapeString("Uuml"); // "�"
99 addAllowedEscapeString("uacute"); // "�"
100 addAllowedEscapeString("ugrave"); // "�"
101 addAllowedEscapeString("ucirc"); // "�"
102 addAllowedEscapeString("uuml"); // "�"
103 addAllowedEscapeString("Yacute"); // "�"
104 addAllowedEscapeString("yacute"); // "�"
105 addAllowedEscapeString("yuml"); // "�"
106
107 addAllowedEscapeString("deg"); // "�"
108 addAllowedEscapeString("plusmn"); // "�"
109 addAllowedEscapeString("sup2"); // "�"
110 addAllowedEscapeString("sup3"); // "�"
111 addAllowedEscapeString("sup1"); // "�"
112 addAllowedEscapeString("nbsp"); // "�"
113 addAllowedEscapeString("pound"); // "�"
114 addAllowedEscapeString("cent"); // "�"
115 addAllowedEscapeString("frac14"); // "�"
116 addAllowedEscapeString("frac12"); // "�"
117 addAllowedEscapeString("frac34"); // "�"
118 addAllowedEscapeString("iquest"); // "�"
119 addAllowedEscapeString("iexcl"); // "�"
120 addAllowedEscapeString("ETH"); // "�"
121 addAllowedEscapeString("eth"); // "�"
122 addAllowedEscapeString("THORN"); // "�"
123 addAllowedEscapeString("thorn"); // "�"
124 addAllowedEscapeString("AElig"); // "�"
125 addAllowedEscapeString("aelig"); // "�"
126 addAllowedEscapeString("Oslash"); // "�"
127 addAllowedEscapeString("curren"); // "�"
128 addAllowedEscapeString("Ccedil"); // "�"
129 addAllowedEscapeString("ccedil"); // "�"
130 addAllowedEscapeString("szlig"); // "�"
131 addAllowedEscapeString("Ntilde"); // "�"
132 addAllowedEscapeString("ntilde"); // "�"
133 addAllowedEscapeString("yen"); // "�"
134 addAllowedEscapeString("not"); // "�"
135 addAllowedEscapeString("ordf"); // "�"
136 addAllowedEscapeString("uml"); // "�"
137 addAllowedEscapeString("shy"); // "�"
138 addAllowedEscapeString("macr"); // "�"
139
140 addAllowedEscapeString("micro"); // "�"
141 addAllowedEscapeString("middot"); // "�"
142 addAllowedEscapeString("cedil"); // "�"
143 addAllowedEscapeString("ordm"); // "�"
144 addAllowedEscapeString("times"); // "�"
145 addAllowedEscapeString("divide"); // "�"
146 addAllowedEscapeString("oslash"); // "�"
147
148 setTokenCaseSensitive(true);
149
150 addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
151 addTokenSubstitute("/note", ")</small></font> ");
152 }
153
154
handleToken(SWBuf & buf,const char * token,BasicFilterUserData * userData)155 bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
156 if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
157 MyUserData *u = (MyUserData *)userData;
158 XMLTag tag(token);
159 if (!strcmp(tag.getName(), "sync")) {
160 if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "Strongs")) {
161 const char* value = tag.getAttribute("value");
162 if (*value == 'H' || *value == 'G' || *value == 'A') {
163 value++;
164 buf += "<small><em>";
165 buf += value;
166 buf += "</em></small>";
167 }
168 else if (*value == 'T') {
169 value += 2;
170
171 buf += "<small><i>";
172 buf += value;
173 buf += "</i></small>";
174 }
175 }
176 else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "morph")) {
177 buf += "<small><em>";
178 buf += tag.getAttribute("value");
179 buf += "</em></small>";
180 }
181 else if (tag.getAttribute("type") && tag.getAttribute("value") && !strcmp(tag.getAttribute("type"), "lemma")) {
182 buf += "<small><em>(";
183 buf += tag.getAttribute("value");
184 buf += ")</em></small>";
185 }
186 }
187 else if (!strcmp(tag.getName(), "div")) {
188 if (tag.isEndTag() && (u->inSecHead)) {
189 buf += "</i></b><br />";
190 u->inSecHead = false;
191 }
192 else if (tag.getAttribute("class")) {
193 if (!strcmp(tag.getAttribute("class"), "sechead")) {
194 u->inSecHead = true;
195 buf += "<br /><b><i>";
196 }
197 else if (!strcmp(tag.getAttribute("class"), "title")) {
198 u->inSecHead = true;
199 buf += "<br /><b><i>";
200 }
201 }
202 }
203 else if (!strcmp(tag.getName(), "img")) {
204 const char *src = strstr(token, "src");
205 if (!src) // assert we have a src attribute
206 return false;
207
208 buf += '<';
209 for (const char *c = token; *c; c++) {
210 if (c == src) {
211 for (;((*c) && (*c != '"')); c++)
212 buf += *c;
213
214 if (!*c) { c--; continue; }
215
216 buf += '"';
217 if (*(c+1) == '/') {
218 buf += "file:";
219 buf += userData->module->getConfigEntry("AbsoluteDataPath");
220 if (buf[buf.length()-2] == '/')
221 c++; // skip '/'
222 }
223 continue;
224 }
225 buf += *c;
226 }
227 buf += '>';
228 }
229 else if (!strcmp(tag.getName(), "scripRef")) { //do nothing with scrip refs, we leave them out
230
231 }
232 else {
233 buf += '<';
234 buf += token;
235 buf += '>';
236
237 // return false; // we still didn't handle token
238 }
239 }
240 return true;
241 }
242
243
244 SWORD_NAMESPACE_END
245