1 /*
2     gen_html.c - Part of psiconv, a PSION 5 file formats converter
3     Copyright (c) 1999-2014  Frodo Looijaard <frodo@frodo.looijaard.name>
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 
20 #include "config.h"
21 
22 #include <psiconv/configuration.h>
23 #include <psiconv/data.h>
24 #include "general.h"
25 
26 #include <string.h>
27 #include <stdlib.h>
28 
29 #ifdef DMALLOC
30 #include "dmalloc.h"
31 #endif
32 
33 #define TEMPSTR_LEN 100
34 
35 static void text(const psiconv_config config,psiconv_list list,
36                  psiconv_string_t data,const encoding enc);
37 static void header(const psiconv_config config, psiconv_list list,
38                    const encoding enc);
39 static void footer(const psiconv_config config, psiconv_list list,
40                    const encoding enc);
41 static void characters(const psiconv_config config, psiconv_list list,
42                 const psiconv_string_t textstr,
43 		const psiconv_character_layout layout,const encoding enc);
44 static void paragraph(const psiconv_config config, psiconv_list list,
45                psiconv_paragraph para, const encoding enc);
46 static void paragraphs(const psiconv_config config, psiconv_list list,
47                 psiconv_text_and_layout paragraphs, const encoding enc);
48 static void gen_word(const psiconv_config config, psiconv_list list,
49                            const psiconv_word_f file, const encoding enc);
50 static void gen_texted(const psiconv_config config, psiconv_list list,
51                     const psiconv_texted_f file, const encoding enc);
52 static int gen_html4(const psiconv_config config, psiconv_list list,
53               const psiconv_file file, const char *dest,
54 	      const encoding enc);
55 
56 
text(const psiconv_config config,psiconv_list list,psiconv_string_t data,const encoding enc)57 void text(const psiconv_config config,psiconv_list list,
58           psiconv_string_t data,const encoding enc)
59 {
60   int i;
61   for (i = 0; i < psiconv_unicode_strlen(data); i++) {
62     if ((data[i] == 0x06) || (data[i] == 0x07) || (data[i] == 0x08))
63       output_simple_chars(config,list,"<BR>",enc);
64     else if ((data[i] == 0x0b) || (data[i] == 0x0c))
65       output_simple_chars(config,list,"-",enc);
66     else if ((data[i] == 0x0f) || (data[i] == 0x09) || (data[i] == 0x0a))
67       output_simple_chars(config,list," ",enc);
68     else if (data[i] >= 0x20)
69       output_char(config,list,data[i],enc);
70   }
71 }
72 
header(const psiconv_config config,psiconv_list list,const encoding enc)73 void header(const psiconv_config config, psiconv_list list, const encoding enc)
74 {
75   output_simple_chars(config,list,"<!DOCTYPE html PUBLIC "
76                                   "\"-//W3C//DTD HTML 4.01 Transitional//EN\" "
77                                   "\"http://www.w3.org/TR/html4/loose.dtd\">\n",
78                       enc);
79   output_simple_chars(config,list,"<HTML>\n",enc);
80   output_simple_chars(config,list,"<HEAD>\n",enc);
81   output_simple_chars(config,list,"<META HTTP-EQUIV=\"Content-Type\" "
82                                   "CONTENT=\"text/html; charset=",enc);
83   output_simple_chars(config,list,enc==ENCODING_UTF8?"UTF-8":
84                                   enc==ENCODING_UCS2?"UTF-16BE":
85 				  enc==ENCODING_ASCII?"US-ASCII":
86 				  "",enc);
87   output_simple_chars(config,list,"\">\n",enc);
88   output_simple_chars(config,list,"<TITLE>EPOC32 file "
89                                   "converted by psiconv</TITLE>\n",enc);
90   output_simple_chars(config,list,"</HEAD>\n",enc);
91   output_simple_chars(config,list,"<BODY>\n",enc);
92 }
93 
footer(const psiconv_config config,psiconv_list list,const encoding enc)94 void footer(const psiconv_config config, psiconv_list list, const encoding enc)
95 {
96   output_simple_chars(config,list,"</BODY>\n",enc);
97   output_simple_chars(config,list,"</HTML>\n",enc);
98 }
99 
character_layout_equal(const psiconv_character_layout l1,const psiconv_character_layout l2)100 int character_layout_equal(const psiconv_character_layout l1,
101                            const psiconv_character_layout l2)
102 {
103   int font_size1,font_size2;
104 
105   font_size1 = l1->font_size < 8  ?1:
106                l1->font_size < 10 ?2:
107                l1->font_size < 13 ?3:
108                l1->font_size < 17 ?4:
109                l1->font_size < 24 ?5:
110                l1->font_size < 36 ?6:7;
111   font_size2 = l2->font_size < 8  ?1:
112                l2->font_size < 10 ?2:
113                l2->font_size < 13 ?3:
114                l2->font_size < 17 ?4:
115                l2->font_size < 24 ?5:
116                l2->font_size < 36 ?6:7;
117 
118   return (l1 && l2 &&
119           (l1->color->red == l2->color->red) &&
120           (l1->color->green == l2->color->green) &&
121           (l1->color->blue == l2->color->blue) &&
122           (font_size1 == font_size2) &&
123           (l1->italic == l2->italic) &&
124           (l1->bold == l2->bold) &&
125           (l1->super_sub == l2->super_sub) &&
126           (l1->underline == l2->underline) &&
127           (l1->strikethrough == l2->strikethrough) &&
128           (l1->font->screenfont == l2->font->screenfont));
129 }
130 
characters(const psiconv_config config,psiconv_list list,const psiconv_string_t textstr,const psiconv_character_layout layout,const encoding enc)131 void characters(const psiconv_config config, psiconv_list list,
132                 const psiconv_string_t textstr,
133 		const psiconv_character_layout layout,const encoding enc)
134 {
135   char tempstr[TEMPSTR_LEN];
136 
137   output_simple_chars(config,list,"<FONT face=\"",enc);
138   output_simple_chars(config,list,
139 	        layout->font->screenfont == psiconv_font_serif?"serif":
140 	        layout->font->screenfont == psiconv_font_sansserif?"sans-serif":
141                 layout->font->screenfont == psiconv_font_nonprop?"monospace":
142 	        layout->font->screenfont == psiconv_font_misc?"fantasy":"",
143 		enc);
144   output_simple_chars(config,list,"\"",enc);
145 
146   if ((layout->font_size < 10)  || (layout->font_size >= 13)) {
147     output_simple_chars(config,list," size=",enc);
148     output_simple_chars(config,list,
149                         layout->font_size < 8  ?"1":
150                         layout->font_size < 10 ?"2":
151                         layout->font_size < 13 ?"3":
152                         layout->font_size < 17 ?"4":
153                         layout->font_size < 24 ?"5":
154                         layout->font_size < 36 ?"6":"7",enc);
155   }
156   if ((layout->color->red != 0) || (layout->color->green != 0) ||
157       (layout->color->blue != 0)) {
158     snprintf(tempstr,TEMPSTR_LEN,"%02x%02x%02x",
159 	     layout->color->red,layout->color->green,layout->color->blue);
160     output_simple_chars(config,list," color=#",enc);
161     output_simple_chars(config,list,tempstr,enc);
162   }
163   output_simple_chars(config,list,">",enc);
164 
165 
166   if (layout->italic)
167     output_simple_chars(config,list,"<I>",enc);
168   if (layout->bold)
169     output_simple_chars(config,list,"<B>",enc);
170   if (layout->super_sub != psiconv_normalscript)
171     output_simple_chars(config,list,
172 	                layout->super_sub == psiconv_superscript?"<SUP>":
173 	                layout->super_sub == psiconv_subscript?"<SUB>":
174 			"",enc);
175   if (layout->strikethrough)
176     output_simple_chars(config,list,"<S>",enc);
177   if (layout->underline)
178     output_simple_chars(config,list,"<U>",enc);
179 
180   text(config,list,textstr,enc);
181 
182   if (layout->underline)
183     output_simple_chars(config,list,"</U>",enc);
184   if (layout->strikethrough)
185     output_simple_chars(config,list,"</S>",enc);
186   if (layout->super_sub != psiconv_normalscript)
187     output_simple_chars(config,list,
188 	                layout->super_sub == psiconv_superscript?"</SUP>":
189 	                layout->super_sub == psiconv_subscript?"</SUB>":
190 			"",enc);
191   if (layout->bold)
192     output_simple_chars(config,list,"</B>",enc);
193   if (layout->italic)
194     output_simple_chars(config,list,"</I>",enc);
195   output_simple_chars(config,list,"</FONT>",enc);
196 }
197 
paragraph(const psiconv_config config,psiconv_list list,psiconv_paragraph para,const encoding enc)198 void paragraph(const psiconv_config config, psiconv_list list,
199                psiconv_paragraph para, const encoding enc)
200 {
201   int i,charnr,start,len;
202   psiconv_string_t text;
203   psiconv_in_line_layout layout,next_layout;
204 
205 
206   output_simple_chars(config,list,
207                       para->base_paragraph->bullet->on?"<UL><LI":"<P",enc);
208 
209   if (para->base_paragraph->justify_hor == psiconv_justify_centre)
210     output_simple_chars(config,list," align=center",enc);
211   else if (para->base_paragraph->justify_hor == psiconv_justify_right)
212     output_simple_chars(config,list," align=right",enc);
213   else if (para->base_paragraph->justify_hor == psiconv_justify_full)
214     output_simple_chars(config,list," align=justify",enc);
215 
216   output_simple_chars(config,list,">",enc);
217 
218   if (psiconv_list_length(para->in_lines) == 0) {
219     if (psiconv_unicode_strlen(para->text))
220       characters(config,list,para->text,para->base_character,enc);
221   } else {
222     charnr = 0;
223     start = -1;
224     for (i = 0; i < psiconv_list_length(para->in_lines); i++) {
225       if (start < 0)
226 	start = charnr;
227       if (!(layout = psiconv_list_get(para->in_lines,i))) {
228 	fputs("Internal data structures corruption\n",stderr);
229 	exit(1);
230       }
231       if (i+1 < psiconv_list_length(para->in_lines)) {
232         if (!(next_layout = psiconv_list_get(para->in_lines,i+1))) {
233           fputs("Internal data structures corruption\n",stderr);
234           exit(1);
235         }
236       } else {
237         next_layout = NULL;
238       }
239       if (next_layout &&
240           character_layout_equal(layout->layout,next_layout->layout)) {
241         charnr += layout->length;
242         continue;
243       }
244       len = charnr - start + layout->length;
245       if (len) {
246 	if (!(text = malloc(sizeof (*text) * (len + 1)))) {
247 	  fputs("Out of memory error\n",stderr);
248 	  exit(1);
249 	}
250 	memcpy(text,para->text+start,len * sizeof(*text));
251 	text[len] = 0;
252 	characters(config,list,text,layout->layout,enc);
253 	free(text);
254       }
255       charnr += layout->length;
256       start = -1;
257     }
258   }
259   output_simple_chars(config, list,
260 	              para->base_paragraph->bullet->on?"</UL>\n":"\n",enc);
261 }
262 
paragraphs(const psiconv_config config,psiconv_list list,psiconv_text_and_layout paragraphs,const encoding enc)263 void paragraphs(const psiconv_config config, psiconv_list list,
264                 psiconv_text_and_layout paragraphs, const encoding enc)
265 {
266   int i;
267   psiconv_paragraph para;
268   for (i = 0; i < psiconv_list_length(paragraphs); i++) {
269     if (!(para = psiconv_list_get(paragraphs,i))) {
270       fputs("Internal datastructure corruption\n",stderr);
271       exit(1);
272     }
273     paragraph(config,list,para,enc);
274   }
275 }
276 
gen_word(const psiconv_config config,psiconv_list list,const psiconv_word_f file,const encoding enc)277 void gen_word(const psiconv_config config, psiconv_list list,
278                     const psiconv_word_f file, const encoding enc)
279 {
280   if (!file)
281     return;
282 
283   header(config,list,enc);
284   paragraphs(config,list,file->paragraphs,enc);
285   footer(config,list,enc);
286 }
287 
288 
gen_texted(const psiconv_config config,psiconv_list list,const psiconv_texted_f file,const encoding enc)289 void gen_texted(const psiconv_config config, psiconv_list list,
290                     const psiconv_texted_f file, const encoding enc)
291 {
292   header(config,list,enc);
293   paragraphs(config,list,file->texted_sec->paragraphs,enc);
294   footer(config,list,enc);
295 }
296 
gen_html4(const psiconv_config config,psiconv_list list,const psiconv_file file,const char * dest,const encoding enc)297 int gen_html4(const psiconv_config config, psiconv_list list,
298               const psiconv_file file, const char *dest,
299 	      const encoding enc)
300 {
301   encoding enc1 = enc;
302 
303   if (enc == ENCODING_PSION) {
304     fputs("Unsupported encoding\n",stderr);
305       return -1;
306   } else if (enc == ENCODING_ASCII)
307     enc1 = ENCODING_ASCII_HTML;
308 
309   if (file->type == psiconv_word_file) {
310     gen_word(config,list,(psiconv_word_f) file->file,enc1);
311     return 0;
312   } else if (file->type == psiconv_texted_file) {
313     gen_texted(config,list,(psiconv_texted_f) file->file,enc1);
314     return 0;
315   } else
316     return -1;
317 }
318 
319 
320 static struct fileformat_s fileformats[] =
321   {
322     {
323       "HTML4",
324       "HTML 4.01 Transitional, without CSS",
325       FORMAT_WORD | FORMAT_TEXTED,
326       gen_html4
327     },
328     {
329       NULL,
330       NULL,
331       0,
332       NULL
333     }
334   };
335 
336 
init_html4(void)337 void init_html4(void)
338 {
339   int i;
340   for (i = 0; fileformats[i].name; i++)
341     psiconv_list_add(fileformat_list,fileformats+i);
342 }
343