1 /*
2 **  mod_html2hdml.c -- Apache html2hdml module
3 */
4 #include "httpd.h"
5 #include "http_config.h"
6 #include "http_protocol.h"
7 #include "ap_config.h"
8 #include "http_log.h"
9 //#include "alloc.h"
10 #include "fnmatch.h"
11 #include <sys/stat.h>
12 #include <unistd.h>
13 
14 #include <stdio.h>
15 #include <string.h>
16 
17 #include "strinput.h"
18 #include "html2hdml.h"
19 
20 #include "mod_html2hdml.h"
21 
22 #ifdef ZODIAX_WITH_HTML2HDML
23 #include "module.h"
24 #endif
25 
26 #include "config.h"
27 
28 #define UNSET (-1)
29 #define OFF (0)
30 #define ON (1)
31 #define WATCHPOINT printf("WATCHPOINT %s %d\n", __FILE__, __LINE__);
32 
33 module MODULE_VAR_EXPORT html2hdml_module;
34 
35 struct pool *gl_pool;
36 
set_opt_for_apmodule(void)37 int set_opt_for_apmodule(void)
38 {
39   gl_clientinfo.row    = 10;
40   gl_clientinfo.column = 10;
41 
42   gl_convertopt.a_href_html2hdml = 0;
43   gl_convertopt.img              = 1;
44   gl_convertopt.img_src_gif2bmp  = 1;
45   gl_convertopt.img_alt          = 0;
46 
47 
48   return 0;
49 }
50 #define MOD_HTML2HDML_DEBUG
html2hdml_convert(FILE * fp,request_rec * r)51 int html2hdml_convert(FILE *fp, request_rec *r)
52 {
53   char buf_stack[BUFFER_SIZE], *buf, *ptr;
54   int n;
55   int need_to_close = 1;
56   int heap_used = 0;
57   int bufsize;
58 #ifdef MOD_HTML2HDML_DEBUG /* must refine */
59   int header_size = 0;
60 #endif
61 
62   set_opt_for_apmodule();
63 #ifdef MOD_HTML2HDML
64   gl_pool = r->pool;
65 
66   gl_convertopt.apache_r         = r;
67 #endif
68 
69   if(fp == NULL) return 0;
70 
71   n = fread(buf_stack, sizeof(char), BUFFER_SIZE-1, fp);
72   buf_stack[n] = '\0';
73 
74   if (feof(fp)) {
75     buf = buf_stack;
76     bufsize = n+1;
77   } else {
78     heap_used = 1;
79 
80     bufsize = n+1;
81     buf = malloc(bufsize*sizeof(char));
82     ptr = buf;
83 
84     /* realloc & copy */
85     memcpy(ptr, buf_stack, n);
86 
87     do {
88       n = fread(buf_stack, sizeof(char), BUFFER_SIZE-1, fp);
89       buf_stack[n] = '\0';
90       if (n) {
91 	buf = realloc(buf, (bufsize+n)*sizeof(char));
92 	ptr = buf+bufsize-1;
93 	bufsize += n;
94 
95 	memcpy(ptr, buf_stack, n);
96       }
97     } while (!feof(fp));
98     buf[bufsize-1] = '\0';
99   }
100 
101   //if (need_to_close) fclose(fp);
102 
103 #ifdef MOD_HTML2HDML_DEBUG /* must refine */
104   {
105     char *tmp_p;
106     tmp_p = strstr(buf, "\r\n\r\n");
107     if ((strncmp(buf, "HTTP/", 5) == 0) && tmp_p) {
108       header_size = tmp_p - buf;
109       header_size += 4;
110     }
111 
112   }
113 #endif
114 
115   if (bufsize > 1500) {
116     fprintf(stderr, "warning: input HTML is over 1500bytes.\n");
117   }
118 
119   //set_inputstr(buf, bufsize-1);
120   set_inputstr(buf+header_size, bufsize-1-header_size);
121 
122   parse_html();
123 
124   if (heap_used) free(buf);
125   return 0;
126 }
127 
html2hdml_create_dir_mconfig(pool * p,char * dir)128 void *html2hdml_create_dir_mconfig(pool *p, char *dir) {
129   html2hdml_conf *cfg;
130 
131   cfg = ap_pcalloc(p, sizeof(html2hdml_conf));
132   cfg->state = UNSET;
133   cfg->header = UNSET; // never set ON. harmful.
134   cfg->post = ON;
135   cfg->directory = ap_pstrdup(p,"/tmp");
136   cfg->types = ap_make_table(p, 8);
137   cfg->uris_ignore = ap_make_table(p, 8);
138 
139   return (void *) cfg;
140 }
141 
html2hdml_merge_dir_mconfig(pool * p,void * origin,void * new)142 static void *html2hdml_merge_dir_mconfig(pool *p, void *origin, void *new) {
143   html2hdml_conf *cfg;
144   html2hdml_conf *cfg_origin = (html2hdml_conf *)origin;
145   html2hdml_conf *cfg_new = (html2hdml_conf *)new;
146 
147   cfg = ap_pcalloc(p, sizeof(html2hdml_conf));
148   cfg->directory = ap_pstrdup(p,"/tmp");
149   cfg->types = ap_make_table(p, 8);
150   cfg->uris_ignore = ap_make_table(p, 8);
151 
152   cfg->state = (cfg_new->state == UNSET) ? cfg_origin->state : cfg_new->state;
153   cfg->header = (cfg_new->header == UNSET) ? cfg_origin->header : cfg_new->header;
154 
155   cfg->post = cfg_new->post;
156 
157   if(strcmp(cfg_new->directory, "/tmp")){
158     cfg->directory = ap_pstrdup(p, cfg_new->directory);
159   } else if (strcmp(cfg_origin->directory, "/tmp")){
160     cfg->directory = ap_pstrdup(p, cfg_origin->directory);
161   }
162 
163   cfg->types = ap_overlay_tables(p, cfg_new->types, cfg_origin->types);
164   cfg->uris_ignore = ap_overlay_tables(p, cfg_new->uris_ignore, cfg_origin->uris_ignore);
165 
166 
167   return (void *) cfg;
168 }
169 
check_table(const char * a)170 int check_table(const char *a) {
171   if (a == NULL)
172     return 0;
173   if('1' == a[0])
174     return 1;
175 
176   return 0;
177 }
178 
table_find(const table * t,const char * key)179 int table_find(const table * t, const char *key) {
180   array_header *hdrs_arr = ap_table_elts(t);
181   table_entry *elts = (table_entry *) hdrs_arr->elts;
182   int i;
183 
184   if (key == NULL)
185     return 0;
186 
187   for (i = 0; i < hdrs_arr->nelts; ++i) {
188     if (!ap_fnmatch(elts[i].key, key, FNM_PATHNAME | FNM_CASE_BLIND))
189       if(check_table(elts[i].val))
190 	return 1;
191   }
192 
193   return 0;
194 }
195 
196 
197 
call_main(request_rec * r,int assbackwards)198 static int call_main(request_rec *r, int assbackwards) {
199   int status = OK;
200   request_rec *subr;
201 
202   //subr = (request_rec *) ap_sub_req_method_uri((char *) r->method, r->uri, r);
203   subr = (request_rec *) ap_sub_req_method_uri((char *) r->method, r->unparsed_uri, r);
204 
205   subr->args = ap_pstrdup(subr->pool, r->args);
206   subr->assbackwards = assbackwards;
207   status = ap_run_sub_req(subr); // status != subr->status
208   ap_destroy_sub_req(subr);
209   ap_bflush(subr->connection->client);
210 
211   //fprintf(stderr, "%s\n", r->uri);
212   //fprintf(stderr, "%d %d\n", status, subr->status);
213   //fprintf(stderr, "%s\n", subr->status_line);
214 
215   {
216     int i, nelts;
217     table_entry *elts;
218     char *key;
219 
220     elts =(table_entry *) ap_table_elts(subr->headers_out)->elts;
221     nelts = ap_table_elts(subr->headers_out)->nelts;
222     for (i = 0; i < nelts; i++) {
223       key = elts[i].key;
224       if (key && (strcmp(key, "Content-Length") != 0)) {
225 	ap_table_set(r->headers_out, key, elts[i].val);
226 	//fprintf(stderr, " %s: %s\n", elts[i].key, elts[i].val);
227       }
228     }
229   }
230 
231   if (subr->status == 302) { // moved temporary
232     return subr->status;
233   } else {
234     return status;
235   }
236 }
237 
call_container(request_rec * r,const char * uri,const char * html2hdmlcache,const char * content_length)238 static int call_container(request_rec *r, const char *uri, const char *html2hdmlcache, const char *content_length) {
239   int status = OK;
240   request_rec *subr;
241 
242   subr = (request_rec *) ap_sub_req_method_uri("POST", uri, r);
243   subr->assbackwards = 0;
244   /*
245      So you are asking, what is up with Content-Length? Well to make CGI's
246      work we have to spoof it a bit. Namely, if Content-Length is set when
247      mod_cgi runs, mod_cgi will try to read the request. Now if your CGI
248      gets it contents through a POST method this of course is a no go since
249      all of the contents will have already been read (and Apache will deadlock
250      trying to read from a stream with no data in it. To get around this we
251      spoof the content length till the original request runs
252   */
253   ap_table_set(subr->headers_in, "Content-Length", content_length);
254   ap_table_set(subr->subprocess_env, "HTML2HDML_SCRIPT_NAME", r->uri);
255   if(r->path_info)
256     ap_table_set(subr->subprocess_env, "HTML2HDML_INFO", r->path_info);
257   if(r->args)
258     ap_table_set(subr->subprocess_env, "HTML2HDML_QUERY_STRING", r->args);
259   ap_table_set(subr->subprocess_env, "HTML2HDML_CACHE", html2hdmlcache);
260   status = ap_run_sub_req(subr);
261   ap_destroy_sub_req(subr);
262 
263   return status;
264 }
265 
call_ssi(request_rec * r,const char * html2hdmlcache)266 static int call_ssi(request_rec *r, const char *html2hdmlcache) {
267   int status = OK;
268   request_rec *subr;
269 
270   subr = (request_rec *) ap_sub_req_method_uri("GET", html2hdmlcache, r);
271   subr->assbackwards = 0;
272   /*
273      So you are asking, what is up with Content-Length? Well to make CGI's
274      work we have to spoof it a bit. Namely, if Content-Length is set when
275      mod_cgi runs, mod_cgi will try to read the request. Now if your CGI
276      gets it contents through a POST method this of course is a no go since
277      all of the contents will have already been read (and Apache will deadlock
278      trying to read from a stream with no data in it. To get around this we
279      spoof the content length till the original request runs
280   */
281   ap_table_set(subr->headers_in, "Content-Length", "0");
282   subr->filename = ap_pstrdup(subr->pool, html2hdmlcache);
283   subr->handler = ap_pstrdup(subr->pool, "server-parsed");
284   /* This really should only be needed if SSI is the last of the called handlers*/
285   subr->content_type = "text/html";
286   /* We fake it */
287   subr->finfo.st_mode = 1;
288   status = ap_run_sub_req(subr);
289   ap_destroy_sub_req(subr);
290 
291   return status;
292 }
293 
html2hdml_fixup(request_rec * r)294 int html2hdml_fixup(request_rec *r) {
295   html2hdml_conf *cfg;
296   request_rec *subr;
297   char *type = NULL;
298   const char *handler = NULL;
299   int var_for_debug; // for deadline check
300 
301   //fprintf(stderr, "fixup1\n");
302 
303 #ifdef ZODIAX_WITH_HTML2HDML
304   cfg =
305     ((zodiac_dir_config *)
306      ap_get_module_config(r->per_dir_config, &zodiac_module))->html2hdml_cfg;
307 #else
308   cfg = (html2hdml_conf *)
309     ap_get_module_config(r->per_dir_config, &html2hdml_module);
310 #endif
311 
312 #ifdef ZODIAX_WITH_HTML2HDML
313   if (r->handler && (strcmp(r->handler, HTML2HDML_MOD_NAME) == 0))
314     r->handler = "zodiac-handler";
315 #endif
316 
317   if (cfg->state < ON) {
318     return DECLINED;
319   }
320   if (r->main) {
321     return DECLINED;
322   }
323   /* If this is a HEAD only, we really don't need to involve ourselves. */
324   if (r->header_only) {
325     return DECLINED;
326   }
327 
328   //fprintf(stderr, "fixup2\n");
329 
330   /* So why switch to doing this? Somewhere since 1.3.6 something
331      has changed about the way that CGI's are done. Not sure what
332      it is, but this is now needed */
333   /* First, we check to see if this is SSI, mod_perl or cgi */
334 #if 0
335   if(r->handler) {
336     type = ap_pstrdup(r->pool, r->handler);
337   } else {
338     type = ap_pstrdup(r->pool, r->content_type);
339   }
340   if (handler = ap_table_get(cfg->types, type)){
341     if(strcmp(handler, "OFF")) {
342       ap_table_set(r->notes, "HTML2HDML_URI", handler);
343     } else {
344       return DECLINED;
345     }
346   } else {
347     return DECLINED;
348   }
349   if (table_find(cfg->uris_ignore, r->uri))
350     return DECLINED;
351 #endif
352 
353   //fprintf(stderr, "fixup3\n");
354 
355   /* #define EXPIRE_CHECK 1 */
356 
357   var_for_debug = 1; // always OK
358 
359   if (var_for_debug) {
360     int i;
361     table_entry *elts;
362 
363     elts =(table_entry *) ap_table_elts(r->headers_in)->elts;
364     for (i = 0; i < ap_table_elts(r->headers_in)->nelts; ++i) {
365       if (elts[i].key != NULL) {
366 	if(strcasecmp(elts[i].key,"accept")==0){
367 	  char* word = NULL;
368 	  char *p;
369 
370 	  p = elts[i].val;
371 	  while(*p != '\0' && (word=ap_getword_nc(r->pool,&p,','))){
372 	    while(*word == ' ') word++;
373 	    if(strcasecmp(word,"text/x-hdml;version=2.0") == 0){
374 	      //OK MATCH
375 #ifdef ZODIAX_WITH_HTML2HDML
376 	      if (r->handler && (strcmp(r->handler, "zodiac-handler") == 0))
377 #endif
378 		r->handler = HTML2HDML_MOD_NAME;
379 	      //fprintf(stderr, "fixup4 - %s\n", r->handler);
380 	      return DECLINED;
381 	    }
382 	  }
383 	}
384       }
385     }
386   }
387 
388   return DECLINED;
389 }
390 
html2hdml_handler(request_rec * r)391 int html2hdml_handler(request_rec *r) {
392   int status=0;
393   int temp_fd, fd_out;
394   int pid;
395   int assbackwards;
396   char string[HUGE_STRING_LEN];
397   char *filename = NULL;
398   const char *handler = NULL;
399   const char *content_length = NULL;
400   html2hdml_conf *cfg;
401   struct stat sbuf;
402 
403   //fprintf(stderr, "h2h_handler1\n");
404 
405   if (r->main) {
406     return DECLINED;
407   }
408   //ap_table_setn(r->headers_out, "ModHtml2hdml", "1.1");
409 
410 #ifdef ZODIAX_WITH_HTML2HDML
411   cfg =
412     ((zodiac_dir_config *)
413      ap_get_module_config(r->per_dir_config, &zodiac_module))->html2hdml_cfg;
414 #else
415   cfg = (html2hdml_conf *)
416     ap_get_module_config(r->per_dir_config, &html2hdml_module);
417 #endif
418 
419   /* Logic is reversed for assbackwards
420      One of these days I am going to ask why this
421      variable is named this.
422   */
423   if (cfg->header == ON) {
424     assbackwards = 0;
425   } else {
426     assbackwards = 1;
427   }
428   pid = getpid();
429   filename = ap_psprintf(r->pool, "%s/.mod_html2hdml.%d", cfg->directory, pid);
430 
431   ap_rflush(r);
432   if ((temp_fd = open(filename,O_RDWR|O_CREAT|O_TRUNC,S_IRWXU)) < 0) {
433     ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r,
434 		  "Bad mojo, mod_html2hdml couldn't create a file : %s",
435 		  filename);
436 
437     return HTTP_INTERNAL_SERVER_ERROR;
438   }
439 
440   fd_out = r->connection->client->fd;
441   r->connection->client->fd = temp_fd;
442 
443   //fprintf(stderr, "h2h_handler2\n");
444 
445   if((status = call_main(r, assbackwards)) != OK) {
446     r->connection->client->fd = fd_out;
447     return status;
448   }
449 
450   //fprintf(stderr, "h2h_handler3\n");
451   //fprintf(stderr, "%d, %d\n", OK, status);
452 
453   r->connection->client->fd = fd_out;
454 
455   lseek(temp_fd, 0, SEEK_SET);
456   if(cfg->post == ON) {
457     if(fstat(temp_fd, &sbuf)) {
458       /* This would be very bad */
459       status = HTTP_INTERNAL_SERVER_ERROR;
460       ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, r, "fstat blew chunks in mod_html2hdml: %d", status);
461       return status;
462     }
463     content_length = ap_psprintf(r->pool, "%d", sbuf.st_size);
464     r->connection->client->fd_in = temp_fd;
465   } else {
466     content_length = ap_pstrdup(r->pool, "0");
467   }
468 
469   /**/
470 
471   {
472     FILE* fp;
473     html2hdml_conf *cfg;
474     int len;
475 
476     //fprintf(stderr, "h2h_handler5\n");
477 
478     //inlist = 0;
479     // method GET?
480     if (r->method_number != M_GET) {
481       r->allowed = M_GET;
482       return DECLINED;
483     }
484 
485     //Directory configuration
486 #ifdef ZODIAX_WITH_HTML2HDML
487     cfg =
488       ((zodiac_dir_config *)
489        ap_get_module_config(r->per_dir_config, &zodiac_module))->html2hdml_cfg;
490 #else
491     cfg = (html2hdml_conf *)
492       ap_get_module_config(r->per_dir_config, &html2hdml_module);
493 #endif
494 
495     if (cfg == NULL)
496       return DECLINED;
497 
498     //fprintf(stderr, "h2h_handler6\n");
499 
500     //if (!cfg->enable)
501     if (cfg->state < ON)
502       return DECLINED;
503 
504 #ifndef ZODIAX_WITH_HTML2HDML
505     // zodiax �ϼ��Τ��ʤ������������ʤ���ͤ���¾�ξ��Ϥɤ��������?
506     if (r->finfo.st_mode == 0)
507       return NOT_FOUND;
508 #endif
509 
510     //fprintf(stderr, "h2h_handler7\n");
511 
512     //isHDML=0;
513     //retrive accept
514 
515     //fprintf(stderr, "hoge3\n");
516 
517     //Open file
518     //fp = ap_pfopen (r->pool, filename, "rb");
519     fp = ap_pfopen (r->pool, filename, "rb");
520     if (fp == 0) {
521       ap_log_reason ("request file permissions deny", filename, r);
522       return FORBIDDEN;
523     }
524 
525     r->content_type = "text/x-hdml;charset=Shift_JIS";
526 
527     ap_soft_timeout ("send", r);
528     ap_send_http_header (r);
529 
530     //fprintf(stderr, "h2h_handler8\n");
531 
532     //html2hdml
533     html2hdml_convert(fp, r);
534 
535     ap_rputs("", r);
536 
537     ap_kill_timeout (r);
538     ap_pfclose (r->pool, fp);
539 
540   }
541 
542   /*
543     handler = ap_table_get(r->notes, "HTML2HDML_URI");
544     if(strcmp(handler, "SSIHTML2HDML")) {
545     if ((status = call_container(r, handler, filename,
546     content_length)) != OK) {
547     ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, r,
548     "The following error occured"
549     " while processing the html2hdml : %d", status);
550     return status;
551     }
552     } else {
553     if ((status = call_ssi(r, filename)) != OK) {
554     ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_ERR, r,
555     "The following error occured"
556     " while processing the html2hdml : %d", status);
557     return status;
558     }
559     }
560   */
561 
562   close(temp_fd);
563   unlink(filename);
564 
565   return OK;
566 }
567 
add_html2hdml(cmd_parms * cmd,void * mconfig,char * mime_type)568 static const char *add_html2hdml(cmd_parms * cmd, void *mconfig, char *mime_type) {
569   html2hdml_conf *cfg = (html2hdml_conf *) mconfig;
570 
571   ap_table_set(cfg->types, mime_type, "1");
572 
573   return NULL;
574 }
575 
ignore_uri(cmd_parms * cmd,void * mconfig,char * uri)576 static const char *ignore_uri(cmd_parms * cmd, void *mconfig, char *uri) {
577   html2hdml_conf *cfg = (html2hdml_conf *) mconfig;
578 
579   ap_table_set(cfg->uris_ignore, uri, "1");
580 
581   return NULL;
582 }
583 
584 /* Dispatch list of content handlers */
585 static const handler_rec html2hdml_handlers[] = {
586   { HTML2HDML_MOD_NAME, html2hdml_handler },
587   { NULL, NULL }
588 };
589 
590 static const command_rec html2hdml_cmds[] = {
591   {"Html2hdmlType", add_html2hdml, NULL, OR_ALL, TAKE1, "Takes two parameters, the mime type/handler and the uri to call on it."},
592   {"Html2hdml", ap_set_flag_slot, (void *) XtOffsetOf(html2hdml_conf, state), OR_ALL, FLAG, "This can either be On or Off (default it Off)."},
593   {"Html2hdmlHeader", ap_set_flag_slot, (void *) XtOffsetOf(html2hdml_conf, header), OR_ALL, FLAG, "This can either be On or Off (default it Off)."},
594   {"Html2hdmlPost", ap_set_flag_slot, (void *) XtOffsetOf(html2hdml_conf, post), OR_ALL, FLAG, "This can either be On or Off (default it On)."},
595   {"Html2hdmlCache", ap_set_string_slot, (void *) XtOffsetOf(html2hdml_conf, directory), OR_ALL, TAKE1, "Change the default directory from /tmp."},
596   {"Html2hdmlIgnore", ignore_uri, NULL, OR_ALL, TAKE1, "Change the default directory from /tmp."},
597   {NULL},
598 };
599 
html2hdml_init(server_rec * s,pool * p)600 static void html2hdml_init(server_rec * s, pool * p) {
601   /* Tell apache we're here */
602   char ver[]=VERSION;
603   char result[100];
604   sprintf(result,"html2hdml/%s",ver);
605   ap_add_version_component(result);
606 }
607 
608 /* Dispatch list for API hooks */
609 module MODULE_VAR_EXPORT html2hdml_module = {
610   STANDARD_MODULE_STUFF,
611   html2hdml_init,        /* module initializer                  */
612   html2hdml_create_dir_mconfig,    /* create per-dir    config structures */
613   html2hdml_merge_dir_mconfig,     /* merge  per-dir    config structures */
614   NULL,                  /* create per-server config structures */
615   NULL,                  /* merge  per-server config structures */
616   html2hdml_cmds,        /* table of config file commands       */
617   html2hdml_handlers,    /* [#8] MIME-typed-dispatched handlers */
618   NULL,                  /* [#1] URI to filename translation    */
619   NULL,                  /* [#4] validate user id from request  */
620   NULL,                  /* [#5] check if the user is ok _here_ */
621   NULL,                  /* [#3] check access by host address   */
622   NULL,                  /* [#6] determine MIME type            */
623   html2hdml_fixup,       /* [#7] pre-run fixups                 */
624   NULL,                  /* [#9] log a transaction              */
625   NULL,                  /* [#2] header parser                  */
626   NULL,                  /* child_init                          */
627   NULL,                  /* child_exit                          */
628   NULL                   /* [#0] post read-request              */
629 #ifdef EAPI
630   ,NULL,                  /* EAPI: add_module                    */
631   NULL,                  /* EAPI: remove_module                 */
632   NULL,                  /* EAPI: rewrite_command               */
633   NULL                   /* EAPI: new_connection                */
634 #endif
635 };
636