1 /*
2 * xml_getencoding.c
3 *
4 * Copyright (c) Chris Putnam 2007-2020
5 *
6 * Source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "charsets.h"
13 #include "str.h"
14 #include "str_conv.h"
15 #include "xml.h"
16 #include "xml_encoding.h"
17
18 static int
xml_getencodingr(xml * node)19 xml_getencodingr( xml *node )
20 {
21 int n = CHARSET_UNKNOWN, m;
22 str *s;
23 char *t;
24
25 if ( xml_tag_matches( node, "xml" ) ) {
26 s = xml_attribute( node, "encoding" );
27 if ( str_has_value( s ) ) {
28 t = str_cstr( s );
29 if ( !strcasecmp( t, "UTF-8" ) )
30 n = CHARSET_UNICODE;
31 else if ( !strcasecmp( t, "UTF8" ) )
32 n = CHARSET_UNICODE;
33 else if ( !strcasecmp( t, "GB18030" ) )
34 n = CHARSET_GB18030;
35 else n = charset_find( t );
36 if ( n==CHARSET_UNKNOWN ) {
37 // Patch: Disable output logging
38 }
39 }
40 }
41 if ( node->down ) {
42 m = xml_getencodingr( node->down );
43 if ( m!=CHARSET_UNKNOWN ) n = m;
44 }
45 if ( node->next ) {
46 m = xml_getencodingr( node->next );
47 if ( m!=CHARSET_UNKNOWN ) n = m;
48 }
49
50 return n;
51 }
52
53 int
xml_getencoding(str * s)54 xml_getencoding( str *s )
55 {
56 int file_charset = CHARSET_UNKNOWN;
57 str descriptor;
58 xml descriptxml;
59 char *p, *q;
60
61 p = strstr( str_cstr( s ), "<?xml" );
62 if ( !p ) p = strstr( str_cstr( s ), "<?XML" );
63 if ( p ) {
64 q = strstr( p, "?>" );
65 if ( q ) {
66 str_init( &descriptor );
67 str_segcpy( &descriptor, p, q+2 );
68 xml_init( &descriptxml );
69 xml_parse( str_cstr( &descriptor ), &descriptxml );
70 file_charset = xml_getencodingr( &descriptxml );
71 xml_free( &descriptxml );
72 str_free( &descriptor );
73 str_segdel( s, p, q+2 );
74 }
75 }
76 return file_charset;
77 }
78