1 // -*- C++ -*-
2 /* Copyright (c) 2001 Fumitoshi UKAI <ukai@debian.or.jp>
3 
4 This file is part of groff.
5 
6 groff is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10 
11 groff is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
19 
20 #ifndef ENCODING_H
21 #define ENCODING_H
22 
23 #include <config.h>
24 
25 #ifdef ENABLE_MULTIBYTE
26 typedef int wchar;	// negative is used for charcode & index
27 #else
28 typedef char wchar;
29 #endif
30 
31 #include <stdio.h>
32 
33 #ifdef __cplusplus
34 class encoding_istream {
35 public:
encoding_istream()36   encoding_istream() {};
~encoding_istream()37   virtual ~encoding_istream() {};
38   virtual int getbyte() = 0;
39   virtual int peekbyte() = 0;
40   virtual void ungetbyte(int ch) = 0;
41 };
42 
43 class encoding_istream_str: public encoding_istream {
44 private:
45   const unsigned char *s;
46   int *i;
encoding_istream_str()47   encoding_istream_str() {};
48 public:
encoding_istream_str(const unsigned char * s0,int * i0)49   encoding_istream_str(const unsigned char *s0, int *i0) : s(s0), i(i0) {};
~encoding_istream_str()50   ~encoding_istream_str() {};
getbyte()51   inline int getbyte() { return s[(*i)++]; };
peekbyte()52   inline int peekbyte() { return s[(*i)]; };
ungetbyte(int ch)53   inline void ungetbyte(int ch) { --(*i); };
54 };
55 
56 class encoding_istream_fp: public encoding_istream {
57 private:
58   FILE *fp;
59 public:
encoding_istream_fp(FILE * fp0)60   encoding_istream_fp(FILE *fp0) : fp(fp0) {};
~encoding_istream_fp()61   ~encoding_istream_fp() {};
getbyte()62   inline int getbyte() { return fgetc(fp); };
peekbyte()63   inline int peekbyte() { int ch = fgetc(fp); ungetc(ch, fp); return ch; };
ungetbyte(int ch)64   inline void ungetbyte(int ch) { ungetc(ch, fp); };
65 };
66 
67 class encoding_ostream {
68 public:
encoding_ostream()69   encoding_ostream() {};
~encoding_ostream()70   virtual ~encoding_ostream() {};
71   virtual void putbyte(unsigned char ch) = 0;
72 };
73 
74 class encoding_ostream_str: public encoding_ostream {
75 private:
76   unsigned char *s;
77   int *i;
78   int len;
encoding_ostream_str()79   encoding_ostream_str() {};
80 public:
encoding_ostream_str(unsigned char * s0,int * i0,int max)81   encoding_ostream_str(unsigned char *s0, int *i0, int max) : s(s0), i(i0), len(max) {};
~encoding_ostream_str()82   ~encoding_ostream_str() {};
putbyte(unsigned char ch)83   inline void putbyte(unsigned char ch) {
84     if (*i < len)
85       s[(*i)++] = ch;
86   }
87 };
88 
89 class encoding_ostream_fp: public encoding_ostream {
90 private:
91   FILE *fp;
92   const char *format;
93 public:
fp(ofp)94   encoding_ostream_fp(FILE *ofp, const char *fmt = "%c") : fp(ofp), format(fmt) {};
~encoding_ostream_fp()95   ~encoding_ostream_fp() {};
putbyte(unsigned char ch)96   inline void putbyte(unsigned char ch) {
97     fprintf(fp, format, ch);
98   }
99 };
100 
101 class encoding_handler {
102 public:
encoding_handler()103   encoding_handler() {};
~encoding_handler()104   virtual ~encoding_handler() {};
105 
106   // name of this encoding_handler
name()107   virtual const char *name() { return ""; };
108 
109   // check if this byte is byte in multibyte character in this encoding?
is_wchar_byte(unsigned char c)110   virtual int is_wchar_byte(unsigned char c) { return 0; };
111 
112   // make new wchar from c0 (beginning of multibytes) and rest from `in'
make_wchar(unsigned char c0,encoding_istream & in)113   virtual wchar make_wchar(unsigned char c0, encoding_istream& in) {
114     return wchar(c0);
115   }
116   // make new wchar from c0 (beginning of multibytes) and rest from `fp'
make_wchar(unsigned char c0,FILE * fp)117   virtual wchar make_wchar(unsigned char c0, FILE *fp) {
118     encoding_istream_fp in(fp);
119     return make_wchar(c0, in);
120   }
121   // make new wchar from c0 (beginning of multibtyes) and rest from
122   // s[*i], *i will be changed to point the byte of next character.
make_wchar(unsigned char c0,const unsigned char * s,int * i)123   virtual wchar make_wchar(unsigned char c0, const unsigned char *s, int *i) {
124     encoding_istream_str in(s, i);
125     return make_wchar(c0, in);
126   }
127 
128   // put wchar to outputstream
129   // returns number of bytes written
put_wchar(wchar wc,encoding_ostream & eos)130   virtual int put_wchar(wchar wc, encoding_ostream& eos) {
131     eos.putbyte((unsigned char)wc);
132     return 1;
133   }
134   // put wchar to `fp' using `fmt'
135   // returns number of bytes written
136   virtual int put_wchar(wchar wc, FILE *fp, const char *fmt = "%c") {
137     encoding_ostream_fp out(fp, fmt);
138     return put_wchar(wc, out);
139   }
140   // put wchar to s[*i] (until maxlen)
141   // *i will be changed to point the byte of next character.
put_wchar(wchar wc,unsigned char * s,int * i,int maxlen)142   virtual int put_wchar(wchar wc, unsigned char *s, int *i, int maxlen) {
143     encoding_ostream_str out(s, i, maxlen);
144     return put_wchar(wc, out);
145   }
146 
147   // maximum number of bytes of multibyte character in this encoding
max_wchar_len()148   virtual int max_wchar_len() { return 1; };
149 
150 };
151 
152 encoding_handler* select_input_encoding_handler(const char* encoding_name);
153 encoding_handler* select_output_encoding_handler(const char* encoding_name);
154 extern encoding_handler* input_encoding;
155 extern encoding_handler* output_encoding;
156 void init_encoding_handler();
157 
158 // check if wc is wchar?
159 int is_wchar_code(wchar wc);
160 
161 // check if wc is wchar & can be represented in single byte?
162 int is_wchar_singlebyte(wchar wc);
163 
164 // get singlebyte representation of wchar (if is_wchar_singlebyte(wc))
165 unsigned char wchar_singlebyte(wchar wc);
166 
167 // get actual wide character code
168 int wchar_code(wchar wc);
169 
170 // make wchar from wide character code
171 int make_wchar(int w);
172 
173 #endif
174 
175 #endif /* ENCODING_H */
176