1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        wx/encconv.h
3 // Purpose:     wxEncodingConverter class for converting between different
4 //              font encodings
5 // Author:      Vaclav Slavik
6 // Copyright:   (c) 1999 Vaclav Slavik
7 // Licence:     wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9 
10 #ifndef _WX_ENCCONV_H_
11 #define _WX_ENCCONV_H_
12 
13 #include "wx/defs.h"
14 
15 #include "wx/object.h"
16 #include "wx/fontenc.h"
17 #include "wx/dynarray.h"
18 
19 // ----------------------------------------------------------------------------
20 // constants
21 // ----------------------------------------------------------------------------
22 
23 enum
24 {
25     wxCONVERT_STRICT,
26     wxCONVERT_SUBSTITUTE
27 };
28 
29 
30 enum
31 {
32     wxPLATFORM_CURRENT = -1,
33 
34     wxPLATFORM_UNIX = 0,
35     wxPLATFORM_WINDOWS,
36     wxPLATFORM_OS2,
37     wxPLATFORM_MAC
38 };
39 
40 // ----------------------------------------------------------------------------
41 // types
42 // ----------------------------------------------------------------------------
43 
44 WX_DEFINE_ARRAY_INT(wxFontEncoding, wxFontEncodingArray);
45 
46 //--------------------------------------------------------------------------------
47 // wxEncodingConverter
48 //                  This class is capable of converting strings between any two
49 //                  8bit encodings/charsets. It can also convert from/to Unicode
50 //--------------------------------------------------------------------------------
51 
52 class WXDLLIMPEXP_BASE wxEncodingConverter : public wxObject
53 {
54     public:
55 
56             wxEncodingConverter();
~wxEncodingConverter()57             virtual ~wxEncodingConverter() { if (m_Table) delete[] m_Table; }
58 
59             // Initialize conversion. Both output or input encoding may
60             // be wxFONTENCODING_UNICODE, but only if wxUSE_WCHAR_T is set to 1.
61             //
62             // All subsequent calls to Convert() will interpret it's argument
63             // as a string in input_enc encoding and will output string in
64             // output_enc encoding.
65             //
66             // You must call this method before calling Convert. You may call
67             // it more than once in order to switch to another conversion
68             //
69             // Method affects behaviour of Convert() in case input character
70             // cannot be converted because it does not exist in output encoding:
71             //     wxCONVERT_STRICT --
72             //              follow behaviour of GNU Recode - just copy unconvertable
73             //              characters to output and don't change them (it's integer
74             //              value will stay the same)
75             //     wxCONVERT_SUBSTITUTE --
76             //              try some (lossy) substitutions - e.g. replace
77             //              unconvertable latin capitals with acute by ordinary
78             //              capitals, replace en-dash or em-dash by '-' etc.
79             //     both modes gurantee that output string will have same length
80             //     as input string
81             //
82             // Returns false if given conversion is impossible, true otherwise
83             // (conversion may be impossible either if you try to convert
84             // to Unicode with non-Unicode build of wxWidgets or if input
85             // or output encoding is not supported.)
86             bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method = wxCONVERT_STRICT);
87 
88             // Convert input string according to settings passed to Init.
89             // Note that you must call Init before using Convert!
90             bool Convert(const char* input, char* output) const;
Convert(char * str)91             bool Convert(char* str) const { return Convert(str, str); }
92             wxString Convert(const wxString& input) const;
93 
94 #if wxUSE_WCHAR_T
95             bool Convert(const char* input, wchar_t* output) const;
96             bool Convert(const wchar_t* input, char* output) const;
97             bool Convert(const wchar_t* input, wchar_t* output) const;
Convert(wchar_t * str)98             bool Convert(wchar_t* str) const { return Convert(str, str); }
99 #endif
100             // Return equivalent(s) for given font that are used
101             // under given platform. wxPLATFORM_CURRENT means the plaform
102             // this binary was compiled for
103             //
104             // Examples:
105             //     current platform          enc    returned value
106             // -----------------------------------------------------
107             //     unix                   CP1250         {ISO8859_2}
108             //     unix                ISO8859_2                  {}
109             //     windows             ISO8859_2            {CP1250}
110             //
111             // Equivalence is defined in terms of convertibility:
112             // 2 encodings are equivalent if you can convert text between
113             // then without loosing information (it may - and will - happen
114             // that you loose special chars like quotation marks or em-dashes
115             // but you shouldn't loose any diacritics and language-specific
116             // characters when converting between equivalent encodings).
117             //
118             // Convert() method is not limited to converting between
119             // equivalent encodings, it can convert between arbitrary
120             // two encodings!
121             //
122             // Remember that this function does _NOT_ check for presence of
123             // fonts in system. It only tells you what are most suitable
124             // encodings. (It usually returns only one encoding)
125             //
126             // Note that argument enc itself may be present in returned array!
127             // (so that you can -- as a side effect -- detect whether the
128             // encoding is native for this platform or not)
129             static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, int platform = wxPLATFORM_CURRENT);
130 
131             // Similar to GetPlatformEquivalent, but this one will return ALL
132             // equivalent encodings, regardless the platform, including itself.
133             static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
134 
135             // Return true if [any text in] one multibyte encoding can be
136             // converted to another one losslessly.
137             //
138             // Do not call this with wxFONTENCODING_UNICODE, it doesn't make
139             // sense (always works in one sense and always depends on the text
140             // to convert in the other)
CanConvert(wxFontEncoding encIn,wxFontEncoding encOut)141             static bool CanConvert(wxFontEncoding encIn, wxFontEncoding encOut)
142             {
143                 return GetAllEquivalents(encIn).Index(encOut) != wxNOT_FOUND;
144             }
145 
146     private:
147 
148 #if wxUSE_WCHAR_T
149             wchar_t *m_Table;
150 #else
151             char *m_Table;
152 #endif
153             bool m_UnicodeInput, m_UnicodeOutput;
154             bool m_JustCopy;
155 
156     DECLARE_NO_COPY_CLASS(wxEncodingConverter)
157 };
158 
159 #endif  // _WX_ENCCONV_H_
160