1######################## BEGIN LICENSE BLOCK ########################
2# The Original Code is mozilla.org code.
3#
4# The Initial Developer of the Original Code is
5# Netscape Communications Corporation.
6# Portions created by the Initial Developer are Copyright (C) 1998
7# the Initial Developer. All Rights Reserved.
8#
9# Contributor(s):
10#   Mark Pilgrim - port to Python
11#
12# This library is free software; you can redistribute it and/or
13# modify it under the terms of the GNU Lesser General Public
14# License as published by the Free Software Foundation; either
15# version 2.1 of the License, or (at your option) any later version.
16#
17# This library is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20# Lesser General Public License for more details.
21#
22# You should have received a copy of the GNU Lesser General Public
23# License along with this library; if not, write to the Free Software
24# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25# 02110-1301  USA
26######################### END LICENSE BLOCK #########################
27
28from .enums import MachineState
29
30# BIG5
31
32BIG5_CLS = (
33    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value
34    1,1,1,1,1,1,0,0,  # 08 - 0f
35    1,1,1,1,1,1,1,1,  # 10 - 17
36    1,1,1,0,1,1,1,1,  # 18 - 1f
37    1,1,1,1,1,1,1,1,  # 20 - 27
38    1,1,1,1,1,1,1,1,  # 28 - 2f
39    1,1,1,1,1,1,1,1,  # 30 - 37
40    1,1,1,1,1,1,1,1,  # 38 - 3f
41    2,2,2,2,2,2,2,2,  # 40 - 47
42    2,2,2,2,2,2,2,2,  # 48 - 4f
43    2,2,2,2,2,2,2,2,  # 50 - 57
44    2,2,2,2,2,2,2,2,  # 58 - 5f
45    2,2,2,2,2,2,2,2,  # 60 - 67
46    2,2,2,2,2,2,2,2,  # 68 - 6f
47    2,2,2,2,2,2,2,2,  # 70 - 77
48    2,2,2,2,2,2,2,1,  # 78 - 7f
49    4,4,4,4,4,4,4,4,  # 80 - 87
50    4,4,4,4,4,4,4,4,  # 88 - 8f
51    4,4,4,4,4,4,4,4,  # 90 - 97
52    4,4,4,4,4,4,4,4,  # 98 - 9f
53    4,3,3,3,3,3,3,3,  # a0 - a7
54    3,3,3,3,3,3,3,3,  # a8 - af
55    3,3,3,3,3,3,3,3,  # b0 - b7
56    3,3,3,3,3,3,3,3,  # b8 - bf
57    3,3,3,3,3,3,3,3,  # c0 - c7
58    3,3,3,3,3,3,3,3,  # c8 - cf
59    3,3,3,3,3,3,3,3,  # d0 - d7
60    3,3,3,3,3,3,3,3,  # d8 - df
61    3,3,3,3,3,3,3,3,  # e0 - e7
62    3,3,3,3,3,3,3,3,  # e8 - ef
63    3,3,3,3,3,3,3,3,  # f0 - f7
64    3,3,3,3,3,3,3,0  # f8 - ff
65)
66
67BIG5_ST = (
68    MachineState.ERROR,MachineState.START,MachineState.START,     3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
69    MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
70    MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
71)
72
73BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
74
75BIG5_SM_MODEL = {'class_table': BIG5_CLS,
76                 'class_factor': 5,
77                 'state_table': BIG5_ST,
78                 'char_len_table': BIG5_CHAR_LEN_TABLE,
79                 'name': 'Big5'}
80
81# CP949
82
83CP949_CLS  = (
84    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f
85    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f
86    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f
87    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f
88    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f
89    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f
90    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f
91    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f
92    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f
93    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f
94    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af
95    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf
96    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf
97    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df
98    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef
99    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff
100)
101
102CP949_ST = (
103#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =
104    MachineState.ERROR,MachineState.START,     3,MachineState.ERROR,MachineState.START,MachineState.START,     4,     5,MachineState.ERROR,     6, # MachineState.START
105    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
106    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
107    MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
108    MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
109    MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
110    MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
111)
112
113CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
114
115CP949_SM_MODEL = {'class_table': CP949_CLS,
116                  'class_factor': 10,
117                  'state_table': CP949_ST,
118                  'char_len_table': CP949_CHAR_LEN_TABLE,
119                  'name': 'CP949'}
120
121# EUC-JP
122
123EUCJP_CLS = (
124    4,4,4,4,4,4,4,4,  # 00 - 07
125    4,4,4,4,4,4,5,5,  # 08 - 0f
126    4,4,4,4,4,4,4,4,  # 10 - 17
127    4,4,4,5,4,4,4,4,  # 18 - 1f
128    4,4,4,4,4,4,4,4,  # 20 - 27
129    4,4,4,4,4,4,4,4,  # 28 - 2f
130    4,4,4,4,4,4,4,4,  # 30 - 37
131    4,4,4,4,4,4,4,4,  # 38 - 3f
132    4,4,4,4,4,4,4,4,  # 40 - 47
133    4,4,4,4,4,4,4,4,  # 48 - 4f
134    4,4,4,4,4,4,4,4,  # 50 - 57
135    4,4,4,4,4,4,4,4,  # 58 - 5f
136    4,4,4,4,4,4,4,4,  # 60 - 67
137    4,4,4,4,4,4,4,4,  # 68 - 6f
138    4,4,4,4,4,4,4,4,  # 70 - 77
139    4,4,4,4,4,4,4,4,  # 78 - 7f
140    5,5,5,5,5,5,5,5,  # 80 - 87
141    5,5,5,5,5,5,1,3,  # 88 - 8f
142    5,5,5,5,5,5,5,5,  # 90 - 97
143    5,5,5,5,5,5,5,5,  # 98 - 9f
144    5,2,2,2,2,2,2,2,  # a0 - a7
145    2,2,2,2,2,2,2,2,  # a8 - af
146    2,2,2,2,2,2,2,2,  # b0 - b7
147    2,2,2,2,2,2,2,2,  # b8 - bf
148    2,2,2,2,2,2,2,2,  # c0 - c7
149    2,2,2,2,2,2,2,2,  # c8 - cf
150    2,2,2,2,2,2,2,2,  # d0 - d7
151    2,2,2,2,2,2,2,2,  # d8 - df
152    0,0,0,0,0,0,0,0,  # e0 - e7
153    0,0,0,0,0,0,0,0,  # e8 - ef
154    0,0,0,0,0,0,0,0,  # f0 - f7
155    0,0,0,0,0,0,0,5  # f8 - ff
156)
157
158EUCJP_ST = (
159          3,     4,     3,     5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
160     MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
161     MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
162     MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     3,MachineState.ERROR,#18-1f
163          3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
164)
165
166EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
167
168EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
169                  'class_factor': 6,
170                  'state_table': EUCJP_ST,
171                  'char_len_table': EUCJP_CHAR_LEN_TABLE,
172                  'name': 'EUC-JP'}
173
174# EUC-KR
175
176EUCKR_CLS  = (
177    1,1,1,1,1,1,1,1,  # 00 - 07
178    1,1,1,1,1,1,0,0,  # 08 - 0f
179    1,1,1,1,1,1,1,1,  # 10 - 17
180    1,1,1,0,1,1,1,1,  # 18 - 1f
181    1,1,1,1,1,1,1,1,  # 20 - 27
182    1,1,1,1,1,1,1,1,  # 28 - 2f
183    1,1,1,1,1,1,1,1,  # 30 - 37
184    1,1,1,1,1,1,1,1,  # 38 - 3f
185    1,1,1,1,1,1,1,1,  # 40 - 47
186    1,1,1,1,1,1,1,1,  # 48 - 4f
187    1,1,1,1,1,1,1,1,  # 50 - 57
188    1,1,1,1,1,1,1,1,  # 58 - 5f
189    1,1,1,1,1,1,1,1,  # 60 - 67
190    1,1,1,1,1,1,1,1,  # 68 - 6f
191    1,1,1,1,1,1,1,1,  # 70 - 77
192    1,1,1,1,1,1,1,1,  # 78 - 7f
193    0,0,0,0,0,0,0,0,  # 80 - 87
194    0,0,0,0,0,0,0,0,  # 88 - 8f
195    0,0,0,0,0,0,0,0,  # 90 - 97
196    0,0,0,0,0,0,0,0,  # 98 - 9f
197    0,2,2,2,2,2,2,2,  # a0 - a7
198    2,2,2,2,2,3,3,3,  # a8 - af
199    2,2,2,2,2,2,2,2,  # b0 - b7
200    2,2,2,2,2,2,2,2,  # b8 - bf
201    2,2,2,2,2,2,2,2,  # c0 - c7
202    2,3,2,2,2,2,2,2,  # c8 - cf
203    2,2,2,2,2,2,2,2,  # d0 - d7
204    2,2,2,2,2,2,2,2,  # d8 - df
205    2,2,2,2,2,2,2,2,  # e0 - e7
206    2,2,2,2,2,2,2,2,  # e8 - ef
207    2,2,2,2,2,2,2,2,  # f0 - f7
208    2,2,2,2,2,2,2,0   # f8 - ff
209)
210
211EUCKR_ST = (
212    MachineState.ERROR,MachineState.START,     3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
213    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
214)
215
216EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
217
218EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
219                'class_factor': 4,
220                'state_table': EUCKR_ST,
221                'char_len_table': EUCKR_CHAR_LEN_TABLE,
222                'name': 'EUC-KR'}
223
224# EUC-TW
225
226EUCTW_CLS = (
227    2,2,2,2,2,2,2,2,  # 00 - 07
228    2,2,2,2,2,2,0,0,  # 08 - 0f
229    2,2,2,2,2,2,2,2,  # 10 - 17
230    2,2,2,0,2,2,2,2,  # 18 - 1f
231    2,2,2,2,2,2,2,2,  # 20 - 27
232    2,2,2,2,2,2,2,2,  # 28 - 2f
233    2,2,2,2,2,2,2,2,  # 30 - 37
234    2,2,2,2,2,2,2,2,  # 38 - 3f
235    2,2,2,2,2,2,2,2,  # 40 - 47
236    2,2,2,2,2,2,2,2,  # 48 - 4f
237    2,2,2,2,2,2,2,2,  # 50 - 57
238    2,2,2,2,2,2,2,2,  # 58 - 5f
239    2,2,2,2,2,2,2,2,  # 60 - 67
240    2,2,2,2,2,2,2,2,  # 68 - 6f
241    2,2,2,2,2,2,2,2,  # 70 - 77
242    2,2,2,2,2,2,2,2,  # 78 - 7f
243    0,0,0,0,0,0,0,0,  # 80 - 87
244    0,0,0,0,0,0,6,0,  # 88 - 8f
245    0,0,0,0,0,0,0,0,  # 90 - 97
246    0,0,0,0,0,0,0,0,  # 98 - 9f
247    0,3,4,4,4,4,4,4,  # a0 - a7
248    5,5,1,1,1,1,1,1,  # a8 - af
249    1,1,1,1,1,1,1,1,  # b0 - b7
250    1,1,1,1,1,1,1,1,  # b8 - bf
251    1,1,3,1,3,3,3,3,  # c0 - c7
252    3,3,3,3,3,3,3,3,  # c8 - cf
253    3,3,3,3,3,3,3,3,  # d0 - d7
254    3,3,3,3,3,3,3,3,  # d8 - df
255    3,3,3,3,3,3,3,3,  # e0 - e7
256    3,3,3,3,3,3,3,3,  # e8 - ef
257    3,3,3,3,3,3,3,3,  # f0 - f7
258    3,3,3,3,3,3,3,0   # f8 - ff
259)
260
261EUCTW_ST = (
262    MachineState.ERROR,MachineState.ERROR,MachineState.START,     3,     3,     3,     4,MachineState.ERROR,#00-07
263    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
264    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
265    MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
266         5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
267    MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
268)
269
270EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
271
272EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
273                'class_factor': 7,
274                'state_table': EUCTW_ST,
275                'char_len_table': EUCTW_CHAR_LEN_TABLE,
276                'name': 'x-euc-tw'}
277
278# GB2312
279
280GB2312_CLS = (
281    1,1,1,1,1,1,1,1,  # 00 - 07
282    1,1,1,1,1,1,0,0,  # 08 - 0f
283    1,1,1,1,1,1,1,1,  # 10 - 17
284    1,1,1,0,1,1,1,1,  # 18 - 1f
285    1,1,1,1,1,1,1,1,  # 20 - 27
286    1,1,1,1,1,1,1,1,  # 28 - 2f
287    3,3,3,3,3,3,3,3,  # 30 - 37
288    3,3,1,1,1,1,1,1,  # 38 - 3f
289    2,2,2,2,2,2,2,2,  # 40 - 47
290    2,2,2,2,2,2,2,2,  # 48 - 4f
291    2,2,2,2,2,2,2,2,  # 50 - 57
292    2,2,2,2,2,2,2,2,  # 58 - 5f
293    2,2,2,2,2,2,2,2,  # 60 - 67
294    2,2,2,2,2,2,2,2,  # 68 - 6f
295    2,2,2,2,2,2,2,2,  # 70 - 77
296    2,2,2,2,2,2,2,4,  # 78 - 7f
297    5,6,6,6,6,6,6,6,  # 80 - 87
298    6,6,6,6,6,6,6,6,  # 88 - 8f
299    6,6,6,6,6,6,6,6,  # 90 - 97
300    6,6,6,6,6,6,6,6,  # 98 - 9f
301    6,6,6,6,6,6,6,6,  # a0 - a7
302    6,6,6,6,6,6,6,6,  # a8 - af
303    6,6,6,6,6,6,6,6,  # b0 - b7
304    6,6,6,6,6,6,6,6,  # b8 - bf
305    6,6,6,6,6,6,6,6,  # c0 - c7
306    6,6,6,6,6,6,6,6,  # c8 - cf
307    6,6,6,6,6,6,6,6,  # d0 - d7
308    6,6,6,6,6,6,6,6,  # d8 - df
309    6,6,6,6,6,6,6,6,  # e0 - e7
310    6,6,6,6,6,6,6,6,  # e8 - ef
311    6,6,6,6,6,6,6,6,  # f0 - f7
312    6,6,6,6,6,6,6,0   # f8 - ff
313)
314
315GB2312_ST = (
316    MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,     3,MachineState.ERROR,#00-07
317    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
318    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
319         4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
320    MachineState.ERROR,MachineState.ERROR,     5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
321    MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
322)
323
324# To be accurate, the length of class 6 can be either 2 or 4.
325# But it is not necessary to discriminate between the two since
326# it is used for frequency analysis only, and we are validating
327# each code range there as well. So it is safe to set it to be
328# 2 here.
329GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
330
331GB2312_SM_MODEL = {'class_table': GB2312_CLS,
332                   'class_factor': 7,
333                   'state_table': GB2312_ST,
334                   'char_len_table': GB2312_CHAR_LEN_TABLE,
335                   'name': 'GB2312'}
336
337# Shift_JIS
338
339SJIS_CLS = (
340    1,1,1,1,1,1,1,1,  # 00 - 07
341    1,1,1,1,1,1,0,0,  # 08 - 0f
342    1,1,1,1,1,1,1,1,  # 10 - 17
343    1,1,1,0,1,1,1,1,  # 18 - 1f
344    1,1,1,1,1,1,1,1,  # 20 - 27
345    1,1,1,1,1,1,1,1,  # 28 - 2f
346    1,1,1,1,1,1,1,1,  # 30 - 37
347    1,1,1,1,1,1,1,1,  # 38 - 3f
348    2,2,2,2,2,2,2,2,  # 40 - 47
349    2,2,2,2,2,2,2,2,  # 48 - 4f
350    2,2,2,2,2,2,2,2,  # 50 - 57
351    2,2,2,2,2,2,2,2,  # 58 - 5f
352    2,2,2,2,2,2,2,2,  # 60 - 67
353    2,2,2,2,2,2,2,2,  # 68 - 6f
354    2,2,2,2,2,2,2,2,  # 70 - 77
355    2,2,2,2,2,2,2,1,  # 78 - 7f
356    3,3,3,3,3,2,2,3,  # 80 - 87
357    3,3,3,3,3,3,3,3,  # 88 - 8f
358    3,3,3,3,3,3,3,3,  # 90 - 97
359    3,3,3,3,3,3,3,3,  # 98 - 9f
360    #0xa0 is illegal in sjis encoding, but some pages does
361    #contain such byte. We need to be more error forgiven.
362    2,2,2,2,2,2,2,2,  # a0 - a7
363    2,2,2,2,2,2,2,2,  # a8 - af
364    2,2,2,2,2,2,2,2,  # b0 - b7
365    2,2,2,2,2,2,2,2,  # b8 - bf
366    2,2,2,2,2,2,2,2,  # c0 - c7
367    2,2,2,2,2,2,2,2,  # c8 - cf
368    2,2,2,2,2,2,2,2,  # d0 - d7
369    2,2,2,2,2,2,2,2,  # d8 - df
370    3,3,3,3,3,3,3,3,  # e0 - e7
371    3,3,3,3,3,4,4,4,  # e8 - ef
372    3,3,3,3,3,3,3,3,  # f0 - f7
373    3,3,3,3,3,0,0,0)  # f8 - ff
374
375
376SJIS_ST = (
377    MachineState.ERROR,MachineState.START,MachineState.START,     3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
378    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
379    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
380)
381
382SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
383
384SJIS_SM_MODEL = {'class_table': SJIS_CLS,
385               'class_factor': 6,
386               'state_table': SJIS_ST,
387               'char_len_table': SJIS_CHAR_LEN_TABLE,
388               'name': 'Shift_JIS'}
389
390# UCS2-BE
391
392UCS2BE_CLS = (
393    0,0,0,0,0,0,0,0,  # 00 - 07
394    0,0,1,0,0,2,0,0,  # 08 - 0f
395    0,0,0,0,0,0,0,0,  # 10 - 17
396    0,0,0,3,0,0,0,0,  # 18 - 1f
397    0,0,0,0,0,0,0,0,  # 20 - 27
398    0,3,3,3,3,3,0,0,  # 28 - 2f
399    0,0,0,0,0,0,0,0,  # 30 - 37
400    0,0,0,0,0,0,0,0,  # 38 - 3f
401    0,0,0,0,0,0,0,0,  # 40 - 47
402    0,0,0,0,0,0,0,0,  # 48 - 4f
403    0,0,0,0,0,0,0,0,  # 50 - 57
404    0,0,0,0,0,0,0,0,  # 58 - 5f
405    0,0,0,0,0,0,0,0,  # 60 - 67
406    0,0,0,0,0,0,0,0,  # 68 - 6f
407    0,0,0,0,0,0,0,0,  # 70 - 77
408    0,0,0,0,0,0,0,0,  # 78 - 7f
409    0,0,0,0,0,0,0,0,  # 80 - 87
410    0,0,0,0,0,0,0,0,  # 88 - 8f
411    0,0,0,0,0,0,0,0,  # 90 - 97
412    0,0,0,0,0,0,0,0,  # 98 - 9f
413    0,0,0,0,0,0,0,0,  # a0 - a7
414    0,0,0,0,0,0,0,0,  # a8 - af
415    0,0,0,0,0,0,0,0,  # b0 - b7
416    0,0,0,0,0,0,0,0,  # b8 - bf
417    0,0,0,0,0,0,0,0,  # c0 - c7
418    0,0,0,0,0,0,0,0,  # c8 - cf
419    0,0,0,0,0,0,0,0,  # d0 - d7
420    0,0,0,0,0,0,0,0,  # d8 - df
421    0,0,0,0,0,0,0,0,  # e0 - e7
422    0,0,0,0,0,0,0,0,  # e8 - ef
423    0,0,0,0,0,0,0,0,  # f0 - f7
424    0,0,0,0,0,0,4,5   # f8 - ff
425)
426
427UCS2BE_ST  = (
428          5,     7,     7,MachineState.ERROR,     4,     3,MachineState.ERROR,MachineState.ERROR,#00-07
429     MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
430     MachineState.ITS_ME,MachineState.ITS_ME,     6,     6,     6,     6,MachineState.ERROR,MachineState.ERROR,#10-17
431          6,     6,     6,     6,     6,MachineState.ITS_ME,     6,     6,#18-1f
432          6,     6,     6,     6,     5,     7,     7,MachineState.ERROR,#20-27
433          5,     8,     6,     6,MachineState.ERROR,     6,     6,     6,#28-2f
434          6,     6,     6,     6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
435)
436
437UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
438
439UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
440                   'class_factor': 6,
441                   'state_table': UCS2BE_ST,
442                   'char_len_table': UCS2BE_CHAR_LEN_TABLE,
443                   'name': 'UTF-16BE'}
444
445# UCS2-LE
446
447UCS2LE_CLS = (
448    0,0,0,0,0,0,0,0,  # 00 - 07
449    0,0,1,0,0,2,0,0,  # 08 - 0f
450    0,0,0,0,0,0,0,0,  # 10 - 17
451    0,0,0,3,0,0,0,0,  # 18 - 1f
452    0,0,0,0,0,0,0,0,  # 20 - 27
453    0,3,3,3,3,3,0,0,  # 28 - 2f
454    0,0,0,0,0,0,0,0,  # 30 - 37
455    0,0,0,0,0,0,0,0,  # 38 - 3f
456    0,0,0,0,0,0,0,0,  # 40 - 47
457    0,0,0,0,0,0,0,0,  # 48 - 4f
458    0,0,0,0,0,0,0,0,  # 50 - 57
459    0,0,0,0,0,0,0,0,  # 58 - 5f
460    0,0,0,0,0,0,0,0,  # 60 - 67
461    0,0,0,0,0,0,0,0,  # 68 - 6f
462    0,0,0,0,0,0,0,0,  # 70 - 77
463    0,0,0,0,0,0,0,0,  # 78 - 7f
464    0,0,0,0,0,0,0,0,  # 80 - 87
465    0,0,0,0,0,0,0,0,  # 88 - 8f
466    0,0,0,0,0,0,0,0,  # 90 - 97
467    0,0,0,0,0,0,0,0,  # 98 - 9f
468    0,0,0,0,0,0,0,0,  # a0 - a7
469    0,0,0,0,0,0,0,0,  # a8 - af
470    0,0,0,0,0,0,0,0,  # b0 - b7
471    0,0,0,0,0,0,0,0,  # b8 - bf
472    0,0,0,0,0,0,0,0,  # c0 - c7
473    0,0,0,0,0,0,0,0,  # c8 - cf
474    0,0,0,0,0,0,0,0,  # d0 - d7
475    0,0,0,0,0,0,0,0,  # d8 - df
476    0,0,0,0,0,0,0,0,  # e0 - e7
477    0,0,0,0,0,0,0,0,  # e8 - ef
478    0,0,0,0,0,0,0,0,  # f0 - f7
479    0,0,0,0,0,0,4,5   # f8 - ff
480)
481
482UCS2LE_ST = (
483          6,     6,     7,     6,     4,     3,MachineState.ERROR,MachineState.ERROR,#00-07
484     MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
485     MachineState.ITS_ME,MachineState.ITS_ME,     5,     5,     5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
486          5,     5,     5,MachineState.ERROR,     5,MachineState.ERROR,     6,     6,#18-1f
487          7,     6,     8,     8,     5,     5,     5,MachineState.ERROR,#20-27
488          5,     5,     5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     5,     5,#28-2f
489          5,     5,     5,MachineState.ERROR,     5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
490)
491
492UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
493
494UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
495                 'class_factor': 6,
496                 'state_table': UCS2LE_ST,
497                 'char_len_table': UCS2LE_CHAR_LEN_TABLE,
498                 'name': 'UTF-16LE'}
499
500# UTF-8
501
502UTF8_CLS = (
503    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value
504    1,1,1,1,1,1,0,0,  # 08 - 0f
505    1,1,1,1,1,1,1,1,  # 10 - 17
506    1,1,1,0,1,1,1,1,  # 18 - 1f
507    1,1,1,1,1,1,1,1,  # 20 - 27
508    1,1,1,1,1,1,1,1,  # 28 - 2f
509    1,1,1,1,1,1,1,1,  # 30 - 37
510    1,1,1,1,1,1,1,1,  # 38 - 3f
511    1,1,1,1,1,1,1,1,  # 40 - 47
512    1,1,1,1,1,1,1,1,  # 48 - 4f
513    1,1,1,1,1,1,1,1,  # 50 - 57
514    1,1,1,1,1,1,1,1,  # 58 - 5f
515    1,1,1,1,1,1,1,1,  # 60 - 67
516    1,1,1,1,1,1,1,1,  # 68 - 6f
517    1,1,1,1,1,1,1,1,  # 70 - 77
518    1,1,1,1,1,1,1,1,  # 78 - 7f
519    2,2,2,2,3,3,3,3,  # 80 - 87
520    4,4,4,4,4,4,4,4,  # 88 - 8f
521    4,4,4,4,4,4,4,4,  # 90 - 97
522    4,4,4,4,4,4,4,4,  # 98 - 9f
523    5,5,5,5,5,5,5,5,  # a0 - a7
524    5,5,5,5,5,5,5,5,  # a8 - af
525    5,5,5,5,5,5,5,5,  # b0 - b7
526    5,5,5,5,5,5,5,5,  # b8 - bf
527    0,0,6,6,6,6,6,6,  # c0 - c7
528    6,6,6,6,6,6,6,6,  # c8 - cf
529    6,6,6,6,6,6,6,6,  # d0 - d7
530    6,6,6,6,6,6,6,6,  # d8 - df
531    7,8,8,8,8,8,8,8,  # e0 - e7
532    8,8,8,8,8,9,8,8,  # e8 - ef
533    10,11,11,11,11,11,11,11,  # f0 - f7
534    12,13,13,13,14,15,0,0    # f8 - ff
535)
536
537UTF8_ST = (
538    MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     12,   10,#00-07
539         9,     11,     8,     7,     6,     5,     4,    3,#08-0f
540    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
541    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
542    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
543    MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
544    MachineState.ERROR,MachineState.ERROR,     5,     5,     5,     5,MachineState.ERROR,MachineState.ERROR,#30-37
545    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
546    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     5,     5,     5,MachineState.ERROR,MachineState.ERROR,#40-47
547    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
548    MachineState.ERROR,MachineState.ERROR,     7,     7,     7,     7,MachineState.ERROR,MachineState.ERROR,#50-57
549    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
550    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     7,     7,MachineState.ERROR,MachineState.ERROR,#60-67
551    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
552    MachineState.ERROR,MachineState.ERROR,     9,     9,     9,     9,MachineState.ERROR,MachineState.ERROR,#70-77
553    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
554    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,     9,MachineState.ERROR,MachineState.ERROR,#80-87
555    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
556    MachineState.ERROR,MachineState.ERROR,    12,    12,    12,    12,MachineState.ERROR,MachineState.ERROR,#90-97
557    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
558    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,    12,MachineState.ERROR,MachineState.ERROR,#a0-a7
559    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
560    MachineState.ERROR,MachineState.ERROR,    12,    12,    12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
561    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
562    MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
563    MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
564)
565
566UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
567
568UTF8_SM_MODEL = {'class_table': UTF8_CLS,
569                 'class_factor': 16,
570                 'state_table': UTF8_ST,
571                 'char_len_table': UTF8_CHAR_LEN_TABLE,
572                 'name': 'UTF-8'}
573