1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* -*- C++ -*-
3 * Copyright (C) 1998 <developer@mozilla.org>
4 *
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 // for japanese encoding, obeserve characteristic:
27 // 1, kana character (or hankaku?) often have hight frequency of appereance
28 // 2, kana character often exist in group
29 // 3, certain combination of kana is never used in japanese language
30
31
32
33 #include "nsEUCJPProber.h"
34
35 namespace qencodingprober {
Reset(void)36 void nsEUCJPProber::Reset(void)
37 {
38 mCodingSM->Reset();
39 mState = eDetecting;
40 mContextAnalyser.Reset();
41 mDistributionAnalyser.Reset();
42 }
43
HandleData(const char * aBuf,unsigned int aLen)44 nsProbingState nsEUCJPProber::HandleData(const char* aBuf, unsigned int aLen)
45 {
46 nsSMState codingState;
47
48 for (unsigned int i = 0; i < aLen; i++)
49 {
50 codingState = mCodingSM->NextState(aBuf[i]);
51 if (codingState == eError)
52 {
53 mState = eNotMe;
54 break;
55 }
56 if (codingState == eItsMe)
57 {
58 mState = eFoundIt;
59 break;
60 }
61 if (codingState == eStart)
62 {
63 unsigned int charLen = mCodingSM->GetCurrentCharLen();
64
65 if (i == 0)
66 {
67 mLastChar[1] = aBuf[0];
68 mContextAnalyser.HandleOneChar(mLastChar, charLen);
69 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
70 }
71 else
72 {
73 mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
74 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
75 }
76 }
77 }
78
79 mLastChar[0] = aBuf[aLen-1];
80
81 if (mState == eDetecting)
82 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
83 mState = eFoundIt;
84
85 return mState;
86 }
87
GetConfidence(void)88 float nsEUCJPProber::GetConfidence(void)
89 {
90 float contxtCf = mContextAnalyser.GetConfidence();
91 float distribCf = mDistributionAnalyser.GetConfidence();
92
93 return (contxtCf > distribCf ? contxtCf : distribCf);
94 }
95 }
96
97
98