1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*  -*- C++ -*-
3 *  Copyright (C) 1998 <developer@mozilla.org>
4 *
5 *
6 *  Permission is hereby granted, free of charge, to any person obtaining
7 *  a copy of this software and associated documentation files (the
8 *  "Software"), to deal in the Software without restriction, including
9 *  without limitation the rights to use, copy, modify, merge, publish,
10 *  distribute, sublicense, and/or sell copies of the Software, and to
11 *  permit persons to whom the Software is furnished to do so, subject to
12 *  the following conditions:
13 *
14 *  The above copyright notice and this permission notice shall be included
15 *  in all copies or substantial portions of the Software.
16 *
17 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 // for japanese encoding, obeserve characteristic:
27 // 1, kana character (or hankaku?) often have hight frequency of appereance
28 // 2, kana character often exist in group
29 // 3, certain combination of kana is never used in japanese language
30 
31 
32 
33 #include "nsEUCJPProber.h"
34 
35 namespace qencodingprober {
Reset(void)36 void  nsEUCJPProber::Reset(void)
37 {
38   mCodingSM->Reset();
39   mState = eDetecting;
40   mContextAnalyser.Reset();
41   mDistributionAnalyser.Reset();
42 }
43 
HandleData(const char * aBuf,unsigned int aLen)44 nsProbingState nsEUCJPProber::HandleData(const char* aBuf, unsigned int aLen)
45 {
46   nsSMState codingState;
47 
48   for (unsigned int i = 0; i < aLen; i++)
49   {
50     codingState = mCodingSM->NextState(aBuf[i]);
51     if (codingState == eError)
52     {
53       mState = eNotMe;
54       break;
55     }
56     if (codingState == eItsMe)
57     {
58       mState = eFoundIt;
59       break;
60     }
61     if (codingState == eStart)
62     {
63       unsigned int charLen = mCodingSM->GetCurrentCharLen();
64 
65       if (i == 0)
66       {
67         mLastChar[1] = aBuf[0];
68         mContextAnalyser.HandleOneChar(mLastChar, charLen);
69         mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
70       }
71       else
72       {
73         mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
74         mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
75       }
76     }
77   }
78 
79   mLastChar[0] = aBuf[aLen-1];
80 
81   if (mState == eDetecting)
82     if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
83       mState = eFoundIt;
84 
85   return mState;
86 }
87 
GetConfidence(void)88 float nsEUCJPProber::GetConfidence(void)
89 {
90   float contxtCf = mContextAnalyser.GetConfidence();
91   float distribCf = mDistributionAnalyser.GetConfidence();
92 
93   return (contxtCf > distribCf ? contxtCf : distribCf);
94 }
95 }
96 
97 
98