1 /*  -*- C++ -*-
2     SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3 
4     SPDX-License-Identifier: MIT
5 */
6 
7 // for japanese encoding, observe characteristic:
8 // 1, kana character (or hankaku?) often have high frequency of appearance
9 // 2, kana character often exist in group
10 // 3, certain combination of kana is never used in japanese language
11 
12 #include "nsEUCJPProber.h"
13 
14 namespace kencodingprober
15 {
Reset(void)16 void nsEUCJPProber::Reset(void)
17 {
18     mCodingSM->Reset();
19     mState = eDetecting;
20     mContextAnalyser.Reset();
21     mDistributionAnalyser.Reset();
22 }
23 
HandleData(const char * aBuf,unsigned int aLen)24 nsProbingState nsEUCJPProber::HandleData(const char *aBuf, unsigned int aLen)
25 {
26     if (aLen == 0) {
27         return mState;
28     }
29 
30     nsSMState codingState;
31 
32     for (unsigned int i = 0; i < aLen; i++) {
33         codingState = mCodingSM->NextState(aBuf[i]);
34         if (codingState == eError) {
35             mState = eNotMe;
36             break;
37         }
38         if (codingState == eItsMe) {
39             mState = eFoundIt;
40             break;
41         }
42         if (codingState == eStart) {
43             unsigned int charLen = mCodingSM->GetCurrentCharLen();
44 
45             if (i == 0) {
46                 mLastChar[1] = aBuf[0];
47                 mContextAnalyser.HandleOneChar(mLastChar, charLen);
48                 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
49             } else {
50                 mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen);
51                 mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
52             }
53         }
54     }
55 
56     mLastChar[0] = aBuf[aLen - 1];
57 
58     if (mState == eDetecting) {
59         if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
60             mState = eFoundIt;
61         }
62     }
63 
64     return mState;
65 }
66 
GetConfidence(void)67 float nsEUCJPProber::GetConfidence(void)
68 {
69     float contxtCf = mContextAnalyser.GetConfidence();
70     float distribCf = mDistributionAnalyser.GetConfidence();
71 
72     return (contxtCf > distribCf ? contxtCf : distribCf);
73 }
74 }
75