1######################## BEGIN LICENSE BLOCK ########################
2# The Original Code is Mozilla Communicator client code.
3#
4# The Initial Developer of the Original Code is
5# Netscape Communications Corporation.
6# Portions created by the Initial Developer are Copyright (C) 1998
7# the Initial Developer. All Rights Reserved.
8#
9# Contributor(s):
10#   Mark Pilgrim - port to Python
11#
12# This library is free software; you can redistribute it and/or
13# modify it under the terms of the GNU Lesser General Public
14# License as published by the Free Software Foundation; either
15# version 2.1 of the License, or (at your option) any later version.
16#
17# This library is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20# Lesser General Public License for more details.
21#
22# You should have received a copy of the GNU Lesser General Public
23# License along with this library; if not, write to the Free Software
24# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25# 02110-1301  USA
26######################### END LICENSE BLOCK #########################
27
28from .enums import ProbingState
29from .charsetprober import CharSetProber
30
31
32class CharSetGroupProber(CharSetProber):
33    def __init__(self, lang_filter=None):
34        super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
35        self._active_num = 0
36        self.probers = []
37        self._best_guess_prober = None
38
39    def reset(self):
40        super(CharSetGroupProber, self).reset()
41        self._active_num = 0
42        for prober in self.probers:
43            if prober:
44                prober.reset()
45                prober.active = True
46                self._active_num += 1
47        self._best_guess_prober = None
48
49    @property
50    def charset_name(self):
51        if not self._best_guess_prober:
52            self.get_confidence()
53            if not self._best_guess_prober:
54                return None
55        return self._best_guess_prober.charset_name
56
57    @property
58    def language(self):
59        if not self._best_guess_prober:
60            self.get_confidence()
61            if not self._best_guess_prober:
62                return None
63        return self._best_guess_prober.language
64
65    def feed(self, byte_str):
66        for prober in self.probers:
67            if not prober:
68                continue
69            if not prober.active:
70                continue
71            state = prober.feed(byte_str)
72            if not state:
73                continue
74            if state == ProbingState.FOUND_IT:
75                self._best_guess_prober = prober
76                self._state = ProbingState.FOUND_IT
77                return self.state
78            elif state == ProbingState.NOT_ME:
79                prober.active = False
80                self._active_num -= 1
81                if self._active_num <= 0:
82                    self._state = ProbingState.NOT_ME
83                    return self.state
84        return self.state
85
86    def get_confidence(self):
87        state = self.state
88        if state == ProbingState.FOUND_IT:
89            return 0.99
90        elif state == ProbingState.NOT_ME:
91            return 0.01
92        best_conf = 0.0
93        self._best_guess_prober = None
94        for prober in self.probers:
95            if not prober:
96                continue
97            if not prober.active:
98                self.logger.debug('%s not active', prober.charset_name)
99                continue
100            conf = prober.get_confidence()
101            self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
102            if best_conf < conf:
103                best_conf = conf
104                self._best_guess_prober = prober
105        if not self._best_guess_prober:
106            return 0.0
107        return best_conf
108