1 ///////////////////////////////////////////////////////////////////////
2 // File:        dawg_cache.cpp
3 // Description: A class that knows about loading and caching dawgs.
4 // Author:      David Eger
5 //
6 // (C) Copyright 2012, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #include "dawg_cache.h"
20 
21 #include "dawg.h"
22 #include "object_cache.h"
23 #include "tessdatamanager.h"
24 
25 namespace tesseract {
26 
27 struct DawgLoader {
DawgLoadertesseract::DawgLoader28   DawgLoader(const std::string &lang, TessdataType tessdata_dawg_type, int dawg_debug_level,
29              TessdataManager *data_file)
30       : lang_(lang)
31       , data_file_(data_file)
32       , tessdata_dawg_type_(tessdata_dawg_type)
33       , dawg_debug_level_(dawg_debug_level) {}
34 
35   Dawg *Load();
36 
37   std::string lang_;
38   TessdataManager *data_file_;
39   TessdataType tessdata_dawg_type_;
40   int dawg_debug_level_;
41 };
42 
GetSquishedDawg(const std::string & lang,TessdataType tessdata_dawg_type,int debug_level,TessdataManager * data_file)43 Dawg *DawgCache::GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type,
44                                  int debug_level, TessdataManager *data_file) {
45   std::string data_id = data_file->GetDataFileName();
46   data_id += kTessdataFileSuffixes[tessdata_dawg_type];
47   DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
48   return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
49 }
50 
Load()51 Dawg *DawgLoader::Load() {
52   TFile fp;
53   if (!data_file_->GetComponent(tessdata_dawg_type_, &fp)) {
54     return nullptr;
55   }
56   DawgType dawg_type;
57   PermuterType perm_type;
58   switch (tessdata_dawg_type_) {
59     case TESSDATA_PUNC_DAWG:
60     case TESSDATA_LSTM_PUNC_DAWG:
61       dawg_type = DAWG_TYPE_PUNCTUATION;
62       perm_type = PUNC_PERM;
63       break;
64     case TESSDATA_SYSTEM_DAWG:
65     case TESSDATA_LSTM_SYSTEM_DAWG:
66       dawg_type = DAWG_TYPE_WORD;
67       perm_type = SYSTEM_DAWG_PERM;
68       break;
69     case TESSDATA_NUMBER_DAWG:
70     case TESSDATA_LSTM_NUMBER_DAWG:
71       dawg_type = DAWG_TYPE_NUMBER;
72       perm_type = NUMBER_PERM;
73       break;
74     case TESSDATA_BIGRAM_DAWG:
75       dawg_type = DAWG_TYPE_WORD; // doesn't actually matter
76       perm_type = COMPOUND_PERM;  // doesn't actually matter
77       break;
78     case TESSDATA_UNAMBIG_DAWG:
79       dawg_type = DAWG_TYPE_WORD;
80       perm_type = SYSTEM_DAWG_PERM;
81       break;
82     case TESSDATA_FREQ_DAWG:
83       dawg_type = DAWG_TYPE_WORD;
84       perm_type = FREQ_DAWG_PERM;
85       break;
86     default:
87       return nullptr;
88   }
89   auto *retval = new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_);
90   if (retval->Load(&fp)) {
91     return retval;
92   }
93   delete retval;
94   return nullptr;
95 }
96 
97 } // namespace tesseract
98