1 /**********************************************************************
2  * File:        pango_font_info.cpp
3  * Description: Font-related objects and helper functions
4  * Author:      Ranjith Unnikrishnan
5  *
6  * (C) Copyright 2013, Google Inc.
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  * http://www.apache.org/licenses/LICENSE-2.0
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #  include "config_auto.h"
22 #endif
23 
24 #if (defined __MINGW32__) || (defined __CYGWIN__)
25 // workaround for stdlib.h and putenv
26 #  undef __STRICT_ANSI__
27 #endif
28 
29 #include "commandlineflags.h"
30 #include "fileio.h"
31 #include "normstrngs.h"
32 #include "pango_font_info.h"
33 #include "tlog.h"
34 
35 #include <tesseract/unichar.h>
36 
37 #include "pango/pango.h"
38 #include "pango/pangocairo.h"
39 #include "pango/pangofc-font.h"
40 
41 #include <algorithm>
42 #include <cstdio>
43 #include <cstdlib>
44 #include <cstring>
45 
46 #ifndef _MSC_VER
47 #  include <sys/param.h>
48 #endif
49 
50 #define DISABLE_HEAP_LEAK_CHECK
51 
52 using namespace tesseract;
53 
54 namespace tesseract {
55 
56 // Default assumed output resolution. Required only for providing font metrics
57 // in pixels.
58 const int kDefaultResolution = 300;
59 
60 std::string PangoFontInfo::fonts_dir_;
61 std::string PangoFontInfo::cache_dir_;
62 
get_glyph(PangoFont * font,gunichar wc)63 static PangoGlyph get_glyph(PangoFont *font, gunichar wc) {
64 #if PANGO_VERSION_CHECK(1, 44, 0)
65   // pango_font_get_hb_font requires Pango 1.44 or newer.
66   hb_font_t *hb_font = pango_font_get_hb_font(font);
67   hb_codepoint_t glyph;
68   hb_font_get_nominal_glyph(hb_font, wc, &glyph);
69 #else
70   // Use deprecated pango_fc_font_get_glyph for older Pango versions.
71   PangoGlyph glyph = pango_fc_font_get_glyph(PANGO_FC_FONT(font), wc);
72 #endif
73   return glyph;
74 }
75 
PangoFontInfo()76 PangoFontInfo::PangoFontInfo() : desc_(nullptr), resolution_(kDefaultResolution) {
77   Clear();
78 }
79 
PangoFontInfo(const std::string & desc)80 PangoFontInfo::PangoFontInfo(const std::string &desc)
81     : desc_(nullptr), resolution_(kDefaultResolution) {
82   if (!ParseFontDescriptionName(desc)) {
83     tprintf("ERROR: Could not parse %s\n", desc.c_str());
84     Clear();
85   }
86 }
87 
Clear()88 void PangoFontInfo::Clear() {
89   font_size_ = 0;
90   family_name_.clear();
91   font_type_ = UNKNOWN;
92   if (desc_) {
93     pango_font_description_free(desc_);
94     desc_ = nullptr;
95   }
96 }
97 
~PangoFontInfo()98 PangoFontInfo::~PangoFontInfo() {
99   pango_font_description_free(desc_);
100 }
101 
DescriptionName() const102 std::string PangoFontInfo::DescriptionName() const {
103   if (!desc_) {
104     return "";
105   }
106   char *desc_str = pango_font_description_to_string(desc_);
107   std::string desc_name(desc_str);
108   g_free(desc_str);
109   return desc_name;
110 }
111 
112 // If not already initialized, initializes FontConfig by setting its
113 // environment variable and creating a fonts.conf file that points to the
114 // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
115 /* static */
SoftInitFontConfig()116 void PangoFontInfo::SoftInitFontConfig() {
117   if (fonts_dir_.empty()) {
118     HardInitFontConfig(FLAGS_fonts_dir.c_str(), FLAGS_fontconfig_tmpdir.c_str());
119   }
120 }
121 
122 // Re-initializes font config, whether or not already initialized.
123 // If already initialized, any existing cache is deleted, just to be sure.
124 /* static */
HardInitFontConfig(const char * fonts_dir,const char * cache_dir)125 void PangoFontInfo::HardInitFontConfig(const char *fonts_dir, const char *cache_dir) {
126   if (!cache_dir_.empty()) {
127     File::DeleteMatchingFiles(File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
128   }
129   const int MAX_FONTCONF_FILESIZE = 1024;
130   char fonts_conf_template[MAX_FONTCONF_FILESIZE];
131   cache_dir_ = cache_dir;
132   fonts_dir_ = fonts_dir;
133   snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
134            "<?xml version=\"1.0\"?>\n"
135            "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
136            "<fontconfig>\n"
137            "<dir>%s</dir>\n"
138            "<cachedir>%s</cachedir>\n"
139            "<config></config>\n"
140            "</fontconfig>\n",
141            fonts_dir, cache_dir);
142   std::string fonts_conf_file = File::JoinPath(cache_dir, "fonts.conf");
143   File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
144 #ifdef _WIN32
145   std::string env("FONTCONFIG_PATH=");
146   env.append(cache_dir);
147   _putenv(env.c_str());
148   _putenv("LANG=en_US.utf8");
149 #else
150   setenv("FONTCONFIG_PATH", cache_dir, true);
151   // Fix the locale so that the reported font names are consistent.
152   setenv("LANG", "en_US.utf8", true);
153 #endif // _WIN32
154 
155   if (FcInitReinitialize() != FcTrue) {
156     tprintf("FcInitiReinitialize failed!!\n");
157   }
158   FontUtils::ReInit();
159   // Clear Pango's font cache too.
160   pango_cairo_font_map_set_default(nullptr);
161 }
162 
ListFontFamilies(PangoFontFamily *** families,int * n_families)163 static void ListFontFamilies(PangoFontFamily ***families, int *n_families) {
164   PangoFontInfo::SoftInitFontConfig();
165   PangoFontMap *font_map = pango_cairo_font_map_get_default();
166   DISABLE_HEAP_LEAK_CHECK;
167   pango_font_map_list_families(font_map, families, n_families);
168 }
169 
ParseFontDescription(const PangoFontDescription * desc)170 bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
171   Clear();
172   const char *family = pango_font_description_get_family(desc);
173   if (!family) {
174     char *desc_str = pango_font_description_to_string(desc);
175     tprintf("WARNING: Could not parse family name from description: '%s'\n", desc_str);
176     g_free(desc_str);
177     return false;
178   }
179   family_name_ = std::string(family);
180   desc_ = pango_font_description_copy(desc);
181 
182   // Set font size in points
183   font_size_ = pango_font_description_get_size(desc);
184   if (!pango_font_description_get_size_is_absolute(desc)) {
185     font_size_ /= PANGO_SCALE;
186   }
187 
188   return true;
189 }
190 
ParseFontDescriptionName(const std::string & name)191 bool PangoFontInfo::ParseFontDescriptionName(const std::string &name) {
192   PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
193   bool success = ParseFontDescription(desc);
194   pango_font_description_free(desc);
195   return success;
196 }
197 
198 // Returns the PangoFont structure corresponding to the closest available font
199 // in the font map. Note that if the font is wholly missing, this could
200 // correspond to a completely different font family and face.
ToPangoFont() const201 PangoFont *PangoFontInfo::ToPangoFont() const {
202   SoftInitFontConfig();
203   PangoFontMap *font_map = pango_cairo_font_map_get_default();
204   PangoContext *context = pango_context_new();
205   pango_cairo_context_set_resolution(context, resolution_);
206   pango_context_set_font_map(context, font_map);
207   PangoFont *font = nullptr;
208   {
209     DISABLE_HEAP_LEAK_CHECK;
210     font = pango_font_map_load_font(font_map, context, desc_);
211   }
212   g_object_unref(context);
213   return font;
214 }
215 
CoversUTF8Text(const char * utf8_text,int byte_length) const216 bool PangoFontInfo::CoversUTF8Text(const char *utf8_text, int byte_length) const {
217   PangoFont *font = ToPangoFont();
218   if (font == nullptr) {
219     // Font not found.
220     return false;
221   }
222   PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
223   for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
224        it != UNICHAR::end(utf8_text, byte_length); ++it) {
225     if (IsWhitespace(*it) || pango_is_zero_width(*it)) {
226       continue;
227     }
228     if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
229       char tmp[5];
230       int len = it.get_utf8(tmp);
231       tmp[len] = '\0';
232       tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
233       pango_coverage_unref(coverage);
234       g_object_unref(font);
235       return false;
236     }
237   }
238   pango_coverage_unref(coverage);
239   g_object_unref(font);
240   return true;
241 }
242 
243 // This variant of strncpy permits src and dest to overlap. It will copy the
244 // first byte first.
my_strnmove(char * dest,const char * src,size_t n)245 static char *my_strnmove(char *dest, const char *src, size_t n) {
246   char *ret = dest;
247 
248   // Copy characters until n reaches zero or the src byte is a nul.
249   do {
250     *dest = *src;
251     --n;
252     ++dest;
253     ++src;
254   } while (n && src[0]);
255 
256   // If we reached a nul byte and there are more 'n' left, zero them out.
257   while (n) {
258     *dest = '\0';
259     --n;
260     ++dest;
261   }
262   return ret;
263 }
264 
DropUncoveredChars(std::string * utf8_text) const265 int PangoFontInfo::DropUncoveredChars(std::string *utf8_text) const {
266   int num_dropped_chars = 0;
267   PangoFont *font = ToPangoFont();
268   if (font == nullptr) {
269     // Font not found, drop all characters.
270     num_dropped_chars = utf8_text->length();
271     utf8_text->clear();
272     return num_dropped_chars;
273   }
274   PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
275   // Maintain two iterators that point into the string. For space efficiency, we
276   // will repeatedly copy one covered UTF8 character from one to the other, and
277   // at the end resize the string to the right length.
278   char *out = const_cast<char *>(utf8_text->c_str());
279   const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
280   const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length());
281   for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
282     // Skip bad utf-8.
283     if (!it.is_legal()) {
284       ++it; // One suitable error message will still be issued.
285       continue;
286     }
287     int unicode = *it;
288     int utf8_len = it.utf8_len();
289     const char *utf8_char = it.utf8_data();
290     // Move it forward before the data gets modified.
291     ++it;
292     if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
293         pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
294       if (TLOG_IS_ON(2)) {
295         UNICHAR unichar(unicode);
296         char *str = unichar.utf8_str();
297         tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
298         delete[] str;
299       }
300       ++num_dropped_chars;
301       continue;
302     }
303     my_strnmove(out, utf8_char, utf8_len);
304     out += utf8_len;
305   }
306   pango_coverage_unref(coverage);
307   g_object_unref(font);
308   utf8_text->resize(out - utf8_text->c_str());
309   return num_dropped_chars;
310 }
311 
GetSpacingProperties(const std::string & utf8_char,int * x_bearing,int * x_advance) const312 bool PangoFontInfo::GetSpacingProperties(const std::string &utf8_char, int *x_bearing,
313                                          int *x_advance) const {
314   // Convert to equivalent PangoFont structure
315   PangoFont *font = ToPangoFont();
316   if (!font) {
317     return false;
318   }
319   // Find the glyph index in the font for the supplied utf8 character.
320   int total_advance = 0;
321   int min_bearing = 0;
322   // Handle multi-unicode strings by reporting the left-most position of the
323   // x-bearing, and right-most position of the x-advance if the string were to
324   // be rendered.
325   const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), utf8_char.length());
326   const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), utf8_char.length());
327   for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
328     PangoGlyph glyph_index = get_glyph(font, *it);
329     if (!glyph_index) {
330       // Glyph for given unicode character doesn't exist in font.
331       g_object_unref(font);
332       return false;
333     }
334     // Find the ink glyph extents for the glyph
335     PangoRectangle ink_rect, logical_rect;
336     pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
337     pango_extents_to_pixels(&ink_rect, nullptr);
338     pango_extents_to_pixels(&logical_rect, nullptr);
339 
340     int bearing = total_advance + PANGO_LBEARING(ink_rect);
341     if (it == it_begin || bearing < min_bearing) {
342       min_bearing = bearing;
343     }
344     total_advance += PANGO_RBEARING(logical_rect);
345   }
346   *x_bearing = min_bearing;
347   *x_advance = total_advance;
348   g_object_unref(font);
349   return true;
350 }
351 
CanRenderString(const char * utf8_word,int len) const352 bool PangoFontInfo::CanRenderString(const char *utf8_word, int len) const {
353   std::vector<std::string> graphemes;
354   return CanRenderString(utf8_word, len, &graphemes);
355 }
356 
CanRenderString(const char * utf8_word,int len,std::vector<std::string> * graphemes) const357 bool PangoFontInfo::CanRenderString(const char *utf8_word, int len,
358                                     std::vector<std::string> *graphemes) const {
359   if (graphemes) {
360     graphemes->clear();
361   }
362   // We check for font coverage of the text first, as otherwise Pango could
363   // (undesirably) fall back to another font that does have the required
364   // coverage.
365   if (!CoversUTF8Text(utf8_word, len)) {
366     return false;
367   }
368   // U+25CC dotted circle character that often (but not always) gets rendered
369   // when there is an illegal grapheme sequence.
370   const char32 kDottedCircleGlyph = 9676;
371   bool bad_glyph = false;
372   PangoFontMap *font_map = pango_cairo_font_map_get_default();
373   PangoContext *context = pango_context_new();
374   pango_context_set_font_map(context, font_map);
375   PangoLayout *layout;
376   {
377     // Pango is not releasing the cached layout.
378     DISABLE_HEAP_LEAK_CHECK;
379     layout = pango_layout_new(context);
380   }
381   if (desc_) {
382     pango_layout_set_font_description(layout, desc_);
383   } else {
384     PangoFontDescription *desc = pango_font_description_from_string(DescriptionName().c_str());
385     pango_layout_set_font_description(layout, desc);
386     pango_font_description_free(desc);
387   }
388   pango_layout_set_text(layout, utf8_word, len);
389   PangoLayoutIter *run_iter = nullptr;
390   { // Fontconfig caches some information here that is not freed before exit.
391     DISABLE_HEAP_LEAK_CHECK;
392     run_iter = pango_layout_get_iter(layout);
393   }
394   do {
395     PangoLayoutRun *run = pango_layout_iter_get_run_readonly(run_iter);
396     if (!run) {
397       tlog(2, "Found end of line nullptr run marker\n");
398       continue;
399     }
400     PangoGlyph dotted_circle_glyph;
401     PangoFont *font = run->item->analysis.font;
402 
403     dotted_circle_glyph = get_glyph(font, kDottedCircleGlyph);
404 
405     if (TLOG_IS_ON(2)) {
406       PangoFontDescription *desc = pango_font_describe(font);
407       char *desc_str = pango_font_description_to_string(desc);
408       tlog(2, "Desc of font in run: %s\n", desc_str);
409       g_free(desc_str);
410       pango_font_description_free(desc);
411     }
412 
413     PangoGlyphItemIter cluster_iter;
414     gboolean have_cluster;
415     for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word);
416          have_cluster && !bad_glyph;
417          have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
418       const int start_byte_index = cluster_iter.start_index;
419       const int end_byte_index = cluster_iter.end_index;
420       int start_glyph_index = cluster_iter.start_glyph;
421       int end_glyph_index = cluster_iter.end_glyph;
422       std::string cluster_text =
423           std::string(utf8_word + start_byte_index, end_byte_index - start_byte_index);
424       if (graphemes) {
425         graphemes->push_back(cluster_text);
426       }
427       if (IsUTF8Whitespace(cluster_text.c_str())) {
428         tlog(2, "Skipping whitespace\n");
429         continue;
430       }
431       if (TLOG_IS_ON(2)) {
432         printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", start_byte_index,
433                end_byte_index, start_glyph_index, end_glyph_index);
434       }
435       for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1;
436            !bad_glyph && i != end_glyph_index; i += step) {
437         const bool unknown_glyph =
438             (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG);
439         const bool illegal_glyph =
440             (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == dotted_circle_glyph);
441         bad_glyph = unknown_glyph || illegal_glyph;
442         if (TLOG_IS_ON(2)) {
443           printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, bad_glyph ? 1 : 0);
444         }
445       }
446       if (TLOG_IS_ON(2)) {
447         printf("  '%s'\n", cluster_text.c_str());
448       }
449       if (bad_glyph)
450         tlog(1, "Found illegal glyph!\n");
451     }
452   } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
453 
454   pango_layout_iter_free(run_iter);
455   g_object_unref(context);
456   g_object_unref(layout);
457   if (bad_glyph && graphemes) {
458     graphemes->clear();
459   }
460   return !bad_glyph;
461 }
462 
463 // ------------------------ FontUtils ------------------------------------
464 std::vector<std::string> FontUtils::available_fonts_; // cache list
465 
466 // Returns whether the specified font description is available in the fonts
467 // directory.
468 //
469 // The generated list of font families and faces includes "synthesized" font
470 // faces that are not truly loadable. Pango versions >=1.18 have a
471 // pango_font_face_is_synthesized method that can be used to prune the list.
472 // Until then, we are restricted to using a hack where we try to load the font
473 // from the font_map, and then check what we loaded to see if it has the
474 // description we expected. If it is not, then the font is deemed unavailable.
475 //
476 // TODO: This function reports also some not synthesized fonts as not available
477 // e.g. 'Bitstream Charter Medium Italic', 'LMRoman17', so we need this hack
478 // until  other solution is found.
479 /* static */
IsAvailableFont(const char * input_query_desc,std::string * best_match)480 bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_match) {
481   std::string query_desc(input_query_desc);
482   PangoFontDescription *desc = pango_font_description_from_string(query_desc.c_str());
483   PangoFont *selected_font = nullptr;
484   {
485     PangoFontInfo::SoftInitFontConfig();
486     PangoFontMap *font_map = pango_cairo_font_map_get_default();
487     PangoContext *context = pango_context_new();
488     pango_context_set_font_map(context, font_map);
489     {
490       DISABLE_HEAP_LEAK_CHECK;
491       selected_font = pango_font_map_load_font(font_map, context, desc);
492     }
493     g_object_unref(context);
494   }
495   if (selected_font == nullptr) {
496     pango_font_description_free(desc);
497     tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc);
498     return false;
499   }
500   PangoFontDescription *selected_desc = pango_font_describe(selected_font);
501 
502   bool equal = pango_font_description_equal(desc, selected_desc);
503   tlog(3, "query weight = %d \t selected weight =%d\n", pango_font_description_get_weight(desc),
504        pango_font_description_get_weight(selected_desc));
505 
506   char *selected_desc_str = pango_font_description_to_string(selected_desc);
507   tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(), selected_desc_str);
508   if (!equal && best_match != nullptr) {
509     *best_match = selected_desc_str;
510     // Clip the ending ' 0' if there is one. It seems that, if there is no
511     // point size on the end of the fontname, then Pango always appends ' 0'.
512     int len = best_match->size();
513     if (len > 2 && best_match->at(len - 1) == '0' && best_match->at(len - 2) == ' ') {
514       *best_match = best_match->substr(0, len - 2);
515     }
516   }
517   g_free(selected_desc_str);
518   pango_font_description_free(selected_desc);
519   g_object_unref(selected_font);
520   pango_font_description_free(desc);
521   if (!equal)
522     tlog(4, "** Font '%s' failed pango_font_description_equal!\n", input_query_desc);
523   return equal;
524 }
525 
ShouldIgnoreFontFamilyName(const char * query)526 static bool ShouldIgnoreFontFamilyName(const char *query) {
527   static const char *kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", nullptr};
528   const char **list = kIgnoredFamilyNames;
529   for (; *list != nullptr; ++list) {
530     if (!strcmp(*list, query)) {
531       return true;
532     }
533   }
534   return false;
535 }
536 
537 // Outputs description names of available fonts.
538 /* static */
ListAvailableFonts()539 const std::vector<std::string> &FontUtils::ListAvailableFonts() {
540   if (!available_fonts_.empty()) {
541     return available_fonts_;
542   }
543 
544   PangoFontFamily **families = nullptr;
545   int n_families = 0;
546   ListFontFamilies(&families, &n_families);
547   for (int i = 0; i < n_families; ++i) {
548     const char *family_name = pango_font_family_get_name(families[i]);
549     tlog(2, "Listing family %s\n", family_name);
550     if (ShouldIgnoreFontFamilyName(family_name)) {
551       continue;
552     }
553 
554     int n_faces;
555     PangoFontFace **faces = nullptr;
556     pango_font_family_list_faces(families[i], &faces, &n_faces);
557     for (int j = 0; j < n_faces; ++j) {
558       PangoFontDescription *desc = pango_font_face_describe(faces[j]);
559       char *desc_str = pango_font_description_to_string(desc);
560       // "synthesized" font faces that are not truly loadable, so we skip it
561       if (!pango_font_face_is_synthesized(faces[j]) && IsAvailableFont(desc_str)) {
562         available_fonts_.emplace_back(desc_str);
563       }
564       pango_font_description_free(desc);
565       g_free(desc_str);
566     }
567     g_free(faces);
568   }
569   g_free(families);
570   std::sort(available_fonts_.begin(), available_fonts_.end());
571   return available_fonts_;
572 }
573 
574 // Utilities written to be backward compatible with StringRender
575 
576 /* static */
FontScore(const std::unordered_map<char32,int64_t> & ch_map,const std::string & fontname,int * raw_score,std::vector<bool> * ch_flags)577 int FontUtils::FontScore(const std::unordered_map<char32, int64_t> &ch_map,
578                          const std::string &fontname, int *raw_score, std::vector<bool> *ch_flags) {
579   PangoFontInfo font_info;
580   if (!font_info.ParseFontDescriptionName(fontname)) {
581     tprintf("ERROR: Could not parse %s\n", fontname.c_str());
582   }
583   PangoFont *font = font_info.ToPangoFont();
584   PangoCoverage *coverage = nullptr;
585   if (font != nullptr) {
586     coverage = pango_font_get_coverage(font, nullptr);
587   }
588   if (ch_flags) {
589     ch_flags->clear();
590     ch_flags->reserve(ch_map.size());
591   }
592   *raw_score = 0;
593   int ok_chars = 0;
594   for (auto it : ch_map) {
595     bool covered =
596         (coverage != nullptr) && (IsWhitespace(it.first) ||
597                                   (pango_coverage_get(coverage, it.first) == PANGO_COVERAGE_EXACT));
598     if (covered) {
599       ++(*raw_score);
600       ok_chars += it.second;
601     }
602     if (ch_flags) {
603       ch_flags->push_back(covered);
604     }
605   }
606   pango_coverage_unref(coverage);
607   g_object_unref(font);
608   return ok_chars;
609 }
610 
611 /* static */
BestFonts(const std::unordered_map<char32,int64_t> & ch_map,std::vector<std::pair<const char *,std::vector<bool>>> * fonts)612 std::string FontUtils::BestFonts(const std::unordered_map<char32, int64_t> &ch_map,
613                                  std::vector<std::pair<const char *, std::vector<bool>>> *fonts) {
614   const double kMinOKFraction = 0.99;
615   // Weighted fraction of characters that must be renderable in a font to make
616   // it OK even if the raw count is not good.
617   const double kMinWeightedFraction = 0.99995;
618 
619   fonts->clear();
620   std::vector<std::vector<bool>> font_flags;
621   std::vector<int> font_scores;
622   std::vector<int> raw_scores;
623   int most_ok_chars = 0;
624   int best_raw_score = 0;
625   const std::vector<std::string> &font_names = FontUtils::ListAvailableFonts();
626   for (const auto &font_name : font_names) {
627     std::vector<bool> ch_flags;
628     int raw_score = 0;
629     int ok_chars = FontScore(ch_map, font_name, &raw_score, &ch_flags);
630     most_ok_chars = std::max(ok_chars, most_ok_chars);
631     best_raw_score = std::max(raw_score, best_raw_score);
632 
633     font_flags.push_back(ch_flags);
634     font_scores.push_back(ok_chars);
635     raw_scores.push_back(raw_score);
636   }
637 
638   // Now select the fonts with a score above a threshold fraction
639   // of both the raw and weighted best scores. To prevent bogus fonts being
640   // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
641   // BOTH weighted and raw scores.
642   // In low character-count scripts, the issue is more getting enough fonts,
643   // when only 1 or 2 might have all those rare dingbats etc in them, so we
644   // allow a font with a very high weighted (coverage) score
645   // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
646   int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
647   int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
648   int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
649 
650   std::string font_list;
651   for (unsigned i = 0; i < font_names.size(); ++i) {
652     int score = font_scores[i];
653     int raw_score = raw_scores[i];
654     if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) {
655       fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
656       tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
657            100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
658       font_list += font_names[i];
659       font_list += "\n";
660     } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
661       tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(),
662            100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score);
663     }
664   }
665   return font_list;
666 }
667 
668 /* static */
SelectFont(const char * utf8_word,const int utf8_len,std::string * font_name,std::vector<std::string> * graphemes)669 bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name,
670                            std::vector<std::string> *graphemes) {
671   return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes);
672 }
673 
674 /* static */
SelectFont(const char * utf8_word,const int utf8_len,const std::vector<std::string> & all_fonts,std::string * font_name,std::vector<std::string> * graphemes)675 bool FontUtils::SelectFont(const char *utf8_word, const int utf8_len,
676                            const std::vector<std::string> &all_fonts, std::string *font_name,
677                            std::vector<std::string> *graphemes) {
678   if (font_name) {
679     font_name->clear();
680   }
681   if (graphemes) {
682     graphemes->clear();
683   }
684   for (const auto &all_font : all_fonts) {
685     PangoFontInfo font;
686     std::vector<std::string> found_graphemes;
687     ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_font), "Could not parse font desc name %s\n",
688                     all_font.c_str());
689     if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
690       if (graphemes) {
691         graphemes->swap(found_graphemes);
692       }
693       if (font_name) {
694         *font_name = all_font;
695       }
696       return true;
697     }
698   }
699   return false;
700 }
701 
702 // PangoFontInfo is reinitialized, so clear the static list of fonts.
703 /* static */
ReInit()704 void FontUtils::ReInit() {
705   available_fonts_.clear();
706 }
707 
708 // Print info about used font backend
709 /* static */
PangoFontTypeInfo()710 void FontUtils::PangoFontTypeInfo() {
711   PangoFontMap *font_map = pango_cairo_font_map_get_default();
712   if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
713       CAIRO_FONT_TYPE_TOY) {
714     printf("Using CAIRO_FONT_TYPE_TOY.\n");
715   } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
716              CAIRO_FONT_TYPE_FT) {
717     printf("Using CAIRO_FONT_TYPE_FT.\n");
718   } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
719              CAIRO_FONT_TYPE_WIN32) {
720     printf("Using CAIRO_FONT_TYPE_WIN32.\n");
721   } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
722              CAIRO_FONT_TYPE_QUARTZ) {
723     printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
724   } else if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap *>(font_map)) ==
725              CAIRO_FONT_TYPE_USER) {
726     printf("Using CAIRO_FONT_TYPE_USER.\n");
727   } else if (!font_map) {
728     printf("Can not create pango cairo font map!\n");
729   }
730 }
731 
732 } // namespace tesseract
733