1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "public/compact_lang_det.h" 6 7 #define MAX_RESULTS 3 8 9 class Language { 10 public: Language(CLD2::Language lang)11 Language(CLD2::Language lang) : mLang(lang) {} 12 getLanguageCode() const13 const char* getLanguageCode() const 14 { 15 return CLD2::LanguageCode(mLang); 16 } 17 18 private: 19 const CLD2::Language mLang; 20 }; 21 22 class LanguageGuess : public Language { 23 public: LanguageGuess(CLD2::Language lang,char percent)24 LanguageGuess(CLD2::Language lang, char percent) : 25 Language(lang), mPercent(percent) {} 26 getPercent() const27 char getPercent() const 28 { 29 return mPercent; 30 } 31 32 private: 33 const char mPercent; 34 }; 35 36 37 class LanguageInfo : public Language { 38 public: detectLanguage(const char * buffer,bool isPlainText)39 static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText) 40 { 41 CLD2::Language languages[MAX_RESULTS] = {}; 42 int percentages[MAX_RESULTS] = {}; 43 bool isReliable = false; 44 45 // This is ignored. 46 int textBytes; 47 48 CLD2::Language bestGuess = DetectLanguageSummary( 49 buffer, strlen(buffer), isPlainText, 50 languages, percentages, &textBytes, 51 &isReliable); 52 53 return new LanguageInfo(isReliable, bestGuess, languages, percentages); 54 } 55 detectLanguage(const char * buffer,bool isPlainText,const char * tldHint,int encodingHint,const char * languageHint)56 static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText, 57 const char* tldHint, int encodingHint, 58 const char* languageHint) 59 { 60 CLD2::CLDHints hints = {languageHint, tldHint, encodingHint, CLD2::UNKNOWN_LANGUAGE}; 61 62 CLD2::Language languages[MAX_RESULTS] = {}; 63 int percentages[MAX_RESULTS] = {}; 64 bool isReliable = false; 65 66 // These are ignored. 67 double scores[MAX_RESULTS]; 68 int textBytes; 69 70 CLD2::Language bestGuess = ExtDetectLanguageSummary( 71 buffer, strlen(buffer), isPlainText, 72 &hints, 0, 73 languages, percentages, scores, 74 nullptr, &textBytes, &isReliable); 75 76 return new LanguageInfo(isReliable, bestGuess, languages, percentages); 77 } 78 ~LanguageInfo()79 ~LanguageInfo() 80 { 81 for (int i = 0; i < MAX_RESULTS; i++) { 82 delete languages[i]; 83 } 84 } 85 getIsReliable() const86 bool getIsReliable() const 87 { 88 return mIsReliable; 89 } 90 91 const LanguageGuess* languages[MAX_RESULTS]; 92 93 private: LanguageInfo(bool isReliable,CLD2::Language bestGuess,CLD2::Language languageIDs[MAX_RESULTS],int percentages[MAX_RESULTS])94 LanguageInfo(bool isReliable, CLD2::Language bestGuess, 95 CLD2::Language languageIDs[MAX_RESULTS], 96 int percentages[MAX_RESULTS]) : 97 Language(bestGuess), mIsReliable(isReliable) 98 { 99 for (int i = 0; i < MAX_RESULTS; i++) { 100 languages[i] = new LanguageGuess(languageIDs[i], percentages[i]); 101 } 102 } 103 104 const bool mIsReliable; 105 }; 106 107 #include "cld.cpp" 108