1#! python3 2 3""" 4Grammar checker tests for French language 5""" 6 7import unittest 8import os 9import re 10import time 11from contextlib import contextmanager 12 13from ..graphspell.ibdawg import IBDAWG 14from ..graphspell.echo import echo 15from . import gc_engine as gce 16from . import conj 17from . import phonet 18from . import mfsp 19 20 21@contextmanager 22def timeblock (label, hDst): 23 "performance counter (contextmanager)" 24 start = time.perf_counter() 25 try: 26 yield 27 finally: 28 end = time.perf_counter() 29 print('{} : {}'.format(label, end - start)) 30 if hDst: 31 hDst.write("{:<12.6}".format(end-start)) 32 33 34def perf (sVersion, hDst=None): 35 "performance tests" 36 print("\nPerformance tests") 37 gce.load() 38 gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") 39 40 spHere, _ = os.path.split(__file__) 41 with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: 42 if hDst: 43 hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) 44 for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): 45 with timeblock(sText[:sText.find(".")], hDst): 46 gce.parse(sText) 47 if hDst: 48 hDst.write("\n") 49 50 51def _fuckBackslashUTF8 (s): 52 "fuck that shit" 53 return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") 54 55 56class TestDictionary (unittest.TestCase): 57 "Test du correcteur orthographique" 58 59 @classmethod 60 def setUpClass (cls): 61 cls.oDic = IBDAWG("fr-allvars.bdic") 62 63 def test_lookup (self): 64 for sWord in ["branche", "Émilie"]: 65 self.assertTrue(self.oDic.lookup(sWord), sWord) 66 67 def test_lookup_failed (self): 68 for sWord in ["Branche", "BRANCHE", "BranchE", "BRanche", "BRAnCHE", "émilie"]: 69 self.assertFalse(self.oDic.lookup(sWord), sWord) 70 71 def test_isvalid (self): 72 for sWord in ["Branche", "branche", "BRANCHE", "Émilie", "ÉMILIE", "aujourd'hui", "aujourd’hui", "Aujourd'hui", "Aujourd’hui"]: 73 self.assertTrue(self.oDic.isValid(sWord), sWord) 74 75 def test_isvalid_failed (self): 76 for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie"]: 77 self.assertFalse(self.oDic.isValid(sWord), sWord) 78 79 80class TestConjugation (unittest.TestCase): 81 "Tests des conjugaisons" 82 83 @classmethod 84 def setUpClass (cls): 85 pass 86 87 def test_isverb (self): 88 for sVerb in ["avoir", "être", "aller", "manger", "courir", "venir", "faire", "finir"]: 89 self.assertTrue(conj.isVerb(sVerb), sVerb) 90 for sVerb in ["berk", "a", "va", "contre", "super", "", "à"]: 91 self.assertFalse(conj.isVerb(sVerb), sVerb) 92 93 def test_hasconj (self): 94 for sVerb, sTense, sWho in [("aller", ":E", ":2s"), ("avoir", ":Is", ":1s"), ("être", ":Ip", ":2p"), 95 ("manger", ":Sp", ":3s"), ("finir", ":K", ":3p"), ("prendre", ":If", ":1p")]: 96 self.assertTrue(conj.hasConj(sVerb, sTense, sWho), sVerb) 97 98 def test_getconj (self): 99 for sVerb, sTense, sWho, sConj in [("aller", ":E", ":2s", "va"), ("avoir", ":Iq", ":1s", "avais"), ("être", ":Ip", ":2p", "êtes"), 100 ("manger", ":Sp", ":3s", "mange"), ("finir", ":K", ":3p", "finiraient"), ("prendre", ":If", ":1p", "prendrons")]: 101 self.assertEqual(conj.getConj(sVerb, sTense, sWho), sConj, sVerb) 102 103 104class TestPhonet (unittest.TestCase): 105 "Tests des équivalences phonétiques" 106 107 @classmethod 108 def setUpClass (cls): 109 cls.lSet = [ 110 ["ce", "se"], 111 ["ces", "saie", "saies", "ses", "sais", "sait"], 112 ["cet", "cette", "sept", "set", "sets"], 113 ["dé", "dés", "dès", "dais", "des"], 114 ["don", "dons", "dont"], 115 ["été", "étaie", "étaies", "étais", "était", "étai", "étés", "étaient"], 116 ["faire", "fer", "fers", "ferre", "ferres", "ferrent"], 117 ["fois", "foi", "foie", "foies"], 118 ["la", "là", "las"], 119 ["mes", "mets", "met", "mai", "mais"], 120 ["mon", "mont", "monts"], 121 ["mot", "mots", "maux"], 122 ["moi", "mois"], 123 ["notre", "nôtre", "nôtres"], 124 ["or", "ors", "hors"], 125 ["hou", "houe", "houes", "ou", "où", "houx"], 126 ["peu", "peux", "peut"], 127 ["son", "sons", "sont"], 128 ["tes", "tais", "tait", "taie", "taies", "thé", "thés"], 129 ["toi", "toit", "toits"], 130 ["ton", "tons", "thon", "thons", "tond", "tonds"], 131 ["voir", "voire"] 132 ] 133 134 def test_getsimil (self): 135 for aSet in self.lSet: 136 for sWord in aSet: 137 self.assertListEqual(phonet.getSimil(sWord), sorted(aSet)) 138 139 140class TestMasFemSingPlur (unittest.TestCase): 141 "Tests des masculins, féminins, singuliers et pluriels" 142 143 @classmethod 144 def setUpClass (cls): 145 cls.lPlural = [ 146 ("travail", ["travaux"]), 147 ("vœu", ["vœux"]), 148 ("gentleman", ["gentlemans", "gentlemen"]) 149 ] 150 151 def test_getplural (self): 152 for sSing, lPlur in self.lPlural: 153 self.assertListEqual(mfsp.getMiscPlural(sSing), lPlur) 154 155 156class TestGrammarChecking (unittest.TestCase): 157 "Tests du correcteur grammatical" 158 159 @classmethod 160 def setUpClass (cls): 161 gce.load() 162 cls._zError = re.compile(r"\{\{.*?\}\}") 163 cls._aTestedRules = set() 164 165 def test_parse (self): 166 zOption = re.compile("^__([a-zA-Z0-9]+)__ ") 167 spHere, _ = os.path.split(__file__) 168 with open(os.path.join(spHere, "gc_test.txt"), "r", encoding="utf-8") as hSrc: 169 nError = 0 170 for sLine in ( s for s in hSrc if not s.startswith("#") and s.strip() ): 171 sLineNum = sLine[:10].strip() 172 sLine = sLine[10:].strip() 173 sOption = None 174 m = zOption.search(sLine) 175 if m: 176 sLine = sLine[m.end():] 177 sOption = m.group(1) 178 if "->>" in sLine: 179 sErrorText, sExceptedSuggs = self._splitTestLine(sLine) 180 if sExceptedSuggs.startswith('"') and sExceptedSuggs.endswith('"'): 181 sExceptedSuggs = sExceptedSuggs[1:-1] 182 else: 183 sErrorText = sLine.strip() 184 sExceptedSuggs = "" 185 sExpectedErrors = self._getExpectedErrors(sErrorText) 186 sTextToCheck = sErrorText.replace("}}", "").replace("{{", "") 187 sFoundErrors, sListErr, sFoundSuggs = self._getFoundErrors(sTextToCheck, sOption) 188 # tests 189 if sExpectedErrors != sFoundErrors: 190 print("\n# Line num: " + sLineNum + \ 191 "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \ 192 "\n expected: " + sExpectedErrors + \ 193 "\n found: " + sFoundErrors + \ 194 "\n errors: \n" + sListErr) 195 nError += 1 196 elif sExceptedSuggs: 197 if sExceptedSuggs != sFoundSuggs: 198 print("\n# Line num: " + sLineNum + \ 199 "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \ 200 "\n expected: " + sExceptedSuggs + \ 201 "\n found: " + sFoundSuggs + \ 202 "\n errors: \n" + sListErr) 203 nError += 1 204 if nError: 205 print("Unexpected errors:", nError) 206 # untested rules 207 i = 0 208 for _, sOpt, sLineId, sRuleId in gce.listRules(): 209 if sOpt != "@@@@" and sRuleId.rstrip("0123456789") not in self._aTestedRules and not re.search("^[0-9]+[sp]$|^[pd]_", sRuleId): 210 echo(f"# untested rule: {sLineId}/{sRuleId}") 211 i += 1 212 if i: 213 echo(" [{} untested rules]".format(i)) 214 215 def _splitTestLine (self, sLine): 216 sText, sSugg = sLine.split("->>") 217 return (sText.strip(), sSugg.strip()) 218 219 def _getFoundErrors (self, sLine, sOption): 220 if sOption: 221 gce.setOption(sOption, True) 222 aErrs = gce.parse(sLine) 223 gce.setOption(sOption, False) 224 else: 225 aErrs = gce.parse(sLine) 226 sRes = " " * len(sLine) 227 sListErr = "" 228 lAllSugg = [] 229 for dErr in aErrs: 230 sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:] 231 sListErr += " * {sLineId} / {sRuleId} at {nStart}:{nEnd}\n".format(**dErr) 232 lAllSugg.append("|".join(dErr["aSuggestions"])) 233 self._aTestedRules.add(dErr["sRuleId"].rstrip("0123456789")) 234 # test messages 235 if "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]: 236 print("\n# Line num : " + dErr["sLineId"] + \ 237 "\n rule name: " + dErr["sRuleId"] + \ 238 "\n message : " + dErr["sMessage"]) 239 return sRes, sListErr, "|||".join(lAllSugg) 240 241 def _getExpectedErrors (self, sLine): 242 sRes = " " * len(sLine) 243 for i, m in enumerate(self._zError.finditer(sLine)): 244 nStart = m.start() - (4 * i) 245 nEnd = m.end() - (4 * (i+1)) 246 sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4] 247 return sRes 248 249 250def main(): 251 "start function" 252 unittest.main() 253 254 255if __name__ == '__main__': 256 main() 257