1package getlang 2 3import ( 4 "github.com/stretchr/testify/assert" 5 "strings" 6 "testing" 7) 8 9func TestEmptyStringFromReader(t *testing.T) { 10 info, _ := FromReader(strings.NewReader("")) 11 assert.Equal(t, "und", info.LanguageCode()) 12} 13 14func TestEnglishPhraseFromBigReader(t *testing.T) { 15 largeText := "" 16 for i := 0; i < 800; i++ { 17 largeText += "this is more language as you can see " 18 } 19 info, _ := FromReader(strings.NewReader(largeText)) 20 assert.Equal(t, "en", info.LanguageCode()) 21 assert.Equal(t, true, info.Confidence() > 0.999) 22} 23 24func TestEnglishPhraseFromReader(t *testing.T) { 25 info, _ := FromReader(strings.NewReader("this is the language")) 26 assert.Equal(t, "en", info.LanguageCode()) 27 assert.Equal(t, true, info.Confidence() > 0.75) 28} 29 30func TestEnglishPhraseTag(t *testing.T) { 31 info, _ := FromReader(strings.NewReader("this is the language")) 32 tag := info.Tag() 33 34 assert.Equal(t, "en", tag.String()) 35 assert.Equal(t, false, tag.IsRoot()) 36 assert.Equal(t, true, tag.Parent().IsRoot()) 37} 38 39func TestEnglishPhraseUSDI(t *testing.T) { 40 text := "We hold these truths to be self-evident, that all men are created equal" 41 ensureClassifiedWithConfidence( 42 t, 43 text, 44 "en", 45 0.95) 46 47 ensureClassifiedTextNamed( 48 t, 49 text, 50 "English", 51 "English") 52} 53 54func TestGermanPhraseUSDI(t *testing.T) { 55 text := "Wir halten diese Wahrheiten für ausgemacht, daß alle Menschen gleich erschaffen worden" 56 ensureClassifiedWithConfidence( 57 t, 58 text, 59 "de", 60 0.95) 61 62 ensureClassifiedTextNamed( 63 t, 64 text, 65 "German", 66 "Deutsch") 67} 68 69func TestEnglishMixedGerman(t *testing.T) { 70 ensureClassifiedWithConfidence( 71 t, 72 "If you wanted to greet someone in this language, you'd say 'wie geht es'", 73 "en", 74 0.35) 75} 76 77func TestEnglishMixedUkrainian(t *testing.T) { 78 ensureClassifiedWithConfidence( 79 t, 80 "the best thing to say is своїй гідності in my opinon of this.", 81 "en", 82 0.55) 83} 84 85func TestSpanishPhraseUSDI(t *testing.T) { 86 ensureClassifiedWithConfidence( 87 t, 88 "Sostenemos como evidentes estas verdades: que los hombres son creados iguales", 89 "es", 90 0.75) 91} 92 93func TestPortuguesePhraseUSDI(t *testing.T) { 94 ensureClassifiedWithConfidence( 95 t, 96 "Consideramos estas verdades como autoevidentes, que todos os homens são criados iguais", 97 "pt", 98 0.95) 99} 100 101func TestPolishPhraseUDHR(t *testing.T) { 102 ensureClassifiedWithConfidence( 103 t, 104 "Wszyscy ludzie rodzą się wolni i równi w swojej godności i prawach", 105 "pl", 106 0.95) 107} 108 109func TestPunjabiPhrase(t *testing.T) { 110 text := "ਮੇਰਾ ਨਾਮ ਭਰਤ ਹੈ." 111 lang := "ਪੰਜਾਬੀ" 112 113 ensureClassifiedWithConfidence( 114 t, 115 text, 116 "pa", 117 0.95) 118 119 ensureClassifiedTextNamed( 120 t, 121 text, 122 "Punjabi", 123 lang) 124} 125 126func TestHungarianPhraseUDHR(t *testing.T) { 127 ensureClassifiedWithConfidence( 128 t, 129 "Minden emberi lény szabadon születik és egyenlő méltósága és joga van", 130 "hu", 131 0.95) 132} 133 134func TestItalianPhraseUDHR(t *testing.T) { 135 ensureClassifiedWithConfidence( 136 t, 137 "Tutti gli esseri umani nascono liberi ed eguali in dignità e diritti", 138 "it", 139 0.95) 140} 141 142func TestRussianPhraseUDHR(t *testing.T) { 143 ensureClassifiedWithConfidence( 144 t, 145 "Все люди рождаются свободными и равными в своем достоинстве и правах", 146 "ru", 147 0.55) 148} 149 150func TestUkrainianPhraseUDHR(t *testing.T) { 151 ensureClassifiedWithConfidence( 152 t, 153 "Всі люди народжуються вільними і рівними у своїй гідності та правах", 154 "uk", 155 0.80) 156} 157 158func TestFrenchPhraseUDHR(t *testing.T) { 159 ensureClassifiedWithConfidence( 160 t, 161 "Tous les êtres humains naissent libres et égaux", 162 "fr", 163 0.95) 164} 165 166func TestKoreanPhrase(t *testing.T) { 167 ensureClassifiedWithConfidence( 168 t, 169 "원래 AB형 사람이 똑똑해", 170 "ko", 171 0.95) 172} 173 174func TestJapanesePhrase(t *testing.T) { 175 text := "何を食べますか" 176 ensureClassifiedWithConfidence( 177 t, 178 text, 179 "ja", 180 0.90) 181 182 ensureClassifiedTextNamed( 183 t, 184 text, 185 "Japanese", 186 "日本語") 187} 188 189func TestChinesePhrase(t *testing.T) { 190 text := "球的采编网络,记者遍布" 191 ensureClassifiedWithConfidence( 192 t, 193 text, 194 "zh", 195 0.95) 196 197 ensureClassifiedTextNamed( 198 t, 199 text, 200 "Chinese", 201 "中文") 202} 203 204func TestArabicPhrase(t *testing.T) { 205 text := "اهتمامًا بذلك المشروع. المجموعة الوحيدة التي " 206 lang := "العربية" 207 ensureClassifiedWithConfidence( 208 t, 209 text, 210 "ar", 211 0.55) 212 213 ensureClassifiedTextNamed( 214 t, 215 text, 216 "Arabic", 217 lang) 218} 219 220func TestBanglaPhrase(t *testing.T) { 221 text := "এই গবেষণায় রত, তাঁদেরকে বলা হয় ভাষাবিজ্ঞানী।ভাষাবিজ্ঞানীরা নৈর্ব্যক্তিক" 222 lang := "বাংলা" 223 224 ensureClassifiedWithConfidence( 225 t, 226 text, 227 "bn", 228 0.85) 229 230 ensureClassifiedTextNamed( 231 t, 232 text, 233 "Bangla", 234 lang) 235} 236 237func TestHindiPhrase(t *testing.T) { 238 text := "ब तक लगातार चल रहा है। इसका प्रसारण प्रत्येक शनिवार और रविवार को रात 10 बजे होता है। इसका पुनः प्रसारण सोनी पल चैनल पर रात 9 बजे होता" 239 lang := "हिन्दी" 240 241 ensureClassifiedWithConfidence( 242 t, 243 text, 244 "hi", 245 0.75) 246 247 ensureClassifiedTextNamed( 248 t, 249 text, 250 "Hindi", 251 lang) 252} 253 254func TestGreekPhrase(t *testing.T) { 255 text := "Ολοι οι άνθρωποι γεννιούνται ελεύθεροι και ίσοι στην αξιοπρέπεια και στα δικαιώματα" 256 257 ensureClassifiedWithConfidence( 258 t, 259 text, 260 "el", 261 0.95) 262 263 ensureClassifiedTextNamed( 264 t, 265 text, 266 "Greek", 267 "Ελληνικά") 268} 269 270func TestHebrewPhrase(t *testing.T) { 271 text := "כראוי. בִּדקו את כותרת הדף" 272 lang := "עברית" 273 274 ensureClassifiedWithConfidence( 275 t, 276 text, 277 "he", 278 0.95) 279 280 ensureClassifiedTextNamed( 281 t, 282 text, 283 "Hebrew", 284 lang) 285} 286 287func TestGujaratiPhrase(t *testing.T) { 288 text := "ગુજરાતી" 289 lang := "ગુજરાતી" 290 291 ensureClassifiedWithConfidence( 292 t, 293 text, 294 "gu", 295 0.95) 296 297 ensureClassifiedTextNamed( 298 t, 299 text, 300 "Gujarati", 301 lang) 302} 303 304func TestThaiPhrase(t *testing.T) { 305 text := "ไทย ไทยไทย" 306 lang := "ไทย" 307 308 ensureClassifiedWithConfidence( 309 t, 310 text, 311 "th", 312 0.95) 313 314 ensureClassifiedTextNamed( 315 t, 316 text, 317 "Thai", 318 lang) 319} 320 321func TestArmenianPhrase(t *testing.T) { 322 text := "ըստ Գրիգորյան օրացույցի" 323 lang := "հայերեն" 324 325 ensureClassifiedWithConfidence( 326 t, 327 text, 328 "hy", 329 0.95) 330 331 ensureClassifiedTextNamed( 332 t, 333 text, 334 "Armenian", 335 lang) 336} 337 338func TestSerbianLatinPhrase(t *testing.T) { 339 text := "ljudi ne znaju jer me uglavnom vide" 340 lang := "srpskohrvatski" 341 342 ensureClassifiedWithConfidence( 343 t, 344 text, 345 "sr", 346 0.85) 347 348 ensureClassifiedTextNamed( 349 t, 350 text, 351 "Serbo-Croatian", 352 lang) 353} 354 355func TestSerbianCyrillicPhrase(t *testing.T) { 356 text := "Код животиња су ове реакције посебно важне при зарастању рана" 357 lang := "српски" 358 359 ensureClassifiedWithConfidence( 360 t, 361 text, 362 "sr", 363 0.95) 364 365 ensureClassifiedTextNamed( 366 t, 367 text, 368 "Serbian (Cyrillic)", 369 lang) 370} 371 372func TestVietnamesePhrase(t *testing.T) { 373 text := "Truyền thông Việt Nam vào dịp này đăng bài ký tên ông" 374 lang := "Tiếng Việt" 375 376 ensureClassifiedWithConfidence( 377 t, 378 text, 379 "vi", 380 0.95) 381 382 ensureClassifiedTextNamed( 383 t, 384 text, 385 "Vietnamese", 386 lang) 387} 388 389func TestTeluguPhrase(t *testing.T) { 390 text := "భారతదేశంలోని దక్షిణ" 391 lang := "తెలుగు" 392 393 ensureClassifiedWithConfidence( 394 t, 395 text, 396 "te", 397 0.95) 398 399 ensureClassifiedTextNamed( 400 t, 401 text, 402 "Telugu", 403 lang) 404} 405 406func TestTamilPhrase(t *testing.T) { 407 text := " நீளமான, கிளைக்காத" 408 lang := "தமிழ்" 409 410 ensureClassifiedWithConfidence( 411 t, 412 text, 413 "ta", 414 0.95) 415 416 ensureClassifiedTextNamed( 417 t, 418 text, 419 "Tamil", 420 lang) 421} 422 423func TestTagalogPhrase(t *testing.T) { 424 text := "ano ang nangyayari sa iyo at ang mah-ina mo ay hindi mo" 425 lang := "Filipino" 426 427 ensureClassifiedWithConfidence( 428 t, 429 text, 430 "tl", 431 0.95) 432 433 ensureClassifiedTextNamed( 434 t, 435 text, 436 "Filipino", 437 lang) 438} 439 440func TestDutchPhrase(t *testing.T) { 441 text := "Een ieder heeft, waar hij zich ook bevindt, het recht als persoon erkend te worden voor de wet" 442 lang := "Nederlands" 443 444 ensureClassifiedWithConfidence( 445 t, 446 text, 447 "nl", 448 0.95) 449 450 ensureClassifiedTextNamed( 451 t, 452 text, 453 "Dutch", 454 lang) 455} 456 457func TestKannadaPhrase(t *testing.T) { 458 text := "ನನ್ನ ಹೆಸರು ಭಾರತ್." 459 lang := "ಕನ್ನಡ" 460 461 ensureClassifiedWithConfidence( 462 t, 463 text, 464 "kn", 465 0.95) 466 467 ensureClassifiedTextNamed( 468 t, 469 text, 470 "Kannada", 471 lang) 472} 473 474func TestNonsense(t *testing.T) { 475 text := "wep lvna eeii vl jkk azc nmn iuah ppl zccl c%l aa1z" 476 ensureClassifiedWithConfidence( 477 t, 478 text, 479 "und", 480 0.75) 481 482 ensureClassifiedTextNamed( 483 t, 484 text, 485 "Unknown language", 486 "") 487} 488 489func ensureClassifiedWithConfidence(t *testing.T, text string, expectedLang string, minConfidence float64) { 490 info := FromString(text) 491 492 assert.Equal(t, expectedLang, info.LanguageCode(), "Misclassified text: "+text) 493 assert.Equal(t, true, info.Confidence() > minConfidence) 494} 495 496func ensureClassifiedTextNamed(t *testing.T, text string, expectedEnglishName string, expectedSelfName string) { 497 info := FromString(text) 498 499 assert.Equal(t, expectedEnglishName, info.LanguageName(), "Wrong language name: "+text) 500 assert.Equal(t, expectedSelfName, info.SelfName(), "Wrong self lang name: "+text) 501} 502