1package getlang
2
3import (
4	"github.com/stretchr/testify/assert"
5	"strings"
6	"testing"
7)
8
9func TestEmptyStringFromReader(t *testing.T) {
10	info, _ := FromReader(strings.NewReader(""))
11	assert.Equal(t, "und", info.LanguageCode())
12}
13
14func TestEnglishPhraseFromBigReader(t *testing.T) {
15	largeText := ""
16	for i := 0; i < 800; i++ {
17		largeText += "this is more language as you can see "
18	}
19	info, _ := FromReader(strings.NewReader(largeText))
20	assert.Equal(t, "en", info.LanguageCode())
21	assert.Equal(t, true, info.Confidence() > 0.999)
22}
23
24func TestEnglishPhraseFromReader(t *testing.T) {
25	info, _ := FromReader(strings.NewReader("this is the language"))
26	assert.Equal(t, "en", info.LanguageCode())
27	assert.Equal(t, true, info.Confidence() > 0.75)
28}
29
30func TestEnglishPhraseTag(t *testing.T) {
31	info, _ := FromReader(strings.NewReader("this is the language"))
32	tag := info.Tag()
33
34	assert.Equal(t, "en", tag.String())
35	assert.Equal(t, false, tag.IsRoot())
36	assert.Equal(t, true, tag.Parent().IsRoot())
37}
38
39func TestEnglishPhraseUSDI(t *testing.T) {
40	text := "We hold these truths to be self-evident, that all men are created equal"
41	ensureClassifiedWithConfidence(
42		t,
43		text,
44		"en",
45		0.95)
46
47	ensureClassifiedTextNamed(
48		t,
49		text,
50		"English",
51		"English")
52}
53
54func TestGermanPhraseUSDI(t *testing.T) {
55	text := "Wir halten diese Wahrheiten für ausgemacht, daß alle Menschen gleich erschaffen worden"
56	ensureClassifiedWithConfidence(
57		t,
58		text,
59		"de",
60		0.95)
61
62	ensureClassifiedTextNamed(
63		t,
64		text,
65		"German",
66		"Deutsch")
67}
68
69func TestEnglishMixedGerman(t *testing.T) {
70	ensureClassifiedWithConfidence(
71		t,
72		"If you wanted to greet someone in this language, you'd say 'wie geht es'",
73		"en",
74		0.35)
75}
76
77func TestEnglishMixedUkrainian(t *testing.T) {
78	ensureClassifiedWithConfidence(
79		t,
80		"the best thing to say is своїй гідності in my opinon of this.",
81		"en",
82		0.55)
83}
84
85func TestSpanishPhraseUSDI(t *testing.T) {
86	ensureClassifiedWithConfidence(
87		t,
88		"Sostenemos como evidentes estas verdades: que los hombres son creados iguales",
89		"es",
90		0.75)
91}
92
93func TestPortuguesePhraseUSDI(t *testing.T) {
94	ensureClassifiedWithConfidence(
95		t,
96		"Consideramos estas verdades como autoevidentes, que todos os homens são criados iguais",
97		"pt",
98		0.95)
99}
100
101func TestPolishPhraseUDHR(t *testing.T) {
102	ensureClassifiedWithConfidence(
103		t,
104		"Wszyscy ludzie rodzą się wolni i równi w swojej godności i prawach",
105		"pl",
106		0.95)
107}
108
109func TestPunjabiPhrase(t *testing.T) {
110	text := "ਮੇਰਾ ਨਾਮ ਭਰਤ ਹੈ."
111	lang := "ਪੰਜਾਬੀ"
112
113	ensureClassifiedWithConfidence(
114		t,
115		text,
116		"pa",
117		0.95)
118
119	ensureClassifiedTextNamed(
120		t,
121		text,
122		"Punjabi",
123		lang)
124}
125
126func TestHungarianPhraseUDHR(t *testing.T) {
127	ensureClassifiedWithConfidence(
128		t,
129		"Minden emberi lény szabadon születik és egyenlő méltósága és joga van",
130		"hu",
131		0.95)
132}
133
134func TestItalianPhraseUDHR(t *testing.T) {
135	ensureClassifiedWithConfidence(
136		t,
137		"Tutti gli esseri umani nascono liberi ed eguali in dignità e diritti",
138		"it",
139		0.95)
140}
141
142func TestRussianPhraseUDHR(t *testing.T) {
143	ensureClassifiedWithConfidence(
144		t,
145		"Все люди рождаются свободными и равными в своем достоинстве и правах",
146		"ru",
147		0.55)
148}
149
150func TestUkrainianPhraseUDHR(t *testing.T) {
151	ensureClassifiedWithConfidence(
152		t,
153		"Всі люди народжуються вільними і рівними у своїй гідності та правах",
154		"uk",
155		0.80)
156}
157
158func TestFrenchPhraseUDHR(t *testing.T) {
159	ensureClassifiedWithConfidence(
160		t,
161		"Tous les êtres humains naissent libres et égaux",
162		"fr",
163		0.95)
164}
165
166func TestKoreanPhrase(t *testing.T) {
167	ensureClassifiedWithConfidence(
168		t,
169		"원래 AB형 사람이 똑똑해",
170		"ko",
171		0.95)
172}
173
174func TestJapanesePhrase(t *testing.T) {
175	text := "何を食べますか"
176	ensureClassifiedWithConfidence(
177		t,
178		text,
179		"ja",
180		0.90)
181
182	ensureClassifiedTextNamed(
183		t,
184		text,
185		"Japanese",
186		"日本語")
187}
188
189func TestChinesePhrase(t *testing.T) {
190	text := "球的采编网络,记者遍布"
191	ensureClassifiedWithConfidence(
192		t,
193		text,
194		"zh",
195		0.95)
196
197	ensureClassifiedTextNamed(
198		t,
199		text,
200		"Chinese",
201		"中文")
202}
203
204func TestArabicPhrase(t *testing.T) {
205	text := "اهتمامًا بذلك المشروع. المجموعة الوحيدة التي "
206	lang := "العربية"
207	ensureClassifiedWithConfidence(
208		t,
209		text,
210		"ar",
211		0.55)
212
213	ensureClassifiedTextNamed(
214		t,
215		text,
216		"Arabic",
217		lang)
218}
219
220func TestBanglaPhrase(t *testing.T) {
221	text := "এই গবেষণায় রত, তাঁদেরকে বলা হয় ভাষাবিজ্ঞানী।ভাষাবিজ্ঞানীরা নৈর্ব্যক্তিক"
222	lang := "বাংলা"
223
224	ensureClassifiedWithConfidence(
225		t,
226		text,
227		"bn",
228		0.85)
229
230	ensureClassifiedTextNamed(
231		t,
232		text,
233		"Bangla",
234		lang)
235}
236
237func TestHindiPhrase(t *testing.T) {
238	text := "ब तक लगातार चल रहा है। इसका प्रसारण प्रत्येक शनिवार और रविवार को रात 10 बजे होता है। इसका पुनः प्रसारण सोनी पल चैनल पर रात 9 बजे होता"
239	lang := "हिन्दी"
240
241	ensureClassifiedWithConfidence(
242		t,
243		text,
244		"hi",
245		0.75)
246
247	ensureClassifiedTextNamed(
248		t,
249		text,
250		"Hindi",
251		lang)
252}
253
254func TestGreekPhrase(t *testing.T) {
255	text := "Ολοι οι άνθρωποι γεννιούνται ελεύθεροι και ίσοι στην αξιοπρέπεια και στα δικαιώματα"
256
257	ensureClassifiedWithConfidence(
258		t,
259		text,
260		"el",
261		0.95)
262
263	ensureClassifiedTextNamed(
264		t,
265		text,
266		"Greek",
267		"Ελληνικά")
268}
269
270func TestHebrewPhrase(t *testing.T) {
271	text := "כראוי. בִּדקו את כותרת הדף"
272	lang := "עברית"
273
274	ensureClassifiedWithConfidence(
275		t,
276		text,
277		"he",
278		0.95)
279
280	ensureClassifiedTextNamed(
281		t,
282		text,
283		"Hebrew",
284		lang)
285}
286
287func TestGujaratiPhrase(t *testing.T) {
288	text := "ગુજરાતી"
289	lang := "ગુજરાતી"
290
291	ensureClassifiedWithConfidence(
292		t,
293		text,
294		"gu",
295		0.95)
296
297	ensureClassifiedTextNamed(
298		t,
299		text,
300		"Gujarati",
301		lang)
302}
303
304func TestThaiPhrase(t *testing.T) {
305	text := "ไทย ไทยไทย"
306	lang := "ไทย"
307
308	ensureClassifiedWithConfidence(
309		t,
310		text,
311		"th",
312		0.95)
313
314	ensureClassifiedTextNamed(
315		t,
316		text,
317		"Thai",
318		lang)
319}
320
321func TestArmenianPhrase(t *testing.T) {
322	text := "ըստ Գրիգորյան օրացույցի"
323	lang := "հայերեն"
324
325	ensureClassifiedWithConfidence(
326		t,
327		text,
328		"hy",
329		0.95)
330
331	ensureClassifiedTextNamed(
332		t,
333		text,
334		"Armenian",
335		lang)
336}
337
338func TestSerbianLatinPhrase(t *testing.T) {
339	text := "ljudi ne znaju jer me uglavnom vide"
340	lang := "srpskohrvatski"
341
342	ensureClassifiedWithConfidence(
343		t,
344		text,
345		"sr",
346		0.85)
347
348	ensureClassifiedTextNamed(
349		t,
350		text,
351		"Serbo-Croatian",
352		lang)
353}
354
355func TestSerbianCyrillicPhrase(t *testing.T) {
356	text := "Код животиња су ове реакције посебно важне при зарастању рана"
357	lang := "српски"
358
359	ensureClassifiedWithConfidence(
360		t,
361		text,
362		"sr",
363		0.95)
364
365	ensureClassifiedTextNamed(
366		t,
367		text,
368		"Serbian (Cyrillic)",
369		lang)
370}
371
372func TestVietnamesePhrase(t *testing.T) {
373	text := "Truyền thông Việt Nam vào dịp này đăng bài ký tên ông"
374	lang := "Tiếng Việt"
375
376	ensureClassifiedWithConfidence(
377		t,
378		text,
379		"vi",
380		0.95)
381
382	ensureClassifiedTextNamed(
383		t,
384		text,
385		"Vietnamese",
386		lang)
387}
388
389func TestTeluguPhrase(t *testing.T) {
390	text := "భారతదేశంలోని దక్షిణ"
391	lang := "తెలుగు"
392
393	ensureClassifiedWithConfidence(
394		t,
395		text,
396		"te",
397		0.95)
398
399	ensureClassifiedTextNamed(
400		t,
401		text,
402		"Telugu",
403		lang)
404}
405
406func TestTamilPhrase(t *testing.T) {
407	text := " நீளமான, கிளைக்காத"
408	lang := "தமிழ்"
409
410	ensureClassifiedWithConfidence(
411		t,
412		text,
413		"ta",
414		0.95)
415
416	ensureClassifiedTextNamed(
417		t,
418		text,
419		"Tamil",
420		lang)
421}
422
423func TestTagalogPhrase(t *testing.T) {
424	text := "ano ang nangyayari sa iyo at ang mah-ina mo ay hindi mo"
425	lang := "Filipino"
426
427	ensureClassifiedWithConfidence(
428		t,
429		text,
430		"tl",
431		0.95)
432
433	ensureClassifiedTextNamed(
434		t,
435		text,
436		"Filipino",
437		lang)
438}
439
440func TestDutchPhrase(t *testing.T) {
441	text := "Een ieder heeft, waar hij zich ook bevindt, het recht als persoon erkend te worden voor de wet"
442	lang := "Nederlands"
443
444	ensureClassifiedWithConfidence(
445		t,
446		text,
447		"nl",
448		0.95)
449
450	ensureClassifiedTextNamed(
451		t,
452		text,
453		"Dutch",
454		lang)
455}
456
457func TestKannadaPhrase(t *testing.T) {
458	text := "ನನ್ನ ಹೆಸರು ಭಾರತ್."
459	lang := "ಕನ್ನಡ"
460
461	ensureClassifiedWithConfidence(
462		t,
463		text,
464		"kn",
465		0.95)
466
467	ensureClassifiedTextNamed(
468		t,
469		text,
470		"Kannada",
471		lang)
472}
473
474func TestNonsense(t *testing.T) {
475	text := "wep lvna eeii vl jkk azc nmn iuah ppl zccl c%l aa1z"
476	ensureClassifiedWithConfidence(
477		t,
478		text,
479		"und",
480		0.75)
481
482	ensureClassifiedTextNamed(
483		t,
484		text,
485		"Unknown language",
486		"")
487}
488
489func ensureClassifiedWithConfidence(t *testing.T, text string, expectedLang string, minConfidence float64) {
490	info := FromString(text)
491
492	assert.Equal(t, expectedLang, info.LanguageCode(), "Misclassified text: "+text)
493	assert.Equal(t, true, info.Confidence() > minConfidence)
494}
495
496func ensureClassifiedTextNamed(t *testing.T, text string, expectedEnglishName string, expectedSelfName string) {
497	info := FromString(text)
498
499	assert.Equal(t, expectedEnglishName, info.LanguageName(), "Wrong language name: "+text)
500	assert.Equal(t, expectedSelfName, info.SelfName(), "Wrong self lang name: "+text)
501}
502