1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package ar
16
17import (
18	"reflect"
19	"testing"
20
21	"github.com/blevesearch/bleve/analysis"
22)
23
24func TestArabicNormalizeFilter(t *testing.T) {
25	tests := []struct {
26		input  analysis.TokenStream
27		output analysis.TokenStream
28	}{
29		// AlifMadda
30		{
31			input: analysis.TokenStream{
32				&analysis.Token{
33					Term: []byte("آجن"),
34				},
35			},
36			output: analysis.TokenStream{
37				&analysis.Token{
38					Term: []byte("اجن"),
39				},
40			},
41		},
42		// AlifHamzaAbove
43		{
44			input: analysis.TokenStream{
45				&analysis.Token{
46					Term: []byte("أحمد"),
47				},
48			},
49			output: analysis.TokenStream{
50				&analysis.Token{
51					Term: []byte("احمد"),
52				},
53			},
54		},
55		// AlifHamzaBelow
56		{
57			input: analysis.TokenStream{
58				&analysis.Token{
59					Term: []byte("إعاذ"),
60				},
61			},
62			output: analysis.TokenStream{
63				&analysis.Token{
64					Term: []byte("اعاذ"),
65				},
66			},
67		},
68		// AlifMaksura
69		{
70			input: analysis.TokenStream{
71				&analysis.Token{
72					Term: []byte("بنى"),
73				},
74			},
75			output: analysis.TokenStream{
76				&analysis.Token{
77					Term: []byte("بني"),
78				},
79			},
80		},
81		// TehMarbuta
82		{
83			input: analysis.TokenStream{
84				&analysis.Token{
85					Term: []byte("فاطمة"),
86				},
87			},
88			output: analysis.TokenStream{
89				&analysis.Token{
90					Term: []byte("فاطمه"),
91				},
92			},
93		},
94		// Tatweel
95		{
96			input: analysis.TokenStream{
97				&analysis.Token{
98					Term: []byte("روبرـــــت"),
99				},
100			},
101			output: analysis.TokenStream{
102				&analysis.Token{
103					Term: []byte("روبرت"),
104				},
105			},
106		},
107		// Fatha
108		{
109			input: analysis.TokenStream{
110				&analysis.Token{
111					Term: []byte("مَبنا"),
112				},
113			},
114			output: analysis.TokenStream{
115				&analysis.Token{
116					Term: []byte("مبنا"),
117				},
118			},
119		},
120		// Kasra
121		{
122			input: analysis.TokenStream{
123				&analysis.Token{
124					Term: []byte("علِي"),
125				},
126			},
127			output: analysis.TokenStream{
128				&analysis.Token{
129					Term: []byte("علي"),
130				},
131			},
132		},
133		// Damma
134		{
135			input: analysis.TokenStream{
136				&analysis.Token{
137					Term: []byte("بُوات"),
138				},
139			},
140			output: analysis.TokenStream{
141				&analysis.Token{
142					Term: []byte("بوات"),
143				},
144			},
145		},
146		// Fathatan
147		{
148			input: analysis.TokenStream{
149				&analysis.Token{
150					Term: []byte("ولداً"),
151				},
152			},
153			output: analysis.TokenStream{
154				&analysis.Token{
155					Term: []byte("ولدا"),
156				},
157			},
158		},
159		// Kasratan
160		{
161			input: analysis.TokenStream{
162				&analysis.Token{
163					Term: []byte("ولدٍ"),
164				},
165			},
166			output: analysis.TokenStream{
167				&analysis.Token{
168					Term: []byte("ولد"),
169				},
170			},
171		},
172		// Dammatan
173		{
174			input: analysis.TokenStream{
175				&analysis.Token{
176					Term: []byte("ولدٌ"),
177				},
178			},
179			output: analysis.TokenStream{
180				&analysis.Token{
181					Term: []byte("ولد"),
182				},
183			},
184		},
185		// Sukun
186		{
187			input: analysis.TokenStream{
188				&analysis.Token{
189					Term: []byte("نلْسون"),
190				},
191			},
192			output: analysis.TokenStream{
193				&analysis.Token{
194					Term: []byte("نلسون"),
195				},
196			},
197		},
198		// Shaddah
199		{
200			input: analysis.TokenStream{
201				&analysis.Token{
202					Term: []byte("هتميّ"),
203				},
204			},
205			output: analysis.TokenStream{
206				&analysis.Token{
207					Term: []byte("هتمي"),
208				},
209			},
210		},
211		// empty
212		{
213			input: analysis.TokenStream{
214				&analysis.Token{
215					Term: []byte(""),
216				},
217			},
218			output: analysis.TokenStream{
219				&analysis.Token{
220					Term: []byte(""),
221				},
222			},
223		},
224	}
225
226	arabicNormalizeFilter := NewArabicNormalizeFilter()
227	for _, test := range tests {
228		actual := arabicNormalizeFilter.Filter(test.input)
229		if !reflect.DeepEqual(actual, test.output) {
230			t.Errorf("expected %#v, got %#v", test.output, actual)
231			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
232		}
233	}
234}
235