1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"os"
19	"testing"
20
21	"github.com/blevesearch/bleve/analysis"
22	"github.com/blevesearch/bleve/document"
23	"github.com/blevesearch/bleve/index"
24	"github.com/blevesearch/bleve/index/scorch/segment/mem"
25)
26
27func TestBuild(t *testing.T) {
28	_ = os.RemoveAll("/tmp/scorch.zap")
29
30	memSegment := buildMemSegment()
31	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
32	if err != nil {
33		t.Fatal(err)
34	}
35}
36
37func buildMemSegment() *mem.Segment {
38	doc := &document.Document{
39		ID: "a",
40		Fields: []document.Field{
41			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
42			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
43			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
44			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
45			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
46		},
47		CompositeFields: []*document.CompositeField{
48			document.NewCompositeField("_all", true, nil, []string{"_id"}),
49		},
50	}
51
52	// forge analyzed docs
53	results := []*index.AnalysisResult{
54		&index.AnalysisResult{
55			Document: doc,
56			Analyzed: []analysis.TokenFrequencies{
57				analysis.TokenFrequency(analysis.TokenStream{
58					&analysis.Token{
59						Start:    0,
60						End:      1,
61						Position: 1,
62						Term:     []byte("a"),
63					},
64				}, nil, false),
65				analysis.TokenFrequency(analysis.TokenStream{
66					&analysis.Token{
67						Start:    0,
68						End:      3,
69						Position: 1,
70						Term:     []byte("wow"),
71					},
72				}, nil, true),
73				analysis.TokenFrequency(analysis.TokenStream{
74					&analysis.Token{
75						Start:    0,
76						End:      4,
77						Position: 1,
78						Term:     []byte("some"),
79					},
80					&analysis.Token{
81						Start:    5,
82						End:      10,
83						Position: 2,
84						Term:     []byte("thing"),
85					},
86				}, nil, true),
87				analysis.TokenFrequency(analysis.TokenStream{
88					&analysis.Token{
89						Start:    0,
90						End:      4,
91						Position: 1,
92						Term:     []byte("cold"),
93					},
94				}, []uint64{0}, true),
95				analysis.TokenFrequency(analysis.TokenStream{
96					&analysis.Token{
97						Start:    0,
98						End:      4,
99						Position: 1,
100						Term:     []byte("dark"),
101					},
102				}, []uint64{1}, true),
103			},
104			Length: []int{
105				1,
106				1,
107				2,
108				1,
109				1,
110			},
111		},
112	}
113
114	// fix up composite fields
115	for _, ar := range results {
116		for i, f := range ar.Document.Fields {
117			for _, cf := range ar.Document.CompositeFields {
118				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
119			}
120		}
121	}
122
123	return mem.NewFromAnalyzedDocs(results)
124}
125
126func buildMemSegmentMulti() *mem.Segment {
127
128	doc := &document.Document{
129		ID: "a",
130		Fields: []document.Field{
131			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
132			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
133			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
134			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
135			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
136		},
137		CompositeFields: []*document.CompositeField{
138			document.NewCompositeField("_all", true, nil, []string{"_id"}),
139		},
140	}
141
142	doc2 := &document.Document{
143		ID: "b",
144		Fields: []document.Field{
145			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
146			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
147			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
148			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
149			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
150		},
151		CompositeFields: []*document.CompositeField{
152			document.NewCompositeField("_all", true, nil, []string{"_id"}),
153		},
154	}
155
156	// forge analyzed docs
157	results := []*index.AnalysisResult{
158		&index.AnalysisResult{
159			Document: doc,
160			Analyzed: []analysis.TokenFrequencies{
161				analysis.TokenFrequency(analysis.TokenStream{
162					&analysis.Token{
163						Start:    0,
164						End:      1,
165						Position: 1,
166						Term:     []byte("a"),
167					},
168				}, nil, false),
169				analysis.TokenFrequency(analysis.TokenStream{
170					&analysis.Token{
171						Start:    0,
172						End:      3,
173						Position: 1,
174						Term:     []byte("wow"),
175					},
176				}, nil, true),
177				analysis.TokenFrequency(analysis.TokenStream{
178					&analysis.Token{
179						Start:    0,
180						End:      4,
181						Position: 1,
182						Term:     []byte("some"),
183					},
184					&analysis.Token{
185						Start:    5,
186						End:      10,
187						Position: 2,
188						Term:     []byte("thing"),
189					},
190				}, nil, true),
191				analysis.TokenFrequency(analysis.TokenStream{
192					&analysis.Token{
193						Start:    0,
194						End:      4,
195						Position: 1,
196						Term:     []byte("cold"),
197					},
198				}, []uint64{0}, true),
199				analysis.TokenFrequency(analysis.TokenStream{
200					&analysis.Token{
201						Start:    0,
202						End:      4,
203						Position: 1,
204						Term:     []byte("dark"),
205					},
206				}, []uint64{1}, true),
207			},
208			Length: []int{
209				1,
210				1,
211				2,
212				1,
213				1,
214			},
215		},
216		&index.AnalysisResult{
217			Document: doc2,
218			Analyzed: []analysis.TokenFrequencies{
219				analysis.TokenFrequency(analysis.TokenStream{
220					&analysis.Token{
221						Start:    0,
222						End:      1,
223						Position: 1,
224						Term:     []byte("b"),
225					},
226				}, nil, false),
227				analysis.TokenFrequency(analysis.TokenStream{
228					&analysis.Token{
229						Start:    0,
230						End:      3,
231						Position: 1,
232						Term:     []byte("who"),
233					},
234				}, nil, true),
235				analysis.TokenFrequency(analysis.TokenStream{
236					&analysis.Token{
237						Start:    0,
238						End:      4,
239						Position: 1,
240						Term:     []byte("some"),
241					},
242					&analysis.Token{
243						Start:    5,
244						End:      10,
245						Position: 2,
246						Term:     []byte("thing"),
247					},
248				}, nil, true),
249				analysis.TokenFrequency(analysis.TokenStream{
250					&analysis.Token{
251						Start:    0,
252						End:      4,
253						Position: 1,
254						Term:     []byte("cold"),
255					},
256				}, []uint64{0}, true),
257				analysis.TokenFrequency(analysis.TokenStream{
258					&analysis.Token{
259						Start:    0,
260						End:      4,
261						Position: 1,
262						Term:     []byte("dark"),
263					},
264				}, []uint64{1}, true),
265			},
266			Length: []int{
267				1,
268				1,
269				2,
270				1,
271				1,
272			},
273		},
274	}
275
276	// fix up composite fields
277	for _, ar := range results {
278		for i, f := range ar.Document.Fields {
279			for _, cf := range ar.Document.CompositeFields {
280				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
281			}
282		}
283	}
284
285	segment := mem.NewFromAnalyzedDocs(results)
286
287	return segment
288}
289
290func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) {
291
292	doc := &document.Document{
293		ID: "a",
294		Fields: []document.Field{
295			document.NewTextField("_id", nil, []byte("a")),
296			document.NewTextField("name", nil, []byte("wow")),
297			document.NewTextField("desc", nil, []byte("some thing")),
298			document.NewTextField("tag", []uint64{0}, []byte("cold")),
299		},
300		CompositeFields: []*document.CompositeField{
301			document.NewCompositeField("_all", true, nil, []string{"_id"}),
302		},
303	}
304
305	var fields []string
306	fields = append(fields, "_id")
307	fields = append(fields, "name")
308	fields = append(fields, "desc")
309	fields = append(fields, "tag")
310
311	// forge analyzed docs
312	results := []*index.AnalysisResult{
313		&index.AnalysisResult{
314			Document: doc,
315			Analyzed: []analysis.TokenFrequencies{
316				analysis.TokenFrequency(analysis.TokenStream{
317					&analysis.Token{
318						Start:    0,
319						End:      1,
320						Position: 1,
321						Term:     []byte("a"),
322					},
323				}, nil, false),
324				analysis.TokenFrequency(analysis.TokenStream{
325					&analysis.Token{
326						Start:    0,
327						End:      3,
328						Position: 1,
329						Term:     []byte("wow"),
330					},
331				}, nil, true),
332				analysis.TokenFrequency(analysis.TokenStream{
333					&analysis.Token{
334						Start:    0,
335						End:      4,
336						Position: 1,
337						Term:     []byte("some"),
338					},
339					&analysis.Token{
340						Start:    5,
341						End:      10,
342						Position: 2,
343						Term:     []byte("thing"),
344					},
345				}, nil, true),
346				analysis.TokenFrequency(analysis.TokenStream{
347					&analysis.Token{
348						Start:    0,
349						End:      4,
350						Position: 1,
351						Term:     []byte("cold"),
352					},
353				}, []uint64{0}, true),
354			},
355			Length: []int{
356				1,
357				1,
358				2,
359				1,
360				1,
361			},
362		},
363	}
364
365	// fix up composite fields
366	for _, ar := range results {
367		for i, f := range ar.Document.Fields {
368			for _, cf := range ar.Document.CompositeFields {
369				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
370			}
371		}
372	}
373
374	return mem.NewFromAnalyzedDocs(results), fields
375}
376