1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "os" 19 "testing" 20 21 "github.com/blevesearch/bleve/analysis" 22 "github.com/blevesearch/bleve/document" 23 "github.com/blevesearch/bleve/index" 24 "github.com/blevesearch/bleve/index/scorch/segment/mem" 25) 26 27func TestBuild(t *testing.T) { 28 _ = os.RemoveAll("/tmp/scorch.zap") 29 30 memSegment := buildMemSegment() 31 err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024) 32 if err != nil { 33 t.Fatal(err) 34 } 35} 36 37func buildMemSegment() *mem.Segment { 38 doc := &document.Document{ 39 ID: "a", 40 Fields: []document.Field{ 41 document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil), 42 document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 43 document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 44 document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 45 document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 46 }, 47 CompositeFields: []*document.CompositeField{ 48 document.NewCompositeField("_all", true, nil, []string{"_id"}), 49 }, 50 } 51 52 // forge analyzed docs 53 results := []*index.AnalysisResult{ 54 &index.AnalysisResult{ 55 Document: doc, 56 Analyzed: []analysis.TokenFrequencies{ 57 analysis.TokenFrequency(analysis.TokenStream{ 58 &analysis.Token{ 59 Start: 0, 60 End: 1, 61 Position: 1, 62 Term: []byte("a"), 63 }, 64 }, nil, false), 65 analysis.TokenFrequency(analysis.TokenStream{ 66 &analysis.Token{ 67 Start: 0, 68 End: 3, 69 Position: 1, 70 Term: []byte("wow"), 71 }, 72 }, nil, true), 73 analysis.TokenFrequency(analysis.TokenStream{ 74 &analysis.Token{ 75 Start: 0, 76 End: 4, 77 Position: 1, 78 Term: []byte("some"), 79 }, 80 &analysis.Token{ 81 Start: 5, 82 End: 10, 83 Position: 2, 84 Term: []byte("thing"), 85 }, 86 }, nil, true), 87 analysis.TokenFrequency(analysis.TokenStream{ 88 &analysis.Token{ 89 Start: 0, 90 End: 4, 91 Position: 1, 92 Term: []byte("cold"), 93 }, 94 }, []uint64{0}, true), 95 analysis.TokenFrequency(analysis.TokenStream{ 96 &analysis.Token{ 97 Start: 0, 98 End: 4, 99 Position: 1, 100 Term: []byte("dark"), 101 }, 102 }, []uint64{1}, true), 103 }, 104 Length: []int{ 105 1, 106 1, 107 2, 108 1, 109 1, 110 }, 111 }, 112 } 113 114 // fix up composite fields 115 for _, ar := range results { 116 for i, f := range ar.Document.Fields { 117 for _, cf := range ar.Document.CompositeFields { 118 cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i]) 119 } 120 } 121 } 122 123 return mem.NewFromAnalyzedDocs(results) 124} 125 126func buildMemSegmentMulti() *mem.Segment { 127 128 doc := &document.Document{ 129 ID: "a", 130 Fields: []document.Field{ 131 document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil), 132 document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 133 document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 134 document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 135 document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 136 }, 137 CompositeFields: []*document.CompositeField{ 138 document.NewCompositeField("_all", true, nil, []string{"_id"}), 139 }, 140 } 141 142 doc2 := &document.Document{ 143 ID: "b", 144 Fields: []document.Field{ 145 document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil), 146 document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 147 document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 148 document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 149 document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil), 150 }, 151 CompositeFields: []*document.CompositeField{ 152 document.NewCompositeField("_all", true, nil, []string{"_id"}), 153 }, 154 } 155 156 // forge analyzed docs 157 results := []*index.AnalysisResult{ 158 &index.AnalysisResult{ 159 Document: doc, 160 Analyzed: []analysis.TokenFrequencies{ 161 analysis.TokenFrequency(analysis.TokenStream{ 162 &analysis.Token{ 163 Start: 0, 164 End: 1, 165 Position: 1, 166 Term: []byte("a"), 167 }, 168 }, nil, false), 169 analysis.TokenFrequency(analysis.TokenStream{ 170 &analysis.Token{ 171 Start: 0, 172 End: 3, 173 Position: 1, 174 Term: []byte("wow"), 175 }, 176 }, nil, true), 177 analysis.TokenFrequency(analysis.TokenStream{ 178 &analysis.Token{ 179 Start: 0, 180 End: 4, 181 Position: 1, 182 Term: []byte("some"), 183 }, 184 &analysis.Token{ 185 Start: 5, 186 End: 10, 187 Position: 2, 188 Term: []byte("thing"), 189 }, 190 }, nil, true), 191 analysis.TokenFrequency(analysis.TokenStream{ 192 &analysis.Token{ 193 Start: 0, 194 End: 4, 195 Position: 1, 196 Term: []byte("cold"), 197 }, 198 }, []uint64{0}, true), 199 analysis.TokenFrequency(analysis.TokenStream{ 200 &analysis.Token{ 201 Start: 0, 202 End: 4, 203 Position: 1, 204 Term: []byte("dark"), 205 }, 206 }, []uint64{1}, true), 207 }, 208 Length: []int{ 209 1, 210 1, 211 2, 212 1, 213 1, 214 }, 215 }, 216 &index.AnalysisResult{ 217 Document: doc2, 218 Analyzed: []analysis.TokenFrequencies{ 219 analysis.TokenFrequency(analysis.TokenStream{ 220 &analysis.Token{ 221 Start: 0, 222 End: 1, 223 Position: 1, 224 Term: []byte("b"), 225 }, 226 }, nil, false), 227 analysis.TokenFrequency(analysis.TokenStream{ 228 &analysis.Token{ 229 Start: 0, 230 End: 3, 231 Position: 1, 232 Term: []byte("who"), 233 }, 234 }, nil, true), 235 analysis.TokenFrequency(analysis.TokenStream{ 236 &analysis.Token{ 237 Start: 0, 238 End: 4, 239 Position: 1, 240 Term: []byte("some"), 241 }, 242 &analysis.Token{ 243 Start: 5, 244 End: 10, 245 Position: 2, 246 Term: []byte("thing"), 247 }, 248 }, nil, true), 249 analysis.TokenFrequency(analysis.TokenStream{ 250 &analysis.Token{ 251 Start: 0, 252 End: 4, 253 Position: 1, 254 Term: []byte("cold"), 255 }, 256 }, []uint64{0}, true), 257 analysis.TokenFrequency(analysis.TokenStream{ 258 &analysis.Token{ 259 Start: 0, 260 End: 4, 261 Position: 1, 262 Term: []byte("dark"), 263 }, 264 }, []uint64{1}, true), 265 }, 266 Length: []int{ 267 1, 268 1, 269 2, 270 1, 271 1, 272 }, 273 }, 274 } 275 276 // fix up composite fields 277 for _, ar := range results { 278 for i, f := range ar.Document.Fields { 279 for _, cf := range ar.Document.CompositeFields { 280 cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i]) 281 } 282 } 283 } 284 285 segment := mem.NewFromAnalyzedDocs(results) 286 287 return segment 288} 289 290func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) { 291 292 doc := &document.Document{ 293 ID: "a", 294 Fields: []document.Field{ 295 document.NewTextField("_id", nil, []byte("a")), 296 document.NewTextField("name", nil, []byte("wow")), 297 document.NewTextField("desc", nil, []byte("some thing")), 298 document.NewTextField("tag", []uint64{0}, []byte("cold")), 299 }, 300 CompositeFields: []*document.CompositeField{ 301 document.NewCompositeField("_all", true, nil, []string{"_id"}), 302 }, 303 } 304 305 var fields []string 306 fields = append(fields, "_id") 307 fields = append(fields, "name") 308 fields = append(fields, "desc") 309 fields = append(fields, "tag") 310 311 // forge analyzed docs 312 results := []*index.AnalysisResult{ 313 &index.AnalysisResult{ 314 Document: doc, 315 Analyzed: []analysis.TokenFrequencies{ 316 analysis.TokenFrequency(analysis.TokenStream{ 317 &analysis.Token{ 318 Start: 0, 319 End: 1, 320 Position: 1, 321 Term: []byte("a"), 322 }, 323 }, nil, false), 324 analysis.TokenFrequency(analysis.TokenStream{ 325 &analysis.Token{ 326 Start: 0, 327 End: 3, 328 Position: 1, 329 Term: []byte("wow"), 330 }, 331 }, nil, true), 332 analysis.TokenFrequency(analysis.TokenStream{ 333 &analysis.Token{ 334 Start: 0, 335 End: 4, 336 Position: 1, 337 Term: []byte("some"), 338 }, 339 &analysis.Token{ 340 Start: 5, 341 End: 10, 342 Position: 2, 343 Term: []byte("thing"), 344 }, 345 }, nil, true), 346 analysis.TokenFrequency(analysis.TokenStream{ 347 &analysis.Token{ 348 Start: 0, 349 End: 4, 350 Position: 1, 351 Term: []byte("cold"), 352 }, 353 }, []uint64{0}, true), 354 }, 355 Length: []int{ 356 1, 357 1, 358 2, 359 1, 360 1, 361 }, 362 }, 363 } 364 365 // fix up composite fields 366 for _, ar := range results { 367 for i, f := range ar.Document.Fields { 368 for _, cf := range ar.Document.CompositeFields { 369 cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i]) 370 } 371 } 372 } 373 374 return mem.NewFromAnalyzedDocs(results), fields 375} 376