1// Copyright 2012 Google Inc. All rights reserved. 2// Use of this source code is governed by the Apache 2.0 3// license that can be found in the LICENSE file. 4 5package search // import "google.golang.org/appengine/search" 6 7// TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage?? 8// TODO: Index.GetAll (or Iterator.GetAll)? 9// TODO: struct <-> protobuf tests. 10// TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero 11// time.Time)? _MAXIMUM_STRING_LENGTH? 12 13import ( 14 "errors" 15 "fmt" 16 "math" 17 "reflect" 18 "regexp" 19 "strconv" 20 "strings" 21 "time" 22 "unicode/utf8" 23 24 "github.com/golang/protobuf/proto" 25 "golang.org/x/net/context" 26 27 "google.golang.org/appengine" 28 "google.golang.org/appengine/internal" 29 pb "google.golang.org/appengine/internal/search" 30) 31 32const maxDocumentsPerPutDelete = 200 33 34var ( 35 // ErrInvalidDocumentType is returned when methods like Put, Get or Next 36 // are passed a dst or src argument of invalid type. 37 ErrInvalidDocumentType = errors.New("search: invalid document type") 38 39 // ErrNoSuchDocument is returned when no document was found for a given ID. 40 ErrNoSuchDocument = errors.New("search: no such document") 41 42 // ErrTooManyDocuments is returned when the user passes too many documents to 43 // PutMulti or DeleteMulti. 44 ErrTooManyDocuments = fmt.Errorf("search: too many documents given to put or delete (max is %d)", maxDocumentsPerPutDelete) 45) 46 47// Atom is a document field whose contents are indexed as a single indivisible 48// string. 49type Atom string 50 51// HTML is a document field whose contents are indexed as HTML. Only text nodes 52// are indexed: "foo<b>bar" will be treated as "foobar". 53type HTML string 54 55// validIndexNameOrDocID is the Go equivalent of Python's 56// _ValidateVisiblePrintableAsciiNotReserved. 57func validIndexNameOrDocID(s string) bool { 58 if strings.HasPrefix(s, "!") { 59 return false 60 } 61 for _, c := range s { 62 if c < 0x21 || 0x7f <= c { 63 return false 64 } 65 } 66 return true 67} 68 69var ( 70 fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`) 71 languageRE = regexp.MustCompile(`^[a-z]{2}$`) 72) 73 74// validFieldName is the Go equivalent of Python's _CheckFieldName. It checks 75// the validity of both field and facet names. 76func validFieldName(s string) bool { 77 return len(s) <= 500 && fieldNameRE.MatchString(s) 78} 79 80// validDocRank checks that the ranks is in the range [0, 2^31). 81func validDocRank(r int) bool { 82 return 0 <= r && r <= (1<<31-1) 83} 84 85// validLanguage checks that a language looks like ISO 639-1. 86func validLanguage(s string) bool { 87 return languageRE.MatchString(s) 88} 89 90// validFloat checks that f is in the range [-2147483647, 2147483647]. 91func validFloat(f float64) bool { 92 return -(1<<31-1) <= f && f <= (1<<31-1) 93} 94 95// Index is an index of documents. 96type Index struct { 97 spec pb.IndexSpec 98} 99 100// orderIDEpoch forms the basis for populating OrderId on documents. 101var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC) 102 103// Open opens the index with the given name. The index is created if it does 104// not already exist. 105// 106// The name is a human-readable ASCII string. It must contain no whitespace 107// characters and not start with "!". 108func Open(name string) (*Index, error) { 109 if !validIndexNameOrDocID(name) { 110 return nil, fmt.Errorf("search: invalid index name %q", name) 111 } 112 return &Index{ 113 spec: pb.IndexSpec{ 114 Name: &name, 115 }, 116 }, nil 117} 118 119// Put saves src to the index. If id is empty, a new ID is allocated by the 120// service and returned. If id is not empty, any existing index entry for that 121// ID is replaced. 122// 123// The ID is a human-readable ASCII string. It must contain no whitespace 124// characters and not start with "!". 125// 126// src must be a non-nil struct pointer or implement the FieldLoadSaver 127// interface. 128func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) { 129 ids, err := x.PutMulti(c, []string{id}, []interface{}{src}) 130 if err != nil { 131 return "", err 132 } 133 return ids[0], nil 134} 135 136// PutMulti is like Put, but is more efficient for adding multiple documents to 137// the index at once. 138// 139// Up to 200 documents can be added at once. ErrTooManyDocuments is returned if 140// you try to add more. 141// 142// ids can either be an empty slice (which means new IDs will be allocated for 143// each of the documents added) or a slice the same size as srcs. 144// 145// The error may be an instance of appengine.MultiError, in which case it will 146// be the same size as srcs and the individual errors inside will correspond 147// with the items in srcs. 148func (x *Index) PutMulti(c context.Context, ids []string, srcs []interface{}) ([]string, error) { 149 if len(ids) != 0 && len(srcs) != len(ids) { 150 return nil, fmt.Errorf("search: PutMulti expects ids and srcs slices of the same length") 151 } 152 if len(srcs) > maxDocumentsPerPutDelete { 153 return nil, ErrTooManyDocuments 154 } 155 156 docs := make([]*pb.Document, len(srcs)) 157 for i, s := range srcs { 158 var err error 159 docs[i], err = saveDoc(s) 160 if err != nil { 161 return nil, err 162 } 163 164 if len(ids) != 0 && ids[i] != "" { 165 if !validIndexNameOrDocID(ids[i]) { 166 return nil, fmt.Errorf("search: invalid ID %q", ids[i]) 167 } 168 docs[i].Id = proto.String(ids[i]) 169 } 170 } 171 172 // spec is modified by Call when applying the current Namespace, so copy it to 173 // avoid retaining the namespace beyond the scope of the Call. 174 spec := x.spec 175 req := &pb.IndexDocumentRequest{ 176 Params: &pb.IndexDocumentParams{ 177 Document: docs, 178 IndexSpec: &spec, 179 }, 180 } 181 res := &pb.IndexDocumentResponse{} 182 if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil { 183 return nil, err 184 } 185 multiErr, hasErr := make(appengine.MultiError, len(res.Status)), false 186 for i, s := range res.Status { 187 if s.GetCode() != pb.SearchServiceError_OK { 188 multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) 189 hasErr = true 190 } 191 } 192 if hasErr { 193 return res.DocId, multiErr 194 } 195 196 if len(res.Status) != len(docs) || len(res.DocId) != len(docs) { 197 return nil, fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs, expected %d)", 198 len(res.Status), len(res.DocId), len(docs)) 199 } 200 return res.DocId, nil 201} 202 203// Get loads the document with the given ID into dst. 204// 205// The ID is a human-readable ASCII string. It must be non-empty, contain no 206// whitespace characters and not start with "!". 207// 208// dst must be a non-nil struct pointer or implement the FieldLoadSaver 209// interface. 210// 211// ErrFieldMismatch is returned when a field is to be loaded into a different 212// type than the one it was stored from, or when a field is missing or 213// unexported in the destination struct. ErrFieldMismatch is only returned if 214// dst is a struct pointer. It is up to the callee to decide whether this error 215// is fatal, recoverable, or ignorable. 216func (x *Index) Get(c context.Context, id string, dst interface{}) error { 217 if id == "" || !validIndexNameOrDocID(id) { 218 return fmt.Errorf("search: invalid ID %q", id) 219 } 220 req := &pb.ListDocumentsRequest{ 221 Params: &pb.ListDocumentsParams{ 222 IndexSpec: &x.spec, 223 StartDocId: proto.String(id), 224 Limit: proto.Int32(1), 225 }, 226 } 227 res := &pb.ListDocumentsResponse{} 228 if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil { 229 return err 230 } 231 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 232 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 233 } 234 if len(res.Document) != 1 || res.Document[0].GetId() != id { 235 return ErrNoSuchDocument 236 } 237 return loadDoc(dst, res.Document[0], nil) 238} 239 240// Delete deletes a document from the index. 241func (x *Index) Delete(c context.Context, id string) error { 242 return x.DeleteMulti(c, []string{id}) 243} 244 245// DeleteMulti deletes multiple documents from the index. 246// 247// The returned error may be an instance of appengine.MultiError, in which case 248// it will be the same size as srcs and the individual errors inside will 249// correspond with the items in srcs. 250func (x *Index) DeleteMulti(c context.Context, ids []string) error { 251 if len(ids) > maxDocumentsPerPutDelete { 252 return ErrTooManyDocuments 253 } 254 255 req := &pb.DeleteDocumentRequest{ 256 Params: &pb.DeleteDocumentParams{ 257 DocId: ids, 258 IndexSpec: &x.spec, 259 }, 260 } 261 res := &pb.DeleteDocumentResponse{} 262 if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil { 263 return err 264 } 265 if len(res.Status) != len(ids) { 266 return fmt.Errorf("search: internal error: wrong number of results (%d, expected %d)", 267 len(res.Status), len(ids)) 268 } 269 multiErr, hasErr := make(appengine.MultiError, len(ids)), false 270 for i, s := range res.Status { 271 if s.GetCode() != pb.SearchServiceError_OK { 272 multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) 273 hasErr = true 274 } 275 } 276 if hasErr { 277 return multiErr 278 } 279 return nil 280} 281 282// List lists all of the documents in an index. The documents are returned in 283// increasing ID order. 284func (x *Index) List(c context.Context, opts *ListOptions) *Iterator { 285 t := &Iterator{ 286 c: c, 287 index: x, 288 count: -1, 289 listInclusive: true, 290 more: moreList, 291 } 292 if opts != nil { 293 t.listStartID = opts.StartID 294 t.limit = opts.Limit 295 t.idsOnly = opts.IDsOnly 296 } 297 return t 298} 299 300func moreList(t *Iterator) error { 301 req := &pb.ListDocumentsRequest{ 302 Params: &pb.ListDocumentsParams{ 303 IndexSpec: &t.index.spec, 304 }, 305 } 306 if t.listStartID != "" { 307 req.Params.StartDocId = &t.listStartID 308 req.Params.IncludeStartDoc = &t.listInclusive 309 } 310 if t.limit > 0 { 311 req.Params.Limit = proto.Int32(int32(t.limit)) 312 } 313 if t.idsOnly { 314 req.Params.KeysOnly = &t.idsOnly 315 } 316 317 res := &pb.ListDocumentsResponse{} 318 if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil { 319 return err 320 } 321 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 322 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 323 } 324 t.listRes = res.Document 325 t.listStartID, t.listInclusive, t.more = "", false, nil 326 if len(res.Document) != 0 && t.limit <= 0 { 327 if id := res.Document[len(res.Document)-1].GetId(); id != "" { 328 t.listStartID, t.more = id, moreList 329 } 330 } 331 return nil 332} 333 334// ListOptions are the options for listing documents in an index. Passing a nil 335// *ListOptions is equivalent to using the default values. 336type ListOptions struct { 337 // StartID is the inclusive lower bound for the ID of the returned 338 // documents. The zero value means all documents will be returned. 339 StartID string 340 341 // Limit is the maximum number of documents to return. The zero value 342 // indicates no limit. 343 Limit int 344 345 // IDsOnly indicates that only document IDs should be returned for the list 346 // operation; no document fields are populated. 347 IDsOnly bool 348} 349 350// Search searches the index for the given query. 351func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator { 352 t := &Iterator{ 353 c: c, 354 index: x, 355 searchQuery: query, 356 more: moreSearch, 357 } 358 if opts != nil { 359 if opts.Cursor != "" { 360 if opts.Offset != 0 { 361 return errIter("at most one of Cursor and Offset may be specified") 362 } 363 t.searchCursor = proto.String(string(opts.Cursor)) 364 } 365 t.limit = opts.Limit 366 t.fields = opts.Fields 367 t.idsOnly = opts.IDsOnly 368 t.sort = opts.Sort 369 t.exprs = opts.Expressions 370 t.refinements = opts.Refinements 371 t.facetOpts = opts.Facets 372 t.searchOffset = opts.Offset 373 t.countAccuracy = opts.CountAccuracy 374 } 375 return t 376} 377 378func moreSearch(t *Iterator) error { 379 // We use per-result (rather than single/per-page) cursors since this 380 // lets us return a Cursor for every iterator document. The two cursor 381 // types are largely interchangeable: a page cursor is the same as the 382 // last per-result cursor in a given search response. 383 req := &pb.SearchRequest{ 384 Params: &pb.SearchParams{ 385 IndexSpec: &t.index.spec, 386 Query: &t.searchQuery, 387 Cursor: t.searchCursor, 388 CursorType: pb.SearchParams_PER_RESULT.Enum(), 389 FieldSpec: &pb.FieldSpec{ 390 Name: t.fields, 391 }, 392 }, 393 } 394 if t.limit > 0 { 395 req.Params.Limit = proto.Int32(int32(t.limit)) 396 } 397 if t.searchOffset > 0 { 398 req.Params.Offset = proto.Int32(int32(t.searchOffset)) 399 t.searchOffset = 0 400 } 401 if t.countAccuracy > 0 { 402 req.Params.MatchedCountAccuracy = proto.Int32(int32(t.countAccuracy)) 403 } 404 if t.idsOnly { 405 req.Params.KeysOnly = &t.idsOnly 406 } 407 if t.sort != nil { 408 if err := sortToProto(t.sort, req.Params); err != nil { 409 return err 410 } 411 } 412 if t.refinements != nil { 413 if err := refinementsToProto(t.refinements, req.Params); err != nil { 414 return err 415 } 416 } 417 for _, e := range t.exprs { 418 req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{ 419 Name: proto.String(e.Name), 420 Expression: proto.String(e.Expr), 421 }) 422 } 423 for _, f := range t.facetOpts { 424 if err := f.setParams(req.Params); err != nil { 425 return fmt.Errorf("bad FacetSearchOption: %v", err) 426 } 427 } 428 // Don't repeat facet search. 429 t.facetOpts = nil 430 431 res := &pb.SearchResponse{} 432 if err := internal.Call(t.c, "search", "Search", req, res); err != nil { 433 return err 434 } 435 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 436 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 437 } 438 t.searchRes = res.Result 439 if len(res.FacetResult) > 0 { 440 t.facetRes = res.FacetResult 441 } 442 t.count = int(*res.MatchedCount) 443 if t.limit > 0 { 444 t.more = nil 445 } else { 446 t.more = moreSearch 447 } 448 return nil 449} 450 451// SearchOptions are the options for searching an index. Passing a nil 452// *SearchOptions is equivalent to using the default values. 453type SearchOptions struct { 454 // Limit is the maximum number of documents to return. The zero value 455 // indicates no limit. 456 Limit int 457 458 // IDsOnly indicates that only document IDs should be returned for the search 459 // operation; no document fields are populated. 460 IDsOnly bool 461 462 // Sort controls the ordering of search results. 463 Sort *SortOptions 464 465 // Fields specifies which document fields to include in the results. If omitted, 466 // all document fields are returned. No more than 100 fields may be specified. 467 Fields []string 468 469 // Expressions specifies additional computed fields to add to each returned 470 // document. 471 Expressions []FieldExpression 472 473 // Facets controls what facet information is returned for these search results. 474 // If no options are specified, no facet results will be returned. 475 Facets []FacetSearchOption 476 477 // Refinements filters the returned documents by requiring them to contain facets 478 // with specific values. Refinements are applied in conjunction for facets with 479 // different names, and in disjunction otherwise. 480 Refinements []Facet 481 482 // Cursor causes the results to commence with the first document after 483 // the document associated with the cursor. 484 Cursor Cursor 485 486 // Offset specifies the number of documents to skip over before returning results. 487 // When specified, Cursor must be nil. 488 Offset int 489 490 // CountAccuracy specifies the maximum result count that can be expected to 491 // be accurate. If zero, the count accuracy defaults to 20. 492 CountAccuracy int 493} 494 495// Cursor represents an iterator's position. 496// 497// The string value of a cursor is web-safe. It can be saved and restored 498// for later use. 499type Cursor string 500 501// FieldExpression defines a custom expression to evaluate for each result. 502type FieldExpression struct { 503 // Name is the name to use for the computed field. 504 Name string 505 506 // Expr is evaluated to provide a custom content snippet for each document. 507 // See https://cloud.google.com/appengine/docs/standard/go/search/options for 508 // the supported expression syntax. 509 Expr string 510} 511 512// FacetSearchOption controls what facet information is returned in search results. 513type FacetSearchOption interface { 514 setParams(*pb.SearchParams) error 515} 516 517// AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet 518// discovery for the search. Automatic facet discovery looks for the facets 519// which appear the most often in the aggregate in the matched documents. 520// 521// The maximum number of facets returned is controlled by facetLimit, and the 522// maximum number of values per facet by facetLimit. A limit of zero indicates 523// a default limit should be used. 524func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption { 525 return &autoFacetOpt{facetLimit, valueLimit} 526} 527 528type autoFacetOpt struct { 529 facetLimit, valueLimit int 530} 531 532const defaultAutoFacetLimit = 10 // As per python runtime search.py. 533 534func (o *autoFacetOpt) setParams(params *pb.SearchParams) error { 535 lim := int32(o.facetLimit) 536 if lim == 0 { 537 lim = defaultAutoFacetLimit 538 } 539 params.AutoDiscoverFacetCount = &lim 540 if o.valueLimit > 0 { 541 params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{ 542 ValueLimit: proto.Int32(int32(o.valueLimit)), 543 } 544 } 545 return nil 546} 547 548// FacetDiscovery returns a FacetSearchOption which selects a facet to be 549// returned with the search results. By default, the most frequently 550// occurring values for that facet will be returned. However, you can also 551// specify a list of particular Atoms or specific Ranges to return. 552func FacetDiscovery(name string, value ...interface{}) FacetSearchOption { 553 return &facetOpt{name, value} 554} 555 556type facetOpt struct { 557 name string 558 values []interface{} 559} 560 561func (o *facetOpt) setParams(params *pb.SearchParams) error { 562 req := &pb.FacetRequest{Name: &o.name} 563 params.IncludeFacet = append(params.IncludeFacet, req) 564 if len(o.values) == 0 { 565 return nil 566 } 567 vtype := reflect.TypeOf(o.values[0]) 568 reqParam := &pb.FacetRequestParam{} 569 for _, v := range o.values { 570 if reflect.TypeOf(v) != vtype { 571 return errors.New("values must all be Atom, or must all be Range") 572 } 573 switch v := v.(type) { 574 case Atom: 575 reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v)) 576 case Range: 577 rng, err := rangeToProto(v) 578 if err != nil { 579 return fmt.Errorf("invalid range: %v", err) 580 } 581 reqParam.Range = append(reqParam.Range, rng) 582 default: 583 return fmt.Errorf("unsupported value type %T", v) 584 } 585 } 586 req.Params = reqParam 587 return nil 588} 589 590// FacetDocumentDepth returns a FacetSearchOption which controls the number of 591// documents to be evaluated with preparing facet results. 592func FacetDocumentDepth(depth int) FacetSearchOption { 593 return facetDepthOpt(depth) 594} 595 596type facetDepthOpt int 597 598func (o facetDepthOpt) setParams(params *pb.SearchParams) error { 599 params.FacetDepth = proto.Int32(int32(o)) 600 return nil 601} 602 603// FacetResult represents the number of times a particular facet and value 604// appeared in the documents matching a search request. 605type FacetResult struct { 606 Facet 607 608 // Count is the number of times this specific facet and value appeared in the 609 // matching documents. 610 Count int 611} 612 613// Range represents a numeric range with inclusive start and exclusive end. 614// Start may be specified as math.Inf(-1) to indicate there is no minimum 615// value, and End may similarly be specified as math.Inf(1); at least one of 616// Start or End must be a finite number. 617type Range struct { 618 Start, End float64 619} 620 621var ( 622 negInf = math.Inf(-1) 623 posInf = math.Inf(1) 624) 625 626// AtLeast returns a Range matching any value greater than, or equal to, min. 627func AtLeast(min float64) Range { 628 return Range{Start: min, End: posInf} 629} 630 631// LessThan returns a Range matching any value less than max. 632func LessThan(max float64) Range { 633 return Range{Start: negInf, End: max} 634} 635 636// SortOptions control the ordering and scoring of search results. 637type SortOptions struct { 638 // Expressions is a slice of expressions representing a multi-dimensional 639 // sort. 640 Expressions []SortExpression 641 642 // Scorer, when specified, will cause the documents to be scored according to 643 // search term frequency. 644 Scorer Scorer 645 646 // Limit is the maximum number of objects to score and/or sort. Limit cannot 647 // be more than 10,000. The zero value indicates a default limit. 648 Limit int 649} 650 651// SortExpression defines a single dimension for sorting a document. 652type SortExpression struct { 653 // Expr is evaluated to provide a sorting value for each document. 654 // See https://cloud.google.com/appengine/docs/standard/go/search/options for 655 // the supported expression syntax. 656 Expr string 657 658 // Reverse causes the documents to be sorted in ascending order. 659 Reverse bool 660 661 // The default value to use when no field is present or the expresion 662 // cannot be calculated for a document. For text sorts, Default must 663 // be of type string; for numeric sorts, float64. 664 Default interface{} 665} 666 667// A Scorer defines how a document is scored. 668type Scorer interface { 669 toProto(*pb.ScorerSpec) 670} 671 672type enumScorer struct { 673 enum pb.ScorerSpec_Scorer 674} 675 676func (e enumScorer) toProto(spec *pb.ScorerSpec) { 677 spec.Scorer = e.enum.Enum() 678} 679 680var ( 681 // MatchScorer assigns a score based on term frequency in a document. 682 MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER} 683 684 // RescoringMatchScorer assigns a score based on the quality of the query 685 // match. It is similar to a MatchScorer but uses a more complex scoring 686 // algorithm based on match term frequency and other factors like field type. 687 // Please be aware that this algorithm is continually refined and can change 688 // over time without notice. This means that the ordering of search results 689 // that use this scorer can also change without notice. 690 RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER} 691) 692 693func sortToProto(sort *SortOptions, params *pb.SearchParams) error { 694 for _, e := range sort.Expressions { 695 spec := &pb.SortSpec{ 696 SortExpression: proto.String(e.Expr), 697 } 698 if e.Reverse { 699 spec.SortDescending = proto.Bool(false) 700 } 701 if e.Default != nil { 702 switch d := e.Default.(type) { 703 case float64: 704 spec.DefaultValueNumeric = &d 705 case string: 706 spec.DefaultValueText = &d 707 default: 708 return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr) 709 } 710 } 711 params.SortSpec = append(params.SortSpec, spec) 712 } 713 714 spec := &pb.ScorerSpec{} 715 if sort.Limit > 0 { 716 spec.Limit = proto.Int32(int32(sort.Limit)) 717 params.ScorerSpec = spec 718 } 719 if sort.Scorer != nil { 720 sort.Scorer.toProto(spec) 721 params.ScorerSpec = spec 722 } 723 724 return nil 725} 726 727func refinementsToProto(refinements []Facet, params *pb.SearchParams) error { 728 for _, r := range refinements { 729 ref := &pb.FacetRefinement{ 730 Name: proto.String(r.Name), 731 } 732 switch v := r.Value.(type) { 733 case Atom: 734 ref.Value = proto.String(string(v)) 735 case Range: 736 rng, err := rangeToProto(v) 737 if err != nil { 738 return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err) 739 } 740 // Unfortunately there are two identical messages for identify Facet ranges. 741 ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End} 742 default: 743 return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v) 744 } 745 params.FacetRefinement = append(params.FacetRefinement, ref) 746 } 747 return nil 748} 749 750func rangeToProto(r Range) (*pb.FacetRange, error) { 751 rng := &pb.FacetRange{} 752 if r.Start != negInf { 753 if !validFloat(r.Start) { 754 return nil, errors.New("invalid value for Start") 755 } 756 rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64)) 757 } else if r.End == posInf { 758 return nil, errors.New("either Start or End must be finite") 759 } 760 if r.End != posInf { 761 if !validFloat(r.End) { 762 return nil, errors.New("invalid value for End") 763 } 764 rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64)) 765 } 766 return rng, nil 767} 768 769func protoToRange(rng *pb.FacetRefinement_Range) Range { 770 r := Range{Start: negInf, End: posInf} 771 if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil { 772 r.Start = x 773 } 774 if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil { 775 r.End = x 776 } 777 return r 778} 779 780// Iterator is the result of searching an index for a query or listing an 781// index. 782type Iterator struct { 783 c context.Context 784 index *Index 785 err error 786 787 listRes []*pb.Document 788 listStartID string 789 listInclusive bool 790 791 searchRes []*pb.SearchResult 792 facetRes []*pb.FacetResult 793 searchQuery string 794 searchCursor *string 795 searchOffset int 796 sort *SortOptions 797 798 fields []string 799 exprs []FieldExpression 800 refinements []Facet 801 facetOpts []FacetSearchOption 802 803 more func(*Iterator) error 804 805 count int 806 countAccuracy int 807 limit int // items left to return; 0 for unlimited. 808 idsOnly bool 809} 810 811// errIter returns an iterator that only returns the given error. 812func errIter(err string) *Iterator { 813 return &Iterator{ 814 err: errors.New(err), 815 } 816} 817 818// Done is returned when a query iteration has completed. 819var Done = errors.New("search: query has no more results") 820 821// Count returns an approximation of the number of documents matched by the 822// query. It is only valid to call for iterators returned by Search. 823func (t *Iterator) Count() int { return t.count } 824 825// fetchMore retrieves more results, if there are no errors or pending results. 826func (t *Iterator) fetchMore() { 827 if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil { 828 t.err = t.more(t) 829 } 830} 831 832// Next returns the ID of the next result. When there are no more results, 833// Done is returned as the error. 834// 835// dst must be a non-nil struct pointer, implement the FieldLoadSaver 836// interface, or be a nil interface value. If a non-nil dst is provided, it 837// will be filled with the indexed fields. dst is ignored if this iterator was 838// created with an IDsOnly option. 839func (t *Iterator) Next(dst interface{}) (string, error) { 840 t.fetchMore() 841 if t.err != nil { 842 return "", t.err 843 } 844 845 var doc *pb.Document 846 var exprs []*pb.Field 847 switch { 848 case len(t.listRes) != 0: 849 doc = t.listRes[0] 850 t.listRes = t.listRes[1:] 851 case len(t.searchRes) != 0: 852 doc = t.searchRes[0].Document 853 exprs = t.searchRes[0].Expression 854 t.searchCursor = t.searchRes[0].Cursor 855 t.searchRes = t.searchRes[1:] 856 default: 857 return "", Done 858 } 859 if doc == nil { 860 return "", errors.New("search: internal error: no document returned") 861 } 862 if !t.idsOnly && dst != nil { 863 if err := loadDoc(dst, doc, exprs); err != nil { 864 return "", err 865 } 866 } 867 return doc.GetId(), nil 868} 869 870// Cursor returns the cursor associated with the current document (that is, 871// the document most recently returned by a call to Next). 872// 873// Passing this cursor in a future call to Search will cause those results 874// to commence with the first document after the current document. 875func (t *Iterator) Cursor() Cursor { 876 if t.searchCursor == nil { 877 return "" 878 } 879 return Cursor(*t.searchCursor) 880} 881 882// Facets returns the facets found within the search results, if any facets 883// were requested in the SearchOptions. 884func (t *Iterator) Facets() ([][]FacetResult, error) { 885 t.fetchMore() 886 if t.err != nil && t.err != Done { 887 return nil, t.err 888 } 889 890 var facets [][]FacetResult 891 for _, f := range t.facetRes { 892 fres := make([]FacetResult, 0, len(f.Value)) 893 for _, v := range f.Value { 894 ref := v.Refinement 895 facet := FacetResult{ 896 Facet: Facet{Name: ref.GetName()}, 897 Count: int(v.GetCount()), 898 } 899 if ref.Value != nil { 900 facet.Value = Atom(*ref.Value) 901 } else { 902 facet.Value = protoToRange(ref.Range) 903 } 904 fres = append(fres, facet) 905 } 906 facets = append(facets, fres) 907 } 908 return facets, nil 909} 910 911// saveDoc converts from a struct pointer or 912// FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf. 913func saveDoc(src interface{}) (*pb.Document, error) { 914 var err error 915 var fields []Field 916 var meta *DocumentMetadata 917 switch x := src.(type) { 918 case FieldLoadSaver: 919 fields, meta, err = x.Save() 920 default: 921 fields, meta, err = saveStructWithMeta(src) 922 } 923 if err != nil { 924 return nil, err 925 } 926 927 fieldsProto, err := fieldsToProto(fields) 928 if err != nil { 929 return nil, err 930 } 931 d := &pb.Document{ 932 Field: fieldsProto, 933 OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())), 934 OrderIdSource: pb.Document_DEFAULTED.Enum(), 935 } 936 if meta != nil { 937 if meta.Rank != 0 { 938 if !validDocRank(meta.Rank) { 939 return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank) 940 } 941 *d.OrderId = int32(meta.Rank) 942 d.OrderIdSource = pb.Document_SUPPLIED.Enum() 943 } 944 if len(meta.Facets) > 0 { 945 facets, err := facetsToProto(meta.Facets) 946 if err != nil { 947 return nil, err 948 } 949 d.Facet = facets 950 } 951 } 952 return d, nil 953} 954 955func fieldsToProto(src []Field) ([]*pb.Field, error) { 956 // Maps to catch duplicate time or numeric fields. 957 timeFields, numericFields := make(map[string]bool), make(map[string]bool) 958 dst := make([]*pb.Field, 0, len(src)) 959 for _, f := range src { 960 if !validFieldName(f.Name) { 961 return nil, fmt.Errorf("search: invalid field name %q", f.Name) 962 } 963 fieldValue := &pb.FieldValue{} 964 switch x := f.Value.(type) { 965 case string: 966 fieldValue.Type = pb.FieldValue_TEXT.Enum() 967 fieldValue.StringValue = proto.String(x) 968 case Atom: 969 fieldValue.Type = pb.FieldValue_ATOM.Enum() 970 fieldValue.StringValue = proto.String(string(x)) 971 case HTML: 972 fieldValue.Type = pb.FieldValue_HTML.Enum() 973 fieldValue.StringValue = proto.String(string(x)) 974 case time.Time: 975 if timeFields[f.Name] { 976 return nil, fmt.Errorf("search: duplicate time field %q", f.Name) 977 } 978 timeFields[f.Name] = true 979 fieldValue.Type = pb.FieldValue_DATE.Enum() 980 fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10)) 981 case float64: 982 if numericFields[f.Name] { 983 return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name) 984 } 985 if !validFloat(x) { 986 return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x) 987 } 988 numericFields[f.Name] = true 989 fieldValue.Type = pb.FieldValue_NUMBER.Enum() 990 fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) 991 case appengine.GeoPoint: 992 if !x.Valid() { 993 return nil, fmt.Errorf( 994 "search: GeoPoint field %q with invalid value %v", 995 f.Name, x) 996 } 997 fieldValue.Type = pb.FieldValue_GEO.Enum() 998 fieldValue.Geo = &pb.FieldValue_Geo{ 999 Lat: proto.Float64(x.Lat), 1000 Lng: proto.Float64(x.Lng), 1001 } 1002 default: 1003 return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value)) 1004 } 1005 if f.Language != "" { 1006 switch f.Value.(type) { 1007 case string, HTML: 1008 if !validLanguage(f.Language) { 1009 return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language) 1010 } 1011 fieldValue.Language = proto.String(f.Language) 1012 default: 1013 return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value) 1014 } 1015 } 1016 if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) { 1017 return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p) 1018 } 1019 dst = append(dst, &pb.Field{ 1020 Name: proto.String(f.Name), 1021 Value: fieldValue, 1022 }) 1023 } 1024 return dst, nil 1025} 1026 1027func facetsToProto(src []Facet) ([]*pb.Facet, error) { 1028 dst := make([]*pb.Facet, 0, len(src)) 1029 for _, f := range src { 1030 if !validFieldName(f.Name) { 1031 return nil, fmt.Errorf("search: invalid facet name %q", f.Name) 1032 } 1033 facetValue := &pb.FacetValue{} 1034 switch x := f.Value.(type) { 1035 case Atom: 1036 if !utf8.ValidString(string(x)) { 1037 return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x) 1038 } 1039 facetValue.Type = pb.FacetValue_ATOM.Enum() 1040 facetValue.StringValue = proto.String(string(x)) 1041 case float64: 1042 if !validFloat(x) { 1043 return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x) 1044 } 1045 facetValue.Type = pb.FacetValue_NUMBER.Enum() 1046 facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) 1047 default: 1048 return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value)) 1049 } 1050 dst = append(dst, &pb.Facet{ 1051 Name: proto.String(f.Name), 1052 Value: facetValue, 1053 }) 1054 } 1055 return dst, nil 1056} 1057 1058// loadDoc converts from protobufs to a struct pointer or 1059// FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's 1060// stored fields and facets, and any document metadata. An additional slice of 1061// fields, exprs, may optionally be provided to contain any derived expressions 1062// requested by the developer. 1063func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) { 1064 fields, err := protoToFields(src.Field) 1065 if err != nil { 1066 return err 1067 } 1068 facets, err := protoToFacets(src.Facet) 1069 if err != nil { 1070 return err 1071 } 1072 if len(exprs) > 0 { 1073 exprFields, err := protoToFields(exprs) 1074 if err != nil { 1075 return err 1076 } 1077 // Mark each field as derived. 1078 for i := range exprFields { 1079 exprFields[i].Derived = true 1080 } 1081 fields = append(fields, exprFields...) 1082 } 1083 meta := &DocumentMetadata{ 1084 Rank: int(src.GetOrderId()), 1085 Facets: facets, 1086 } 1087 switch x := dst.(type) { 1088 case FieldLoadSaver: 1089 return x.Load(fields, meta) 1090 default: 1091 return loadStructWithMeta(dst, fields, meta) 1092 } 1093} 1094 1095func protoToFields(fields []*pb.Field) ([]Field, error) { 1096 dst := make([]Field, 0, len(fields)) 1097 for _, field := range fields { 1098 fieldValue := field.GetValue() 1099 f := Field{ 1100 Name: field.GetName(), 1101 } 1102 switch fieldValue.GetType() { 1103 case pb.FieldValue_TEXT: 1104 f.Value = fieldValue.GetStringValue() 1105 f.Language = fieldValue.GetLanguage() 1106 case pb.FieldValue_ATOM: 1107 f.Value = Atom(fieldValue.GetStringValue()) 1108 case pb.FieldValue_HTML: 1109 f.Value = HTML(fieldValue.GetStringValue()) 1110 f.Language = fieldValue.GetLanguage() 1111 case pb.FieldValue_DATE: 1112 sv := fieldValue.GetStringValue() 1113 millis, err := strconv.ParseInt(sv, 10, 64) 1114 if err != nil { 1115 return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err) 1116 } 1117 f.Value = time.Unix(0, millis*1e6) 1118 case pb.FieldValue_NUMBER: 1119 sv := fieldValue.GetStringValue() 1120 x, err := strconv.ParseFloat(sv, 64) 1121 if err != nil { 1122 return nil, err 1123 } 1124 f.Value = x 1125 case pb.FieldValue_GEO: 1126 geoValue := fieldValue.GetGeo() 1127 geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()} 1128 if !geoPoint.Valid() { 1129 return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint) 1130 } 1131 f.Value = geoPoint 1132 default: 1133 return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType()) 1134 } 1135 dst = append(dst, f) 1136 } 1137 return dst, nil 1138} 1139 1140func protoToFacets(facets []*pb.Facet) ([]Facet, error) { 1141 if len(facets) == 0 { 1142 return nil, nil 1143 } 1144 dst := make([]Facet, 0, len(facets)) 1145 for _, facet := range facets { 1146 facetValue := facet.GetValue() 1147 f := Facet{ 1148 Name: facet.GetName(), 1149 } 1150 switch facetValue.GetType() { 1151 case pb.FacetValue_ATOM: 1152 f.Value = Atom(facetValue.GetStringValue()) 1153 case pb.FacetValue_NUMBER: 1154 sv := facetValue.GetStringValue() 1155 x, err := strconv.ParseFloat(sv, 64) 1156 if err != nil { 1157 return nil, err 1158 } 1159 f.Value = x 1160 default: 1161 return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType()) 1162 } 1163 dst = append(dst, f) 1164 } 1165 return dst, nil 1166} 1167 1168func namespaceMod(m proto.Message, namespace string) { 1169 set := func(s **string) { 1170 if *s == nil { 1171 *s = &namespace 1172 } 1173 } 1174 switch m := m.(type) { 1175 case *pb.IndexDocumentRequest: 1176 set(&m.Params.IndexSpec.Namespace) 1177 case *pb.ListDocumentsRequest: 1178 set(&m.Params.IndexSpec.Namespace) 1179 case *pb.DeleteDocumentRequest: 1180 set(&m.Params.IndexSpec.Namespace) 1181 case *pb.SearchRequest: 1182 set(&m.Params.IndexSpec.Namespace) 1183 } 1184} 1185 1186func init() { 1187 internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name) 1188 internal.NamespaceMods["search"] = namespaceMod 1189} 1190