1// Copyright 2012 Google Inc. All rights reserved.
2// Use of this source code is governed by the Apache 2.0
3// license that can be found in the LICENSE file.
4
5package search // import "google.golang.org/appengine/search"
6
7// TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage??
8// TODO: Index.GetAll (or Iterator.GetAll)?
9// TODO: struct <-> protobuf tests.
10// TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero
11// time.Time)? _MAXIMUM_STRING_LENGTH?
12
13import (
14	"errors"
15	"fmt"
16	"math"
17	"reflect"
18	"regexp"
19	"strconv"
20	"strings"
21	"time"
22	"unicode/utf8"
23
24	"github.com/golang/protobuf/proto"
25	"golang.org/x/net/context"
26
27	"google.golang.org/appengine"
28	"google.golang.org/appengine/internal"
29	pb "google.golang.org/appengine/internal/search"
30)
31
32const maxDocumentsPerPutDelete = 200
33
34var (
35	// ErrInvalidDocumentType is returned when methods like Put, Get or Next
36	// are passed a dst or src argument of invalid type.
37	ErrInvalidDocumentType = errors.New("search: invalid document type")
38
39	// ErrNoSuchDocument is returned when no document was found for a given ID.
40	ErrNoSuchDocument = errors.New("search: no such document")
41
42	// ErrTooManyDocuments is returned when the user passes too many documents to
43	// PutMulti or DeleteMulti.
44	ErrTooManyDocuments = fmt.Errorf("search: too many documents given to put or delete (max is %d)", maxDocumentsPerPutDelete)
45)
46
47// Atom is a document field whose contents are indexed as a single indivisible
48// string.
49type Atom string
50
51// HTML is a document field whose contents are indexed as HTML. Only text nodes
52// are indexed: "foo<b>bar" will be treated as "foobar".
53type HTML string
54
55// validIndexNameOrDocID is the Go equivalent of Python's
56// _ValidateVisiblePrintableAsciiNotReserved.
57func validIndexNameOrDocID(s string) bool {
58	if strings.HasPrefix(s, "!") {
59		return false
60	}
61	for _, c := range s {
62		if c < 0x21 || 0x7f <= c {
63			return false
64		}
65	}
66	return true
67}
68
69var (
70	fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`)
71	languageRE  = regexp.MustCompile(`^[a-z]{2}$`)
72)
73
74// validFieldName is the Go equivalent of Python's _CheckFieldName. It checks
75// the validity of both field and facet names.
76func validFieldName(s string) bool {
77	return len(s) <= 500 && fieldNameRE.MatchString(s)
78}
79
80// validDocRank checks that the ranks is in the range [0, 2^31).
81func validDocRank(r int) bool {
82	return 0 <= r && r <= (1<<31-1)
83}
84
85// validLanguage checks that a language looks like ISO 639-1.
86func validLanguage(s string) bool {
87	return languageRE.MatchString(s)
88}
89
90// validFloat checks that f is in the range [-2147483647, 2147483647].
91func validFloat(f float64) bool {
92	return -(1<<31-1) <= f && f <= (1<<31-1)
93}
94
95// Index is an index of documents.
96type Index struct {
97	spec pb.IndexSpec
98}
99
100// orderIDEpoch forms the basis for populating OrderId on documents.
101var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC)
102
103// Open opens the index with the given name. The index is created if it does
104// not already exist.
105//
106// The name is a human-readable ASCII string. It must contain no whitespace
107// characters and not start with "!".
108func Open(name string) (*Index, error) {
109	if !validIndexNameOrDocID(name) {
110		return nil, fmt.Errorf("search: invalid index name %q", name)
111	}
112	return &Index{
113		spec: pb.IndexSpec{
114			Name: &name,
115		},
116	}, nil
117}
118
119// Put saves src to the index. If id is empty, a new ID is allocated by the
120// service and returned. If id is not empty, any existing index entry for that
121// ID is replaced.
122//
123// The ID is a human-readable ASCII string. It must contain no whitespace
124// characters and not start with "!".
125//
126// src must be a non-nil struct pointer or implement the FieldLoadSaver
127// interface.
128func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) {
129	ids, err := x.PutMulti(c, []string{id}, []interface{}{src})
130	if err != nil {
131		return "", err
132	}
133	return ids[0], nil
134}
135
136// PutMulti is like Put, but is more efficient for adding multiple documents to
137// the index at once.
138//
139// Up to 200 documents can be added at once. ErrTooManyDocuments is returned if
140// you try to add more.
141//
142// ids can either be an empty slice (which means new IDs will be allocated for
143// each of the documents added) or a slice the same size as srcs.
144//
145// The error may be an instance of appengine.MultiError, in which case it will
146// be the same size as srcs and the individual errors inside will correspond
147// with the items in srcs.
148func (x *Index) PutMulti(c context.Context, ids []string, srcs []interface{}) ([]string, error) {
149	if len(ids) != 0 && len(srcs) != len(ids) {
150		return nil, fmt.Errorf("search: PutMulti expects ids and srcs slices of the same length")
151	}
152	if len(srcs) > maxDocumentsPerPutDelete {
153		return nil, ErrTooManyDocuments
154	}
155
156	docs := make([]*pb.Document, len(srcs))
157	for i, s := range srcs {
158		var err error
159		docs[i], err = saveDoc(s)
160		if err != nil {
161			return nil, err
162		}
163
164		if len(ids) != 0 && ids[i] != "" {
165			if !validIndexNameOrDocID(ids[i]) {
166				return nil, fmt.Errorf("search: invalid ID %q", ids[i])
167			}
168			docs[i].Id = proto.String(ids[i])
169		}
170	}
171
172	// spec is modified by Call when applying the current Namespace, so copy it to
173	// avoid retaining the namespace beyond the scope of the Call.
174	spec := x.spec
175	req := &pb.IndexDocumentRequest{
176		Params: &pb.IndexDocumentParams{
177			Document:  docs,
178			IndexSpec: &spec,
179		},
180	}
181	res := &pb.IndexDocumentResponse{}
182	if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil {
183		return nil, err
184	}
185	multiErr, hasErr := make(appengine.MultiError, len(res.Status)), false
186	for i, s := range res.Status {
187		if s.GetCode() != pb.SearchServiceError_OK {
188			multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
189			hasErr = true
190		}
191	}
192	if hasErr {
193		return res.DocId, multiErr
194	}
195
196	if len(res.Status) != len(docs) || len(res.DocId) != len(docs) {
197		return nil, fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs, expected %d)",
198			len(res.Status), len(res.DocId), len(docs))
199	}
200	return res.DocId, nil
201}
202
203// Get loads the document with the given ID into dst.
204//
205// The ID is a human-readable ASCII string. It must be non-empty, contain no
206// whitespace characters and not start with "!".
207//
208// dst must be a non-nil struct pointer or implement the FieldLoadSaver
209// interface.
210//
211// ErrFieldMismatch is returned when a field is to be loaded into a different
212// type than the one it was stored from, or when a field is missing or
213// unexported in the destination struct. ErrFieldMismatch is only returned if
214// dst is a struct pointer. It is up to the callee to decide whether this error
215// is fatal, recoverable, or ignorable.
216func (x *Index) Get(c context.Context, id string, dst interface{}) error {
217	if id == "" || !validIndexNameOrDocID(id) {
218		return fmt.Errorf("search: invalid ID %q", id)
219	}
220	req := &pb.ListDocumentsRequest{
221		Params: &pb.ListDocumentsParams{
222			IndexSpec:  &x.spec,
223			StartDocId: proto.String(id),
224			Limit:      proto.Int32(1),
225		},
226	}
227	res := &pb.ListDocumentsResponse{}
228	if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil {
229		return err
230	}
231	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
232		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
233	}
234	if len(res.Document) != 1 || res.Document[0].GetId() != id {
235		return ErrNoSuchDocument
236	}
237	return loadDoc(dst, res.Document[0], nil)
238}
239
240// Delete deletes a document from the index.
241func (x *Index) Delete(c context.Context, id string) error {
242	return x.DeleteMulti(c, []string{id})
243}
244
245// DeleteMulti deletes multiple documents from the index.
246//
247// The returned error may be an instance of appengine.MultiError, in which case
248// it will be the same size as srcs and the individual errors inside will
249// correspond with the items in srcs.
250func (x *Index) DeleteMulti(c context.Context, ids []string) error {
251	if len(ids) > maxDocumentsPerPutDelete {
252		return ErrTooManyDocuments
253	}
254
255	req := &pb.DeleteDocumentRequest{
256		Params: &pb.DeleteDocumentParams{
257			DocId:     ids,
258			IndexSpec: &x.spec,
259		},
260	}
261	res := &pb.DeleteDocumentResponse{}
262	if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil {
263		return err
264	}
265	if len(res.Status) != len(ids) {
266		return fmt.Errorf("search: internal error: wrong number of results (%d, expected %d)",
267			len(res.Status), len(ids))
268	}
269	multiErr, hasErr := make(appengine.MultiError, len(ids)), false
270	for i, s := range res.Status {
271		if s.GetCode() != pb.SearchServiceError_OK {
272			multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
273			hasErr = true
274		}
275	}
276	if hasErr {
277		return multiErr
278	}
279	return nil
280}
281
282// List lists all of the documents in an index. The documents are returned in
283// increasing ID order.
284func (x *Index) List(c context.Context, opts *ListOptions) *Iterator {
285	t := &Iterator{
286		c:             c,
287		index:         x,
288		count:         -1,
289		listInclusive: true,
290		more:          moreList,
291	}
292	if opts != nil {
293		t.listStartID = opts.StartID
294		t.limit = opts.Limit
295		t.idsOnly = opts.IDsOnly
296	}
297	return t
298}
299
300func moreList(t *Iterator) error {
301	req := &pb.ListDocumentsRequest{
302		Params: &pb.ListDocumentsParams{
303			IndexSpec: &t.index.spec,
304		},
305	}
306	if t.listStartID != "" {
307		req.Params.StartDocId = &t.listStartID
308		req.Params.IncludeStartDoc = &t.listInclusive
309	}
310	if t.limit > 0 {
311		req.Params.Limit = proto.Int32(int32(t.limit))
312	}
313	if t.idsOnly {
314		req.Params.KeysOnly = &t.idsOnly
315	}
316
317	res := &pb.ListDocumentsResponse{}
318	if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil {
319		return err
320	}
321	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
322		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
323	}
324	t.listRes = res.Document
325	t.listStartID, t.listInclusive, t.more = "", false, nil
326	if len(res.Document) != 0 && t.limit <= 0 {
327		if id := res.Document[len(res.Document)-1].GetId(); id != "" {
328			t.listStartID, t.more = id, moreList
329		}
330	}
331	return nil
332}
333
334// ListOptions are the options for listing documents in an index. Passing a nil
335// *ListOptions is equivalent to using the default values.
336type ListOptions struct {
337	// StartID is the inclusive lower bound for the ID of the returned
338	// documents. The zero value means all documents will be returned.
339	StartID string
340
341	// Limit is the maximum number of documents to return. The zero value
342	// indicates no limit.
343	Limit int
344
345	// IDsOnly indicates that only document IDs should be returned for the list
346	// operation; no document fields are populated.
347	IDsOnly bool
348}
349
350// Search searches the index for the given query.
351func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator {
352	t := &Iterator{
353		c:           c,
354		index:       x,
355		searchQuery: query,
356		more:        moreSearch,
357	}
358	if opts != nil {
359		if opts.Cursor != "" {
360			if opts.Offset != 0 {
361				return errIter("at most one of Cursor and Offset may be specified")
362			}
363			t.searchCursor = proto.String(string(opts.Cursor))
364		}
365		t.limit = opts.Limit
366		t.fields = opts.Fields
367		t.idsOnly = opts.IDsOnly
368		t.sort = opts.Sort
369		t.exprs = opts.Expressions
370		t.refinements = opts.Refinements
371		t.facetOpts = opts.Facets
372		t.searchOffset = opts.Offset
373		t.countAccuracy = opts.CountAccuracy
374	}
375	return t
376}
377
378func moreSearch(t *Iterator) error {
379	// We use per-result (rather than single/per-page) cursors since this
380	// lets us return a Cursor for every iterator document. The two cursor
381	// types are largely interchangeable: a page cursor is the same as the
382	// last per-result cursor in a given search response.
383	req := &pb.SearchRequest{
384		Params: &pb.SearchParams{
385			IndexSpec:  &t.index.spec,
386			Query:      &t.searchQuery,
387			Cursor:     t.searchCursor,
388			CursorType: pb.SearchParams_PER_RESULT.Enum(),
389			FieldSpec: &pb.FieldSpec{
390				Name: t.fields,
391			},
392		},
393	}
394	if t.limit > 0 {
395		req.Params.Limit = proto.Int32(int32(t.limit))
396	}
397	if t.searchOffset > 0 {
398		req.Params.Offset = proto.Int32(int32(t.searchOffset))
399		t.searchOffset = 0
400	}
401	if t.countAccuracy > 0 {
402		req.Params.MatchedCountAccuracy = proto.Int32(int32(t.countAccuracy))
403	}
404	if t.idsOnly {
405		req.Params.KeysOnly = &t.idsOnly
406	}
407	if t.sort != nil {
408		if err := sortToProto(t.sort, req.Params); err != nil {
409			return err
410		}
411	}
412	if t.refinements != nil {
413		if err := refinementsToProto(t.refinements, req.Params); err != nil {
414			return err
415		}
416	}
417	for _, e := range t.exprs {
418		req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{
419			Name:       proto.String(e.Name),
420			Expression: proto.String(e.Expr),
421		})
422	}
423	for _, f := range t.facetOpts {
424		if err := f.setParams(req.Params); err != nil {
425			return fmt.Errorf("bad FacetSearchOption: %v", err)
426		}
427	}
428	// Don't repeat facet search.
429	t.facetOpts = nil
430
431	res := &pb.SearchResponse{}
432	if err := internal.Call(t.c, "search", "Search", req, res); err != nil {
433		return err
434	}
435	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
436		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
437	}
438	t.searchRes = res.Result
439	if len(res.FacetResult) > 0 {
440		t.facetRes = res.FacetResult
441	}
442	t.count = int(*res.MatchedCount)
443	if t.limit > 0 {
444		t.more = nil
445	} else {
446		t.more = moreSearch
447	}
448	return nil
449}
450
451// SearchOptions are the options for searching an index. Passing a nil
452// *SearchOptions is equivalent to using the default values.
453type SearchOptions struct {
454	// Limit is the maximum number of documents to return. The zero value
455	// indicates no limit.
456	Limit int
457
458	// IDsOnly indicates that only document IDs should be returned for the search
459	// operation; no document fields are populated.
460	IDsOnly bool
461
462	// Sort controls the ordering of search results.
463	Sort *SortOptions
464
465	// Fields specifies which document fields to include in the results. If omitted,
466	// all document fields are returned. No more than 100 fields may be specified.
467	Fields []string
468
469	// Expressions specifies additional computed fields to add to each returned
470	// document.
471	Expressions []FieldExpression
472
473	// Facets controls what facet information is returned for these search results.
474	// If no options are specified, no facet results will be returned.
475	Facets []FacetSearchOption
476
477	// Refinements filters the returned documents by requiring them to contain facets
478	// with specific values. Refinements are applied in conjunction for facets with
479	// different names, and in disjunction otherwise.
480	Refinements []Facet
481
482	// Cursor causes the results to commence with the first document after
483	// the document associated with the cursor.
484	Cursor Cursor
485
486	// Offset specifies the number of documents to skip over before returning results.
487	// When specified, Cursor must be nil.
488	Offset int
489
490	// CountAccuracy specifies the maximum result count that can be expected to
491	// be accurate. If zero, the count accuracy defaults to 20.
492	CountAccuracy int
493}
494
495// Cursor represents an iterator's position.
496//
497// The string value of a cursor is web-safe. It can be saved and restored
498// for later use.
499type Cursor string
500
501// FieldExpression defines a custom expression to evaluate for each result.
502type FieldExpression struct {
503	// Name is the name to use for the computed field.
504	Name string
505
506	// Expr is evaluated to provide a custom content snippet for each document.
507	// See https://cloud.google.com/appengine/docs/standard/go/search/options for
508	// the supported expression syntax.
509	Expr string
510}
511
512// FacetSearchOption controls what facet information is returned in search results.
513type FacetSearchOption interface {
514	setParams(*pb.SearchParams) error
515}
516
517// AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet
518// discovery for the search. Automatic facet discovery looks for the facets
519// which appear the most often in the aggregate in the matched documents.
520//
521// The maximum number of facets returned is controlled by facetLimit, and the
522// maximum number of values per facet by facetLimit. A limit of zero indicates
523// a default limit should be used.
524func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption {
525	return &autoFacetOpt{facetLimit, valueLimit}
526}
527
528type autoFacetOpt struct {
529	facetLimit, valueLimit int
530}
531
532const defaultAutoFacetLimit = 10 // As per python runtime search.py.
533
534func (o *autoFacetOpt) setParams(params *pb.SearchParams) error {
535	lim := int32(o.facetLimit)
536	if lim == 0 {
537		lim = defaultAutoFacetLimit
538	}
539	params.AutoDiscoverFacetCount = &lim
540	if o.valueLimit > 0 {
541		params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{
542			ValueLimit: proto.Int32(int32(o.valueLimit)),
543		}
544	}
545	return nil
546}
547
548// FacetDiscovery returns a FacetSearchOption which selects a facet to be
549// returned with the search results. By default, the most frequently
550// occurring values for that facet will be returned. However, you can also
551// specify a list of particular Atoms or specific Ranges to return.
552func FacetDiscovery(name string, value ...interface{}) FacetSearchOption {
553	return &facetOpt{name, value}
554}
555
556type facetOpt struct {
557	name   string
558	values []interface{}
559}
560
561func (o *facetOpt) setParams(params *pb.SearchParams) error {
562	req := &pb.FacetRequest{Name: &o.name}
563	params.IncludeFacet = append(params.IncludeFacet, req)
564	if len(o.values) == 0 {
565		return nil
566	}
567	vtype := reflect.TypeOf(o.values[0])
568	reqParam := &pb.FacetRequestParam{}
569	for _, v := range o.values {
570		if reflect.TypeOf(v) != vtype {
571			return errors.New("values must all be Atom, or must all be Range")
572		}
573		switch v := v.(type) {
574		case Atom:
575			reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v))
576		case Range:
577			rng, err := rangeToProto(v)
578			if err != nil {
579				return fmt.Errorf("invalid range: %v", err)
580			}
581			reqParam.Range = append(reqParam.Range, rng)
582		default:
583			return fmt.Errorf("unsupported value type %T", v)
584		}
585	}
586	req.Params = reqParam
587	return nil
588}
589
590// FacetDocumentDepth returns a FacetSearchOption which controls the number of
591// documents to be evaluated with preparing facet results.
592func FacetDocumentDepth(depth int) FacetSearchOption {
593	return facetDepthOpt(depth)
594}
595
596type facetDepthOpt int
597
598func (o facetDepthOpt) setParams(params *pb.SearchParams) error {
599	params.FacetDepth = proto.Int32(int32(o))
600	return nil
601}
602
603// FacetResult represents the number of times a particular facet and value
604// appeared in the documents matching a search request.
605type FacetResult struct {
606	Facet
607
608	// Count is the number of times this specific facet and value appeared in the
609	// matching documents.
610	Count int
611}
612
613// Range represents a numeric range with inclusive start and exclusive end.
614// Start may be specified as math.Inf(-1) to indicate there is no minimum
615// value, and End may similarly be specified as math.Inf(1); at least one of
616// Start or End must be a finite number.
617type Range struct {
618	Start, End float64
619}
620
621var (
622	negInf = math.Inf(-1)
623	posInf = math.Inf(1)
624)
625
626// AtLeast returns a Range matching any value greater than, or equal to, min.
627func AtLeast(min float64) Range {
628	return Range{Start: min, End: posInf}
629}
630
631// LessThan returns a Range matching any value less than max.
632func LessThan(max float64) Range {
633	return Range{Start: negInf, End: max}
634}
635
636// SortOptions control the ordering and scoring of search results.
637type SortOptions struct {
638	// Expressions is a slice of expressions representing a multi-dimensional
639	// sort.
640	Expressions []SortExpression
641
642	// Scorer, when specified, will cause the documents to be scored according to
643	// search term frequency.
644	Scorer Scorer
645
646	// Limit is the maximum number of objects to score and/or sort. Limit cannot
647	// be more than 10,000. The zero value indicates a default limit.
648	Limit int
649}
650
651// SortExpression defines a single dimension for sorting a document.
652type SortExpression struct {
653	// Expr is evaluated to provide a sorting value for each document.
654	// See https://cloud.google.com/appengine/docs/standard/go/search/options for
655	// the supported expression syntax.
656	Expr string
657
658	// Reverse causes the documents to be sorted in ascending order.
659	Reverse bool
660
661	// The default value to use when no field is present or the expresion
662	// cannot be calculated for a document. For text sorts, Default must
663	// be of type string; for numeric sorts, float64.
664	Default interface{}
665}
666
667// A Scorer defines how a document is scored.
668type Scorer interface {
669	toProto(*pb.ScorerSpec)
670}
671
672type enumScorer struct {
673	enum pb.ScorerSpec_Scorer
674}
675
676func (e enumScorer) toProto(spec *pb.ScorerSpec) {
677	spec.Scorer = e.enum.Enum()
678}
679
680var (
681	// MatchScorer assigns a score based on term frequency in a document.
682	MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER}
683
684	// RescoringMatchScorer assigns a score based on the quality of the query
685	// match. It is similar to a MatchScorer but uses a more complex scoring
686	// algorithm based on match term frequency and other factors like field type.
687	// Please be aware that this algorithm is continually refined and can change
688	// over time without notice. This means that the ordering of search results
689	// that use this scorer can also change without notice.
690	RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER}
691)
692
693func sortToProto(sort *SortOptions, params *pb.SearchParams) error {
694	for _, e := range sort.Expressions {
695		spec := &pb.SortSpec{
696			SortExpression: proto.String(e.Expr),
697		}
698		if e.Reverse {
699			spec.SortDescending = proto.Bool(false)
700		}
701		if e.Default != nil {
702			switch d := e.Default.(type) {
703			case float64:
704				spec.DefaultValueNumeric = &d
705			case string:
706				spec.DefaultValueText = &d
707			default:
708				return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr)
709			}
710		}
711		params.SortSpec = append(params.SortSpec, spec)
712	}
713
714	spec := &pb.ScorerSpec{}
715	if sort.Limit > 0 {
716		spec.Limit = proto.Int32(int32(sort.Limit))
717		params.ScorerSpec = spec
718	}
719	if sort.Scorer != nil {
720		sort.Scorer.toProto(spec)
721		params.ScorerSpec = spec
722	}
723
724	return nil
725}
726
727func refinementsToProto(refinements []Facet, params *pb.SearchParams) error {
728	for _, r := range refinements {
729		ref := &pb.FacetRefinement{
730			Name: proto.String(r.Name),
731		}
732		switch v := r.Value.(type) {
733		case Atom:
734			ref.Value = proto.String(string(v))
735		case Range:
736			rng, err := rangeToProto(v)
737			if err != nil {
738				return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err)
739			}
740			// Unfortunately there are two identical messages for identify Facet ranges.
741			ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End}
742		default:
743			return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v)
744		}
745		params.FacetRefinement = append(params.FacetRefinement, ref)
746	}
747	return nil
748}
749
750func rangeToProto(r Range) (*pb.FacetRange, error) {
751	rng := &pb.FacetRange{}
752	if r.Start != negInf {
753		if !validFloat(r.Start) {
754			return nil, errors.New("invalid value for Start")
755		}
756		rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64))
757	} else if r.End == posInf {
758		return nil, errors.New("either Start or End must be finite")
759	}
760	if r.End != posInf {
761		if !validFloat(r.End) {
762			return nil, errors.New("invalid value for End")
763		}
764		rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64))
765	}
766	return rng, nil
767}
768
769func protoToRange(rng *pb.FacetRefinement_Range) Range {
770	r := Range{Start: negInf, End: posInf}
771	if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil {
772		r.Start = x
773	}
774	if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil {
775		r.End = x
776	}
777	return r
778}
779
780// Iterator is the result of searching an index for a query or listing an
781// index.
782type Iterator struct {
783	c     context.Context
784	index *Index
785	err   error
786
787	listRes       []*pb.Document
788	listStartID   string
789	listInclusive bool
790
791	searchRes    []*pb.SearchResult
792	facetRes     []*pb.FacetResult
793	searchQuery  string
794	searchCursor *string
795	searchOffset int
796	sort         *SortOptions
797
798	fields      []string
799	exprs       []FieldExpression
800	refinements []Facet
801	facetOpts   []FacetSearchOption
802
803	more func(*Iterator) error
804
805	count         int
806	countAccuracy int
807	limit         int // items left to return; 0 for unlimited.
808	idsOnly       bool
809}
810
811// errIter returns an iterator that only returns the given error.
812func errIter(err string) *Iterator {
813	return &Iterator{
814		err: errors.New(err),
815	}
816}
817
818// Done is returned when a query iteration has completed.
819var Done = errors.New("search: query has no more results")
820
821// Count returns an approximation of the number of documents matched by the
822// query. It is only valid to call for iterators returned by Search.
823func (t *Iterator) Count() int { return t.count }
824
825// fetchMore retrieves more results, if there are no errors or pending results.
826func (t *Iterator) fetchMore() {
827	if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil {
828		t.err = t.more(t)
829	}
830}
831
832// Next returns the ID of the next result. When there are no more results,
833// Done is returned as the error.
834//
835// dst must be a non-nil struct pointer, implement the FieldLoadSaver
836// interface, or be a nil interface value. If a non-nil dst is provided, it
837// will be filled with the indexed fields. dst is ignored if this iterator was
838// created with an IDsOnly option.
839func (t *Iterator) Next(dst interface{}) (string, error) {
840	t.fetchMore()
841	if t.err != nil {
842		return "", t.err
843	}
844
845	var doc *pb.Document
846	var exprs []*pb.Field
847	switch {
848	case len(t.listRes) != 0:
849		doc = t.listRes[0]
850		t.listRes = t.listRes[1:]
851	case len(t.searchRes) != 0:
852		doc = t.searchRes[0].Document
853		exprs = t.searchRes[0].Expression
854		t.searchCursor = t.searchRes[0].Cursor
855		t.searchRes = t.searchRes[1:]
856	default:
857		return "", Done
858	}
859	if doc == nil {
860		return "", errors.New("search: internal error: no document returned")
861	}
862	if !t.idsOnly && dst != nil {
863		if err := loadDoc(dst, doc, exprs); err != nil {
864			return "", err
865		}
866	}
867	return doc.GetId(), nil
868}
869
870// Cursor returns the cursor associated with the current document (that is,
871// the document most recently returned by a call to Next).
872//
873// Passing this cursor in a future call to Search will cause those results
874// to commence with the first document after the current document.
875func (t *Iterator) Cursor() Cursor {
876	if t.searchCursor == nil {
877		return ""
878	}
879	return Cursor(*t.searchCursor)
880}
881
882// Facets returns the facets found within the search results, if any facets
883// were requested in the SearchOptions.
884func (t *Iterator) Facets() ([][]FacetResult, error) {
885	t.fetchMore()
886	if t.err != nil && t.err != Done {
887		return nil, t.err
888	}
889
890	var facets [][]FacetResult
891	for _, f := range t.facetRes {
892		fres := make([]FacetResult, 0, len(f.Value))
893		for _, v := range f.Value {
894			ref := v.Refinement
895			facet := FacetResult{
896				Facet: Facet{Name: ref.GetName()},
897				Count: int(v.GetCount()),
898			}
899			if ref.Value != nil {
900				facet.Value = Atom(*ref.Value)
901			} else {
902				facet.Value = protoToRange(ref.Range)
903			}
904			fres = append(fres, facet)
905		}
906		facets = append(facets, fres)
907	}
908	return facets, nil
909}
910
911// saveDoc converts from a struct pointer or
912// FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf.
913func saveDoc(src interface{}) (*pb.Document, error) {
914	var err error
915	var fields []Field
916	var meta *DocumentMetadata
917	switch x := src.(type) {
918	case FieldLoadSaver:
919		fields, meta, err = x.Save()
920	default:
921		fields, meta, err = saveStructWithMeta(src)
922	}
923	if err != nil {
924		return nil, err
925	}
926
927	fieldsProto, err := fieldsToProto(fields)
928	if err != nil {
929		return nil, err
930	}
931	d := &pb.Document{
932		Field:         fieldsProto,
933		OrderId:       proto.Int32(int32(time.Since(orderIDEpoch).Seconds())),
934		OrderIdSource: pb.Document_DEFAULTED.Enum(),
935	}
936	if meta != nil {
937		if meta.Rank != 0 {
938			if !validDocRank(meta.Rank) {
939				return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank)
940			}
941			*d.OrderId = int32(meta.Rank)
942			d.OrderIdSource = pb.Document_SUPPLIED.Enum()
943		}
944		if len(meta.Facets) > 0 {
945			facets, err := facetsToProto(meta.Facets)
946			if err != nil {
947				return nil, err
948			}
949			d.Facet = facets
950		}
951	}
952	return d, nil
953}
954
955func fieldsToProto(src []Field) ([]*pb.Field, error) {
956	// Maps to catch duplicate time or numeric fields.
957	timeFields, numericFields := make(map[string]bool), make(map[string]bool)
958	dst := make([]*pb.Field, 0, len(src))
959	for _, f := range src {
960		if !validFieldName(f.Name) {
961			return nil, fmt.Errorf("search: invalid field name %q", f.Name)
962		}
963		fieldValue := &pb.FieldValue{}
964		switch x := f.Value.(type) {
965		case string:
966			fieldValue.Type = pb.FieldValue_TEXT.Enum()
967			fieldValue.StringValue = proto.String(x)
968		case Atom:
969			fieldValue.Type = pb.FieldValue_ATOM.Enum()
970			fieldValue.StringValue = proto.String(string(x))
971		case HTML:
972			fieldValue.Type = pb.FieldValue_HTML.Enum()
973			fieldValue.StringValue = proto.String(string(x))
974		case time.Time:
975			if timeFields[f.Name] {
976				return nil, fmt.Errorf("search: duplicate time field %q", f.Name)
977			}
978			timeFields[f.Name] = true
979			fieldValue.Type = pb.FieldValue_DATE.Enum()
980			fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10))
981		case float64:
982			if numericFields[f.Name] {
983				return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name)
984			}
985			if !validFloat(x) {
986				return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x)
987			}
988			numericFields[f.Name] = true
989			fieldValue.Type = pb.FieldValue_NUMBER.Enum()
990			fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
991		case appengine.GeoPoint:
992			if !x.Valid() {
993				return nil, fmt.Errorf(
994					"search: GeoPoint field %q with invalid value %v",
995					f.Name, x)
996			}
997			fieldValue.Type = pb.FieldValue_GEO.Enum()
998			fieldValue.Geo = &pb.FieldValue_Geo{
999				Lat: proto.Float64(x.Lat),
1000				Lng: proto.Float64(x.Lng),
1001			}
1002		default:
1003			return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value))
1004		}
1005		if f.Language != "" {
1006			switch f.Value.(type) {
1007			case string, HTML:
1008				if !validLanguage(f.Language) {
1009					return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language)
1010				}
1011				fieldValue.Language = proto.String(f.Language)
1012			default:
1013				return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value)
1014			}
1015		}
1016		if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) {
1017			return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p)
1018		}
1019		dst = append(dst, &pb.Field{
1020			Name:  proto.String(f.Name),
1021			Value: fieldValue,
1022		})
1023	}
1024	return dst, nil
1025}
1026
1027func facetsToProto(src []Facet) ([]*pb.Facet, error) {
1028	dst := make([]*pb.Facet, 0, len(src))
1029	for _, f := range src {
1030		if !validFieldName(f.Name) {
1031			return nil, fmt.Errorf("search: invalid facet name %q", f.Name)
1032		}
1033		facetValue := &pb.FacetValue{}
1034		switch x := f.Value.(type) {
1035		case Atom:
1036			if !utf8.ValidString(string(x)) {
1037				return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x)
1038			}
1039			facetValue.Type = pb.FacetValue_ATOM.Enum()
1040			facetValue.StringValue = proto.String(string(x))
1041		case float64:
1042			if !validFloat(x) {
1043				return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x)
1044			}
1045			facetValue.Type = pb.FacetValue_NUMBER.Enum()
1046			facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
1047		default:
1048			return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value))
1049		}
1050		dst = append(dst, &pb.Facet{
1051			Name:  proto.String(f.Name),
1052			Value: facetValue,
1053		})
1054	}
1055	return dst, nil
1056}
1057
1058// loadDoc converts from protobufs to a struct pointer or
1059// FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's
1060// stored fields and facets, and any document metadata.  An additional slice of
1061// fields, exprs, may optionally be provided to contain any derived expressions
1062// requested by the developer.
1063func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) {
1064	fields, err := protoToFields(src.Field)
1065	if err != nil {
1066		return err
1067	}
1068	facets, err := protoToFacets(src.Facet)
1069	if err != nil {
1070		return err
1071	}
1072	if len(exprs) > 0 {
1073		exprFields, err := protoToFields(exprs)
1074		if err != nil {
1075			return err
1076		}
1077		// Mark each field as derived.
1078		for i := range exprFields {
1079			exprFields[i].Derived = true
1080		}
1081		fields = append(fields, exprFields...)
1082	}
1083	meta := &DocumentMetadata{
1084		Rank:   int(src.GetOrderId()),
1085		Facets: facets,
1086	}
1087	switch x := dst.(type) {
1088	case FieldLoadSaver:
1089		return x.Load(fields, meta)
1090	default:
1091		return loadStructWithMeta(dst, fields, meta)
1092	}
1093}
1094
1095func protoToFields(fields []*pb.Field) ([]Field, error) {
1096	dst := make([]Field, 0, len(fields))
1097	for _, field := range fields {
1098		fieldValue := field.GetValue()
1099		f := Field{
1100			Name: field.GetName(),
1101		}
1102		switch fieldValue.GetType() {
1103		case pb.FieldValue_TEXT:
1104			f.Value = fieldValue.GetStringValue()
1105			f.Language = fieldValue.GetLanguage()
1106		case pb.FieldValue_ATOM:
1107			f.Value = Atom(fieldValue.GetStringValue())
1108		case pb.FieldValue_HTML:
1109			f.Value = HTML(fieldValue.GetStringValue())
1110			f.Language = fieldValue.GetLanguage()
1111		case pb.FieldValue_DATE:
1112			sv := fieldValue.GetStringValue()
1113			millis, err := strconv.ParseInt(sv, 10, 64)
1114			if err != nil {
1115				return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err)
1116			}
1117			f.Value = time.Unix(0, millis*1e6)
1118		case pb.FieldValue_NUMBER:
1119			sv := fieldValue.GetStringValue()
1120			x, err := strconv.ParseFloat(sv, 64)
1121			if err != nil {
1122				return nil, err
1123			}
1124			f.Value = x
1125		case pb.FieldValue_GEO:
1126			geoValue := fieldValue.GetGeo()
1127			geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()}
1128			if !geoPoint.Valid() {
1129				return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint)
1130			}
1131			f.Value = geoPoint
1132		default:
1133			return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType())
1134		}
1135		dst = append(dst, f)
1136	}
1137	return dst, nil
1138}
1139
1140func protoToFacets(facets []*pb.Facet) ([]Facet, error) {
1141	if len(facets) == 0 {
1142		return nil, nil
1143	}
1144	dst := make([]Facet, 0, len(facets))
1145	for _, facet := range facets {
1146		facetValue := facet.GetValue()
1147		f := Facet{
1148			Name: facet.GetName(),
1149		}
1150		switch facetValue.GetType() {
1151		case pb.FacetValue_ATOM:
1152			f.Value = Atom(facetValue.GetStringValue())
1153		case pb.FacetValue_NUMBER:
1154			sv := facetValue.GetStringValue()
1155			x, err := strconv.ParseFloat(sv, 64)
1156			if err != nil {
1157				return nil, err
1158			}
1159			f.Value = x
1160		default:
1161			return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType())
1162		}
1163		dst = append(dst, f)
1164	}
1165	return dst, nil
1166}
1167
1168func namespaceMod(m proto.Message, namespace string) {
1169	set := func(s **string) {
1170		if *s == nil {
1171			*s = &namespace
1172		}
1173	}
1174	switch m := m.(type) {
1175	case *pb.IndexDocumentRequest:
1176		set(&m.Params.IndexSpec.Namespace)
1177	case *pb.ListDocumentsRequest:
1178		set(&m.Params.IndexSpec.Namespace)
1179	case *pb.DeleteDocumentRequest:
1180		set(&m.Params.IndexSpec.Namespace)
1181	case *pb.SearchRequest:
1182		set(&m.Params.IndexSpec.Namespace)
1183	}
1184}
1185
1186func init() {
1187	internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name)
1188	internal.NamespaceMods["search"] = namespaceMod
1189}
1190