1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file contains the infrastructure to create an
6// identifier and full-text index for a set of Go files.
7//
8// Algorithm for identifier index:
9// - traverse all .go files of the file tree specified by root
10// - for each identifier (word) encountered, collect all occurrences (spots)
11//   into a list; this produces a list of spots for each word
12// - reduce the lists: from a list of spots to a list of FileRuns,
13//   and from a list of FileRuns into a list of PakRuns
14// - make a HitList from the PakRuns
15//
16// Details:
17// - keep two lists per word: one containing package-level declarations
18//   that have snippets, and one containing all other spots
19// - keep the snippets in a separate table indexed by snippet index
20//   and store the snippet index in place of the line number in a SpotInfo
21//   (the line number for spots with snippets is stored in the snippet)
22// - at the end, create lists of alternative spellings for a given
23//   word
24//
25// Algorithm for full text index:
26// - concatenate all source code in a byte buffer (in memory)
27// - add the files to a file set in lockstep as they are added to the byte
28//   buffer such that a byte buffer offset corresponds to the Pos value for
29//   that file location
30// - create a suffix array from the concatenated sources
31//
32// String lookup in full text index:
33// - use the suffix array to lookup a string's offsets - the offsets
34//   correspond to the Pos values relative to the file set
35// - translate the Pos values back into file and line information and
36//   sort the result
37
38package godoc
39
40import (
41	"bufio"
42	"bytes"
43	"encoding/gob"
44	"errors"
45	"fmt"
46	"go/ast"
47	"go/doc"
48	"go/parser"
49	"go/token"
50	"index/suffixarray"
51	"io"
52	"log"
53	"os"
54	pathpkg "path"
55	"path/filepath"
56	"regexp"
57	"runtime"
58	"sort"
59	"strconv"
60	"strings"
61	"sync"
62	"time"
63	"unicode"
64
65	"golang.org/x/tools/godoc/util"
66	"golang.org/x/tools/godoc/vfs"
67)
68
69// ----------------------------------------------------------------------------
70// InterfaceSlice is a helper type for sorting interface
71// slices according to some slice-specific sort criteria.
72
73type comparer func(x, y interface{}) bool
74
75type interfaceSlice struct {
76	slice []interface{}
77	less  comparer
78}
79
80// ----------------------------------------------------------------------------
81// RunList
82
83// A RunList is a list of entries that can be sorted according to some
84// criteria. A RunList may be compressed by grouping "runs" of entries
85// which are equal (according to the sort criteria) into a new RunList of
86// runs. For instance, a RunList containing pairs (x, y) may be compressed
87// into a RunList containing pair runs (x, {y}) where each run consists of
88// a list of y's with the same x.
89type RunList []interface{}
90
91func (h RunList) sort(less comparer) {
92	sort.Sort(&interfaceSlice{h, less})
93}
94
95func (p *interfaceSlice) Len() int           { return len(p.slice) }
96func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
97func (p *interfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
98
99// Compress entries which are the same according to a sort criteria
100// (specified by less) into "runs".
101func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
102	if len(h) == 0 {
103		return nil
104	}
105	// len(h) > 0
106
107	// create runs of entries with equal values
108	h.sort(less)
109
110	// for each run, make a new run object and collect them in a new RunList
111	var hh RunList
112	i, x := 0, h[0]
113	for j, y := range h {
114		if less(x, y) {
115			hh = append(hh, newRun(h[i:j]))
116			i, x = j, h[j] // start a new run
117		}
118	}
119	// add final run, if any
120	if i < len(h) {
121		hh = append(hh, newRun(h[i:]))
122	}
123
124	return hh
125}
126
127// ----------------------------------------------------------------------------
128// KindRun
129
130// Debugging support. Disable to see multiple entries per line.
131const removeDuplicates = true
132
133// A KindRun is a run of SpotInfos of the same kind in a given file.
134// The kind (3 bits) is stored in each SpotInfo element; to find the
135// kind of a KindRun, look at any of its elements.
136type KindRun []SpotInfo
137
138// KindRuns are sorted by line number or index. Since the isIndex bit
139// is always the same for all infos in one list we can compare lori's.
140func (k KindRun) Len() int           { return len(k) }
141func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
142func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }
143
144// FileRun contents are sorted by Kind for the reduction into KindRuns.
145func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
146
147// newKindRun allocates a new KindRun from the SpotInfo run h.
148func newKindRun(h RunList) interface{} {
149	run := make(KindRun, len(h))
150	for i, x := range h {
151		run[i] = x.(SpotInfo)
152	}
153
154	// Spots were sorted by file and kind to create this run.
155	// Within this run, sort them by line number or index.
156	sort.Sort(run)
157
158	if removeDuplicates {
159		// Since both the lori and kind field must be
160		// same for duplicates, and since the isIndex
161		// bit is always the same for all infos in one
162		// list we can simply compare the entire info.
163		k := 0
164		prev := SpotInfo(1<<32 - 1) // an unlikely value
165		for _, x := range run {
166			if x != prev {
167				run[k] = x
168				k++
169				prev = x
170			}
171		}
172		run = run[0:k]
173	}
174
175	return run
176}
177
178// ----------------------------------------------------------------------------
179// FileRun
180
181// A Pak describes a Go package.
182type Pak struct {
183	Path string // path of directory containing the package
184	Name string // package name as declared by package clause
185}
186
187// Paks are sorted by name (primary key) and by import path (secondary key).
188func (p *Pak) less(q *Pak) bool {
189	return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
190}
191
192// A File describes a Go file.
193type File struct {
194	Name string // directory-local file name
195	Pak  *Pak   // the package to which the file belongs
196}
197
198// Path returns the file path of f.
199func (f *File) Path() string {
200	return pathpkg.Join(f.Pak.Path, f.Name)
201}
202
203// A Spot describes a single occurrence of a word.
204type Spot struct {
205	File *File
206	Info SpotInfo
207}
208
209// A FileRun is a list of KindRuns belonging to the same file.
210type FileRun struct {
211	File   *File
212	Groups []KindRun
213}
214
215// Spots are sorted by file path for the reduction into FileRuns.
216func lessSpot(x, y interface{}) bool {
217	fx := x.(Spot).File
218	fy := y.(Spot).File
219	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
220	px := fx.Pak.Path
221	py := fy.Pak.Path
222	return px < py || px == py && fx.Name < fy.Name
223}
224
225// newFileRun allocates a new FileRun from the Spot run h.
226func newFileRun(h RunList) interface{} {
227	file := h[0].(Spot).File
228
229	// reduce the list of Spots into a list of KindRuns
230	h1 := make(RunList, len(h))
231	for i, x := range h {
232		h1[i] = x.(Spot).Info
233	}
234	h2 := h1.reduce(lessKind, newKindRun)
235
236	// create the FileRun
237	groups := make([]KindRun, len(h2))
238	for i, x := range h2 {
239		groups[i] = x.(KindRun)
240	}
241	return &FileRun{file, groups}
242}
243
244// ----------------------------------------------------------------------------
245// PakRun
246
247// A PakRun describes a run of *FileRuns of a package.
248type PakRun struct {
249	Pak   *Pak
250	Files []*FileRun
251}
252
253// Sorting support for files within a PakRun.
254func (p *PakRun) Len() int           { return len(p.Files) }
255func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
256func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
257
258// FileRuns are sorted by package for the reduction into PakRuns.
259func lessFileRun(x, y interface{}) bool {
260	return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
261}
262
263// newPakRun allocates a new PakRun from the *FileRun run h.
264func newPakRun(h RunList) interface{} {
265	pak := h[0].(*FileRun).File.Pak
266	files := make([]*FileRun, len(h))
267	for i, x := range h {
268		files[i] = x.(*FileRun)
269	}
270	run := &PakRun{pak, files}
271	sort.Sort(run) // files were sorted by package; sort them by file now
272	return run
273}
274
275// ----------------------------------------------------------------------------
276// HitList
277
278// A HitList describes a list of PakRuns.
279type HitList []*PakRun
280
281// PakRuns are sorted by package.
282func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
283
284func reduce(h0 RunList) HitList {
285	// reduce a list of Spots into a list of FileRuns
286	h1 := h0.reduce(lessSpot, newFileRun)
287	// reduce a list of FileRuns into a list of PakRuns
288	h2 := h1.reduce(lessFileRun, newPakRun)
289	// sort the list of PakRuns by package
290	h2.sort(lessPakRun)
291	// create a HitList
292	h := make(HitList, len(h2))
293	for i, p := range h2 {
294		h[i] = p.(*PakRun)
295	}
296	return h
297}
298
299// filter returns a new HitList created by filtering
300// all PakRuns from h that have a matching pakname.
301func (h HitList) filter(pakname string) HitList {
302	var hh HitList
303	for _, p := range h {
304		if p.Pak.Name == pakname {
305			hh = append(hh, p)
306		}
307	}
308	return hh
309}
310
311// ----------------------------------------------------------------------------
312// AltWords
313
314type wordPair struct {
315	canon string // canonical word spelling (all lowercase)
316	alt   string // alternative spelling
317}
318
319// An AltWords describes a list of alternative spellings for a
320// canonical (all lowercase) spelling of a word.
321type AltWords struct {
322	Canon string   // canonical word spelling (all lowercase)
323	Alts  []string // alternative spelling for the same word
324}
325
326// wordPairs are sorted by their canonical spelling.
327func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
328
329// newAltWords allocates a new AltWords from the *wordPair run h.
330func newAltWords(h RunList) interface{} {
331	canon := h[0].(*wordPair).canon
332	alts := make([]string, len(h))
333	for i, x := range h {
334		alts[i] = x.(*wordPair).alt
335	}
336	return &AltWords{canon, alts}
337}
338
339func (a *AltWords) filter(s string) *AltWords {
340	var alts []string
341	for _, w := range a.Alts {
342		if w != s {
343			alts = append(alts, w)
344		}
345	}
346	if len(alts) > 0 {
347		return &AltWords{a.Canon, alts}
348	}
349	return nil
350}
351
352// Ident stores information about external identifiers in order to create
353// links to package documentation.
354type Ident struct {
355	Path    string // e.g. "net/http"
356	Package string // e.g. "http"
357	Name    string // e.g. "NewRequest"
358	Doc     string // e.g. "NewRequest returns a new Request..."
359}
360
361// byImportCount sorts the given slice of Idents by the import
362// counts of the packages to which they belong.
363type byImportCount struct {
364	Idents      []Ident
365	ImportCount map[string]int
366}
367
368func (ic byImportCount) Len() int {
369	return len(ic.Idents)
370}
371
372func (ic byImportCount) Less(i, j int) bool {
373	ri := ic.ImportCount[ic.Idents[i].Path]
374	rj := ic.ImportCount[ic.Idents[j].Path]
375	if ri == rj {
376		return ic.Idents[i].Path < ic.Idents[j].Path
377	}
378	return ri > rj
379}
380
381func (ic byImportCount) Swap(i, j int) {
382	ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
383}
384
385func (ic byImportCount) String() string {
386	buf := bytes.NewBuffer([]byte("["))
387	for _, v := range ic.Idents {
388		buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
389	}
390	buf.WriteString("\n]")
391	return buf.String()
392}
393
394// filter creates a new Ident list where the results match the given
395// package name.
396func (ic byImportCount) filter(pakname string) []Ident {
397	if ic.Idents == nil {
398		return nil
399	}
400	var res []Ident
401	for _, i := range ic.Idents {
402		if i.Package == pakname {
403			res = append(res, i)
404		}
405	}
406	return res
407}
408
409// top returns the top n identifiers.
410func (ic byImportCount) top(n int) []Ident {
411	if len(ic.Idents) > n {
412		return ic.Idents[:n]
413	}
414	return ic.Idents
415}
416
417// ----------------------------------------------------------------------------
418// Indexer
419
420type IndexResult struct {
421	Decls  RunList // package-level declarations (with snippets)
422	Others RunList // all other occurrences
423}
424
425// Statistics provides statistics information for an index.
426type Statistics struct {
427	Bytes int // total size of indexed source files
428	Files int // number of indexed source files
429	Lines int // number of lines (all files)
430	Words int // number of different identifiers
431	Spots int // number of identifier occurrences
432}
433
434// An Indexer maintains the data structures and provides the machinery
435// for indexing .go files under a file tree. It implements the path.Visitor
436// interface for walking file trees, and the ast.Visitor interface for
437// walking Go ASTs.
438type Indexer struct {
439	c          *Corpus
440	fset       *token.FileSet // file set for all indexed files
441	fsOpenGate chan bool      // send pre fs.Open; receive on close
442
443	mu            sync.Mutex              // guards all the following
444	sources       bytes.Buffer            // concatenated sources
445	strings       map[string]string       // interned string
446	packages      map[Pak]*Pak            // interned *Paks
447	words         map[string]*IndexResult // RunLists of Spots
448	snippets      []*Snippet              // indices are stored in SpotInfos
449	current       *token.File             // last file added to file set
450	file          *File                   // AST for current file
451	decl          ast.Decl                // AST for current decl
452	stats         Statistics
453	throttle      *util.Throttle
454	importCount   map[string]int                 // package path ("net/http") => count
455	packagePath   map[string]map[string]bool     // "template" => "text/template" => true
456	exports       map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
457	curPkgExports map[string]SpotKind
458	idents        map[SpotKind]map[string][]Ident // kind => name => list of Idents
459}
460
461func (x *Indexer) intern(s string) string {
462	if s, ok := x.strings[s]; ok {
463		return s
464	}
465	x.strings[s] = s
466	return s
467}
468
469func (x *Indexer) lookupPackage(path, name string) *Pak {
470	// In the source directory tree, more than one package may
471	// live in the same directory. For the packages map, construct
472	// a key that includes both the directory path and the package
473	// name.
474	key := Pak{Path: x.intern(path), Name: x.intern(name)}
475	pak := x.packages[key]
476	if pak == nil {
477		pak = &key
478		x.packages[key] = pak
479	}
480	return pak
481}
482
483func (x *Indexer) addSnippet(s *Snippet) int {
484	index := len(x.snippets)
485	x.snippets = append(x.snippets, s)
486	return index
487}
488
489func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
490	if id == nil {
491		return
492	}
493	name := x.intern(id.Name)
494
495	switch kind {
496	case TypeDecl, FuncDecl, ConstDecl, VarDecl:
497		x.curPkgExports[name] = kind
498	}
499
500	lists, found := x.words[name]
501	if !found {
502		lists = new(IndexResult)
503		x.words[name] = lists
504	}
505
506	if kind == Use || x.decl == nil {
507		if x.c.IndexGoCode {
508			// not a declaration or no snippet required
509			info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
510			lists.Others = append(lists.Others, Spot{x.file, info})
511		}
512	} else {
513		// a declaration with snippet
514		index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
515		info := makeSpotInfo(kind, index, true)
516		lists.Decls = append(lists.Decls, Spot{x.file, info})
517	}
518
519	x.stats.Spots++
520}
521
522func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
523	for _, f := range flist.List {
524		x.decl = nil // no snippets for fields
525		for _, name := range f.Names {
526			x.visitIdent(kind, name)
527		}
528		ast.Walk(x, f.Type)
529		// ignore tag - not indexed at the moment
530	}
531}
532
533func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
534	switch n := spec.(type) {
535	case *ast.ImportSpec:
536		x.visitIdent(ImportDecl, n.Name)
537		if n.Path != nil {
538			if imp, err := strconv.Unquote(n.Path.Value); err == nil {
539				x.importCount[x.intern(imp)]++
540			}
541		}
542
543	case *ast.ValueSpec:
544		for _, n := range n.Names {
545			x.visitIdent(kind, n)
546		}
547		ast.Walk(x, n.Type)
548		for _, v := range n.Values {
549			ast.Walk(x, v)
550		}
551
552	case *ast.TypeSpec:
553		x.visitIdent(TypeDecl, n.Name)
554		ast.Walk(x, n.Type)
555	}
556}
557
558func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
559	kind := VarDecl
560	if decl.Tok == token.CONST {
561		kind = ConstDecl
562	}
563	x.decl = decl
564	for _, s := range decl.Specs {
565		x.visitSpec(kind, s)
566	}
567}
568
569func (x *Indexer) Visit(node ast.Node) ast.Visitor {
570	switch n := node.(type) {
571	case nil:
572		// nothing to do
573
574	case *ast.Ident:
575		x.visitIdent(Use, n)
576
577	case *ast.FieldList:
578		x.visitFieldList(VarDecl, n)
579
580	case *ast.InterfaceType:
581		x.visitFieldList(MethodDecl, n.Methods)
582
583	case *ast.DeclStmt:
584		// local declarations should only be *ast.GenDecls;
585		// ignore incorrect ASTs
586		if decl, ok := n.Decl.(*ast.GenDecl); ok {
587			x.decl = nil // no snippets for local declarations
588			x.visitGenDecl(decl)
589		}
590
591	case *ast.GenDecl:
592		x.decl = n
593		x.visitGenDecl(n)
594
595	case *ast.FuncDecl:
596		kind := FuncDecl
597		if n.Recv != nil {
598			kind = MethodDecl
599			ast.Walk(x, n.Recv)
600		}
601		x.decl = n
602		x.visitIdent(kind, n.Name)
603		ast.Walk(x, n.Type)
604		if n.Body != nil {
605			ast.Walk(x, n.Body)
606		}
607
608	case *ast.File:
609		x.decl = nil
610		x.visitIdent(PackageClause, n.Name)
611		for _, d := range n.Decls {
612			ast.Walk(x, d)
613		}
614
615	default:
616		return x
617	}
618
619	return nil
620}
621
622// addFile adds a file to the index if possible and returns the file set file
623// and the file's AST if it was successfully parsed as a Go file. If addFile
624// failed (that is, if the file was not added), it returns file == nil.
625func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
626	defer f.Close()
627
628	// The file set's base offset and x.sources size must be in lock-step;
629	// this permits the direct mapping of suffix array lookup results to
630	// to corresponding Pos values.
631	//
632	// When a file is added to the file set, its offset base increases by
633	// the size of the file + 1; and the initial base offset is 1. Add an
634	// extra byte to the sources here.
635	x.sources.WriteByte(0)
636
637	// If the sources length doesn't match the file set base at this point
638	// the file set implementation changed or we have another error.
639	base := x.fset.Base()
640	if x.sources.Len() != base {
641		panic("internal error: file base incorrect")
642	}
643
644	// append file contents (src) to x.sources
645	if _, err := x.sources.ReadFrom(f); err == nil {
646		src := x.sources.Bytes()[base:]
647
648		if goFile {
649			// parse the file and in the process add it to the file set
650			if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
651				file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
652				return
653			}
654			// file has parse errors, and the AST may be incorrect -
655			// set lines information explicitly and index as ordinary
656			// text file (cannot fall through to the text case below
657			// because the file has already been added to the file set
658			// by the parser)
659			file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
660			file.SetLinesForContent(src)
661			ast = nil
662			return
663		}
664
665		if util.IsText(src) {
666			// only add the file to the file set (for the full text index)
667			file = x.fset.AddFile(filename, x.fset.Base(), len(src))
668			file.SetLinesForContent(src)
669			return
670		}
671	}
672
673	// discard possibly added data
674	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
675	return
676}
677
678// Design note: Using an explicit white list of permitted files for indexing
679// makes sure that the important files are included and massively reduces the
680// number of files to index. The advantage over a blacklist is that unexpected
681// (non-blacklisted) files won't suddenly explode the index.
682
683// Files are whitelisted if they have a file name or extension
684// present as key in whitelisted.
685var whitelisted = map[string]bool{
686	".bash":        true,
687	".c":           true,
688	".cc":          true,
689	".cpp":         true,
690	".cxx":         true,
691	".css":         true,
692	".go":          true,
693	".goc":         true,
694	".h":           true,
695	".hh":          true,
696	".hpp":         true,
697	".hxx":         true,
698	".html":        true,
699	".js":          true,
700	".out":         true,
701	".py":          true,
702	".s":           true,
703	".sh":          true,
704	".txt":         true,
705	".xml":         true,
706	"AUTHORS":      true,
707	"CONTRIBUTORS": true,
708	"LICENSE":      true,
709	"Makefile":     true,
710	"PATENTS":      true,
711	"README":       true,
712}
713
714// isWhitelisted returns true if a file is on the list
715// of "permitted" files for indexing. The filename must
716// be the directory-local name of the file.
717func isWhitelisted(filename string) bool {
718	key := pathpkg.Ext(filename)
719	if key == "" {
720		// file has no extension - use entire filename
721		key = filename
722	}
723	return whitelisted[key]
724}
725
726func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
727	pkgName := x.intern(astFile.Name.Name)
728	if pkgName == "main" {
729		return
730	}
731	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
732	astPkg := ast.Package{
733		Name: pkgName,
734		Files: map[string]*ast.File{
735			filename: astFile,
736		},
737	}
738	var m doc.Mode
739	docPkg := doc.New(&astPkg, dirname, m)
740	addIdent := func(sk SpotKind, name string, docstr string) {
741		if x.idents[sk] == nil {
742			x.idents[sk] = make(map[string][]Ident)
743		}
744		name = x.intern(name)
745		x.idents[sk][name] = append(x.idents[sk][name], Ident{
746			Path:    pkgPath,
747			Package: pkgName,
748			Name:    name,
749			Doc:     doc.Synopsis(docstr),
750		})
751	}
752
753	if x.idents[PackageClause] == nil {
754		x.idents[PackageClause] = make(map[string][]Ident)
755	}
756	// List of words under which the package identifier will be stored.
757	// This includes the package name and the components of the directory
758	// in which it resides.
759	words := strings.Split(pathpkg.Dir(pkgPath), "/")
760	if words[0] == "." {
761		words = []string{}
762	}
763	name := x.intern(docPkg.Name)
764	synopsis := doc.Synopsis(docPkg.Doc)
765	words = append(words, name)
766	pkgIdent := Ident{
767		Path:    pkgPath,
768		Package: pkgName,
769		Name:    name,
770		Doc:     synopsis,
771	}
772	for _, word := range words {
773		word = x.intern(word)
774		found := false
775		pkgs := x.idents[PackageClause][word]
776		for i, p := range pkgs {
777			if p.Path == pkgPath {
778				if docPkg.Doc != "" {
779					p.Doc = synopsis
780					pkgs[i] = p
781				}
782				found = true
783				break
784			}
785		}
786		if !found {
787			x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
788		}
789	}
790
791	for _, c := range docPkg.Consts {
792		for _, name := range c.Names {
793			addIdent(ConstDecl, name, c.Doc)
794		}
795	}
796	for _, t := range docPkg.Types {
797		addIdent(TypeDecl, t.Name, t.Doc)
798		for _, c := range t.Consts {
799			for _, name := range c.Names {
800				addIdent(ConstDecl, name, c.Doc)
801			}
802		}
803		for _, v := range t.Vars {
804			for _, name := range v.Names {
805				addIdent(VarDecl, name, v.Doc)
806			}
807		}
808		for _, f := range t.Funcs {
809			addIdent(FuncDecl, f.Name, f.Doc)
810		}
811		for _, f := range t.Methods {
812			addIdent(MethodDecl, f.Name, f.Doc)
813			// Change the name of methods to be "<typename>.<methodname>".
814			// They will still be indexed as <methodname>.
815			idents := x.idents[MethodDecl][f.Name]
816			idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
817		}
818	}
819	for _, v := range docPkg.Vars {
820		for _, name := range v.Names {
821			addIdent(VarDecl, name, v.Doc)
822		}
823	}
824	for _, f := range docPkg.Funcs {
825		addIdent(FuncDecl, f.Name, f.Doc)
826	}
827}
828
829func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
830	pkgName := astFile.Name.Name
831
832	if x.c.IndexGoCode {
833		x.current = file
834		pak := x.lookupPackage(dirname, pkgName)
835		x.file = &File{filename, pak}
836		ast.Walk(x, astFile)
837	}
838
839	if x.c.IndexDocs {
840		// Test files are already filtered out in visitFile if IndexGoCode and
841		// IndexFullText are false.  Otherwise, check here.
842		isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
843			(strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
844		if !isTestFile {
845			x.indexDocs(dirname, filename, astFile)
846		}
847	}
848
849	ppKey := x.intern(pkgName)
850	if _, ok := x.packagePath[ppKey]; !ok {
851		x.packagePath[ppKey] = make(map[string]bool)
852	}
853	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
854	x.packagePath[ppKey][pkgPath] = true
855
856	// Merge in exported symbols found walking this file into
857	// the map for that package.
858	if len(x.curPkgExports) > 0 {
859		dest, ok := x.exports[pkgPath]
860		if !ok {
861			dest = make(map[string]SpotKind)
862			x.exports[pkgPath] = dest
863		}
864		for k, v := range x.curPkgExports {
865			dest[k] = v
866		}
867	}
868}
869
870func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
871	if fi.IsDir() || !x.c.IndexEnabled {
872		return
873	}
874
875	filename := pathpkg.Join(dirname, fi.Name())
876	goFile := isGoFile(fi)
877
878	switch {
879	case x.c.IndexFullText:
880		if !isWhitelisted(fi.Name()) {
881			return
882		}
883	case x.c.IndexGoCode:
884		if !goFile {
885			return
886		}
887	case x.c.IndexDocs:
888		if !goFile ||
889			strings.HasSuffix(fi.Name(), "_test.go") ||
890			strings.HasPrefix(dirname, "/test/") {
891			return
892		}
893	default:
894		// No indexing turned on.
895		return
896	}
897
898	x.fsOpenGate <- true
899	defer func() { <-x.fsOpenGate }()
900
901	// open file
902	f, err := x.c.fs.Open(filename)
903	if err != nil {
904		return
905	}
906
907	x.mu.Lock()
908	defer x.mu.Unlock()
909
910	x.throttle.Throttle()
911
912	x.curPkgExports = make(map[string]SpotKind)
913	file, fast := x.addFile(f, filename, goFile)
914	if file == nil {
915		return // addFile failed
916	}
917
918	if fast != nil {
919		x.indexGoFile(dirname, fi.Name(), file, fast)
920	}
921
922	// update statistics
923	x.stats.Bytes += file.Size()
924	x.stats.Files++
925	x.stats.Lines += file.LineCount()
926}
927
928// indexOptions contains information that affects the contents of an index.
929type indexOptions struct {
930	// Docs provides documentation search results.
931	// It is only consulted if IndexEnabled is true.
932	// The default values is true.
933	Docs bool
934
935	// GoCode provides Go source code search results.
936	// It is only consulted if IndexEnabled is true.
937	// The default values is true.
938	GoCode bool
939
940	// FullText provides search results from all files.
941	// It is only consulted if IndexEnabled is true.
942	// The default values is true.
943	FullText bool
944
945	// MaxResults optionally specifies the maximum results for indexing.
946	// The default is 1000.
947	MaxResults int
948}
949
950// ----------------------------------------------------------------------------
951// Index
952
953type LookupResult struct {
954	Decls  HitList // package-level declarations (with snippets)
955	Others HitList // all other occurrences
956}
957
958type Index struct {
959	fset        *token.FileSet           // file set used during indexing; nil if no textindex
960	suffixes    *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
961	words       map[string]*LookupResult // maps words to hit lists
962	alts        map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
963	snippets    []*Snippet               // all snippets, indexed by snippet index
964	stats       Statistics
965	importCount map[string]int                 // package path ("net/http") => count
966	packagePath map[string]map[string]bool     // "template" => "text/template" => true
967	exports     map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
968	idents      map[SpotKind]map[string][]Ident
969	opts        indexOptions
970}
971
972func canonical(w string) string { return strings.ToLower(w) }
973
974// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
975// consuming file descriptors, where some systems have low 256 or 512 limits.
976// Go should have a built-in way to cap fd usage under the ulimit.
977const (
978	maxOpenFiles = 200
979	maxOpenDirs  = 50
980)
981
982func (c *Corpus) throttle() float64 {
983	if c.IndexThrottle <= 0 {
984		return 0.9
985	}
986	if c.IndexThrottle > 1.0 {
987		return 1.0
988	}
989	return c.IndexThrottle
990}
991
992// NewIndex creates a new index for the .go files provided by the corpus.
993func (c *Corpus) NewIndex() *Index {
994	// initialize Indexer
995	// (use some reasonably sized maps to start)
996	x := &Indexer{
997		c:           c,
998		fset:        token.NewFileSet(),
999		fsOpenGate:  make(chan bool, maxOpenFiles),
1000		strings:     make(map[string]string),
1001		packages:    make(map[Pak]*Pak, 256),
1002		words:       make(map[string]*IndexResult, 8192),
1003		throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
1004		importCount: make(map[string]int),
1005		packagePath: make(map[string]map[string]bool),
1006		exports:     make(map[string]map[string]SpotKind),
1007		idents:      make(map[SpotKind]map[string][]Ident, 4),
1008	}
1009
1010	// index all files in the directories given by dirnames
1011	var wg sync.WaitGroup // outstanding ReadDir + visitFile
1012	dirGate := make(chan bool, maxOpenDirs)
1013	for dirname := range c.fsDirnames() {
1014		if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
1015			continue
1016		}
1017		dirGate <- true
1018		wg.Add(1)
1019		go func(dirname string) {
1020			defer func() { <-dirGate }()
1021			defer wg.Done()
1022
1023			list, err := c.fs.ReadDir(dirname)
1024			if err != nil {
1025				log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
1026				return // ignore this directory
1027			}
1028			for _, fi := range list {
1029				wg.Add(1)
1030				go func(fi os.FileInfo) {
1031					defer wg.Done()
1032					x.visitFile(dirname, fi)
1033				}(fi)
1034			}
1035		}(dirname)
1036	}
1037	wg.Wait()
1038
1039	if !c.IndexFullText {
1040		// the file set, the current file, and the sources are
1041		// not needed after indexing if no text index is built -
1042		// help GC and clear them
1043		x.fset = nil
1044		x.sources.Reset()
1045		x.current = nil // contains reference to fset!
1046	}
1047
1048	// for each word, reduce the RunLists into a LookupResult;
1049	// also collect the word with its canonical spelling in a
1050	// word list for later computation of alternative spellings
1051	words := make(map[string]*LookupResult)
1052	var wlist RunList
1053	for w, h := range x.words {
1054		decls := reduce(h.Decls)
1055		others := reduce(h.Others)
1056		words[w] = &LookupResult{
1057			Decls:  decls,
1058			Others: others,
1059		}
1060		wlist = append(wlist, &wordPair{canonical(w), w})
1061		x.throttle.Throttle()
1062	}
1063	x.stats.Words = len(words)
1064
1065	// reduce the word list {canonical(w), w} into
1066	// a list of AltWords runs {canonical(w), {w}}
1067	alist := wlist.reduce(lessWordPair, newAltWords)
1068
1069	// convert alist into a map of alternative spellings
1070	alts := make(map[string]*AltWords)
1071	for i := 0; i < len(alist); i++ {
1072		a := alist[i].(*AltWords)
1073		alts[a.Canon] = a
1074	}
1075
1076	// create text index
1077	var suffixes *suffixarray.Index
1078	if c.IndexFullText {
1079		suffixes = suffixarray.New(x.sources.Bytes())
1080	}
1081
1082	// sort idents by the number of imports of their respective packages
1083	for _, idMap := range x.idents {
1084		for _, ir := range idMap {
1085			sort.Sort(byImportCount{ir, x.importCount})
1086		}
1087	}
1088
1089	return &Index{
1090		fset:        x.fset,
1091		suffixes:    suffixes,
1092		words:       words,
1093		alts:        alts,
1094		snippets:    x.snippets,
1095		stats:       x.stats,
1096		importCount: x.importCount,
1097		packagePath: x.packagePath,
1098		exports:     x.exports,
1099		idents:      x.idents,
1100		opts: indexOptions{
1101			Docs:       x.c.IndexDocs,
1102			GoCode:     x.c.IndexGoCode,
1103			FullText:   x.c.IndexFullText,
1104			MaxResults: x.c.MaxResults,
1105		},
1106	}
1107}
1108
1109var ErrFileIndexVersion = errors.New("file index version out of date")
1110
1111const fileIndexVersion = 3
1112
1113// fileIndex is the subset of Index that's gob-encoded for use by
1114// Index.Write and Index.Read.
1115type fileIndex struct {
1116	Version     int
1117	Words       map[string]*LookupResult
1118	Alts        map[string]*AltWords
1119	Snippets    []*Snippet
1120	Fulltext    bool
1121	Stats       Statistics
1122	ImportCount map[string]int
1123	PackagePath map[string]map[string]bool
1124	Exports     map[string]map[string]SpotKind
1125	Idents      map[SpotKind]map[string][]Ident
1126	Opts        indexOptions
1127}
1128
1129func (x *fileIndex) Write(w io.Writer) error {
1130	return gob.NewEncoder(w).Encode(x)
1131}
1132
1133func (x *fileIndex) Read(r io.Reader) error {
1134	return gob.NewDecoder(r).Decode(x)
1135}
1136
1137// WriteTo writes the index x to w.
1138func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
1139	w = countingWriter{&n, w}
1140	fulltext := false
1141	if x.suffixes != nil {
1142		fulltext = true
1143	}
1144	fx := fileIndex{
1145		Version:     fileIndexVersion,
1146		Words:       x.words,
1147		Alts:        x.alts,
1148		Snippets:    x.snippets,
1149		Fulltext:    fulltext,
1150		Stats:       x.stats,
1151		ImportCount: x.importCount,
1152		PackagePath: x.packagePath,
1153		Exports:     x.exports,
1154		Idents:      x.idents,
1155		Opts:        x.opts,
1156	}
1157	if err := fx.Write(w); err != nil {
1158		return 0, err
1159	}
1160	if fulltext {
1161		encode := func(x interface{}) error {
1162			return gob.NewEncoder(w).Encode(x)
1163		}
1164		if err := x.fset.Write(encode); err != nil {
1165			return 0, err
1166		}
1167		if err := x.suffixes.Write(w); err != nil {
1168			return 0, err
1169		}
1170	}
1171	return n, nil
1172}
1173
1174// ReadFrom reads the index from r into x; x must not be nil.
1175// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
1176// If the index is from an old version, the error is ErrFileIndexVersion.
1177func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
1178	// We use the ability to read bytes as a plausible surrogate for buffering.
1179	if _, ok := r.(io.ByteReader); !ok {
1180		r = bufio.NewReader(r)
1181	}
1182	r = countingReader{&n, r.(byteReader)}
1183	var fx fileIndex
1184	if err := fx.Read(r); err != nil {
1185		return n, err
1186	}
1187	if fx.Version != fileIndexVersion {
1188		return 0, ErrFileIndexVersion
1189	}
1190	x.words = fx.Words
1191	x.alts = fx.Alts
1192	x.snippets = fx.Snippets
1193	x.stats = fx.Stats
1194	x.importCount = fx.ImportCount
1195	x.packagePath = fx.PackagePath
1196	x.exports = fx.Exports
1197	x.idents = fx.Idents
1198	x.opts = fx.Opts
1199	if fx.Fulltext {
1200		x.fset = token.NewFileSet()
1201		decode := func(x interface{}) error {
1202			return gob.NewDecoder(r).Decode(x)
1203		}
1204		if err := x.fset.Read(decode); err != nil {
1205			return n, err
1206		}
1207		x.suffixes = new(suffixarray.Index)
1208		if err := x.suffixes.Read(r); err != nil {
1209			return n, err
1210		}
1211	}
1212	return n, nil
1213}
1214
1215// Stats returns index statistics.
1216func (x *Index) Stats() Statistics {
1217	return x.stats
1218}
1219
1220// ImportCount returns a map from import paths to how many times they were seen.
1221func (x *Index) ImportCount() map[string]int {
1222	return x.importCount
1223}
1224
1225// PackagePath returns a map from short package name to a set
1226// of full package path names that use that short package name.
1227func (x *Index) PackagePath() map[string]map[string]bool {
1228	return x.packagePath
1229}
1230
1231// Exports returns a map from full package path to exported
1232// symbol name to its type.
1233func (x *Index) Exports() map[string]map[string]SpotKind {
1234	return x.exports
1235}
1236
1237// Idents returns a map from identifier type to exported
1238// symbol name to the list of identifiers matching that name.
1239func (x *Index) Idents() map[SpotKind]map[string][]Ident {
1240	return x.idents
1241}
1242
1243func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
1244	match = x.words[w]
1245	alt = x.alts[canonical(w)]
1246	// remove current spelling from alternatives
1247	// (if there is no match, the alternatives do
1248	// not contain the current spelling)
1249	if match != nil && alt != nil {
1250		alt = alt.filter(w)
1251	}
1252	return
1253}
1254
1255// isIdentifier reports whether s is a Go identifier.
1256func isIdentifier(s string) bool {
1257	for i, ch := range s {
1258		if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
1259			continue
1260		}
1261		return false
1262	}
1263	return len(s) > 0
1264}
1265
1266// For a given query, which is either a single identifier or a qualified
1267// identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
1268// list of alternative spellings, and identifiers, if any. Any and all results
1269// may be nil.  If the query syntax is wrong, an error is reported.
1270func (x *Index) Lookup(query string) (*SearchResult, error) {
1271	ss := strings.Split(query, ".")
1272
1273	// check query syntax
1274	for _, s := range ss {
1275		if !isIdentifier(s) {
1276			return nil, errors.New("all query parts must be identifiers")
1277		}
1278	}
1279	rslt := &SearchResult{
1280		Query:  query,
1281		Idents: make(map[SpotKind][]Ident, 5),
1282	}
1283	// handle simple and qualified identifiers
1284	switch len(ss) {
1285	case 1:
1286		ident := ss[0]
1287		rslt.Hit, rslt.Alt = x.lookupWord(ident)
1288		if rslt.Hit != nil {
1289			// found a match - filter packages with same name
1290			// for the list of packages called ident, if any
1291			rslt.Pak = rslt.Hit.Others.filter(ident)
1292		}
1293		for k, v := range x.idents {
1294			const rsltLimit = 50
1295			ids := byImportCount{v[ident], x.importCount}
1296			rslt.Idents[k] = ids.top(rsltLimit)
1297		}
1298
1299	case 2:
1300		pakname, ident := ss[0], ss[1]
1301		rslt.Hit, rslt.Alt = x.lookupWord(ident)
1302		if rslt.Hit != nil {
1303			// found a match - filter by package name
1304			// (no paks - package names are not qualified)
1305			decls := rslt.Hit.Decls.filter(pakname)
1306			others := rslt.Hit.Others.filter(pakname)
1307			rslt.Hit = &LookupResult{decls, others}
1308		}
1309		for k, v := range x.idents {
1310			ids := byImportCount{v[ident], x.importCount}
1311			rslt.Idents[k] = ids.filter(pakname)
1312		}
1313
1314	default:
1315		return nil, errors.New("query is not a (qualified) identifier")
1316	}
1317
1318	return rslt, nil
1319}
1320
1321func (x *Index) Snippet(i int) *Snippet {
1322	// handle illegal snippet indices gracefully
1323	if 0 <= i && i < len(x.snippets) {
1324		return x.snippets[i]
1325	}
1326	return nil
1327}
1328
1329type positionList []struct {
1330	filename string
1331	line     int
1332}
1333
1334func (list positionList) Len() int           { return len(list) }
1335func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
1336func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }
1337
1338// unique returns the list sorted and with duplicate entries removed
1339func unique(list []int) []int {
1340	sort.Ints(list)
1341	var last int
1342	i := 0
1343	for _, x := range list {
1344		if i == 0 || x != last {
1345			last = x
1346			list[i] = x
1347			i++
1348		}
1349	}
1350	return list[0:i]
1351}
1352
1353// A FileLines value specifies a file and line numbers within that file.
1354type FileLines struct {
1355	Filename string
1356	Lines    []int
1357}
1358
1359// LookupRegexp returns the number of matches and the matches where a regular
1360// expression r is found in the full text index. At most n matches are
1361// returned (thus found <= n).
1362//
1363func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
1364	if x.suffixes == nil || n <= 0 {
1365		return
1366	}
1367	// n > 0
1368
1369	var list positionList
1370	// FindAllIndex may returns matches that span across file boundaries.
1371	// Such matches are unlikely, buf after eliminating them we may end up
1372	// with fewer than n matches. If we don't have enough at the end, redo
1373	// the search with an increased value n1, but only if FindAllIndex
1374	// returned all the requested matches in the first place (if it
1375	// returned fewer than that there cannot be more).
1376	for n1 := n; found < n; n1 += n - found {
1377		found = 0
1378		matches := x.suffixes.FindAllIndex(r, n1)
1379		// compute files, exclude matches that span file boundaries,
1380		// and map offsets to file-local offsets
1381		list = make(positionList, len(matches))
1382		for _, m := range matches {
1383			// by construction, an offset corresponds to the Pos value
1384			// for the file set - use it to get the file and line
1385			p := token.Pos(m[0])
1386			if file := x.fset.File(p); file != nil {
1387				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
1388					// match [m[0], m[1]) is within the file boundaries
1389					list[found].filename = file.Name()
1390					list[found].line = file.Line(p)
1391					found++
1392				}
1393			}
1394		}
1395		if found == n || len(matches) < n1 {
1396			// found all matches or there's no chance to find more
1397			break
1398		}
1399	}
1400	list = list[0:found]
1401	sort.Sort(list) // sort by filename
1402
1403	// collect matches belonging to the same file
1404	var last string
1405	var lines []int
1406	addLines := func() {
1407		if len(lines) > 0 {
1408			// remove duplicate lines
1409			result = append(result, FileLines{last, unique(lines)})
1410			lines = nil
1411		}
1412	}
1413	for _, m := range list {
1414		if m.filename != last {
1415			addLines()
1416			last = m.filename
1417		}
1418		lines = append(lines, m.line)
1419	}
1420	addLines()
1421
1422	return
1423}
1424
1425// InvalidateIndex should be called whenever any of the file systems
1426// under godoc's observation change so that the indexer is kicked on.
1427func (c *Corpus) invalidateIndex() {
1428	c.fsModified.Set(nil)
1429	c.refreshMetadata()
1430}
1431
1432// feedDirnames feeds the directory names of all directories
1433// under the file system given by root to channel c.
1434//
1435func (c *Corpus) feedDirnames(ch chan<- string) {
1436	if dir, _ := c.fsTree.Get(); dir != nil {
1437		for d := range dir.(*Directory).iter(false) {
1438			ch <- d.Path
1439		}
1440	}
1441}
1442
1443// fsDirnames() returns a channel sending all directory names
1444// of all the file systems under godoc's observation.
1445//
1446func (c *Corpus) fsDirnames() <-chan string {
1447	ch := make(chan string, 256) // buffered for fewer context switches
1448	go func() {
1449		c.feedDirnames(ch)
1450		close(ch)
1451	}()
1452	return ch
1453}
1454
1455// CompatibleWith reports whether the Index x is compatible with the corpus
1456// indexing options set in c.
1457func (x *Index) CompatibleWith(c *Corpus) bool {
1458	return x.opts.Docs == c.IndexDocs &&
1459		x.opts.GoCode == c.IndexGoCode &&
1460		x.opts.FullText == c.IndexFullText &&
1461		x.opts.MaxResults == c.MaxResults
1462}
1463
1464func (c *Corpus) readIndex(filenames string) error {
1465	matches, err := filepath.Glob(filenames)
1466	if err != nil {
1467		return err
1468	} else if matches == nil {
1469		return fmt.Errorf("no index files match %q", filenames)
1470	}
1471	sort.Strings(matches) // make sure files are in the right order
1472	files := make([]io.Reader, 0, len(matches))
1473	for _, filename := range matches {
1474		f, err := os.Open(filename)
1475		if err != nil {
1476			return err
1477		}
1478		defer f.Close()
1479		files = append(files, f)
1480	}
1481	return c.ReadIndexFrom(io.MultiReader(files...))
1482}
1483
1484// ReadIndexFrom sets the current index from the serialized version found in r.
1485func (c *Corpus) ReadIndexFrom(r io.Reader) error {
1486	x := new(Index)
1487	if _, err := x.ReadFrom(r); err != nil {
1488		return err
1489	}
1490	if !x.CompatibleWith(c) {
1491		return fmt.Errorf("index file options are incompatible: %v", x.opts)
1492	}
1493	c.searchIndex.Set(x)
1494	return nil
1495}
1496
1497func (c *Corpus) UpdateIndex() {
1498	if c.Verbose {
1499		log.Printf("updating index...")
1500	}
1501	start := time.Now()
1502	index := c.NewIndex()
1503	stop := time.Now()
1504	c.searchIndex.Set(index)
1505	if c.Verbose {
1506		secs := stop.Sub(start).Seconds()
1507		stats := index.Stats()
1508		log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
1509			secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
1510	}
1511	memstats := new(runtime.MemStats)
1512	runtime.ReadMemStats(memstats)
1513	if c.Verbose {
1514		log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1515	}
1516	runtime.GC()
1517	runtime.ReadMemStats(memstats)
1518	if c.Verbose {
1519		log.Printf("after  GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1520	}
1521}
1522
1523// RunIndexer runs forever, indexing.
1524func (c *Corpus) RunIndexer() {
1525	// initialize the index from disk if possible
1526	if c.IndexFiles != "" {
1527		c.initFSTree()
1528		if err := c.readIndex(c.IndexFiles); err != nil {
1529			log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
1530		}
1531		return
1532	}
1533
1534	// Repeatedly update the package directory tree and index.
1535	for {
1536		c.initFSTree()
1537		c.UpdateIndex()
1538		if c.IndexInterval < 0 {
1539			return
1540		}
1541		delay := 5 * time.Minute // by default, reindex every 5 minutes
1542		if c.IndexInterval > 0 {
1543			delay = c.IndexInterval
1544		}
1545		time.Sleep(delay)
1546	}
1547}
1548
1549type countingWriter struct {
1550	n *int64
1551	w io.Writer
1552}
1553
1554func (c countingWriter) Write(p []byte) (n int, err error) {
1555	n, err = c.w.Write(p)
1556	*c.n += int64(n)
1557	return
1558}
1559
1560type byteReader interface {
1561	io.Reader
1562	io.ByteReader
1563}
1564
1565type countingReader struct {
1566	n *int64
1567	r byteReader
1568}
1569
1570func (c countingReader) Read(p []byte) (n int, err error) {
1571	n, err = c.r.Read(p)
1572	*c.n += int64(n)
1573	return
1574}
1575
1576func (c countingReader) ReadByte() (b byte, err error) {
1577	b, err = c.r.ReadByte()
1578	*c.n += 1
1579	return
1580}
1581