1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package stringset provides a way to represent a collection of strings
6// compactly.
7package stringset
8
9import "sort"
10
11// A Set holds a collection of strings that can be looked up by an index number.
12type Set struct {
13	// These fields are exported to allow for code generation.
14
15	Data  string
16	Index []uint16
17}
18
19// Elem returns the string with index i. It panics if i is out of range.
20func (s *Set) Elem(i int) string {
21	return s.Data[s.Index[i]:s.Index[i+1]]
22}
23
24// Len returns the number of strings in the set.
25func (s *Set) Len() int {
26	return len(s.Index) - 1
27}
28
29// Search returns the index of the given string or -1 if it is not in the set.
30// The Set must have been created with strings in sorted order.
31func Search(s *Set, str string) int {
32	// TODO: optimize this if it gets used a lot.
33	n := len(s.Index) - 1
34	p := sort.Search(n, func(i int) bool {
35		return s.Elem(i) >= str
36	})
37	if p == n || str != s.Elem(p) {
38		return -1
39	}
40	return p
41}
42
43// A Builder constructs Sets.
44type Builder struct {
45	set   Set
46	index map[string]int
47}
48
49// NewBuilder returns a new and initialized Builder.
50func NewBuilder() *Builder {
51	return &Builder{
52		set: Set{
53			Index: []uint16{0},
54		},
55		index: map[string]int{},
56	}
57}
58
59// Set creates the set created so far.
60func (b *Builder) Set() Set {
61	return b.set
62}
63
64// Index returns the index for the given string, which must have been added
65// before.
66func (b *Builder) Index(s string) int {
67	return b.index[s]
68}
69
70// Add adds a string to the index. Strings that are added by a single Add will
71// be stored together, unless they match an existing string.
72func (b *Builder) Add(ss ...string) {
73	// First check if the string already exists.
74	for _, s := range ss {
75		if _, ok := b.index[s]; ok {
76			continue
77		}
78		b.index[s] = len(b.set.Index) - 1
79		b.set.Data += s
80		x := len(b.set.Data)
81		if x > 0xFFFF {
82			panic("Index too > 0xFFFF")
83		}
84		b.set.Index = append(b.set.Index, uint16(x))
85	}
86}
87