1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9// This program generates table.go and table_test.go based on the authoritative
10// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat
11//
12// The version is derived from
13// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat
14// and a human-readable form is at
15// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
16//
17// To fetch a particular git revision, such as 5c70ccd250, pass
18// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
19// and -version "an explicit version string".
20
21import (
22	"bufio"
23	"bytes"
24	"flag"
25	"fmt"
26	"go/format"
27	"io"
28	"io/ioutil"
29	"net/http"
30	"os"
31	"regexp"
32	"sort"
33	"strings"
34
35	"golang.org/x/net/idna"
36)
37
38const (
39	// These sum of these four values must be no greater than 32.
40	nodesBitsChildren   = 10
41	nodesBitsICANN      = 1
42	nodesBitsTextOffset = 15
43	nodesBitsTextLength = 6
44
45	// These sum of these four values must be no greater than 32.
46	childrenBitsWildcard = 1
47	childrenBitsNodeType = 2
48	childrenBitsHi       = 14
49	childrenBitsLo       = 14
50)
51
52var (
53	maxChildren   int
54	maxTextOffset int
55	maxTextLength int
56	maxHi         uint32
57	maxLo         uint32
58)
59
60func max(a, b int) int {
61	if a < b {
62		return b
63	}
64	return a
65}
66
67func u32max(a, b uint32) uint32 {
68	if a < b {
69		return b
70	}
71	return a
72}
73
74const (
75	nodeTypeNormal     = 0
76	nodeTypeException  = 1
77	nodeTypeParentOnly = 2
78	numNodeType        = 3
79)
80
81func nodeTypeStr(n int) string {
82	switch n {
83	case nodeTypeNormal:
84		return "+"
85	case nodeTypeException:
86		return "!"
87	case nodeTypeParentOnly:
88		return "o"
89	}
90	panic("unreachable")
91}
92
93const (
94	defaultURL   = "https://publicsuffix.org/list/effective_tld_names.dat"
95	gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
96)
97
98var (
99	labelEncoding = map[string]uint32{}
100	labelsList    = []string{}
101	labelsMap     = map[string]bool{}
102	rules         = []string{}
103	numICANNRules = 0
104
105	// validSuffixRE is used to check that the entries in the public suffix
106	// list are in canonical form (after Punycode encoding). Specifically,
107	// capital letters are not allowed.
108	validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
109
110	shaRE  = regexp.MustCompile(`"sha":"([^"]+)"`)
111	dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`)
112
113	comments = flag.Bool("comments", false, "generate table.go comments, for debugging")
114	subset   = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
115	url      = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead")
116	v        = flag.Bool("v", false, "verbose output (to stderr)")
117	version  = flag.String("version", "", "the effective_tld_names.dat version")
118)
119
120func main() {
121	if err := main1(); err != nil {
122		fmt.Fprintln(os.Stderr, err)
123		os.Exit(1)
124	}
125}
126
127func main1() error {
128	flag.Parse()
129	if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
130		return fmt.Errorf("not enough bits to encode the nodes table")
131	}
132	if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
133		return fmt.Errorf("not enough bits to encode the children table")
134	}
135	if *version == "" {
136		if *url != defaultURL {
137			return fmt.Errorf("-version was not specified, and the -url is not the default one")
138		}
139		sha, date, err := gitCommit()
140		if err != nil {
141			return err
142		}
143		*version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date)
144	}
145	var r io.Reader = os.Stdin
146	if *url != "" {
147		res, err := http.Get(*url)
148		if err != nil {
149			return err
150		}
151		if res.StatusCode != http.StatusOK {
152			return fmt.Errorf("bad GET status for %s: %s", *url, res.Status)
153		}
154		r = res.Body
155		defer res.Body.Close()
156	}
157
158	var root node
159	icann := false
160	br := bufio.NewReader(r)
161	for {
162		s, err := br.ReadString('\n')
163		if err != nil {
164			if err == io.EOF {
165				break
166			}
167			return err
168		}
169		s = strings.TrimSpace(s)
170		if strings.Contains(s, "BEGIN ICANN DOMAINS") {
171			if len(rules) != 0 {
172				return fmt.Errorf(`expected no rules before "BEGIN ICANN DOMAINS"`)
173			}
174			icann = true
175			continue
176		}
177		if strings.Contains(s, "END ICANN DOMAINS") {
178			icann, numICANNRules = false, len(rules)
179			continue
180		}
181		if s == "" || strings.HasPrefix(s, "//") {
182			continue
183		}
184		s, err = idna.ToASCII(s)
185		if err != nil {
186			return err
187		}
188		if !validSuffixRE.MatchString(s) {
189			return fmt.Errorf("bad publicsuffix.org list data: %q", s)
190		}
191
192		if *subset {
193			switch {
194			case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
195			case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
196			case s == "ao" || strings.HasSuffix(s, ".ao"):
197			case s == "ar" || strings.HasSuffix(s, ".ar"):
198			case s == "arpa" || strings.HasSuffix(s, ".arpa"):
199			case s == "cy" || strings.HasSuffix(s, ".cy"):
200			case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
201			case s == "jp":
202			case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
203			case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
204			case s == "om" || strings.HasSuffix(s, ".om"):
205			case s == "uk" || strings.HasSuffix(s, ".uk"):
206			case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
207			case s == "tw" || strings.HasSuffix(s, ".tw"):
208			case s == "zw" || strings.HasSuffix(s, ".zw"):
209			case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
210				// xn--p1ai is Russian-Cyrillic "рф".
211			default:
212				continue
213			}
214		}
215
216		rules = append(rules, s)
217
218		nt, wildcard := nodeTypeNormal, false
219		switch {
220		case strings.HasPrefix(s, "*."):
221			s, nt = s[2:], nodeTypeParentOnly
222			wildcard = true
223		case strings.HasPrefix(s, "!"):
224			s, nt = s[1:], nodeTypeException
225		}
226		labels := strings.Split(s, ".")
227		for n, i := &root, len(labels)-1; i >= 0; i-- {
228			label := labels[i]
229			n = n.child(label)
230			if i == 0 {
231				if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
232					n.nodeType = nt
233				}
234				n.icann = n.icann && icann
235				n.wildcard = n.wildcard || wildcard
236			}
237			labelsMap[label] = true
238		}
239	}
240	labelsList = make([]string, 0, len(labelsMap))
241	for label := range labelsMap {
242		labelsList = append(labelsList, label)
243	}
244	sort.Strings(labelsList)
245
246	if err := generate(printReal, &root, "table.go"); err != nil {
247		return err
248	}
249	if err := generate(printTest, &root, "table_test.go"); err != nil {
250		return err
251	}
252	return nil
253}
254
255func generate(p func(io.Writer, *node) error, root *node, filename string) error {
256	buf := new(bytes.Buffer)
257	if err := p(buf, root); err != nil {
258		return err
259	}
260	b, err := format.Source(buf.Bytes())
261	if err != nil {
262		return err
263	}
264	return ioutil.WriteFile(filename, b, 0644)
265}
266
267func gitCommit() (sha, date string, retErr error) {
268	res, err := http.Get(gitCommitURL)
269	if err != nil {
270		return "", "", err
271	}
272	if res.StatusCode != http.StatusOK {
273		return "", "", fmt.Errorf("bad GET status for %s: %s", gitCommitURL, res.Status)
274	}
275	defer res.Body.Close()
276	b, err := ioutil.ReadAll(res.Body)
277	if err != nil {
278		return "", "", err
279	}
280	if m := shaRE.FindSubmatch(b); m != nil {
281		sha = string(m[1])
282	}
283	if m := dateRE.FindSubmatch(b); m != nil {
284		date = string(m[1])
285	}
286	if sha == "" || date == "" {
287		retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL)
288	}
289	return sha, date, retErr
290}
291
292func printTest(w io.Writer, n *node) error {
293	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
294	fmt.Fprintf(w, "package publicsuffix\n\nconst numICANNRules = %d\n\nvar rules = [...]string{\n", numICANNRules)
295	for _, rule := range rules {
296		fmt.Fprintf(w, "%q,\n", rule)
297	}
298	fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
299	if err := n.walk(w, printNodeLabel); err != nil {
300		return err
301	}
302	fmt.Fprintf(w, "}\n")
303	return nil
304}
305
306func printReal(w io.Writer, n *node) error {
307	const header = `// generated by go run gen.go; DO NOT EDIT
308
309package publicsuffix
310
311const version = %q
312
313const (
314	nodesBitsChildren   = %d
315	nodesBitsICANN      = %d
316	nodesBitsTextOffset = %d
317	nodesBitsTextLength = %d
318
319	childrenBitsWildcard = %d
320	childrenBitsNodeType = %d
321	childrenBitsHi       = %d
322	childrenBitsLo       = %d
323)
324
325const (
326	nodeTypeNormal     = %d
327	nodeTypeException  = %d
328	nodeTypeParentOnly = %d
329)
330
331// numTLD is the number of top level domains.
332const numTLD = %d
333
334`
335	fmt.Fprintf(w, header, *version,
336		nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
337		childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
338		nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
339
340	text := combineText(labelsList)
341	if text == "" {
342		return fmt.Errorf("internal error: makeText returned no text")
343	}
344	for _, label := range labelsList {
345		offset, length := strings.Index(text, label), len(label)
346		if offset < 0 {
347			return fmt.Errorf("internal error: could not find %q in text %q", label, text)
348		}
349		maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
350		if offset >= 1<<nodesBitsTextOffset {
351			return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
352		}
353		if length >= 1<<nodesBitsTextLength {
354			return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
355		}
356		labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
357	}
358	fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
359	for len(text) > 0 {
360		n, plus := len(text), ""
361		if n > 64 {
362			n, plus = 64, " +"
363		}
364		fmt.Fprintf(w, "%q%s\n", text[:n], plus)
365		text = text[n:]
366	}
367
368	if err := n.walk(w, assignIndexes); err != nil {
369		return err
370	}
371
372	fmt.Fprintf(w, `
373
374// nodes is the list of nodes. Each node is represented as a uint32, which
375// encodes the node's children, wildcard bit and node type (as an index into
376// the children array), ICANN bit and text.
377//
378// If the table was generated with the -comments flag, there is a //-comment
379// after each node's data. In it is the nodes-array indexes of the children,
380// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
381// nodeType is printed as + for normal, ! for exception, and o for parent-only
382// nodes that have children but don't match a domain label in their own right.
383// An I denotes an ICANN domain.
384//
385// The layout within the uint32, from MSB to LSB, is:
386//	[%2d bits] unused
387//	[%2d bits] children index
388//	[%2d bits] ICANN bit
389//	[%2d bits] text index
390//	[%2d bits] text length
391var nodes = [...]uint32{
392`,
393		32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
394		nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
395	if err := n.walk(w, printNode); err != nil {
396		return err
397	}
398	fmt.Fprintf(w, `}
399
400// children is the list of nodes' children, the parent's wildcard bit and the
401// parent's node type. If a node has no children then their children index
402// will be in the range [0, 6), depending on the wildcard bit and node type.
403//
404// The layout within the uint32, from MSB to LSB, is:
405//	[%2d bits] unused
406//	[%2d bits] wildcard bit
407//	[%2d bits] node type
408//	[%2d bits] high nodes index (exclusive) of children
409//	[%2d bits] low nodes index (inclusive) of children
410var children=[...]uint32{
411`,
412		32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
413		childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
414	for i, c := range childrenEncoding {
415		s := "---------------"
416		lo := c & (1<<childrenBitsLo - 1)
417		hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
418		if lo != hi {
419			s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
420		}
421		nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
422		wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
423		if *comments {
424			fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
425				c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
426		} else {
427			fmt.Fprintf(w, "0x%x,\n", c)
428		}
429	}
430	fmt.Fprintf(w, "}\n\n")
431	fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
432	fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
433	fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
434	fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
435	fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
436	return nil
437}
438
439type node struct {
440	label    string
441	nodeType int
442	icann    bool
443	wildcard bool
444	// nodesIndex and childrenIndex are the index of this node in the nodes
445	// and the index of its children offset/length in the children arrays.
446	nodesIndex, childrenIndex int
447	// firstChild is the index of this node's first child, or zero if this
448	// node has no children.
449	firstChild int
450	// children are the node's children, in strictly increasing node label order.
451	children []*node
452}
453
454func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
455	if err := f(w, n); err != nil {
456		return err
457	}
458	for _, c := range n.children {
459		if err := c.walk(w, f); err != nil {
460			return err
461		}
462	}
463	return nil
464}
465
466// child returns the child of n with the given label. The child is created if
467// it did not exist beforehand.
468func (n *node) child(label string) *node {
469	for _, c := range n.children {
470		if c.label == label {
471			return c
472		}
473	}
474	c := &node{
475		label:    label,
476		nodeType: nodeTypeParentOnly,
477		icann:    true,
478	}
479	n.children = append(n.children, c)
480	sort.Sort(byLabel(n.children))
481	return c
482}
483
484type byLabel []*node
485
486func (b byLabel) Len() int           { return len(b) }
487func (b byLabel) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
488func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
489
490var nextNodesIndex int
491
492// childrenEncoding are the encoded entries in the generated children array.
493// All these pre-defined entries have no children.
494var childrenEncoding = []uint32{
495	0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
496	1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
497	2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
498	4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
499	5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
500	6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
501}
502
503var firstCallToAssignIndexes = true
504
505func assignIndexes(w io.Writer, n *node) error {
506	if len(n.children) != 0 {
507		// Assign nodesIndex.
508		n.firstChild = nextNodesIndex
509		for _, c := range n.children {
510			c.nodesIndex = nextNodesIndex
511			nextNodesIndex++
512		}
513
514		// The root node's children is implicit.
515		if firstCallToAssignIndexes {
516			firstCallToAssignIndexes = false
517			return nil
518		}
519
520		// Assign childrenIndex.
521		maxChildren = max(maxChildren, len(childrenEncoding))
522		if len(childrenEncoding) >= 1<<nodesBitsChildren {
523			return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
524		}
525		n.childrenIndex = len(childrenEncoding)
526		lo := uint32(n.firstChild)
527		hi := lo + uint32(len(n.children))
528		maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
529		if lo >= 1<<childrenBitsLo {
530			return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
531		}
532		if hi >= 1<<childrenBitsHi {
533			return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
534		}
535		enc := hi<<childrenBitsLo | lo
536		enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
537		if n.wildcard {
538			enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
539		}
540		childrenEncoding = append(childrenEncoding, enc)
541	} else {
542		n.childrenIndex = n.nodeType
543		if n.wildcard {
544			n.childrenIndex += numNodeType
545		}
546	}
547	return nil
548}
549
550func printNode(w io.Writer, n *node) error {
551	for _, c := range n.children {
552		s := "---------------"
553		if len(c.children) != 0 {
554			s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
555		}
556		encoding := labelEncoding[c.label]
557		if c.icann {
558			encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
559		}
560		encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
561		if *comments {
562			fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
563				encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
564				nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
565			)
566		} else {
567			fmt.Fprintf(w, "0x%x,\n", encoding)
568		}
569	}
570	return nil
571}
572
573func printNodeLabel(w io.Writer, n *node) error {
574	for _, c := range n.children {
575		fmt.Fprintf(w, "%q,\n", c.label)
576	}
577	return nil
578}
579
580func icannStr(icann bool) string {
581	if icann {
582		return "I"
583	}
584	return " "
585}
586
587func wildcardStr(wildcard bool) string {
588	if wildcard {
589		return "*"
590	}
591	return " "
592}
593
594// combineText combines all the strings in labelsList to form one giant string.
595// Overlapping strings will be merged: "arpa" and "parliament" could yield
596// "arparliament".
597func combineText(labelsList []string) string {
598	beforeLength := 0
599	for _, s := range labelsList {
600		beforeLength += len(s)
601	}
602
603	text := crush(removeSubstrings(labelsList))
604	if *v {
605		fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
606	}
607	return text
608}
609
610type byLength []string
611
612func (s byLength) Len() int           { return len(s) }
613func (s byLength) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
614func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
615
616// removeSubstrings returns a copy of its input with any strings removed
617// that are substrings of other provided strings.
618func removeSubstrings(input []string) []string {
619	// Make a copy of input.
620	ss := append(make([]string, 0, len(input)), input...)
621	sort.Sort(byLength(ss))
622
623	for i, shortString := range ss {
624		// For each string, only consider strings higher than it in sort order, i.e.
625		// of equal length or greater.
626		for _, longString := range ss[i+1:] {
627			if strings.Contains(longString, shortString) {
628				ss[i] = ""
629				break
630			}
631		}
632	}
633
634	// Remove the empty strings.
635	sort.Strings(ss)
636	for len(ss) > 0 && ss[0] == "" {
637		ss = ss[1:]
638	}
639	return ss
640}
641
642// crush combines a list of strings, taking advantage of overlaps. It returns a
643// single string that contains each input string as a substring.
644func crush(ss []string) string {
645	maxLabelLen := 0
646	for _, s := range ss {
647		if maxLabelLen < len(s) {
648			maxLabelLen = len(s)
649		}
650	}
651
652	for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
653		prefixes := makePrefixMap(ss, prefixLen)
654		for i, s := range ss {
655			if len(s) <= prefixLen {
656				continue
657			}
658			mergeLabel(ss, i, prefixLen, prefixes)
659		}
660	}
661
662	return strings.Join(ss, "")
663}
664
665// mergeLabel merges the label at ss[i] with the first available matching label
666// in prefixMap, where the last "prefixLen" characters in ss[i] match the first
667// "prefixLen" characters in the matching label.
668// It will merge ss[i] repeatedly until no more matches are available.
669// All matching labels merged into ss[i] are replaced by "".
670func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
671	s := ss[i]
672	suffix := s[len(s)-prefixLen:]
673	for _, j := range prefixes[suffix] {
674		// Empty strings mean "already used." Also avoid merging with self.
675		if ss[j] == "" || i == j {
676			continue
677		}
678		if *v {
679			fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
680				prefixLen, i, j, ss[i], ss[j], suffix)
681		}
682		ss[i] += ss[j][prefixLen:]
683		ss[j] = ""
684		// ss[i] has a new suffix, so merge again if possible.
685		// Note: we only have to merge again at the same prefix length. Shorter
686		// prefix lengths will be handled in the next iteration of crush's for loop.
687		// Can there be matches for longer prefix lengths, introduced by the merge?
688		// I believe that any such matches would by necessity have been eliminated
689		// during substring removal or merged at a higher prefix length. For
690		// instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
691		// would yield "abcde", which could be merged with "bcdef." However, in
692		// practice "cde" would already have been elimintated by removeSubstrings.
693		mergeLabel(ss, i, prefixLen, prefixes)
694		return
695	}
696}
697
698// prefixMap maps from a prefix to a list of strings containing that prefix. The
699// list of strings is represented as indexes into a slice of strings stored
700// elsewhere.
701type prefixMap map[string][]int
702
703// makePrefixMap constructs a prefixMap from a slice of strings.
704func makePrefixMap(ss []string, prefixLen int) prefixMap {
705	prefixes := make(prefixMap)
706	for i, s := range ss {
707		// We use < rather than <= because if a label matches on a prefix equal to
708		// its full length, that's actually a substring match handled by
709		// removeSubstrings.
710		if prefixLen < len(s) {
711			prefix := s[:prefixLen]
712			prefixes[prefix] = append(prefixes[prefix], i)
713		}
714	}
715
716	return prefixes
717}
718