1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ignore
6// +build ignore
7
8package main
9
10// This program generates table.go and table_test.go based on the authoritative
11// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat
12//
13// The version is derived from
14// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat
15// and a human-readable form is at
16// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
17//
18// To fetch a particular git revision, such as 5c70ccd250, pass
19// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
20// and -version "an explicit version string".
21
22import (
23	"bufio"
24	"bytes"
25	"flag"
26	"fmt"
27	"go/format"
28	"io"
29	"io/ioutil"
30	"net/http"
31	"os"
32	"regexp"
33	"sort"
34	"strings"
35
36	"golang.org/x/net/idna"
37)
38
39const (
40	// These sum of these four values must be no greater than 32.
41	nodesBitsChildren   = 10
42	nodesBitsICANN      = 1
43	nodesBitsTextOffset = 15
44	nodesBitsTextLength = 6
45
46	// These sum of these four values must be no greater than 32.
47	childrenBitsWildcard = 1
48	childrenBitsNodeType = 2
49	childrenBitsHi       = 14
50	childrenBitsLo       = 14
51)
52
53var (
54	maxChildren   int
55	maxTextOffset int
56	maxTextLength int
57	maxHi         uint32
58	maxLo         uint32
59)
60
61func max(a, b int) int {
62	if a < b {
63		return b
64	}
65	return a
66}
67
68func u32max(a, b uint32) uint32 {
69	if a < b {
70		return b
71	}
72	return a
73}
74
75const (
76	nodeTypeNormal     = 0
77	nodeTypeException  = 1
78	nodeTypeParentOnly = 2
79	numNodeType        = 3
80)
81
82func nodeTypeStr(n int) string {
83	switch n {
84	case nodeTypeNormal:
85		return "+"
86	case nodeTypeException:
87		return "!"
88	case nodeTypeParentOnly:
89		return "o"
90	}
91	panic("unreachable")
92}
93
94const (
95	defaultURL   = "https://publicsuffix.org/list/effective_tld_names.dat"
96	gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
97)
98
99var (
100	labelEncoding = map[string]uint32{}
101	labelsList    = []string{}
102	labelsMap     = map[string]bool{}
103	rules         = []string{}
104	numICANNRules = 0
105
106	// validSuffixRE is used to check that the entries in the public suffix
107	// list are in canonical form (after Punycode encoding). Specifically,
108	// capital letters are not allowed.
109	validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
110
111	shaRE  = regexp.MustCompile(`"sha":"([^"]+)"`)
112	dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`)
113
114	comments = flag.Bool("comments", false, "generate table.go comments, for debugging")
115	subset   = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
116	url      = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead")
117	v        = flag.Bool("v", false, "verbose output (to stderr)")
118	version  = flag.String("version", "", "the effective_tld_names.dat version")
119)
120
121func main() {
122	if err := main1(); err != nil {
123		fmt.Fprintln(os.Stderr, err)
124		os.Exit(1)
125	}
126}
127
128func main1() error {
129	flag.Parse()
130	if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
131		return fmt.Errorf("not enough bits to encode the nodes table")
132	}
133	if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
134		return fmt.Errorf("not enough bits to encode the children table")
135	}
136	if *version == "" {
137		if *url != defaultURL {
138			return fmt.Errorf("-version was not specified, and the -url is not the default one")
139		}
140		sha, date, err := gitCommit()
141		if err != nil {
142			return err
143		}
144		*version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date)
145	}
146	var r io.Reader = os.Stdin
147	if *url != "" {
148		res, err := http.Get(*url)
149		if err != nil {
150			return err
151		}
152		if res.StatusCode != http.StatusOK {
153			return fmt.Errorf("bad GET status for %s: %s", *url, res.Status)
154		}
155		r = res.Body
156		defer res.Body.Close()
157	}
158
159	var root node
160	icann := false
161	br := bufio.NewReader(r)
162	for {
163		s, err := br.ReadString('\n')
164		if err != nil {
165			if err == io.EOF {
166				break
167			}
168			return err
169		}
170		s = strings.TrimSpace(s)
171		if strings.Contains(s, "BEGIN ICANN DOMAINS") {
172			if len(rules) != 0 {
173				return fmt.Errorf(`expected no rules before "BEGIN ICANN DOMAINS"`)
174			}
175			icann = true
176			continue
177		}
178		if strings.Contains(s, "END ICANN DOMAINS") {
179			icann, numICANNRules = false, len(rules)
180			continue
181		}
182		if s == "" || strings.HasPrefix(s, "//") {
183			continue
184		}
185		s, err = idna.ToASCII(s)
186		if err != nil {
187			return err
188		}
189		if !validSuffixRE.MatchString(s) {
190			return fmt.Errorf("bad publicsuffix.org list data: %q", s)
191		}
192
193		if *subset {
194			switch {
195			case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
196			case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
197			case s == "ao" || strings.HasSuffix(s, ".ao"):
198			case s == "ar" || strings.HasSuffix(s, ".ar"):
199			case s == "arpa" || strings.HasSuffix(s, ".arpa"):
200			case s == "cy" || strings.HasSuffix(s, ".cy"):
201			case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
202			case s == "jp":
203			case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
204			case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
205			case s == "om" || strings.HasSuffix(s, ".om"):
206			case s == "uk" || strings.HasSuffix(s, ".uk"):
207			case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
208			case s == "tw" || strings.HasSuffix(s, ".tw"):
209			case s == "zw" || strings.HasSuffix(s, ".zw"):
210			case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
211				// xn--p1ai is Russian-Cyrillic "рф".
212			default:
213				continue
214			}
215		}
216
217		rules = append(rules, s)
218
219		nt, wildcard := nodeTypeNormal, false
220		switch {
221		case strings.HasPrefix(s, "*."):
222			s, nt = s[2:], nodeTypeParentOnly
223			wildcard = true
224		case strings.HasPrefix(s, "!"):
225			s, nt = s[1:], nodeTypeException
226		}
227		labels := strings.Split(s, ".")
228		for n, i := &root, len(labels)-1; i >= 0; i-- {
229			label := labels[i]
230			n = n.child(label)
231			if i == 0 {
232				if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
233					n.nodeType = nt
234				}
235				n.icann = n.icann && icann
236				n.wildcard = n.wildcard || wildcard
237			}
238			labelsMap[label] = true
239		}
240	}
241	labelsList = make([]string, 0, len(labelsMap))
242	for label := range labelsMap {
243		labelsList = append(labelsList, label)
244	}
245	sort.Strings(labelsList)
246
247	if err := generate(printReal, &root, "table.go"); err != nil {
248		return err
249	}
250	if err := generate(printTest, &root, "table_test.go"); err != nil {
251		return err
252	}
253	return nil
254}
255
256func generate(p func(io.Writer, *node) error, root *node, filename string) error {
257	buf := new(bytes.Buffer)
258	if err := p(buf, root); err != nil {
259		return err
260	}
261	b, err := format.Source(buf.Bytes())
262	if err != nil {
263		return err
264	}
265	return ioutil.WriteFile(filename, b, 0644)
266}
267
268func gitCommit() (sha, date string, retErr error) {
269	res, err := http.Get(gitCommitURL)
270	if err != nil {
271		return "", "", err
272	}
273	if res.StatusCode != http.StatusOK {
274		return "", "", fmt.Errorf("bad GET status for %s: %s", gitCommitURL, res.Status)
275	}
276	defer res.Body.Close()
277	b, err := ioutil.ReadAll(res.Body)
278	if err != nil {
279		return "", "", err
280	}
281	if m := shaRE.FindSubmatch(b); m != nil {
282		sha = string(m[1])
283	}
284	if m := dateRE.FindSubmatch(b); m != nil {
285		date = string(m[1])
286	}
287	if sha == "" || date == "" {
288		retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL)
289	}
290	return sha, date, retErr
291}
292
293func printTest(w io.Writer, n *node) error {
294	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
295	fmt.Fprintf(w, "package publicsuffix\n\nconst numICANNRules = %d\n\nvar rules = [...]string{\n", numICANNRules)
296	for _, rule := range rules {
297		fmt.Fprintf(w, "%q,\n", rule)
298	}
299	fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
300	if err := n.walk(w, printNodeLabel); err != nil {
301		return err
302	}
303	fmt.Fprintf(w, "}\n")
304	return nil
305}
306
307func printReal(w io.Writer, n *node) error {
308	const header = `// generated by go run gen.go; DO NOT EDIT
309
310package publicsuffix
311
312const version = %q
313
314const (
315	nodesBitsChildren   = %d
316	nodesBitsICANN      = %d
317	nodesBitsTextOffset = %d
318	nodesBitsTextLength = %d
319
320	childrenBitsWildcard = %d
321	childrenBitsNodeType = %d
322	childrenBitsHi       = %d
323	childrenBitsLo       = %d
324)
325
326const (
327	nodeTypeNormal     = %d
328	nodeTypeException  = %d
329	nodeTypeParentOnly = %d
330)
331
332// numTLD is the number of top level domains.
333const numTLD = %d
334
335`
336	fmt.Fprintf(w, header, *version,
337		nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
338		childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
339		nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
340
341	text := combineText(labelsList)
342	if text == "" {
343		return fmt.Errorf("internal error: makeText returned no text")
344	}
345	for _, label := range labelsList {
346		offset, length := strings.Index(text, label), len(label)
347		if offset < 0 {
348			return fmt.Errorf("internal error: could not find %q in text %q", label, text)
349		}
350		maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
351		if offset >= 1<<nodesBitsTextOffset {
352			return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
353		}
354		if length >= 1<<nodesBitsTextLength {
355			return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
356		}
357		labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
358	}
359	fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
360	for len(text) > 0 {
361		n, plus := len(text), ""
362		if n > 64 {
363			n, plus = 64, " +"
364		}
365		fmt.Fprintf(w, "%q%s\n", text[:n], plus)
366		text = text[n:]
367	}
368
369	if err := n.walk(w, assignIndexes); err != nil {
370		return err
371	}
372
373	fmt.Fprintf(w, `
374
375// nodes is the list of nodes. Each node is represented as a uint32, which
376// encodes the node's children, wildcard bit and node type (as an index into
377// the children array), ICANN bit and text.
378//
379// If the table was generated with the -comments flag, there is a //-comment
380// after each node's data. In it is the nodes-array indexes of the children,
381// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
382// nodeType is printed as + for normal, ! for exception, and o for parent-only
383// nodes that have children but don't match a domain label in their own right.
384// An I denotes an ICANN domain.
385//
386// The layout within the uint32, from MSB to LSB, is:
387//	[%2d bits] unused
388//	[%2d bits] children index
389//	[%2d bits] ICANN bit
390//	[%2d bits] text index
391//	[%2d bits] text length
392var nodes = [...]uint32{
393`,
394		32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
395		nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
396	if err := n.walk(w, printNode); err != nil {
397		return err
398	}
399	fmt.Fprintf(w, `}
400
401// children is the list of nodes' children, the parent's wildcard bit and the
402// parent's node type. If a node has no children then their children index
403// will be in the range [0, 6), depending on the wildcard bit and node type.
404//
405// The layout within the uint32, from MSB to LSB, is:
406//	[%2d bits] unused
407//	[%2d bits] wildcard bit
408//	[%2d bits] node type
409//	[%2d bits] high nodes index (exclusive) of children
410//	[%2d bits] low nodes index (inclusive) of children
411var children=[...]uint32{
412`,
413		32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
414		childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
415	for i, c := range childrenEncoding {
416		s := "---------------"
417		lo := c & (1<<childrenBitsLo - 1)
418		hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
419		if lo != hi {
420			s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
421		}
422		nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
423		wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
424		if *comments {
425			fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
426				c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
427		} else {
428			fmt.Fprintf(w, "0x%x,\n", c)
429		}
430	}
431	fmt.Fprintf(w, "}\n\n")
432	fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
433	fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
434	fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
435	fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
436	fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
437	return nil
438}
439
440type node struct {
441	label    string
442	nodeType int
443	icann    bool
444	wildcard bool
445	// nodesIndex and childrenIndex are the index of this node in the nodes
446	// and the index of its children offset/length in the children arrays.
447	nodesIndex, childrenIndex int
448	// firstChild is the index of this node's first child, or zero if this
449	// node has no children.
450	firstChild int
451	// children are the node's children, in strictly increasing node label order.
452	children []*node
453}
454
455func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
456	if err := f(w, n); err != nil {
457		return err
458	}
459	for _, c := range n.children {
460		if err := c.walk(w, f); err != nil {
461			return err
462		}
463	}
464	return nil
465}
466
467// child returns the child of n with the given label. The child is created if
468// it did not exist beforehand.
469func (n *node) child(label string) *node {
470	for _, c := range n.children {
471		if c.label == label {
472			return c
473		}
474	}
475	c := &node{
476		label:    label,
477		nodeType: nodeTypeParentOnly,
478		icann:    true,
479	}
480	n.children = append(n.children, c)
481	sort.Sort(byLabel(n.children))
482	return c
483}
484
485type byLabel []*node
486
487func (b byLabel) Len() int           { return len(b) }
488func (b byLabel) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
489func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
490
491var nextNodesIndex int
492
493// childrenEncoding are the encoded entries in the generated children array.
494// All these pre-defined entries have no children.
495var childrenEncoding = []uint32{
496	0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
497	1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
498	2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
499	4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
500	5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
501	6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
502}
503
504var firstCallToAssignIndexes = true
505
506func assignIndexes(w io.Writer, n *node) error {
507	if len(n.children) != 0 {
508		// Assign nodesIndex.
509		n.firstChild = nextNodesIndex
510		for _, c := range n.children {
511			c.nodesIndex = nextNodesIndex
512			nextNodesIndex++
513		}
514
515		// The root node's children is implicit.
516		if firstCallToAssignIndexes {
517			firstCallToAssignIndexes = false
518			return nil
519		}
520
521		// Assign childrenIndex.
522		maxChildren = max(maxChildren, len(childrenEncoding))
523		if len(childrenEncoding) >= 1<<nodesBitsChildren {
524			return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
525		}
526		n.childrenIndex = len(childrenEncoding)
527		lo := uint32(n.firstChild)
528		hi := lo + uint32(len(n.children))
529		maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
530		if lo >= 1<<childrenBitsLo {
531			return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
532		}
533		if hi >= 1<<childrenBitsHi {
534			return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
535		}
536		enc := hi<<childrenBitsLo | lo
537		enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
538		if n.wildcard {
539			enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
540		}
541		childrenEncoding = append(childrenEncoding, enc)
542	} else {
543		n.childrenIndex = n.nodeType
544		if n.wildcard {
545			n.childrenIndex += numNodeType
546		}
547	}
548	return nil
549}
550
551func printNode(w io.Writer, n *node) error {
552	for _, c := range n.children {
553		s := "---------------"
554		if len(c.children) != 0 {
555			s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
556		}
557		encoding := labelEncoding[c.label]
558		if c.icann {
559			encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
560		}
561		encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
562		if *comments {
563			fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
564				encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
565				nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
566			)
567		} else {
568			fmt.Fprintf(w, "0x%x,\n", encoding)
569		}
570	}
571	return nil
572}
573
574func printNodeLabel(w io.Writer, n *node) error {
575	for _, c := range n.children {
576		fmt.Fprintf(w, "%q,\n", c.label)
577	}
578	return nil
579}
580
581func icannStr(icann bool) string {
582	if icann {
583		return "I"
584	}
585	return " "
586}
587
588func wildcardStr(wildcard bool) string {
589	if wildcard {
590		return "*"
591	}
592	return " "
593}
594
595// combineText combines all the strings in labelsList to form one giant string.
596// Overlapping strings will be merged: "arpa" and "parliament" could yield
597// "arparliament".
598func combineText(labelsList []string) string {
599	beforeLength := 0
600	for _, s := range labelsList {
601		beforeLength += len(s)
602	}
603
604	text := crush(removeSubstrings(labelsList))
605	if *v {
606		fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
607	}
608	return text
609}
610
611type byLength []string
612
613func (s byLength) Len() int           { return len(s) }
614func (s byLength) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
615func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
616
617// removeSubstrings returns a copy of its input with any strings removed
618// that are substrings of other provided strings.
619func removeSubstrings(input []string) []string {
620	// Make a copy of input.
621	ss := append(make([]string, 0, len(input)), input...)
622	sort.Sort(byLength(ss))
623
624	for i, shortString := range ss {
625		// For each string, only consider strings higher than it in sort order, i.e.
626		// of equal length or greater.
627		for _, longString := range ss[i+1:] {
628			if strings.Contains(longString, shortString) {
629				ss[i] = ""
630				break
631			}
632		}
633	}
634
635	// Remove the empty strings.
636	sort.Strings(ss)
637	for len(ss) > 0 && ss[0] == "" {
638		ss = ss[1:]
639	}
640	return ss
641}
642
643// crush combines a list of strings, taking advantage of overlaps. It returns a
644// single string that contains each input string as a substring.
645func crush(ss []string) string {
646	maxLabelLen := 0
647	for _, s := range ss {
648		if maxLabelLen < len(s) {
649			maxLabelLen = len(s)
650		}
651	}
652
653	for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
654		prefixes := makePrefixMap(ss, prefixLen)
655		for i, s := range ss {
656			if len(s) <= prefixLen {
657				continue
658			}
659			mergeLabel(ss, i, prefixLen, prefixes)
660		}
661	}
662
663	return strings.Join(ss, "")
664}
665
666// mergeLabel merges the label at ss[i] with the first available matching label
667// in prefixMap, where the last "prefixLen" characters in ss[i] match the first
668// "prefixLen" characters in the matching label.
669// It will merge ss[i] repeatedly until no more matches are available.
670// All matching labels merged into ss[i] are replaced by "".
671func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
672	s := ss[i]
673	suffix := s[len(s)-prefixLen:]
674	for _, j := range prefixes[suffix] {
675		// Empty strings mean "already used." Also avoid merging with self.
676		if ss[j] == "" || i == j {
677			continue
678		}
679		if *v {
680			fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
681				prefixLen, i, j, ss[i], ss[j], suffix)
682		}
683		ss[i] += ss[j][prefixLen:]
684		ss[j] = ""
685		// ss[i] has a new suffix, so merge again if possible.
686		// Note: we only have to merge again at the same prefix length. Shorter
687		// prefix lengths will be handled in the next iteration of crush's for loop.
688		// Can there be matches for longer prefix lengths, introduced by the merge?
689		// I believe that any such matches would by necessity have been eliminated
690		// during substring removal or merged at a higher prefix length. For
691		// instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
692		// would yield "abcde", which could be merged with "bcdef." However, in
693		// practice "cde" would already have been elimintated by removeSubstrings.
694		mergeLabel(ss, i, prefixLen, prefixes)
695		return
696	}
697}
698
699// prefixMap maps from a prefix to a list of strings containing that prefix. The
700// list of strings is represented as indexes into a slice of strings stored
701// elsewhere.
702type prefixMap map[string][]int
703
704// makePrefixMap constructs a prefixMap from a slice of strings.
705func makePrefixMap(ss []string, prefixLen int) prefixMap {
706	prefixes := make(prefixMap)
707	for i, s := range ss {
708		// We use < rather than <= because if a label matches on a prefix equal to
709		// its full length, that's actually a substring match handled by
710		// removeSubstrings.
711		if prefixLen < len(s) {
712			prefix := s[:prefixLen]
713			prefixes[prefix] = append(prefixes[prefix], i)
714		}
715	}
716
717	return prefixes
718}
719