1package textseg
2
3import (
4	"bufio"
5	"bytes"
6)
7
8// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
9// all of the recognized tokens in the given buffer.
10func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
11	scanner := bufio.NewScanner(bytes.NewReader(buf))
12	scanner.Split(splitFunc)
13	var ret [][]byte
14	for scanner.Scan() {
15		ret = append(ret, scanner.Bytes())
16	}
17	return ret, scanner.Err()
18}
19
20// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
21// recognized tokens in the given buffer.
22func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
23	scanner := bufio.NewScanner(bytes.NewReader(buf))
24	scanner.Split(splitFunc)
25	var ret int
26	for scanner.Scan() {
27		ret++
28	}
29	return ret, scanner.Err()
30}
31