1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package testing
6
7import (
8	"flag"
9	"fmt"
10	"internal/race"
11	"internal/sysinfo"
12	"io"
13	"math"
14	"os"
15	"runtime"
16	"sort"
17	"strconv"
18	"strings"
19	"sync"
20	"sync/atomic"
21	"time"
22	"unicode"
23)
24
25func initBenchmarkFlags() {
26	matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27	benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28	flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`")
29}
30
31var (
32	matchBenchmarks *string
33	benchmarkMemory *bool
34
35	benchTime = durationOrCountFlag{d: 1 * time.Second} // changed during test of testing package
36)
37
38type durationOrCountFlag struct {
39	d         time.Duration
40	n         int
41	allowZero bool
42}
43
44func (f *durationOrCountFlag) String() string {
45	if f.n > 0 {
46		return fmt.Sprintf("%dx", f.n)
47	}
48	return f.d.String()
49}
50
51func (f *durationOrCountFlag) Set(s string) error {
52	if strings.HasSuffix(s, "x") {
53		n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54		if err != nil || n < 0 || (!f.allowZero && n == 0) {
55			return fmt.Errorf("invalid count")
56		}
57		*f = durationOrCountFlag{n: int(n)}
58		return nil
59	}
60	d, err := time.ParseDuration(s)
61	if err != nil || d < 0 || (!f.allowZero && d == 0) {
62		return fmt.Errorf("invalid duration")
63	}
64	*f = durationOrCountFlag{d: d}
65	return nil
66}
67
68// Global lock to ensure only one benchmark runs at a time.
69var benchmarkLock sync.Mutex
70
71// Used for every benchmark for measuring memory.
72var memStats runtime.MemStats
73
74// InternalBenchmark is an internal type but exported because it is cross-package;
75// it is part of the implementation of the "go test" command.
76type InternalBenchmark struct {
77	Name string
78	F    func(b *B)
79}
80
81// B is a type passed to Benchmark functions to manage benchmark
82// timing and to specify the number of iterations to run.
83//
84// A benchmark ends when its Benchmark function returns or calls any of the methods
85// FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
86// only from the goroutine running the Benchmark function.
87// The other reporting methods, such as the variations of Log and Error,
88// may be called simultaneously from multiple goroutines.
89//
90// Like in tests, benchmark logs are accumulated during execution
91// and dumped to standard output when done. Unlike in tests, benchmark logs
92// are always printed, so as not to hide output whose existence may be
93// affecting benchmark results.
94type B struct {
95	common
96	importPath       string // import path of the package containing the benchmark
97	context          *benchContext
98	N                int
99	previousN        int           // number of iterations in the previous run
100	previousDuration time.Duration // total duration of the previous run
101	benchFunc        func(b *B)
102	benchTime        durationOrCountFlag
103	bytes            int64
104	missingBytes     bool // one of the subbenchmarks does not have bytes set.
105	timerOn          bool
106	showAllocResult  bool
107	result           BenchmarkResult
108	parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
109	// The initial states of memStats.Mallocs and memStats.TotalAlloc.
110	startAllocs uint64
111	startBytes  uint64
112	// The net total of this test after being run.
113	netAllocs uint64
114	netBytes  uint64
115	// Extra metrics collected by ReportMetric.
116	extra map[string]float64
117}
118
119// StartTimer starts timing a test. This function is called automatically
120// before a benchmark starts, but it can also be used to resume timing after
121// a call to StopTimer.
122func (b *B) StartTimer() {
123	if !b.timerOn {
124		runtime.ReadMemStats(&memStats)
125		b.startAllocs = memStats.Mallocs
126		b.startBytes = memStats.TotalAlloc
127		b.start = time.Now()
128		b.timerOn = true
129	}
130}
131
132// StopTimer stops timing a test. This can be used to pause the timer
133// while performing complex initialization that you don't
134// want to measure.
135func (b *B) StopTimer() {
136	if b.timerOn {
137		b.duration += time.Since(b.start)
138		runtime.ReadMemStats(&memStats)
139		b.netAllocs += memStats.Mallocs - b.startAllocs
140		b.netBytes += memStats.TotalAlloc - b.startBytes
141		b.timerOn = false
142	}
143}
144
145// ResetTimer zeroes the elapsed benchmark time and memory allocation counters
146// and deletes user-reported metrics.
147// It does not affect whether the timer is running.
148func (b *B) ResetTimer() {
149	if b.extra == nil {
150		// Allocate the extra map before reading memory stats.
151		// Pre-size it to make more allocation unlikely.
152		b.extra = make(map[string]float64, 16)
153	} else {
154		for k := range b.extra {
155			delete(b.extra, k)
156		}
157	}
158	if b.timerOn {
159		runtime.ReadMemStats(&memStats)
160		b.startAllocs = memStats.Mallocs
161		b.startBytes = memStats.TotalAlloc
162		b.start = time.Now()
163	}
164	b.duration = 0
165	b.netAllocs = 0
166	b.netBytes = 0
167}
168
169// SetBytes records the number of bytes processed in a single operation.
170// If this is called, the benchmark will report ns/op and MB/s.
171func (b *B) SetBytes(n int64) { b.bytes = n }
172
173// ReportAllocs enables malloc statistics for this benchmark.
174// It is equivalent to setting -test.benchmem, but it only affects the
175// benchmark function that calls ReportAllocs.
176func (b *B) ReportAllocs() {
177	b.showAllocResult = true
178}
179
180// runN runs a single benchmark for the specified number of iterations.
181func (b *B) runN(n int) {
182	benchmarkLock.Lock()
183	defer benchmarkLock.Unlock()
184	defer b.runCleanup(normalPanic)
185	// Try to get a comparable environment for each run
186	// by clearing garbage from previous runs.
187	runtime.GC()
188	b.raceErrors = -race.Errors()
189	b.N = n
190	b.parallelism = 1
191	b.ResetTimer()
192	b.StartTimer()
193	b.benchFunc(b)
194	b.StopTimer()
195	b.previousN = n
196	b.previousDuration = b.duration
197	b.raceErrors += race.Errors()
198	if b.raceErrors > 0 {
199		b.Errorf("race detected during execution of benchmark")
200	}
201}
202
203func min(x, y int64) int64 {
204	if x > y {
205		return y
206	}
207	return x
208}
209
210func max(x, y int64) int64 {
211	if x < y {
212		return y
213	}
214	return x
215}
216
217// run1 runs the first iteration of benchFunc. It reports whether more
218// iterations of this benchmarks should be run.
219func (b *B) run1() bool {
220	if ctx := b.context; ctx != nil {
221		// Extend maxLen, if needed.
222		if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
223			ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
224		}
225	}
226	go func() {
227		// Signal that we're done whether we return normally
228		// or by FailNow's runtime.Goexit.
229		defer func() {
230			b.signal <- true
231		}()
232
233		b.runN(1)
234	}()
235	<-b.signal
236	if b.failed {
237		fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
238		return false
239	}
240	// Only print the output if we know we are not going to proceed.
241	// Otherwise it is printed in processBench.
242	b.mu.RLock()
243	finished := b.finished
244	b.mu.RUnlock()
245	if atomic.LoadInt32(&b.hasSub) != 0 || finished {
246		tag := "BENCH"
247		if b.skipped {
248			tag = "SKIP"
249		}
250		if b.chatty != nil && (len(b.output) > 0 || finished) {
251			b.trimOutput()
252			fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
253		}
254		return false
255	}
256	return true
257}
258
259var labelsOnce sync.Once
260
261// run executes the benchmark in a separate goroutine, including all of its
262// subbenchmarks. b must not have subbenchmarks.
263func (b *B) run() {
264	labelsOnce.Do(func() {
265		fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
266		fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
267		if b.importPath != "" {
268			fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
269		}
270		if cpu := sysinfo.CPU.Name(); cpu != "" {
271			fmt.Fprintf(b.w, "cpu: %s\n", cpu)
272		}
273	})
274	if b.context != nil {
275		// Running go test --test.bench
276		b.context.processBench(b) // Must call doBench.
277	} else {
278		// Running func Benchmark.
279		b.doBench()
280	}
281}
282
283func (b *B) doBench() BenchmarkResult {
284	go b.launch()
285	<-b.signal
286	return b.result
287}
288
289// launch launches the benchmark function. It gradually increases the number
290// of benchmark iterations until the benchmark runs for the requested benchtime.
291// launch is run by the doBench function as a separate goroutine.
292// run1 must have been called on b.
293func (b *B) launch() {
294	// Signal that we're done whether we return normally
295	// or by FailNow's runtime.Goexit.
296	defer func() {
297		b.signal <- true
298	}()
299
300	// Run the benchmark for at least the specified amount of time.
301	if b.benchTime.n > 0 {
302		// We already ran a single iteration in run1.
303		// If -benchtime=1x was requested, use that result.
304		// See https://golang.org/issue/32051.
305		if b.benchTime.n > 1 {
306			b.runN(b.benchTime.n)
307		}
308	} else {
309		d := b.benchTime.d
310		for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
311			last := n
312			// Predict required iterations.
313			goalns := d.Nanoseconds()
314			prevIters := int64(b.N)
315			prevns := b.duration.Nanoseconds()
316			if prevns <= 0 {
317				// Round up, to avoid div by zero.
318				prevns = 1
319			}
320			// Order of operations matters.
321			// For very fast benchmarks, prevIters ~= prevns.
322			// If you divide first, you get 0 or 1,
323			// which can hide an order of magnitude in execution time.
324			// So multiply first, then divide.
325			n = goalns * prevIters / prevns
326			// Run more iterations than we think we'll need (1.2x).
327			n += n / 5
328			// Don't grow too fast in case we had timing errors previously.
329			n = min(n, 100*last)
330			// Be sure to run at least one more than last time.
331			n = max(n, last+1)
332			// Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.)
333			n = min(n, 1e9)
334			b.runN(int(n))
335		}
336	}
337	b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
338}
339
340// ReportMetric adds "n unit" to the reported benchmark results.
341// If the metric is per-iteration, the caller should divide by b.N,
342// and by convention units should end in "/op".
343// ReportMetric overrides any previously reported value for the same unit.
344// ReportMetric panics if unit is the empty string or if unit contains
345// any whitespace.
346// If unit is a unit normally reported by the benchmark framework itself
347// (such as "allocs/op"), ReportMetric will override that metric.
348// Setting "ns/op" to 0 will suppress that built-in metric.
349func (b *B) ReportMetric(n float64, unit string) {
350	if unit == "" {
351		panic("metric unit must not be empty")
352	}
353	if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
354		panic("metric unit must not contain whitespace")
355	}
356	b.extra[unit] = n
357}
358
359// BenchmarkResult contains the results of a benchmark run.
360type BenchmarkResult struct {
361	N         int           // The number of iterations.
362	T         time.Duration // The total time taken.
363	Bytes     int64         // Bytes processed in one iteration.
364	MemAllocs uint64        // The total number of memory allocations.
365	MemBytes  uint64        // The total number of bytes allocated.
366
367	// Extra records additional metrics reported by ReportMetric.
368	Extra map[string]float64
369}
370
371// NsPerOp returns the "ns/op" metric.
372func (r BenchmarkResult) NsPerOp() int64 {
373	if v, ok := r.Extra["ns/op"]; ok {
374		return int64(v)
375	}
376	if r.N <= 0 {
377		return 0
378	}
379	return r.T.Nanoseconds() / int64(r.N)
380}
381
382// mbPerSec returns the "MB/s" metric.
383func (r BenchmarkResult) mbPerSec() float64 {
384	if v, ok := r.Extra["MB/s"]; ok {
385		return v
386	}
387	if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
388		return 0
389	}
390	return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
391}
392
393// AllocsPerOp returns the "allocs/op" metric,
394// which is calculated as r.MemAllocs / r.N.
395func (r BenchmarkResult) AllocsPerOp() int64 {
396	if v, ok := r.Extra["allocs/op"]; ok {
397		return int64(v)
398	}
399	if r.N <= 0 {
400		return 0
401	}
402	return int64(r.MemAllocs) / int64(r.N)
403}
404
405// AllocedBytesPerOp returns the "B/op" metric,
406// which is calculated as r.MemBytes / r.N.
407func (r BenchmarkResult) AllocedBytesPerOp() int64 {
408	if v, ok := r.Extra["B/op"]; ok {
409		return int64(v)
410	}
411	if r.N <= 0 {
412		return 0
413	}
414	return int64(r.MemBytes) / int64(r.N)
415}
416
417// String returns a summary of the benchmark results.
418// It follows the benchmark result line format from
419// https://golang.org/design/14313-benchmark-format, not including the
420// benchmark name.
421// Extra metrics override built-in metrics of the same name.
422// String does not include allocs/op or B/op, since those are reported
423// by MemString.
424func (r BenchmarkResult) String() string {
425	buf := new(strings.Builder)
426	fmt.Fprintf(buf, "%8d", r.N)
427
428	// Get ns/op as a float.
429	ns, ok := r.Extra["ns/op"]
430	if !ok {
431		ns = float64(r.T.Nanoseconds()) / float64(r.N)
432	}
433	if ns != 0 {
434		buf.WriteByte('\t')
435		prettyPrint(buf, ns, "ns/op")
436	}
437
438	if mbs := r.mbPerSec(); mbs != 0 {
439		fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
440	}
441
442	// Print extra metrics that aren't represented in the standard
443	// metrics.
444	var extraKeys []string
445	for k := range r.Extra {
446		switch k {
447		case "ns/op", "MB/s", "B/op", "allocs/op":
448			// Built-in metrics reported elsewhere.
449			continue
450		}
451		extraKeys = append(extraKeys, k)
452	}
453	sort.Strings(extraKeys)
454	for _, k := range extraKeys {
455		buf.WriteByte('\t')
456		prettyPrint(buf, r.Extra[k], k)
457	}
458	return buf.String()
459}
460
461func prettyPrint(w io.Writer, x float64, unit string) {
462	// Print all numbers with 10 places before the decimal point
463	// and small numbers with four sig figs. Field widths are
464	// chosen to fit the whole part in 10 places while aligning
465	// the decimal point of all fractional formats.
466	var format string
467	switch y := math.Abs(x); {
468	case y == 0 || y >= 999.95:
469		format = "%10.0f %s"
470	case y >= 99.995:
471		format = "%12.1f %s"
472	case y >= 9.9995:
473		format = "%13.2f %s"
474	case y >= 0.99995:
475		format = "%14.3f %s"
476	case y >= 0.099995:
477		format = "%15.4f %s"
478	case y >= 0.0099995:
479		format = "%16.5f %s"
480	case y >= 0.00099995:
481		format = "%17.6f %s"
482	default:
483		format = "%18.7f %s"
484	}
485	fmt.Fprintf(w, format, x, unit)
486}
487
488// MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'.
489func (r BenchmarkResult) MemString() string {
490	return fmt.Sprintf("%8d B/op\t%8d allocs/op",
491		r.AllocedBytesPerOp(), r.AllocsPerOp())
492}
493
494// benchmarkName returns full name of benchmark including procs suffix.
495func benchmarkName(name string, n int) string {
496	if n != 1 {
497		return fmt.Sprintf("%s-%d", name, n)
498	}
499	return name
500}
501
502type benchContext struct {
503	match *matcher
504
505	maxLen int // The largest recorded benchmark name.
506	extLen int // Maximum extension length.
507}
508
509// RunBenchmarks is an internal function but exported because it is cross-package;
510// it is part of the implementation of the "go test" command.
511func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
512	runBenchmarks("", matchString, benchmarks)
513}
514
515func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
516	// If no flag was specified, don't run benchmarks.
517	if len(*matchBenchmarks) == 0 {
518		return true
519	}
520	// Collect matching benchmarks and determine longest name.
521	maxprocs := 1
522	for _, procs := range cpuList {
523		if procs > maxprocs {
524			maxprocs = procs
525		}
526	}
527	ctx := &benchContext{
528		match:  newMatcher(matchString, *matchBenchmarks, "-test.bench"),
529		extLen: len(benchmarkName("", maxprocs)),
530	}
531	var bs []InternalBenchmark
532	for _, Benchmark := range benchmarks {
533		if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched {
534			bs = append(bs, Benchmark)
535			benchName := benchmarkName(Benchmark.Name, maxprocs)
536			if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
537				ctx.maxLen = l
538			}
539		}
540	}
541	main := &B{
542		common: common{
543			name:  "Main",
544			w:     os.Stdout,
545			bench: true,
546		},
547		importPath: importPath,
548		benchFunc: func(b *B) {
549			for _, Benchmark := range bs {
550				b.Run(Benchmark.Name, Benchmark.F)
551			}
552		},
553		benchTime: benchTime,
554		context:   ctx,
555	}
556	if Verbose() {
557		main.chatty = newChattyPrinter(main.w)
558	}
559	main.runN(1)
560	return !main.failed
561}
562
563// processBench runs bench b for the configured CPU counts and prints the results.
564func (ctx *benchContext) processBench(b *B) {
565	for i, procs := range cpuList {
566		for j := uint(0); j < *count; j++ {
567			runtime.GOMAXPROCS(procs)
568			benchName := benchmarkName(b.name, procs)
569
570			// If it's chatty, we've already printed this information.
571			if b.chatty == nil {
572				fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
573			}
574			// Recompute the running time for all but the first iteration.
575			if i > 0 || j > 0 {
576				b = &B{
577					common: common{
578						signal: make(chan bool),
579						name:   b.name,
580						w:      b.w,
581						chatty: b.chatty,
582						bench:  true,
583					},
584					benchFunc: b.benchFunc,
585					benchTime: b.benchTime,
586				}
587				b.run1()
588			}
589			r := b.doBench()
590			if b.failed {
591				// The output could be very long here, but probably isn't.
592				// We print it all, regardless, because we don't want to trim the reason
593				// the benchmark failed.
594				fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
595				continue
596			}
597			results := r.String()
598			if b.chatty != nil {
599				fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
600			}
601			if *benchmarkMemory || b.showAllocResult {
602				results += "\t" + r.MemString()
603			}
604			fmt.Fprintln(b.w, results)
605			// Unlike with tests, we ignore the -chatty flag and always print output for
606			// benchmarks since the output generation time will skew the results.
607			if len(b.output) > 0 {
608				b.trimOutput()
609				fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
610			}
611			if p := runtime.GOMAXPROCS(-1); p != procs {
612				fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
613			}
614		}
615	}
616}
617
618// Run benchmarks f as a subbenchmark with the given name. It reports
619// whether there were any failures.
620//
621// A subbenchmark is like any other benchmark. A benchmark that calls Run at
622// least once will not be measured itself and will be called once with N=1.
623func (b *B) Run(name string, f func(b *B)) bool {
624	// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
625	// Release the lock and acquire it on exit to ensure locks stay paired.
626	atomic.StoreInt32(&b.hasSub, 1)
627	benchmarkLock.Unlock()
628	defer benchmarkLock.Lock()
629
630	benchName, ok, partial := b.name, true, false
631	if b.context != nil {
632		benchName, ok, partial = b.context.match.fullName(&b.common, name)
633	}
634	if !ok {
635		return true
636	}
637	var pc [maxStackLen]uintptr
638	n := runtime.Callers(2, pc[:])
639	sub := &B{
640		common: common{
641			signal:  make(chan bool),
642			name:    benchName,
643			parent:  &b.common,
644			level:   b.level + 1,
645			creator: pc[:n],
646			w:       b.w,
647			chatty:  b.chatty,
648			bench:   true,
649		},
650		importPath: b.importPath,
651		benchFunc:  f,
652		benchTime:  b.benchTime,
653		context:    b.context,
654	}
655	if partial {
656		// Partial name match, like -bench=X/Y matching BenchmarkX.
657		// Only process sub-benchmarks, if any.
658		atomic.StoreInt32(&sub.hasSub, 1)
659	}
660
661	if b.chatty != nil {
662		labelsOnce.Do(func() {
663			fmt.Printf("goos: %s\n", runtime.GOOS)
664			fmt.Printf("goarch: %s\n", runtime.GOARCH)
665			if b.importPath != "" {
666				fmt.Printf("pkg: %s\n", b.importPath)
667			}
668			if cpu := sysinfo.CPU.Name(); cpu != "" {
669				fmt.Printf("cpu: %s\n", cpu)
670			}
671		})
672
673		fmt.Println(benchName)
674	}
675
676	if sub.run1() {
677		sub.run()
678	}
679	b.add(sub.result)
680	return !sub.failed
681}
682
683// add simulates running benchmarks in sequence in a single iteration. It is
684// used to give some meaningful results in case func Benchmark is used in
685// combination with Run.
686func (b *B) add(other BenchmarkResult) {
687	r := &b.result
688	// The aggregated BenchmarkResults resemble running all subbenchmarks as
689	// in sequence in a single benchmark.
690	r.N = 1
691	r.T += time.Duration(other.NsPerOp())
692	if other.Bytes == 0 {
693		// Summing Bytes is meaningless in aggregate if not all subbenchmarks
694		// set it.
695		b.missingBytes = true
696		r.Bytes = 0
697	}
698	if !b.missingBytes {
699		r.Bytes += other.Bytes
700	}
701	r.MemAllocs += uint64(other.AllocsPerOp())
702	r.MemBytes += uint64(other.AllocedBytesPerOp())
703}
704
705// trimOutput shortens the output from a benchmark, which can be very long.
706func (b *B) trimOutput() {
707	// The output is likely to appear multiple times because the benchmark
708	// is run multiple times, but at least it will be seen. This is not a big deal
709	// because benchmarks rarely print, but just in case, we trim it if it's too long.
710	const maxNewlines = 10
711	for nlCount, j := 0, 0; j < len(b.output); j++ {
712		if b.output[j] == '\n' {
713			nlCount++
714			if nlCount >= maxNewlines {
715				b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
716				break
717			}
718		}
719	}
720}
721
722// A PB is used by RunParallel for running parallel benchmarks.
723type PB struct {
724	globalN *uint64 // shared between all worker goroutines iteration counter
725	grain   uint64  // acquire that many iterations from globalN at once
726	cache   uint64  // local cache of acquired iterations
727	bN      uint64  // total number of iterations to execute (b.N)
728}
729
730// Next reports whether there are more iterations to execute.
731func (pb *PB) Next() bool {
732	if pb.cache == 0 {
733		n := atomic.AddUint64(pb.globalN, pb.grain)
734		if n <= pb.bN {
735			pb.cache = pb.grain
736		} else if n < pb.bN+pb.grain {
737			pb.cache = pb.bN + pb.grain - n
738		} else {
739			return false
740		}
741	}
742	pb.cache--
743	return true
744}
745
746// RunParallel runs a benchmark in parallel.
747// It creates multiple goroutines and distributes b.N iterations among them.
748// The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
749// non-CPU-bound benchmarks, call SetParallelism before RunParallel.
750// RunParallel is usually used with the go test -cpu flag.
751//
752// The body function will be run in each goroutine. It should set up any
753// goroutine-local state and then iterate until pb.Next returns false.
754// It should not use the StartTimer, StopTimer, or ResetTimer functions,
755// because they have global effect. It should also not call Run.
756func (b *B) RunParallel(body func(*PB)) {
757	if b.N == 0 {
758		return // Nothing to do when probing.
759	}
760	// Calculate grain size as number of iterations that take ~100µs.
761	// 100µs is enough to amortize the overhead and provide sufficient
762	// dynamic load balancing.
763	grain := uint64(0)
764	if b.previousN > 0 && b.previousDuration > 0 {
765		grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
766	}
767	if grain < 1 {
768		grain = 1
769	}
770	// We expect the inner loop and function call to take at least 10ns,
771	// so do not do more than 100µs/10ns=1e4 iterations.
772	if grain > 1e4 {
773		grain = 1e4
774	}
775
776	n := uint64(0)
777	numProcs := b.parallelism * runtime.GOMAXPROCS(0)
778	var wg sync.WaitGroup
779	wg.Add(numProcs)
780	for p := 0; p < numProcs; p++ {
781		go func() {
782			defer wg.Done()
783			pb := &PB{
784				globalN: &n,
785				grain:   grain,
786				bN:      uint64(b.N),
787			}
788			body(pb)
789		}()
790	}
791	wg.Wait()
792	if n <= uint64(b.N) && !b.Failed() {
793		b.Fatal("RunParallel: body exited without pb.Next() == false")
794	}
795}
796
797// SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
798// There is usually no need to call SetParallelism for CPU-bound benchmarks.
799// If p is less than 1, this call will have no effect.
800func (b *B) SetParallelism(p int) {
801	if p >= 1 {
802		b.parallelism = p
803	}
804}
805
806// Benchmark benchmarks a single function. It is useful for creating
807// custom benchmarks that do not use the "go test" command.
808//
809// If f depends on testing flags, then Init must be used to register
810// those flags before calling Benchmark and before calling flag.Parse.
811//
812// If f calls Run, the result will be an estimate of running all its
813// subbenchmarks that don't call Run in sequence in a single benchmark.
814func Benchmark(f func(b *B)) BenchmarkResult {
815	b := &B{
816		common: common{
817			signal: make(chan bool),
818			w:      discard{},
819		},
820		benchFunc: f,
821		benchTime: benchTime,
822	}
823	if b.run1() {
824		b.run()
825	}
826	return b.result
827}
828
829type discard struct{}
830
831func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
832