1/*
2 *
3 * Copyright 2017 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19// Package stats tracks the statistics associated with benchmark runs.
20package stats
21
22import (
23	"bytes"
24	"fmt"
25	"log"
26	"math"
27	"runtime"
28	"sort"
29	"strconv"
30	"sync"
31	"time"
32
33	"google.golang.org/grpc"
34)
35
36// FeatureIndex is an enum for features that usually differ across individual
37// benchmark runs in a single execution. These are usually configured by the
38// user through command line flags.
39type FeatureIndex int
40
41// FeatureIndex enum values corresponding to individually settable features.
42const (
43	EnableTraceIndex FeatureIndex = iota
44	ReadLatenciesIndex
45	ReadKbpsIndex
46	ReadMTUIndex
47	MaxConcurrentCallsIndex
48	ReqSizeBytesIndex
49	RespSizeBytesIndex
50	ReqPayloadCurveIndex
51	RespPayloadCurveIndex
52	CompModesIndex
53	EnableChannelzIndex
54	EnablePreloaderIndex
55
56	// MaxFeatureIndex is a place holder to indicate the total number of feature
57	// indices we have. Any new feature indices should be added above this.
58	MaxFeatureIndex
59)
60
61// Features represent configured options for a specific benchmark run. This is
62// usually constructed from command line arguments passed by the caller. See
63// benchmark/benchmain/main.go for defined command line flags. This is also
64// part of the BenchResults struct which is serialized and written to a file.
65type Features struct {
66	// Network mode used for this benchmark run. Could be one of Local, LAN, WAN
67	// or Longhaul.
68	NetworkMode string
69	// UseBufCon indicates whether an in-memory connection was used for this
70	// benchmark run instead of system network I/O.
71	UseBufConn bool
72	// EnableKeepalive indicates if keepalives were enabled on the connections
73	// used in this benchmark run.
74	EnableKeepalive bool
75	// BenchTime indicates the duration of the benchmark run.
76	BenchTime time.Duration
77
78	// Features defined above are usually the same for all benchmark runs in a
79	// particular invocation, while the features defined below could vary from
80	// run to run based on the configured command line. These features have a
81	// corresponding featureIndex value which is used for a variety of reasons.
82
83	// EnableTrace indicates if tracing was enabled.
84	EnableTrace bool
85	// Latency is the simulated one-way network latency used.
86	Latency time.Duration
87	// Kbps is the simulated network throughput used.
88	Kbps int
89	// MTU is the simulated network MTU used.
90	MTU int
91	// MaxConcurrentCalls is the number of concurrent RPCs made during this
92	// benchmark run.
93	MaxConcurrentCalls int
94	// ReqSizeBytes is the request size in bytes used in this benchmark run.
95	// Unused if ReqPayloadCurve is non-nil.
96	ReqSizeBytes int
97	// RespSizeBytes is the response size in bytes used in this benchmark run.
98	// Unused if RespPayloadCurve is non-nil.
99	RespSizeBytes int
100	// ReqPayloadCurve is a histogram representing the shape a random
101	// distribution request payloads should take.
102	ReqPayloadCurve *PayloadCurve
103	// RespPayloadCurve is a histogram representing the shape a random
104	// distribution request payloads should take.
105	RespPayloadCurve *PayloadCurve
106	// ModeCompressor represents the compressor mode used.
107	ModeCompressor string
108	// EnableChannelz indicates if channelz was turned on.
109	EnableChannelz bool
110	// EnablePreloader indicates if preloading was turned on.
111	EnablePreloader bool
112}
113
114// String returns all the feature values as a string.
115func (f Features) String() string {
116	var reqPayloadString, respPayloadString string
117	if f.ReqPayloadCurve != nil {
118		reqPayloadString = fmt.Sprintf("reqPayloadCurve_%s", f.ReqPayloadCurve.ShortHash())
119	} else {
120		reqPayloadString = fmt.Sprintf("reqSize_%vB", f.ReqSizeBytes)
121	}
122	if f.RespPayloadCurve != nil {
123		respPayloadString = fmt.Sprintf("respPayloadCurve_%s", f.RespPayloadCurve.ShortHash())
124	} else {
125		respPayloadString = fmt.Sprintf("respSize_%vB", f.RespSizeBytes)
126	}
127	return fmt.Sprintf("networkMode_%v-bufConn_%v-keepalive_%v-benchTime_%v-"+
128		"trace_%v-latency_%v-kbps_%v-MTU_%v-maxConcurrentCalls_%v-%s-%s-"+
129		"compressor_%v-channelz_%v-preloader_%v",
130		f.NetworkMode, f.UseBufConn, f.EnableKeepalive, f.BenchTime, f.EnableTrace,
131		f.Latency, f.Kbps, f.MTU, f.MaxConcurrentCalls, reqPayloadString,
132		respPayloadString, f.ModeCompressor, f.EnableChannelz, f.EnablePreloader)
133}
134
135// SharedFeatures returns the shared features as a pretty printable string.
136// 'wantFeatures' is a bitmask of wanted features, indexed by FeaturesIndex.
137func (f Features) SharedFeatures(wantFeatures []bool) string {
138	var b bytes.Buffer
139	if f.NetworkMode != "" {
140		b.WriteString(fmt.Sprintf("Network: %v\n", f.NetworkMode))
141	}
142	if f.UseBufConn {
143		b.WriteString(fmt.Sprintf("UseBufConn: %v\n", f.UseBufConn))
144	}
145	if f.EnableKeepalive {
146		b.WriteString(fmt.Sprintf("EnableKeepalive: %v\n", f.EnableKeepalive))
147	}
148	b.WriteString(fmt.Sprintf("BenchTime: %v\n", f.BenchTime))
149	f.partialString(&b, wantFeatures, ": ", "\n")
150	return b.String()
151}
152
153// PrintableName returns a one line name which includes the features specified
154// by 'wantFeatures' which is a bitmask of wanted features, indexed by
155// FeaturesIndex.
156func (f Features) PrintableName(wantFeatures []bool) string {
157	var b bytes.Buffer
158	f.partialString(&b, wantFeatures, "_", "-")
159	return b.String()
160}
161
162// partialString writes features specified by 'wantFeatures' to the provided
163// bytes.Buffer.
164func (f Features) partialString(b *bytes.Buffer, wantFeatures []bool, sep, delim string) {
165	for i, sf := range wantFeatures {
166		if sf {
167			switch FeatureIndex(i) {
168			case EnableTraceIndex:
169				b.WriteString(fmt.Sprintf("Trace%v%v%v", sep, f.EnableTrace, delim))
170			case ReadLatenciesIndex:
171				b.WriteString(fmt.Sprintf("Latency%v%v%v", sep, f.Latency, delim))
172			case ReadKbpsIndex:
173				b.WriteString(fmt.Sprintf("Kbps%v%v%v", sep, f.Kbps, delim))
174			case ReadMTUIndex:
175				b.WriteString(fmt.Sprintf("MTU%v%v%v", sep, f.MTU, delim))
176			case MaxConcurrentCallsIndex:
177				b.WriteString(fmt.Sprintf("Callers%v%v%v", sep, f.MaxConcurrentCalls, delim))
178			case ReqSizeBytesIndex:
179				b.WriteString(fmt.Sprintf("ReqSize%v%vB%v", sep, f.ReqSizeBytes, delim))
180			case RespSizeBytesIndex:
181				b.WriteString(fmt.Sprintf("RespSize%v%vB%v", sep, f.RespSizeBytes, delim))
182			case ReqPayloadCurveIndex:
183				if f.ReqPayloadCurve != nil {
184					b.WriteString(fmt.Sprintf("ReqPayloadCurve%vSHA-256:%v%v", sep, f.ReqPayloadCurve.Hash(), delim))
185				}
186			case RespPayloadCurveIndex:
187				if f.RespPayloadCurve != nil {
188					b.WriteString(fmt.Sprintf("RespPayloadCurve%vSHA-256:%v%v", sep, f.RespPayloadCurve.Hash(), delim))
189				}
190			case CompModesIndex:
191				b.WriteString(fmt.Sprintf("Compressor%v%v%v", sep, f.ModeCompressor, delim))
192			case EnableChannelzIndex:
193				b.WriteString(fmt.Sprintf("Channelz%v%v%v", sep, f.EnableChannelz, delim))
194			case EnablePreloaderIndex:
195				b.WriteString(fmt.Sprintf("Preloader%v%v%v", sep, f.EnablePreloader, delim))
196			default:
197				log.Fatalf("Unknown feature index %v. maxFeatureIndex is %v", i, MaxFeatureIndex)
198			}
199		}
200	}
201}
202
203// BenchResults records features and results of a benchmark run. A collection
204// of these structs is usually serialized and written to a file after a
205// benchmark execution, and could later be read for pretty-printing or
206// comparison with other benchmark results.
207type BenchResults struct {
208	// GoVersion is the version of the compiler the benchmark was compiled with.
209	GoVersion string
210	// GrpcVersion is the gRPC version being benchmarked.
211	GrpcVersion string
212	// RunMode is the workload mode for this benchmark run. This could be unary,
213	// stream or unconstrained.
214	RunMode string
215	// Features represents the configured feature options for this run.
216	Features Features
217	// SharedFeatures represents the features which were shared across all
218	// benchmark runs during one execution. It is a slice indexed by
219	// 'FeaturesIndex' and a value of true indicates that the associated
220	// feature is shared across all runs.
221	SharedFeatures []bool
222	// Data contains the statistical data of interest from the benchmark run.
223	Data RunData
224}
225
226// RunData contains statistical data of interest from a benchmark run.
227type RunData struct {
228	// TotalOps is the number of operations executed during this benchmark run.
229	// Only makes sense for unary and streaming workloads.
230	TotalOps uint64
231	// SendOps is the number of send operations executed during this benchmark
232	// run. Only makes sense for unconstrained workloads.
233	SendOps uint64
234	// RecvOps is the number of receive operations executed during this benchmark
235	// run. Only makes sense for unconstrained workloads.
236	RecvOps uint64
237	// AllocedBytes is the average memory allocation in bytes per operation.
238	AllocedBytes float64
239	// Allocs is the average number of memory allocations per operation.
240	Allocs float64
241	// ReqT is the average request throughput associated with this run.
242	ReqT float64
243	// RespT is the average response throughput associated with this run.
244	RespT float64
245
246	// We store different latencies associated with each run. These latencies are
247	// only computed for unary and stream workloads as they are not very useful
248	// for unconstrained workloads.
249
250	// Fiftieth is the 50th percentile latency.
251	Fiftieth time.Duration
252	// Ninetieth is the 90th percentile latency.
253	Ninetieth time.Duration
254	// Ninetyninth is the 99th percentile latency.
255	NinetyNinth time.Duration
256	// Average is the average latency.
257	Average time.Duration
258}
259
260type durationSlice []time.Duration
261
262func (a durationSlice) Len() int           { return len(a) }
263func (a durationSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
264func (a durationSlice) Less(i, j int) bool { return a[i] < a[j] }
265
266// Stats is a helper for gathering statistics about individual benchmark runs.
267type Stats struct {
268	mu         sync.Mutex
269	numBuckets int
270	hw         *histWrapper
271	results    []BenchResults
272	startMS    runtime.MemStats
273	stopMS     runtime.MemStats
274}
275
276type histWrapper struct {
277	unit      time.Duration
278	histogram *Histogram
279	durations durationSlice
280}
281
282// NewStats creates a new Stats instance. If numBuckets is not positive, the
283// default value (16) will be used.
284func NewStats(numBuckets int) *Stats {
285	if numBuckets <= 0 {
286		numBuckets = 16
287	}
288	// Use one more bucket for the last unbounded bucket.
289	s := &Stats{numBuckets: numBuckets + 1}
290	s.hw = &histWrapper{}
291	return s
292}
293
294// StartRun is to be invoked to indicate the start of a new benchmark run.
295func (s *Stats) StartRun(mode string, f Features, sf []bool) {
296	s.mu.Lock()
297	defer s.mu.Unlock()
298
299	runtime.ReadMemStats(&s.startMS)
300	s.results = append(s.results, BenchResults{
301		GoVersion:      runtime.Version(),
302		GrpcVersion:    grpc.Version,
303		RunMode:        mode,
304		Features:       f,
305		SharedFeatures: sf,
306	})
307}
308
309// EndRun is to be invoked to indicate the end of the ongoing benchmark run. It
310// computes a bunch of stats and dumps them to stdout.
311func (s *Stats) EndRun(count uint64) {
312	s.mu.Lock()
313	defer s.mu.Unlock()
314
315	runtime.ReadMemStats(&s.stopMS)
316	r := &s.results[len(s.results)-1]
317	r.Data = RunData{
318		TotalOps:     count,
319		AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64(count),
320		Allocs:       float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64(count),
321		ReqT:         float64(count) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
322		RespT:        float64(count) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
323	}
324	s.computeLatencies(r)
325	s.dump(r)
326	s.hw = &histWrapper{}
327}
328
329// EndUnconstrainedRun is similar to EndRun, but is to be used for
330// unconstrained workloads.
331func (s *Stats) EndUnconstrainedRun(req uint64, resp uint64) {
332	s.mu.Lock()
333	defer s.mu.Unlock()
334
335	runtime.ReadMemStats(&s.stopMS)
336	r := &s.results[len(s.results)-1]
337	r.Data = RunData{
338		SendOps:      req,
339		RecvOps:      resp,
340		AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64((req+resp)/2),
341		Allocs:       float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64((req+resp)/2),
342		ReqT:         float64(req) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
343		RespT:        float64(resp) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
344	}
345	s.computeLatencies(r)
346	s.dump(r)
347	s.hw = &histWrapper{}
348}
349
350// AddDuration adds an elapsed duration per operation to the stats. This is
351// used by unary and stream modes where request and response stats are equal.
352func (s *Stats) AddDuration(d time.Duration) {
353	s.mu.Lock()
354	defer s.mu.Unlock()
355
356	s.hw.durations = append(s.hw.durations, d)
357}
358
359// GetResults returns the results from all benchmark runs.
360func (s *Stats) GetResults() []BenchResults {
361	s.mu.Lock()
362	defer s.mu.Unlock()
363
364	return s.results
365}
366
367// computeLatencies computes percentile latencies based on durations stored in
368// the stats object and updates the corresponding fields in the result object.
369func (s *Stats) computeLatencies(result *BenchResults) {
370	if len(s.hw.durations) == 0 {
371		return
372	}
373	sort.Sort(s.hw.durations)
374	minDuration := int64(s.hw.durations[0])
375	maxDuration := int64(s.hw.durations[len(s.hw.durations)-1])
376
377	// Use the largest unit that can represent the minimum time duration.
378	s.hw.unit = time.Nanosecond
379	for _, u := range []time.Duration{time.Microsecond, time.Millisecond, time.Second} {
380		if minDuration <= int64(u) {
381			break
382		}
383		s.hw.unit = u
384	}
385
386	numBuckets := s.numBuckets
387	if n := int(maxDuration - minDuration + 1); n < numBuckets {
388		numBuckets = n
389	}
390	s.hw.histogram = NewHistogram(HistogramOptions{
391		NumBuckets: numBuckets,
392		// max-min(lower bound of last bucket) = (1 + growthFactor)^(numBuckets-2) * baseBucketSize.
393		GrowthFactor:   math.Pow(float64(maxDuration-minDuration), 1/float64(numBuckets-2)) - 1,
394		BaseBucketSize: 1.0,
395		MinValue:       minDuration,
396	})
397	for _, d := range s.hw.durations {
398		s.hw.histogram.Add(int64(d))
399	}
400	result.Data.Fiftieth = s.hw.durations[max(s.hw.histogram.Count*int64(50)/100-1, 0)]
401	result.Data.Ninetieth = s.hw.durations[max(s.hw.histogram.Count*int64(90)/100-1, 0)]
402	result.Data.NinetyNinth = s.hw.durations[max(s.hw.histogram.Count*int64(99)/100-1, 0)]
403	result.Data.Average = time.Duration(float64(s.hw.histogram.Sum) / float64(s.hw.histogram.Count))
404}
405
406// dump returns a printable version.
407func (s *Stats) dump(result *BenchResults) {
408	var b bytes.Buffer
409
410	// Go and gRPC version information.
411	b.WriteString(fmt.Sprintf("%s/grpc%s\n", result.GoVersion, result.GrpcVersion))
412
413	// This prints the run mode and all features of the bench on a line.
414	b.WriteString(fmt.Sprintf("%s-%s:\n", result.RunMode, result.Features.String()))
415
416	unit := s.hw.unit
417	tUnit := fmt.Sprintf("%v", unit)[1:] // stores one of s, ms, μs, ns
418
419	if l := result.Data.Fiftieth; l != 0 {
420		b.WriteString(fmt.Sprintf("50_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
421	}
422	if l := result.Data.Ninetieth; l != 0 {
423		b.WriteString(fmt.Sprintf("90_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
424	}
425	if l := result.Data.NinetyNinth; l != 0 {
426		b.WriteString(fmt.Sprintf("99_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
427	}
428	if l := result.Data.Average; l != 0 {
429		b.WriteString(fmt.Sprintf("Avg_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
430	}
431	b.WriteString(fmt.Sprintf("Bytes/op: %v\t", result.Data.AllocedBytes))
432	b.WriteString(fmt.Sprintf("Allocs/op: %v\t\n", result.Data.Allocs))
433
434	// This prints the histogram stats for the latency.
435	if s.hw.histogram == nil {
436		b.WriteString("Histogram (empty)\n")
437	} else {
438		b.WriteString(fmt.Sprintf("Histogram (unit: %s)\n", tUnit))
439		s.hw.histogram.PrintWithUnit(&b, float64(unit))
440	}
441
442	// Print throughput data.
443	req := result.Data.SendOps
444	if req == 0 {
445		req = result.Data.TotalOps
446	}
447	resp := result.Data.RecvOps
448	if resp == 0 {
449		resp = result.Data.TotalOps
450	}
451	b.WriteString(fmt.Sprintf("Number of requests:  %v\tRequest throughput:  %v bit/s\n", req, result.Data.ReqT))
452	b.WriteString(fmt.Sprintf("Number of responses: %v\tResponse throughput: %v bit/s\n", resp, result.Data.RespT))
453	fmt.Println(b.String())
454}
455
456func max(a, b int64) int64 {
457	if a > b {
458		return a
459	}
460	return b
461}
462