1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package profile
16
17import (
18	"fmt"
19	"sort"
20	"strconv"
21	"strings"
22)
23
24// Compact performs garbage collection on a profile to remove any
25// unreferenced fields. This is useful to reduce the size of a profile
26// after samples or locations have been removed.
27func (p *Profile) Compact() *Profile {
28	p, _ = Merge([]*Profile{p})
29	return p
30}
31
32// Merge merges all the profiles in profs into a single Profile.
33// Returns a new profile independent of the input profiles. The merged
34// profile is compacted to eliminate unused samples, locations,
35// functions and mappings. Profiles must have identical profile sample
36// and period types or the merge will fail. profile.Period of the
37// resulting profile will be the maximum of all profiles, and
38// profile.TimeNanos will be the earliest nonzero one. Merges are
39// associative with the caveat of the first profile having some
40// specialization in how headers are combined. There may be other
41// subtleties now or in the future regarding associativity.
42func Merge(srcs []*Profile) (*Profile, error) {
43	if len(srcs) == 0 {
44		return nil, fmt.Errorf("no profiles to merge")
45	}
46	p, err := combineHeaders(srcs)
47	if err != nil {
48		return nil, err
49	}
50
51	pm := &profileMerger{
52		p:         p,
53		samples:   make(map[sampleKey]*Sample, len(srcs[0].Sample)),
54		locations: make(map[locationKey]*Location, len(srcs[0].Location)),
55		functions: make(map[functionKey]*Function, len(srcs[0].Function)),
56		mappings:  make(map[mappingKey]*Mapping, len(srcs[0].Mapping)),
57	}
58
59	for _, src := range srcs {
60		// Clear the profile-specific hash tables
61		pm.locationsByID = make(map[uint64]*Location, len(src.Location))
62		pm.functionsByID = make(map[uint64]*Function, len(src.Function))
63		pm.mappingsByID = make(map[uint64]mapInfo, len(src.Mapping))
64
65		if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
66			// The Mapping list has the property that the first mapping
67			// represents the main binary. Take the first Mapping we see,
68			// otherwise the operations below will add mappings in an
69			// arbitrary order.
70			pm.mapMapping(src.Mapping[0])
71		}
72
73		for _, s := range src.Sample {
74			if !isZeroSample(s) {
75				pm.mapSample(s)
76			}
77		}
78	}
79
80	for _, s := range p.Sample {
81		if isZeroSample(s) {
82			// If there are any zero samples, re-merge the profile to GC
83			// them.
84			return Merge([]*Profile{p})
85		}
86	}
87
88	return p, nil
89}
90
91// Normalize normalizes the source profile by multiplying each value in profile by the
92// ratio of the sum of the base profile's values of that sample type to the sum of the
93// source profile's value of that sample type.
94func (p *Profile) Normalize(pb *Profile) error {
95
96	if err := p.compatible(pb); err != nil {
97		return err
98	}
99
100	baseVals := make([]int64, len(p.SampleType))
101	for _, s := range pb.Sample {
102		for i, v := range s.Value {
103			baseVals[i] += v
104		}
105	}
106
107	srcVals := make([]int64, len(p.SampleType))
108	for _, s := range p.Sample {
109		for i, v := range s.Value {
110			srcVals[i] += v
111		}
112	}
113
114	normScale := make([]float64, len(baseVals))
115	for i := range baseVals {
116		if srcVals[i] == 0 {
117			normScale[i] = 0.0
118		} else {
119			normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
120		}
121	}
122	p.ScaleN(normScale)
123	return nil
124}
125
126func isZeroSample(s *Sample) bool {
127	for _, v := range s.Value {
128		if v != 0 {
129			return false
130		}
131	}
132	return true
133}
134
135type profileMerger struct {
136	p *Profile
137
138	// Memoization tables within a profile.
139	locationsByID map[uint64]*Location
140	functionsByID map[uint64]*Function
141	mappingsByID  map[uint64]mapInfo
142
143	// Memoization tables for profile entities.
144	samples   map[sampleKey]*Sample
145	locations map[locationKey]*Location
146	functions map[functionKey]*Function
147	mappings  map[mappingKey]*Mapping
148}
149
150type mapInfo struct {
151	m      *Mapping
152	offset int64
153}
154
155func (pm *profileMerger) mapSample(src *Sample) *Sample {
156	s := &Sample{
157		Location: make([]*Location, len(src.Location)),
158		Value:    make([]int64, len(src.Value)),
159		Label:    make(map[string][]string, len(src.Label)),
160		NumLabel: make(map[string][]int64, len(src.NumLabel)),
161		NumUnit:  make(map[string][]string, len(src.NumLabel)),
162	}
163	for i, l := range src.Location {
164		s.Location[i] = pm.mapLocation(l)
165	}
166	for k, v := range src.Label {
167		vv := make([]string, len(v))
168		copy(vv, v)
169		s.Label[k] = vv
170	}
171	for k, v := range src.NumLabel {
172		u := src.NumUnit[k]
173		vv := make([]int64, len(v))
174		uu := make([]string, len(u))
175		copy(vv, v)
176		copy(uu, u)
177		s.NumLabel[k] = vv
178		s.NumUnit[k] = uu
179	}
180	// Check memoization table. Must be done on the remapped location to
181	// account for the remapped mapping. Add current values to the
182	// existing sample.
183	k := s.key()
184	if ss, ok := pm.samples[k]; ok {
185		for i, v := range src.Value {
186			ss.Value[i] += v
187		}
188		return ss
189	}
190	copy(s.Value, src.Value)
191	pm.samples[k] = s
192	pm.p.Sample = append(pm.p.Sample, s)
193	return s
194}
195
196// key generates sampleKey to be used as a key for maps.
197func (sample *Sample) key() sampleKey {
198	ids := make([]string, len(sample.Location))
199	for i, l := range sample.Location {
200		ids[i] = strconv.FormatUint(l.ID, 16)
201	}
202
203	labels := make([]string, 0, len(sample.Label))
204	for k, v := range sample.Label {
205		labels = append(labels, fmt.Sprintf("%q%q", k, v))
206	}
207	sort.Strings(labels)
208
209	numlabels := make([]string, 0, len(sample.NumLabel))
210	for k, v := range sample.NumLabel {
211		numlabels = append(numlabels, fmt.Sprintf("%q%x%x", k, v, sample.NumUnit[k]))
212	}
213	sort.Strings(numlabels)
214
215	return sampleKey{
216		strings.Join(ids, "|"),
217		strings.Join(labels, ""),
218		strings.Join(numlabels, ""),
219	}
220}
221
222type sampleKey struct {
223	locations string
224	labels    string
225	numlabels string
226}
227
228func (pm *profileMerger) mapLocation(src *Location) *Location {
229	if src == nil {
230		return nil
231	}
232
233	if l, ok := pm.locationsByID[src.ID]; ok {
234		return l
235	}
236
237	mi := pm.mapMapping(src.Mapping)
238	l := &Location{
239		ID:       uint64(len(pm.p.Location) + 1),
240		Mapping:  mi.m,
241		Address:  uint64(int64(src.Address) + mi.offset),
242		Line:     make([]Line, len(src.Line)),
243		IsFolded: src.IsFolded,
244	}
245	for i, ln := range src.Line {
246		l.Line[i] = pm.mapLine(ln)
247	}
248	// Check memoization table. Must be done on the remapped location to
249	// account for the remapped mapping ID.
250	k := l.key()
251	if ll, ok := pm.locations[k]; ok {
252		pm.locationsByID[src.ID] = ll
253		return ll
254	}
255	pm.locationsByID[src.ID] = l
256	pm.locations[k] = l
257	pm.p.Location = append(pm.p.Location, l)
258	return l
259}
260
261// key generates locationKey to be used as a key for maps.
262func (l *Location) key() locationKey {
263	key := locationKey{
264		addr:     l.Address,
265		isFolded: l.IsFolded,
266	}
267	if l.Mapping != nil {
268		// Normalizes address to handle address space randomization.
269		key.addr -= l.Mapping.Start
270		key.mappingID = l.Mapping.ID
271	}
272	lines := make([]string, len(l.Line)*2)
273	for i, line := range l.Line {
274		if line.Function != nil {
275			lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
276		}
277		lines[i*2+1] = strconv.FormatInt(line.Line, 16)
278	}
279	key.lines = strings.Join(lines, "|")
280	return key
281}
282
283type locationKey struct {
284	addr, mappingID uint64
285	lines           string
286	isFolded        bool
287}
288
289func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
290	if src == nil {
291		return mapInfo{}
292	}
293
294	if mi, ok := pm.mappingsByID[src.ID]; ok {
295		return mi
296	}
297
298	// Check memoization tables.
299	mk := src.key()
300	if m, ok := pm.mappings[mk]; ok {
301		mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
302		pm.mappingsByID[src.ID] = mi
303		return mi
304	}
305	m := &Mapping{
306		ID:              uint64(len(pm.p.Mapping) + 1),
307		Start:           src.Start,
308		Limit:           src.Limit,
309		Offset:          src.Offset,
310		File:            src.File,
311		BuildID:         src.BuildID,
312		HasFunctions:    src.HasFunctions,
313		HasFilenames:    src.HasFilenames,
314		HasLineNumbers:  src.HasLineNumbers,
315		HasInlineFrames: src.HasInlineFrames,
316	}
317	pm.p.Mapping = append(pm.p.Mapping, m)
318
319	// Update memoization tables.
320	pm.mappings[mk] = m
321	mi := mapInfo{m, 0}
322	pm.mappingsByID[src.ID] = mi
323	return mi
324}
325
326// key generates encoded strings of Mapping to be used as a key for
327// maps.
328func (m *Mapping) key() mappingKey {
329	// Normalize addresses to handle address space randomization.
330	// Round up to next 4K boundary to avoid minor discrepancies.
331	const mapsizeRounding = 0x1000
332
333	size := m.Limit - m.Start
334	size = size + mapsizeRounding - 1
335	size = size - (size % mapsizeRounding)
336	key := mappingKey{
337		size:   size,
338		offset: m.Offset,
339	}
340
341	switch {
342	case m.BuildID != "":
343		key.buildIDOrFile = m.BuildID
344	case m.File != "":
345		key.buildIDOrFile = m.File
346	default:
347		// A mapping containing neither build ID nor file name is a fake mapping. A
348		// key with empty buildIDOrFile is used for fake mappings so that they are
349		// treated as the same mapping during merging.
350	}
351	return key
352}
353
354type mappingKey struct {
355	size, offset  uint64
356	buildIDOrFile string
357}
358
359func (pm *profileMerger) mapLine(src Line) Line {
360	ln := Line{
361		Function: pm.mapFunction(src.Function),
362		Line:     src.Line,
363	}
364	return ln
365}
366
367func (pm *profileMerger) mapFunction(src *Function) *Function {
368	if src == nil {
369		return nil
370	}
371	if f, ok := pm.functionsByID[src.ID]; ok {
372		return f
373	}
374	k := src.key()
375	if f, ok := pm.functions[k]; ok {
376		pm.functionsByID[src.ID] = f
377		return f
378	}
379	f := &Function{
380		ID:         uint64(len(pm.p.Function) + 1),
381		Name:       src.Name,
382		SystemName: src.SystemName,
383		Filename:   src.Filename,
384		StartLine:  src.StartLine,
385	}
386	pm.functions[k] = f
387	pm.functionsByID[src.ID] = f
388	pm.p.Function = append(pm.p.Function, f)
389	return f
390}
391
392// key generates a struct to be used as a key for maps.
393func (f *Function) key() functionKey {
394	return functionKey{
395		f.StartLine,
396		f.Name,
397		f.SystemName,
398		f.Filename,
399	}
400}
401
402type functionKey struct {
403	startLine                  int64
404	name, systemName, fileName string
405}
406
407// combineHeaders checks that all profiles can be merged and returns
408// their combined profile.
409func combineHeaders(srcs []*Profile) (*Profile, error) {
410	for _, s := range srcs[1:] {
411		if err := srcs[0].compatible(s); err != nil {
412			return nil, err
413		}
414	}
415
416	var timeNanos, durationNanos, period int64
417	var comments []string
418	seenComments := map[string]bool{}
419	var defaultSampleType string
420	for _, s := range srcs {
421		if timeNanos == 0 || s.TimeNanos < timeNanos {
422			timeNanos = s.TimeNanos
423		}
424		durationNanos += s.DurationNanos
425		if period == 0 || period < s.Period {
426			period = s.Period
427		}
428		for _, c := range s.Comments {
429			if seen := seenComments[c]; !seen {
430				comments = append(comments, c)
431				seenComments[c] = true
432			}
433		}
434		if defaultSampleType == "" {
435			defaultSampleType = s.DefaultSampleType
436		}
437	}
438
439	p := &Profile{
440		SampleType: make([]*ValueType, len(srcs[0].SampleType)),
441
442		DropFrames: srcs[0].DropFrames,
443		KeepFrames: srcs[0].KeepFrames,
444
445		TimeNanos:     timeNanos,
446		DurationNanos: durationNanos,
447		PeriodType:    srcs[0].PeriodType,
448		Period:        period,
449
450		Comments:          comments,
451		DefaultSampleType: defaultSampleType,
452	}
453	copy(p.SampleType, srcs[0].SampleType)
454	return p, nil
455}
456
457// compatible determines if two profiles can be compared/merged.
458// returns nil if the profiles are compatible; otherwise an error with
459// details on the incompatibility.
460func (p *Profile) compatible(pb *Profile) error {
461	if !equalValueType(p.PeriodType, pb.PeriodType) {
462		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
463	}
464
465	if len(p.SampleType) != len(pb.SampleType) {
466		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
467	}
468
469	for i := range p.SampleType {
470		if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
471			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
472		}
473	}
474	return nil
475}
476
477// equalValueType returns true if the two value types are semantically
478// equal. It ignores the internal fields used during encode/decode.
479func equalValueType(st1, st2 *ValueType) bool {
480	return st1.Type == st2.Type && st1.Unit == st2.Unit
481}
482