1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This file implements parsers to convert java legacy profiles into
16// the profile.proto format.
17
18package profile
19
20import (
21	"bytes"
22	"fmt"
23	"io"
24	"path/filepath"
25	"regexp"
26	"strconv"
27	"strings"
28)
29
30var (
31	attributeRx            = regexp.MustCompile(`([\w ]+)=([\w ]+)`)
32	javaSampleRx           = regexp.MustCompile(` *(\d+) +(\d+) +@ +([ x0-9a-f]*)`)
33	javaLocationRx         = regexp.MustCompile(`^\s*0x([[:xdigit:]]+)\s+(.*)\s*$`)
34	javaLocationFileLineRx = regexp.MustCompile(`^(.*)\s+\((.+):(-?[[:digit:]]+)\)$`)
35	javaLocationPathRx     = regexp.MustCompile(`^(.*)\s+\((.*)\)$`)
36)
37
38// javaCPUProfile returns a new Profile from profilez data.
39// b is the profile bytes after the header, period is the profiling
40// period, and parse is a function to parse 8-byte chunks from the
41// profile in its native endianness.
42func javaCPUProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
43	p := &Profile{
44		Period:     period * 1000,
45		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
46		SampleType: []*ValueType{{Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}},
47	}
48	var err error
49	var locs map[uint64]*Location
50	if b, locs, err = parseCPUSamples(b, parse, false, p); err != nil {
51		return nil, err
52	}
53
54	if err = parseJavaLocations(b, locs, p); err != nil {
55		return nil, err
56	}
57
58	// Strip out addresses for better merge.
59	if err = p.Aggregate(true, true, true, true, false); err != nil {
60		return nil, err
61	}
62
63	return p, nil
64}
65
66// parseJavaProfile returns a new profile from heapz or contentionz
67// data. b is the profile bytes after the header.
68func parseJavaProfile(b []byte) (*Profile, error) {
69	h := bytes.SplitAfterN(b, []byte("\n"), 2)
70	if len(h) < 2 {
71		return nil, errUnrecognized
72	}
73
74	p := &Profile{
75		PeriodType: &ValueType{},
76	}
77	header := string(bytes.TrimSpace(h[0]))
78
79	var err error
80	var pType string
81	switch header {
82	case "--- heapz 1 ---":
83		pType = "heap"
84	case "--- contentionz 1 ---":
85		pType = "contention"
86	default:
87		return nil, errUnrecognized
88	}
89
90	if b, err = parseJavaHeader(pType, h[1], p); err != nil {
91		return nil, err
92	}
93	var locs map[uint64]*Location
94	if b, locs, err = parseJavaSamples(pType, b, p); err != nil {
95		return nil, err
96	}
97	if err = parseJavaLocations(b, locs, p); err != nil {
98		return nil, err
99	}
100
101	// Strip out addresses for better merge.
102	if err = p.Aggregate(true, true, true, true, false); err != nil {
103		return nil, err
104	}
105
106	return p, nil
107}
108
109// parseJavaHeader parses the attribute section on a java profile and
110// populates a profile. Returns the remainder of the buffer after all
111// attributes.
112func parseJavaHeader(pType string, b []byte, p *Profile) ([]byte, error) {
113	nextNewLine := bytes.IndexByte(b, byte('\n'))
114	for nextNewLine != -1 {
115		line := string(bytes.TrimSpace(b[0:nextNewLine]))
116		if line != "" {
117			h := attributeRx.FindStringSubmatch(line)
118			if h == nil {
119				// Not a valid attribute, exit.
120				return b, nil
121			}
122
123			attribute, value := strings.TrimSpace(h[1]), strings.TrimSpace(h[2])
124			var err error
125			switch pType + "/" + attribute {
126			case "heap/format", "cpu/format", "contention/format":
127				if value != "java" {
128					return nil, errUnrecognized
129				}
130			case "heap/resolution":
131				p.SampleType = []*ValueType{
132					{Type: "inuse_objects", Unit: "count"},
133					{Type: "inuse_space", Unit: value},
134				}
135			case "contention/resolution":
136				p.SampleType = []*ValueType{
137					{Type: "contentions", Unit: "count"},
138					{Type: "delay", Unit: value},
139				}
140			case "contention/sampling period":
141				p.PeriodType = &ValueType{
142					Type: "contentions", Unit: "count",
143				}
144				if p.Period, err = strconv.ParseInt(value, 0, 64); err != nil {
145					return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
146				}
147			case "contention/ms since reset":
148				millis, err := strconv.ParseInt(value, 0, 64)
149				if err != nil {
150					return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
151				}
152				p.DurationNanos = millis * 1000 * 1000
153			default:
154				return nil, errUnrecognized
155			}
156		}
157		// Grab next line.
158		b = b[nextNewLine+1:]
159		nextNewLine = bytes.IndexByte(b, byte('\n'))
160	}
161	return b, nil
162}
163
164// parseJavaSamples parses the samples from a java profile and
165// populates the Samples in a profile. Returns the remainder of the
166// buffer after the samples.
167func parseJavaSamples(pType string, b []byte, p *Profile) ([]byte, map[uint64]*Location, error) {
168	nextNewLine := bytes.IndexByte(b, byte('\n'))
169	locs := make(map[uint64]*Location)
170	for nextNewLine != -1 {
171		line := string(bytes.TrimSpace(b[0:nextNewLine]))
172		if line != "" {
173			sample := javaSampleRx.FindStringSubmatch(line)
174			if sample == nil {
175				// Not a valid sample, exit.
176				return b, locs, nil
177			}
178
179			// Java profiles have data/fields inverted compared to other
180			// profile types.
181			var err error
182			value1, value2, value3 := sample[2], sample[1], sample[3]
183			addrs, err := parseHexAddresses(value3)
184			if err != nil {
185				return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
186			}
187
188			var sloc []*Location
189			for _, addr := range addrs {
190				loc := locs[addr]
191				if locs[addr] == nil {
192					loc = &Location{
193						Address: addr,
194					}
195					p.Location = append(p.Location, loc)
196					locs[addr] = loc
197				}
198				sloc = append(sloc, loc)
199			}
200			s := &Sample{
201				Value:    make([]int64, 2),
202				Location: sloc,
203			}
204
205			if s.Value[0], err = strconv.ParseInt(value1, 0, 64); err != nil {
206				return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
207			}
208			if s.Value[1], err = strconv.ParseInt(value2, 0, 64); err != nil {
209				return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
210			}
211
212			switch pType {
213			case "heap":
214				const javaHeapzSamplingRate = 524288 // 512K
215				if s.Value[0] == 0 {
216					return nil, nil, fmt.Errorf("parsing sample %s: second value must be non-zero", line)
217				}
218				s.NumLabel = map[string][]int64{"bytes": {s.Value[1] / s.Value[0]}}
219				s.Value[0], s.Value[1] = scaleHeapSample(s.Value[0], s.Value[1], javaHeapzSamplingRate)
220			case "contention":
221				if period := p.Period; period != 0 {
222					s.Value[0] = s.Value[0] * p.Period
223					s.Value[1] = s.Value[1] * p.Period
224				}
225			}
226			p.Sample = append(p.Sample, s)
227		}
228		// Grab next line.
229		b = b[nextNewLine+1:]
230		nextNewLine = bytes.IndexByte(b, byte('\n'))
231	}
232	return b, locs, nil
233}
234
235// parseJavaLocations parses the location information in a java
236// profile and populates the Locations in a profile. It uses the
237// location addresses from the profile as both the ID of each
238// location.
239func parseJavaLocations(b []byte, locs map[uint64]*Location, p *Profile) error {
240	r := bytes.NewBuffer(b)
241	fns := make(map[string]*Function)
242	for {
243		line, err := r.ReadString('\n')
244		if err != nil {
245			if err != io.EOF {
246				return err
247			}
248			if line == "" {
249				break
250			}
251		}
252
253		if line = strings.TrimSpace(line); line == "" {
254			continue
255		}
256
257		jloc := javaLocationRx.FindStringSubmatch(line)
258		if len(jloc) != 3 {
259			continue
260		}
261		addr, err := strconv.ParseUint(jloc[1], 16, 64)
262		if err != nil {
263			return fmt.Errorf("parsing sample %s: %v", line, err)
264		}
265		loc := locs[addr]
266		if loc == nil {
267			// Unused/unseen
268			continue
269		}
270		var lineFunc, lineFile string
271		var lineNo int64
272
273		if fileLine := javaLocationFileLineRx.FindStringSubmatch(jloc[2]); len(fileLine) == 4 {
274			// Found a line of the form: "function (file:line)"
275			lineFunc, lineFile = fileLine[1], fileLine[2]
276			if n, err := strconv.ParseInt(fileLine[3], 10, 64); err == nil && n > 0 {
277				lineNo = n
278			}
279		} else if filePath := javaLocationPathRx.FindStringSubmatch(jloc[2]); len(filePath) == 3 {
280			// If there's not a file:line, it's a shared library path.
281			// The path isn't interesting, so just give the .so.
282			lineFunc, lineFile = filePath[1], filepath.Base(filePath[2])
283		} else if strings.Contains(jloc[2], "generated stub/JIT") {
284			lineFunc = "STUB"
285		} else {
286			// Treat whole line as the function name. This is used by the
287			// java agent for internal states such as "GC" or "VM".
288			lineFunc = jloc[2]
289		}
290		fn := fns[lineFunc]
291
292		if fn == nil {
293			fn = &Function{
294				Name:       lineFunc,
295				SystemName: lineFunc,
296				Filename:   lineFile,
297			}
298			fns[lineFunc] = fn
299			p.Function = append(p.Function, fn)
300		}
301		loc.Line = []Line{
302			{
303				Function: fn,
304				Line:     lineNo,
305			},
306		}
307		loc.Address = 0
308	}
309
310	p.remapLocationIDs()
311	p.remapFunctionIDs()
312	p.remapMappingIDs()
313
314	return nil
315}
316