1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <set>
26 #include <string>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <vector>
30
31 #include <android-base/logging.h>
32 #include <android-base/file.h>
33 #include <android-base/parseint.h>
34 #include <android-base/strings.h>
35 #include <android-base/unique_fd.h>
36 #if defined(__ANDROID__)
37 #include <android-base/properties.h>
38 #endif
39
40 #include "CallChainJoiner.h"
41 #include "command.h"
42 #include "environment.h"
43 #include "ETMRecorder.h"
44 #include "event_selection_set.h"
45 #include "event_type.h"
46 #include "IOEventLoop.h"
47 #include "JITDebugReader.h"
48 #include "OfflineUnwinder.h"
49 #include "read_apk.h"
50 #include "read_elf.h"
51 #include "record.h"
52 #include "record_file.h"
53 #include "thread_tree.h"
54 #include "tracing.h"
55 #include "utils.h"
56 #include "workload.h"
57
58 using namespace simpleperf;
59
60 static std::string default_measured_event_type = "cpu-cycles";
61
62 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
63 {"u", PERF_SAMPLE_BRANCH_USER},
64 {"k", PERF_SAMPLE_BRANCH_KERNEL},
65 {"any", PERF_SAMPLE_BRANCH_ANY},
66 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
67 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
68 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
69 };
70
71 static std::unordered_map<std::string, int> clockid_map = {
72 {"realtime", CLOCK_REALTIME},
73 {"monotonic", CLOCK_MONOTONIC},
74 {"monotonic_raw", CLOCK_MONOTONIC_RAW},
75 {"boottime", CLOCK_BOOTTIME},
76 };
77
78 // The max size of records dumped by kernel is 65535, and dump stack size
79 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
80 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
81
82 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
83 // Here 1024 is a desired value for pages in mapped buffer. If mapped
84 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
85 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
86
87 // Cache size used by CallChainJoiner to cache call chains in memory.
88 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
89
90 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
91 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
92 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
93 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
94 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
95
96 static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
97
98 struct TimeStat {
99 uint64_t prepare_recording_time = 0;
100 uint64_t start_recording_time = 0;
101 uint64_t stop_recording_time = 0;
102 uint64_t finish_recording_time = 0;
103 uint64_t post_process_time = 0;
104 };
105
106 class RecordCommand : public Command {
107 public:
RecordCommand()108 RecordCommand()
109 : Command(
110 "record", "record sampling info in perf.data",
111 // clang-format off
112 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
113 " Gather sampling information of running [command]. And -a/-p/-t option\n"
114 " can be used to change target of sampling information.\n"
115 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
116 "Select monitored threads:\n"
117 "-a System-wide collection.\n"
118 #if defined(__ANDROID__)
119 "--app package_name Profile the process of an Android application.\n"
120 " On non-rooted devices, the app must be debuggable,\n"
121 " because we use run-as to switch to the app's context.\n"
122 #endif
123 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
124 " with -a.\n"
125 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
126 "\n"
127 "Select monitored event types:\n"
128 "-e event1[:modifier1],event2[:modifier2],...\n"
129 " Select a list of events to record. An event can be:\n"
130 " 1) an event name listed in `simpleperf list`;\n"
131 " 2) a raw PMU event in rN format. N is a hex number.\n"
132 " For example, r1b selects event number 0x1b.\n"
133 " Modifiers can be added to define how the event should be\n"
134 " monitored. Possible modifiers are:\n"
135 " u - monitor user space events only\n"
136 " k - monitor kernel space events only\n"
137 "--group event1[:modifier],event2[:modifier2],...\n"
138 " Similar to -e option. But events specified in the same --group\n"
139 " option are monitored as a group, and scheduled in and out at the\n"
140 " same time.\n"
141 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n"
142 " Similar to \"-c 1 -e sched:sched_switch\".\n"
143 "\n"
144 "Select monitoring options:\n"
145 "-f freq Set event sample frequency. It means recording at most [freq]\n"
146 " samples every second. For non-tracepoint events, the default\n"
147 " option is -f 4000. A -f/-c option affects all event types\n"
148 " following it until meeting another -f/-c option. For example,\n"
149 " for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
150 " has sample freq 1000, sched:sched_switch event has sample period 1.\n"
151 "-c count Set event sample period. It means recording one sample when\n"
152 " [count] events happen. For tracepoint events, the default option\n"
153 " is -c 1.\n"
154 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
155 " Enable call graph recording. Use frame pointer or dwarf debug\n"
156 " frame as the method to parse call graph in stack.\n"
157 " Default is dwarf,65528.\n"
158 "-g Same as '--call-graph dwarf'.\n"
159 "--clockid clock_id Generate timestamps of samples using selected clock.\n"
160 " Possible values are: realtime, monotonic,\n"
161 " monotonic_raw, boottime, perf. If supported, default\n"
162 " is monotonic, otherwise is perf.\n"
163 "--cpu cpu_item1,cpu_item2,...\n"
164 " Collect samples only on the selected cpus. cpu_item can be cpu\n"
165 " number like 1, or cpu range like 0-3.\n"
166 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
167 " [command]. Here time_in_sec may be any positive\n"
168 " floating point number.\n"
169 "-j branch_filter1,branch_filter2,...\n"
170 " Enable taken branch stack sampling. Each sample captures a series\n"
171 " of consecutive taken branches.\n"
172 " The following filters are defined:\n"
173 " any: any type of branch\n"
174 " any_call: any function call or system call\n"
175 " any_ret: any function return or system call return\n"
176 " ind_call: any indirect branch\n"
177 " u: only when the branch target is at the user level\n"
178 " k: only when the branch target is in the kernel\n"
179 " This option requires at least one branch type among any, any_call,\n"
180 " any_ret, ind_call.\n"
181 "-b Enable taken branch stack sampling. Same as '-j any'.\n"
182 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
183 " the kernel. It should be a power of 2. If not set, the max\n"
184 " possible value <= 1024 will be used.\n"
185 "--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n"
186 " Need to be power of 2 and page size aligned.\n"
187 " Used memory size is (buffer_size * (cpu_count + 1).\n"
188 " Default is 4M.\n"
189 "--no-inherit Don't record created child threads/processes.\n"
190 "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n"
191 " percent is in range [1-100], default is 25.\n"
192 "--include-filter binary1,binary2,...\n"
193 " Trace only selected binaries in cs-etm instruction tracing.\n"
194 " Each entry is a binary path.\n"
195 "\n"
196 "Dwarf unwinding options:\n"
197 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
198 " stack will be recorded in perf.data and unwound while\n"
199 " recording by default. Use --post-unwind=yes to switch\n"
200 " to unwind after recording.\n"
201 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
202 " will be unwound by default. Use this option to disable the\n"
203 " unwinding of the user's stack.\n"
204 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
205 " callchain joiner is used to break the 64k stack limit\n"
206 " and build more complete call graphs. However, the built\n"
207 " call graphs may not be correct in all cases.\n"
208 "--callchain-joiner-min-matching-nodes count\n"
209 " When callchain joiner is used, set the matched nodes needed to join\n"
210 " callchains. The count should be >= 1. By default it is 1.\n"
211 "--no-cut-samples Simpleperf uses a record buffer to cache records received from the kernel.\n"
212 " When the available space in the buffer reaches low level, it cuts part of\n"
213 " the stack data in samples. When the available space reaches critical level,\n"
214 " it drops all samples. This option makes simpleperf not cut samples when the\n"
215 " available space reaches low level.\n"
216 "\n"
217 "Recording file options:\n"
218 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
219 " kernel symbols will be dumped when needed.\n"
220 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
221 " dumped in perf.data, to support reporting in another\n"
222 " environment.\n"
223 "-o record_file_name Set record file name, default is perf.data.\n"
224 "--size-limit SIZE[K|M|G] Stop recording after SIZE bytes of records.\n"
225 " Default is unlimited.\n"
226 "--symfs <dir> Look for files with symbols relative to this directory.\n"
227 " This option is used to provide files with symbol table and\n"
228 " debug information, which are used for unwinding and dumping symbols.\n"
229 "\n"
230 "Other options:\n"
231 "--exit-with-parent Stop recording when the process starting\n"
232 " simpleperf dies.\n"
233 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
234 " <fd_no>, then close <fd_no>.\n"
235 "--stdio-controls-profiling Use stdin/stdout to pause/resume profiling.\n"
236 #if defined(__ANDROID__)
237 "--in-app We are already running in the app's context.\n"
238 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
239 #endif
240 #if 0
241 // Below options are only used internally and shouldn't be visible to the public.
242 "--out-fd <fd> Write perf.data to a file descriptor.\n"
243 "--stop-signal-fd <fd> Stop recording when fd is readable.\n"
244 #endif
245 // clang-format on
246 ),
247 system_wide_collection_(false),
248 branch_sampling_(0),
249 fp_callchain_sampling_(false),
250 dwarf_callchain_sampling_(false),
251 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
252 unwind_dwarf_callchain_(true),
253 post_unwind_(false),
254 child_inherit_(true),
255 duration_in_sec_(0),
256 can_dump_kernel_symbols_(true),
257 dump_symbols_(true),
258 event_selection_set_(false),
259 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
260 record_filename_("perf.data"),
261 sample_record_count_(0),
262 lost_record_count_(0),
263 in_app_context_(false),
264 trace_offcpu_(false),
265 exclude_kernel_callchain_(false),
266 allow_callchain_joiner_(true),
267 callchain_joiner_min_matching_nodes_(1u),
268 last_record_timestamp_(0u) {
269 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
270 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
271 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
272 // finish properly.
273 signal(SIGPIPE, SIG_IGN);
274 }
275
276 bool Run(const std::vector<std::string>& args);
277
278 private:
279 bool ParseOptions(const std::vector<std::string>& args,
280 std::vector<std::string>* non_option_args);
281 bool AdjustPerfEventLimit();
282 bool PrepareRecording(Workload* workload);
283 bool DoRecording(Workload* workload);
284 bool PostProcessRecording(const std::vector<std::string>& args);
285 bool TraceOffCpu();
286 bool SetEventSelectionFlags();
287 bool CreateAndInitRecordFile();
288 std::unique_ptr<RecordFileWriter> CreateRecordFile(
289 const std::string& filename);
290 bool DumpKernelSymbol();
291 bool DumpTracingData();
292 bool DumpKernelMaps();
293 bool DumpUserSpaceMaps();
294 bool DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids);
295 bool DumpAuxTraceInfo();
296 bool ProcessRecord(Record* record);
297 bool ShouldOmitRecord(Record* record);
298 bool DumpMapsForRecord(Record* record);
299 bool SaveRecordForPostUnwinding(Record* record);
300 bool SaveRecordAfterUnwinding(Record* record);
301 bool SaveRecordWithoutUnwinding(Record* record);
302 bool ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records);
303 bool ProcessControlCmd(IOEventLoop* loop);
304
305 void UpdateRecord(Record* record);
306 bool UnwindRecord(SampleRecord& r);
307 bool PostUnwindRecords();
308 bool JoinCallChains();
309 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
310 bool DumpBuildIdFeature();
311 bool DumpFileFeature();
312 bool DumpMetaInfoFeature(bool kernel_symbols_available);
313 void CollectHitFileInfo(const SampleRecord& r);
314
315 std::unique_ptr<SampleSpeed> sample_speed_;
316 bool system_wide_collection_;
317 uint64_t branch_sampling_;
318 bool fp_callchain_sampling_;
319 bool dwarf_callchain_sampling_;
320 uint32_t dump_stack_size_in_dwarf_sampling_;
321 bool unwind_dwarf_callchain_;
322 bool post_unwind_;
323 std::unique_ptr<OfflineUnwinder> offline_unwinder_;
324 bool child_inherit_;
325 double duration_in_sec_;
326 bool can_dump_kernel_symbols_;
327 bool dump_symbols_;
328 std::string clockid_;
329 std::vector<int> cpus_;
330 EventSelectionSet event_selection_set_;
331
332 std::pair<size_t, size_t> mmap_page_range_;
333 size_t aux_buffer_size_ = kDefaultAuxBufferSize;
334
335 ThreadTree thread_tree_;
336 std::string record_filename_;
337 android::base::unique_fd out_fd_;
338 std::unique_ptr<RecordFileWriter> record_file_writer_;
339 android::base::unique_fd stop_signal_fd_;
340
341 uint64_t sample_record_count_;
342 uint64_t lost_record_count_;
343 android::base::unique_fd start_profiling_fd_;
344 bool stdio_controls_profiling_ = false;
345
346 std::string app_package_name_;
347 bool in_app_context_;
348 bool trace_offcpu_;
349 bool exclude_kernel_callchain_;
350 uint64_t size_limit_in_bytes_ = 0;
351 uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
352 size_t cpu_time_max_percent_ = 25;
353
354 // For CallChainJoiner
355 bool allow_callchain_joiner_;
356 size_t callchain_joiner_min_matching_nodes_;
357 std::unique_ptr<CallChainJoiner> callchain_joiner_;
358 bool allow_cutting_samples_ = true;
359
360 std::unique_ptr<JITDebugReader> jit_debug_reader_;
361 uint64_t last_record_timestamp_; // used to insert Mmap2Records for JIT debug info
362 TimeStat time_stat_;
363 EventAttrWithId dumping_attr_id_;
364 // In system wide recording, record if we have dumped map info for a process.
365 std::unordered_set<pid_t> dumped_processes_;
366 };
367
Run(const std::vector<std::string> & args)368 bool RecordCommand::Run(const std::vector<std::string>& args) {
369 ScopedCurrentArch scoped_arch(GetMachineArch());
370 if (!CheckPerfEventLimit()) {
371 return false;
372 }
373 AllowMoreOpenedFiles();
374
375 std::vector<std::string> workload_args;
376 if (!ParseOptions(args, &workload_args)) {
377 return false;
378 }
379 if (!AdjustPerfEventLimit()) {
380 return false;
381 }
382 ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_));
383 if (!app_package_name_.empty() && !in_app_context_) {
384 // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
385 // it will be impossible when using --app. So don't switch to app's context when we are
386 // root.
387 if (!IsRoot()) {
388 return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
389 record_filename_, true);
390 }
391 }
392 std::unique_ptr<Workload> workload;
393 if (!workload_args.empty()) {
394 workload = Workload::CreateWorkload(workload_args);
395 if (workload == nullptr) {
396 return false;
397 }
398 }
399 time_stat_.prepare_recording_time = GetSystemClock();
400 if (!PrepareRecording(workload.get())) {
401 return false;
402 }
403 time_stat_.start_recording_time = GetSystemClock();
404 if (!DoRecording(workload.get())) {
405 return false;
406 }
407 return PostProcessRecording(args);
408 }
409
PrepareRecording(Workload * workload)410 bool RecordCommand::PrepareRecording(Workload* workload) {
411 // 1. Prepare in other modules.
412 PrepareVdsoFile();
413
414 // 2. Add default event type.
415 if (event_selection_set_.empty()) {
416 size_t group_id;
417 if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
418 return false;
419 }
420 if (sample_speed_) {
421 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
422 }
423 }
424
425 // 3. Process options before opening perf event files.
426 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
427 if (trace_offcpu_ && !TraceOffCpu()) {
428 return false;
429 }
430 if (!SetEventSelectionFlags()) {
431 return false;
432 }
433 if (unwind_dwarf_callchain_) {
434 offline_unwinder_ = OfflineUnwinder::Create(false);
435 }
436 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
437 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
438 callchain_joiner_min_matching_nodes_,
439 false));
440 }
441
442 // 4. Add monitored targets.
443 bool need_to_check_targets = false;
444 if (system_wide_collection_) {
445 event_selection_set_.AddMonitoredThreads({-1});
446 } else if (!event_selection_set_.HasMonitoredTarget()) {
447 if (workload != nullptr) {
448 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
449 event_selection_set_.SetEnableOnExec(true);
450 if (event_selection_set_.HasInplaceSampler()) {
451 // Start worker early, because the worker process has to setup inplace-sampler server
452 // before we try to connect it.
453 if (!workload->Start()) {
454 return false;
455 }
456 }
457 } else if (!app_package_name_.empty()) {
458 // If app process is not created, wait for it. This allows simpleperf starts before
459 // app process. In this way, we can have a better support of app start-up time profiling.
460 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
461 event_selection_set_.AddMonitoredProcesses(pids);
462 need_to_check_targets = true;
463 } else {
464 LOG(ERROR)
465 << "No threads to monitor. Try `simpleperf help record` for help";
466 return false;
467 }
468 } else {
469 need_to_check_targets = true;
470 }
471 // Profiling JITed/interpreted Java code is supported starting from Android P.
472 // Also support profiling art interpreter on host.
473 if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
474 // JIT symfiles are stored in temporary files, and are deleted after recording. But if
475 // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
476 // the debug-unwind cmd.
477 bool keep_symfiles = dwarf_callchain_sampling_ && !unwind_dwarf_callchain_;
478 bool sync_with_records = clockid_ == "monotonic";
479 jit_debug_reader_.reset(new JITDebugReader(keep_symfiles, sync_with_records));
480 // To profile java code, need to dump maps containing vdex files, which are not executable.
481 event_selection_set_.SetRecordNotExecutableMaps(true);
482 }
483
484 // 5. Open perf event files and create mapped buffers.
485 if (!event_selection_set_.OpenEventFiles(cpus_)) {
486 return false;
487 }
488 size_t record_buffer_size = system_wide_collection_ ? kSystemWideRecordBufferSize
489 : kRecordBufferSize;
490 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
491 aux_buffer_size_, record_buffer_size,
492 allow_cutting_samples_)) {
493 return false;
494 }
495 auto callback =
496 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
497 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
498 return false;
499 }
500
501 // 6. Create perf.data.
502 if (!CreateAndInitRecordFile()) {
503 return false;
504 }
505
506 // 7. Add read/signal/periodic Events.
507 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
508 return false;
509 }
510 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
511 auto exit_loop_callback = [loop]() {
512 return loop->ExitLoop();
513 };
514 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback)) {
515 return false;
516 }
517
518 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
519 if (!SignalIsIgnored(SIGHUP)) {
520 if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback)) {
521 return false;
522 }
523 }
524 if (stop_signal_fd_ != -1) {
525 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
526 return false;
527 }
528 }
529
530 if (duration_in_sec_ != 0) {
531 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
532 [loop]() { return loop->ExitLoop(); })) {
533 return false;
534 }
535 }
536 if (stdio_controls_profiling_) {
537 if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
538 return false;
539 }
540 }
541 if (jit_debug_reader_) {
542 auto callback = [this](const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records) {
543 return ProcessJITDebugInfo(debug_info, sync_kernel_records);
544 };
545 if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
546 return false;
547 }
548 if (!system_wide_collection_) {
549 std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
550 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
551 pid_t pid;
552 if (GetProcessForThread(tid, &pid)) {
553 pids.insert(pid);
554 }
555 }
556 for (pid_t pid : pids) {
557 if (!jit_debug_reader_->MonitorProcess(pid)) {
558 return false;
559 }
560 }
561 if (!jit_debug_reader_->ReadAllProcesses()) {
562 return false;
563 }
564 }
565 }
566 return true;
567 }
568
DoRecording(Workload * workload)569 bool RecordCommand::DoRecording(Workload* workload) {
570 // Write records in mapped buffers of perf_event_files to output file while workload is running.
571 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
572 return false;
573 }
574 if (start_profiling_fd_.get() != -1) {
575 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
576 PLOG(ERROR) << "failed to write to start_profiling_fd_";
577 }
578 start_profiling_fd_.reset();
579 }
580 if (stdio_controls_profiling_) {
581 printf("started\n");
582 fflush(stdout);
583 }
584 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
585 return false;
586 }
587 time_stat_.stop_recording_time = GetSystemClock();
588 if (!event_selection_set_.FinishReadMmapEventData()) {
589 return false;
590 }
591 time_stat_.finish_recording_time = GetSystemClock();
592 return true;
593 }
594
WriteRecordDataToOutFd(const std::string & in_filename,android::base::unique_fd out_fd)595 static bool WriteRecordDataToOutFd(const std::string& in_filename, android::base::unique_fd out_fd) {
596 android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
597 if (in_fd == -1) {
598 PLOG(ERROR) << "Failed to open " << in_filename;
599 return false;
600 }
601 char buf[8192];
602 while (true) {
603 ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
604 if (n < 0) {
605 PLOG(ERROR) << "Failed to read " << in_filename;
606 return false;
607 }
608 if (n == 0) {
609 break;
610 }
611 if (!android::base::WriteFully(out_fd, buf, n)) {
612 PLOG(ERROR) << "Failed to write to out_fd";
613 return false;
614 }
615 }
616 unlink(in_filename.c_str());
617 return true;
618 }
619
PostProcessRecording(const std::vector<std::string> & args)620 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
621 // 1. Post unwind dwarf callchain.
622 if (unwind_dwarf_callchain_ && post_unwind_) {
623 if (!PostUnwindRecords()) {
624 return false;
625 }
626 }
627
628 // 2. Optionally join Callchains.
629 if (callchain_joiner_) {
630 JoinCallChains();
631 }
632
633 // 3. Dump additional features, and close record file.
634 if (!DumpAdditionalFeatures(args)) {
635 return false;
636 }
637 if (!record_file_writer_->Close()) {
638 return false;
639 }
640 if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
641 return false;
642 }
643 time_stat_.post_process_time = GetSystemClock();
644
645 // 4. Show brief record result.
646 auto record_stat = event_selection_set_.GetRecordStat();
647 if (event_selection_set_.HasAuxTrace()) {
648 LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size;
649 if (record_stat.lost_aux_data_size != 0) {
650 LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size;
651 }
652 } else {
653 std::string cut_samples;
654 if (record_stat.cut_stack_samples > 0) {
655 cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples);
656 }
657 lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples;
658 LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples
659 << ". Samples lost: " << lost_record_count_ << ".";
660 LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, "
661 << record_stat.lost_non_samples << " non samples, cut stack of "
662 << record_stat.cut_stack_samples << " samples.";
663 if (sample_record_count_ + lost_record_count_ != 0) {
664 double lost_percent =
665 static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_);
666 constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
667 if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
668 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
669 << "consider increasing mmap_pages(-m), "
670 << "or decreasing sample frequency(-f), "
671 << "or increasing sample period(-c).";
672 }
673 }
674 if (callchain_joiner_) {
675 callchain_joiner_->DumpStat();
676 }
677 }
678 LOG(DEBUG) << "Prepare recording time "
679 << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6
680 << " ms, recording time "
681 << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e6
682 << " ms, stop recording time "
683 << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e6
684 << " ms, post process time "
685 << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e6 << " ms.";
686 return true;
687 }
688
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)689 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
690 std::vector<std::string>* non_option_args) {
691 std::vector<size_t> wait_setting_speed_event_groups_;
692 size_t i;
693 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
694 if (args[i] == "-a") {
695 system_wide_collection_ = true;
696 } else if (args[i] == "--app") {
697 if (!NextArgumentOrError(args, &i)) {
698 return false;
699 }
700 app_package_name_ = args[i];
701 } else if (args[i] == "--aux-buffer-size") {
702 if (!GetUintOption(args, &i, &aux_buffer_size_, 0, std::numeric_limits<size_t>::max(),
703 true)) {
704 return false;
705 }
706 if (!IsPowerOfTwo(aux_buffer_size_) || aux_buffer_size_ % sysconf(_SC_PAGE_SIZE)) {
707 LOG(ERROR) << "invalid aux buffer size: " << args[i];
708 return false;
709 }
710 } else if (args[i] == "-b") {
711 branch_sampling_ = branch_sampling_type_map["any"];
712 } else if (args[i] == "-c" || args[i] == "-f") {
713 uint64_t value;
714 if (!GetUintOption(args, &i, &value, 1)) {
715 return false;
716 }
717 if (args[i-1] == "-c") {
718 sample_speed_.reset(new SampleSpeed(0, value));
719 } else {
720 if (value >= INT_MAX) {
721 LOG(ERROR) << "sample freq can't be bigger than INT_MAX.";
722 return false;
723 }
724 sample_speed_.reset(new SampleSpeed(value, 0));
725 max_sample_freq_ = std::max(max_sample_freq_, value);
726 }
727 for (auto group_id : wait_setting_speed_event_groups_) {
728 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
729 }
730 wait_setting_speed_event_groups_.clear();
731
732 } else if (args[i] == "--call-graph") {
733 if (!NextArgumentOrError(args, &i)) {
734 return false;
735 }
736 std::vector<std::string> strs = android::base::Split(args[i], ",");
737 if (strs[0] == "fp") {
738 fp_callchain_sampling_ = true;
739 dwarf_callchain_sampling_ = false;
740 } else if (strs[0] == "dwarf") {
741 fp_callchain_sampling_ = false;
742 dwarf_callchain_sampling_ = true;
743 if (strs.size() > 1) {
744 uint64_t size;
745 if (!android::base::ParseUint(strs[1], &size)) {
746 LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
747 return false;
748 }
749 if ((size & 7) != 0) {
750 LOG(ERROR) << "dump stack size " << size
751 << " is not 8-byte aligned.";
752 return false;
753 }
754 if (size >= MAX_DUMP_STACK_SIZE) {
755 LOG(ERROR) << "dump stack size " << size
756 << " is bigger than max allowed size "
757 << MAX_DUMP_STACK_SIZE << ".";
758 return false;
759 }
760 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
761 }
762 } else {
763 LOG(ERROR) << "unexpected argument for --call-graph option: "
764 << args[i];
765 return false;
766 }
767 } else if (args[i] == "--clockid") {
768 if (!NextArgumentOrError(args, &i)) {
769 return false;
770 }
771 if (args[i] != "perf") {
772 if (!IsSettingClockIdSupported()) {
773 LOG(ERROR) << "Setting clockid is not supported by the kernel.";
774 return false;
775 }
776 if (clockid_map.find(args[i]) == clockid_map.end()) {
777 LOG(ERROR) << "Invalid clockid: " << args[i];
778 return false;
779 }
780 }
781 clockid_ = args[i];
782 } else if (args[i] == "--cpu") {
783 if (!NextArgumentOrError(args, &i)) {
784 return false;
785 }
786 cpus_ = GetCpusFromString(args[i]);
787 } else if (args[i] == "--cpu-percent") {
788 if (!GetUintOption(args, &i, &cpu_time_max_percent_, 1, 100)) {
789 return false;
790 }
791 } else if (args[i] == "--duration") {
792 if (!GetDoubleOption(args, &i, &duration_in_sec_, 1e-9)) {
793 return false;
794 }
795 } else if (args[i] == "-e") {
796 if (!NextArgumentOrError(args, &i)) {
797 return false;
798 }
799 std::vector<std::string> event_types = android::base::Split(args[i], ",");
800 for (auto& event_type : event_types) {
801 size_t group_id;
802 if (!event_selection_set_.AddEventType(event_type, &group_id)) {
803 return false;
804 }
805 if (sample_speed_) {
806 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
807 } else {
808 wait_setting_speed_event_groups_.push_back(group_id);
809 }
810 }
811 } else if (args[i] == "--exit-with-parent") {
812 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
813 } else if (args[i] == "-g") {
814 fp_callchain_sampling_ = false;
815 dwarf_callchain_sampling_ = true;
816 } else if (args[i] == "--group") {
817 if (!NextArgumentOrError(args, &i)) {
818 return false;
819 }
820 std::vector<std::string> event_types = android::base::Split(args[i], ",");
821 size_t group_id;
822 if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
823 return false;
824 }
825 if (sample_speed_) {
826 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
827 } else {
828 wait_setting_speed_event_groups_.push_back(group_id);
829 }
830 } else if (args[i] == "--in-app") {
831 in_app_context_ = true;
832 } else if (args[i] == "--include-filter") {
833 if (!NextArgumentOrError(args, &i)) {
834 return false;
835 }
836 event_selection_set_.SetIncludeFilters(android::base::Split(args[i], ","));
837 } else if (args[i] == "-j") {
838 if (!NextArgumentOrError(args, &i)) {
839 return false;
840 }
841 std::vector<std::string> branch_sampling_types =
842 android::base::Split(args[i], ",");
843 for (auto& type : branch_sampling_types) {
844 auto it = branch_sampling_type_map.find(type);
845 if (it == branch_sampling_type_map.end()) {
846 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
847 return false;
848 }
849 branch_sampling_ |= it->second;
850 }
851 } else if (args[i] == "-m") {
852 uint64_t pages;
853 if (!GetUintOption(args, &i, &pages)) {
854 return false;
855 }
856 if (!IsPowerOfTwo(pages)) {
857 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
858 return false;
859 }
860 mmap_page_range_.first = mmap_page_range_.second = pages;
861 } else if (args[i] == "--no-dump-kernel-symbols") {
862 can_dump_kernel_symbols_ = false;
863 } else if (args[i] == "--no-dump-symbols") {
864 dump_symbols_ = false;
865 } else if (args[i] == "--no-inherit") {
866 child_inherit_ = false;
867 } else if (args[i] == "--no-unwind") {
868 unwind_dwarf_callchain_ = false;
869 } else if (args[i] == "--no-callchain-joiner") {
870 allow_callchain_joiner_ = false;
871 } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
872 if (!GetUintOption(args, &i, &callchain_joiner_min_matching_nodes_, 1)) {
873 return false;
874 }
875 } else if (args[i] == "--no-cut-samples") {
876 allow_cutting_samples_ = false;
877 } else if (args[i] == "-o") {
878 if (!NextArgumentOrError(args, &i)) {
879 return false;
880 }
881 record_filename_ = args[i];
882 } else if (args[i] == "--out-fd") {
883 int fd;
884 if (!GetUintOption(args, &i, &fd)) {
885 return false;
886 }
887 out_fd_.reset(fd);
888 } else if (args[i] == "-p") {
889 if (!NextArgumentOrError(args, &i)) {
890 return false;
891 }
892 std::set<pid_t> pids;
893 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
894 return false;
895 }
896 event_selection_set_.AddMonitoredProcesses(pids);
897 } else if (android::base::StartsWith(args[i], "--post-unwind")) {
898 if (args[i] == "--post-unwind" || args[i] == "--post-unwind=yes") {
899 post_unwind_ = true;
900 } else if (args[i] == "--post-unwind=no") {
901 post_unwind_ = false;
902 } else {
903 LOG(ERROR) << "unexpected option " << args[i];
904 return false;
905 }
906 } else if (args[i] == "--size-limit") {
907 if (!GetUintOption(args, &i, &size_limit_in_bytes_, 1, std::numeric_limits<uint64_t>::max(),
908 true)) {
909 return false;
910 }
911 } else if (args[i] == "--start_profiling_fd") {
912 int fd;
913 if (!GetUintOption(args, &i, &fd)) {
914 return false;
915 }
916 start_profiling_fd_.reset(fd);
917 } else if (args[i] == "--stdio-controls-profiling") {
918 stdio_controls_profiling_ = true;
919 } else if (args[i] == "--stop-signal-fd") {
920 int fd;
921 if (!GetUintOption(args, &i, &fd)) {
922 return false;
923 }
924 stop_signal_fd_.reset(fd);
925 } else if (args[i] == "--symfs") {
926 if (!NextArgumentOrError(args, &i)) {
927 return false;
928 }
929 if (!Dso::SetSymFsDir(args[i])) {
930 return false;
931 }
932 } else if (args[i] == "-t") {
933 if (!NextArgumentOrError(args, &i)) {
934 return false;
935 }
936 std::set<pid_t> tids;
937 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
938 return false;
939 }
940 event_selection_set_.AddMonitoredThreads(tids);
941 } else if (args[i] == "--trace-offcpu") {
942 trace_offcpu_ = true;
943 } else if (args[i] == "--tracepoint-events") {
944 if (!NextArgumentOrError(args, &i)) {
945 return false;
946 }
947 if (!SetTracepointEventsFilePath(args[i])) {
948 return false;
949 }
950 } else if (args[i] == "--") {
951 i++;
952 break;
953 } else {
954 ReportUnknownOption(args, i);
955 return false;
956 }
957 }
958
959 if (!dwarf_callchain_sampling_) {
960 if (!unwind_dwarf_callchain_) {
961 LOG(ERROR)
962 << "--no-unwind is only used with `--call-graph dwarf` option.";
963 return false;
964 }
965 unwind_dwarf_callchain_ = false;
966 }
967 if (post_unwind_) {
968 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
969 post_unwind_ = false;
970 }
971 }
972
973 if (fp_callchain_sampling_) {
974 if (GetBuildArch() == ARCH_ARM) {
975 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
976 << "consider using `-g` option or profiling on aarch64 architecture.";
977 }
978 }
979
980 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
981 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
982 "used at the same time.";
983 return false;
984 }
985
986 if (system_wide_collection_ && !IsRoot()) {
987 LOG(ERROR) << "System wide profiling needs root privilege.";
988 return false;
989 }
990
991 if (dump_symbols_ && can_dump_kernel_symbols_) {
992 // No need to dump kernel symbols as we will dump all required symbols.
993 can_dump_kernel_symbols_ = false;
994 }
995 if (clockid_.empty()) {
996 clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
997 }
998
999 non_option_args->clear();
1000 for (; i < args.size(); ++i) {
1001 non_option_args->push_back(args[i]);
1002 }
1003 return true;
1004 }
1005
AdjustPerfEventLimit()1006 bool RecordCommand::AdjustPerfEventLimit() {
1007 bool set_prop = false;
1008 // 1. Adjust max_sample_rate.
1009 uint64_t cur_max_freq;
1010 if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1011 !SetMaxSampleFrequency(max_sample_freq_)) {
1012 set_prop = true;
1013 }
1014 // 2. Adjust perf_cpu_time_max_percent.
1015 size_t cur_percent;
1016 if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1017 !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1018 set_prop = true;
1019 }
1020 // 3. Adjust perf_event_mlock_kb.
1021 long cpus = sysconf(_SC_NPROCESSORS_CONF);
1022 uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1023 if (event_selection_set_.HasAuxTrace()) {
1024 mlock_kb += cpus * aux_buffer_size_ / 1024;
1025 }
1026 uint64_t cur_mlock_kb;
1027 if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1028 !SetPerfEventMlockKb(mlock_kb)) {
1029 set_prop = true;
1030 }
1031
1032 if (GetAndroidVersion() >= kAndroidVersionP + 1 && set_prop && !in_app_context_) {
1033 return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1034 std::max(mlock_kb, cur_mlock_kb));
1035 }
1036 return true;
1037 }
1038
TraceOffCpu()1039 bool RecordCommand::TraceOffCpu() {
1040 if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1041 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1042 return false;
1043 }
1044 for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1045 if (event_type->name == "sched:sched_switch") {
1046 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1047 return false;
1048 }
1049 }
1050 if (!IsDumpingRegsForTracepointEventsSupported()) {
1051 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1052 return false;
1053 }
1054 return event_selection_set_.AddEventType("sched:sched_switch");
1055 }
1056
SetEventSelectionFlags()1057 bool RecordCommand::SetEventSelectionFlags() {
1058 event_selection_set_.SampleIdAll();
1059 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1060 return false;
1061 }
1062 if (fp_callchain_sampling_) {
1063 event_selection_set_.EnableFpCallChainSampling();
1064 } else if (dwarf_callchain_sampling_) {
1065 if (!event_selection_set_.EnableDwarfCallChainSampling(
1066 dump_stack_size_in_dwarf_sampling_)) {
1067 return false;
1068 }
1069 }
1070 event_selection_set_.SetInherit(child_inherit_);
1071 if (clockid_ != "perf") {
1072 event_selection_set_.SetClockId(clockid_map[clockid_]);
1073 }
1074 return true;
1075 }
1076
CreateAndInitRecordFile()1077 bool RecordCommand::CreateAndInitRecordFile() {
1078 record_file_writer_ = CreateRecordFile(record_filename_);
1079 if (record_file_writer_ == nullptr) {
1080 return false;
1081 }
1082 // Use first perf_event_attr and first event id to dump mmap and comm records.
1083 dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0];
1084 return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps() &&
1085 DumpAuxTraceInfo();
1086 }
1087
CreateRecordFile(const std::string & filename)1088 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
1089 const std::string& filename) {
1090 std::unique_ptr<RecordFileWriter> writer =
1091 RecordFileWriter::CreateInstance(filename);
1092 if (writer == nullptr) {
1093 return nullptr;
1094 }
1095
1096 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
1097 return nullptr;
1098 }
1099 return writer;
1100 }
1101
DumpKernelSymbol()1102 bool RecordCommand::DumpKernelSymbol() {
1103 if (can_dump_kernel_symbols_) {
1104 std::string kallsyms;
1105 if (event_selection_set_.NeedKernelSymbol() &&
1106 CheckKernelSymbolAddresses()) {
1107 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
1108 PLOG(ERROR) << "failed to read /proc/kallsyms";
1109 return false;
1110 }
1111 KernelSymbolRecord r(kallsyms);
1112 if (!ProcessRecord(&r)) {
1113 return false;
1114 }
1115 }
1116 }
1117 return true;
1118 }
1119
DumpTracingData()1120 bool RecordCommand::DumpTracingData() {
1121 std::vector<const EventType*> tracepoint_event_types =
1122 event_selection_set_.GetTracepointEvents();
1123 if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1124 return true; // No need to dump tracing data, or can't do it.
1125 }
1126 std::vector<char> tracing_data;
1127 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1128 return false;
1129 }
1130 TracingDataRecord record(tracing_data);
1131 if (!ProcessRecord(&record)) {
1132 return false;
1133 }
1134 return true;
1135 }
1136
DumpKernelMaps()1137 bool RecordCommand::DumpKernelMaps() {
1138 KernelMmap kernel_mmap;
1139 std::vector<KernelMmap> module_mmaps;
1140 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
1141
1142 MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
1143 kernel_mmap.len, 0, kernel_mmap.filepath, dumping_attr_id_.ids[0]);
1144 if (!ProcessRecord(&mmap_record)) {
1145 return false;
1146 }
1147 for (auto& module_mmap : module_mmaps) {
1148 MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, module_mmap.start_addr,
1149 module_mmap.len, 0, module_mmap.filepath, dumping_attr_id_.ids[0]);
1150 if (!ProcessRecord(&mmap_record)) {
1151 return false;
1152 }
1153 }
1154 return true;
1155 }
1156
DumpUserSpaceMaps()1157 bool RecordCommand::DumpUserSpaceMaps() {
1158 // For system_wide profiling, maps of a process is dumped when needed (first time a sample hits
1159 // that process).
1160 if (system_wide_collection_) {
1161 return true;
1162 }
1163 // Map from process id to a set of thread ids in that process.
1164 std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1165 for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1166 std::vector<pid_t> tids = GetThreadsInProcess(pid);
1167 process_map[pid].insert(tids.begin(), tids.end());
1168 }
1169 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1170 pid_t pid;
1171 if (GetProcessForThread(tid, &pid)) {
1172 process_map[pid].insert(tid);
1173 }
1174 }
1175
1176 // Dump each process.
1177 for (auto& pair : process_map) {
1178 if (!DumpProcessMaps(pair.first, pair.second)) {
1179 return false;
1180 }
1181 }
1182 return true;
1183 }
1184
DumpProcessMaps(pid_t pid,const std::unordered_set<pid_t> & tids)1185 bool RecordCommand::DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids) {
1186 // Dump mmap records.
1187 std::vector<ThreadMmap> thread_mmaps;
1188 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
1189 // The process may exit before we get its info.
1190 return true;
1191 }
1192 const perf_event_attr& attr = *dumping_attr_id_.attr;
1193 uint64_t event_id = dumping_attr_id_.ids[0];
1194 for (const auto& map : thread_mmaps) {
1195 if (!(map.prot & PROT_EXEC) && !event_selection_set_.RecordNotExecutableMaps()) {
1196 continue;
1197 }
1198 Mmap2Record record(attr, false, pid, pid, map.start_addr, map.len,
1199 map.pgoff, map.prot, map.name, event_id, last_record_timestamp_);
1200 if (!ProcessRecord(&record)) {
1201 return false;
1202 }
1203 }
1204 // Dump process name.
1205 std::string name = GetCompleteProcessName(pid);
1206 if (!name.empty()) {
1207 CommRecord record(attr, pid, pid, name, event_id, last_record_timestamp_);
1208 if (!ProcessRecord(&record)) {
1209 return false;
1210 }
1211 }
1212 // Dump thread info.
1213 for (const auto& tid : tids) {
1214 if (tid != pid && GetThreadName(tid, &name)) {
1215 CommRecord comm_record(attr, pid, tid, name, event_id, last_record_timestamp_);
1216 if (!ProcessRecord(&comm_record)) {
1217 return false;
1218 }
1219 }
1220 }
1221 return true;
1222 }
1223
ProcessRecord(Record * record)1224 bool RecordCommand::ProcessRecord(Record* record) {
1225 UpdateRecord(record);
1226 if (ShouldOmitRecord(record)) {
1227 return true;
1228 }
1229 if (size_limit_in_bytes_ > 0u) {
1230 if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1231 return event_selection_set_.GetIOEventLoop()->ExitLoop();
1232 }
1233 }
1234 if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1235 return false;
1236 }
1237 last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1238 // In system wide recording, maps are dumped when they are needed by records.
1239 if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1240 return false;
1241 }
1242 if (unwind_dwarf_callchain_) {
1243 if (post_unwind_) {
1244 return SaveRecordForPostUnwinding(record);
1245 }
1246 return SaveRecordAfterUnwinding(record);
1247 }
1248 return SaveRecordWithoutUnwinding(record);
1249 }
1250
DumpAuxTraceInfo()1251 bool RecordCommand::DumpAuxTraceInfo() {
1252 if (event_selection_set_.HasAuxTrace()) {
1253 AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1254 return ProcessRecord(&auxtrace_info);
1255 }
1256 return true;
1257 }
1258
1259 template <typename MmapRecordType>
MapOnlyExistInMemory(MmapRecordType * record)1260 bool MapOnlyExistInMemory(MmapRecordType* record) {
1261 return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1262 }
1263
ShouldOmitRecord(Record * record)1264 bool RecordCommand::ShouldOmitRecord(Record* record) {
1265 if (jit_debug_reader_) {
1266 // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1267 // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1268 // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1269 // entries for unwinding, as in http://b/77236599. So it is better to remove
1270 // dalvik-jit-code-cache and other maps that only exist in memory.
1271 switch (record->type()) {
1272 case PERF_RECORD_MMAP:
1273 return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1274 case PERF_RECORD_MMAP2:
1275 return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1276 }
1277 }
1278 return false;
1279 }
1280
DumpMapsForRecord(Record * record)1281 bool RecordCommand::DumpMapsForRecord(Record* record) {
1282 if (record->type() == PERF_RECORD_SAMPLE) {
1283 pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1284 if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1285 // Dump map info and all thread names for that process.
1286 std::vector<pid_t> tids = GetThreadsInProcess(pid);
1287 if (!tids.empty() &&
1288 !DumpProcessMaps(pid, std::unordered_set<pid_t>(tids.begin(), tids.end()))) {
1289 return false;
1290 }
1291 dumped_processes_.insert(pid);
1292 }
1293 }
1294 return true;
1295 }
1296
SaveRecordForPostUnwinding(Record * record)1297 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1298 if (!record_file_writer_->WriteRecord(*record)) {
1299 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1300 << "--no-post-unwind option.";
1301 return false;
1302 }
1303 return true;
1304 }
1305
SaveRecordAfterUnwinding(Record * record)1306 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1307 if (record->type() == PERF_RECORD_SAMPLE) {
1308 auto& r = *static_cast<SampleRecord*>(record);
1309 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1310 // to adjust callchains generated by dwarf unwinder.
1311 r.AdjustCallChainGeneratedByKernel();
1312 if (!UnwindRecord(r)) {
1313 return false;
1314 }
1315 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1316 // chain.
1317 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1318 // If current record contains no user callchain, skip it.
1319 return true;
1320 }
1321 sample_record_count_++;
1322 } else if (record->type() == PERF_RECORD_LOST) {
1323 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1324 } else {
1325 thread_tree_.Update(*record);
1326 }
1327 return record_file_writer_->WriteRecord(*record);
1328 }
1329
SaveRecordWithoutUnwinding(Record * record)1330 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1331 if (record->type() == PERF_RECORD_SAMPLE) {
1332 auto& r = *static_cast<SampleRecord*>(record);
1333 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1334 r.AdjustCallChainGeneratedByKernel();
1335 }
1336 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1337 // If current record contains no user callchain, skip it.
1338 return true;
1339 }
1340 sample_record_count_++;
1341 } else if (record->type() == PERF_RECORD_LOST) {
1342 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1343 }
1344 return record_file_writer_->WriteRecord(*record);
1345 }
1346
ProcessJITDebugInfo(const std::vector<JITDebugInfo> & debug_info,bool sync_kernel_records)1347 bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info,
1348 bool sync_kernel_records) {
1349 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
1350 for (auto& info : debug_info) {
1351 if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1352 uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp
1353 : last_record_timestamp_;
1354 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid,
1355 info.jit_code_addr, info.jit_code_len, 0, map_flags::PROT_JIT_SYMFILE_MAP,
1356 info.file_path, attr_id.ids[0], timestamp);
1357 if (!ProcessRecord(&record)) {
1358 return false;
1359 }
1360 } else {
1361 if (info.extracted_dex_file_map) {
1362 ThreadMmap& map = *info.extracted_dex_file_map;
1363 uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp
1364 : last_record_timestamp_;
1365 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len,
1366 map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp);
1367 if (!ProcessRecord(&record)) {
1368 return false;
1369 }
1370 }
1371 thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1372 }
1373 }
1374 // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1375 // generated after them. So process existing samples each time generating new JIT maps. We prefer
1376 // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1377 // maps, and we want to report them properly.
1378 if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1379 return false;
1380 }
1381 return true;
1382 }
1383
ProcessControlCmd(IOEventLoop * loop)1384 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1385 char* line = nullptr;
1386 size_t line_length = 0;
1387 if (getline(&line, &line_length, stdin) == -1) {
1388 free(line);
1389 // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1390 // So we may see EOF of stdin.
1391 return loop->ExitLoop();
1392 }
1393 std::string cmd = android::base::Trim(line);
1394 free(line);
1395 LOG(DEBUG) << "process control cmd: " << cmd;
1396 bool result = false;
1397 if (cmd == "pause") {
1398 result = event_selection_set_.SetEnableEvents(false);
1399 } else if (cmd == "resume") {
1400 result = event_selection_set_.SetEnableEvents(true);
1401 } else {
1402 LOG(ERROR) << "unknown control cmd: " << cmd;
1403 }
1404 printf("%s\n", result ? "ok" : "error");
1405 fflush(stdout);
1406 return result;
1407 }
1408
1409 template <class RecordType>
UpdateMmapRecordForEmbeddedPath(RecordType & r,bool has_prot,uint32_t prot)1410 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1411 if (r.InKernel()) {
1412 return;
1413 }
1414 std::string filename = r.filename;
1415 bool name_changed = false;
1416 // Some vdex files in map files are marked with deleted flag, but they exist in the file system.
1417 // It may be because a new file is used to replace the old one, but still worth to try.
1418 if (android::base::EndsWith(filename, " (deleted)")) {
1419 filename.resize(filename.size() - 10);
1420 name_changed = true;
1421 }
1422 if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1423 // For the case of a shared library "foobar.so" embedded
1424 // inside an APK, we rewrite the original MMAP from
1425 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1426 // so as to make the library name explicit. This update is
1427 // done here (as part of the record operation) as opposed to
1428 // on the host during the report, since we want to report
1429 // the correct library name even if the the APK in question
1430 // is not present on the host. The new offset W is
1431 // calculated to be with respect to the start of foobar.so,
1432 // not to the start of path.apk.
1433 EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1434 if (ee != nullptr) {
1435 // Compute new offset relative to start of elf in APK.
1436 auto data = *r.data;
1437 data.pgoff -= ee->entry_offset();
1438 r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1439 return;
1440 }
1441 }
1442 std::string zip_path;
1443 std::string entry_name;
1444 if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1445 filename = GetUrlInApk(zip_path, entry_name);
1446 name_changed = true;
1447 }
1448 if (name_changed) {
1449 auto data = *r.data;
1450 r.SetDataAndFilename(data, filename);
1451 }
1452 }
1453
UpdateRecord(Record * record)1454 void RecordCommand::UpdateRecord(Record* record) {
1455 if (record->type() == PERF_RECORD_MMAP) {
1456 UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1457 } else if (record->type() == PERF_RECORD_MMAP2) {
1458 auto r = static_cast<Mmap2Record*>(record);
1459 UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1460 } else if (record->type() == PERF_RECORD_COMM) {
1461 auto r = static_cast<CommRecord*>(record);
1462 if (r->data->pid == r->data->tid) {
1463 std::string s = GetCompleteProcessName(r->data->pid);
1464 if (!s.empty()) {
1465 r->SetCommandName(s);
1466 }
1467 }
1468 }
1469 }
1470
UnwindRecord(SampleRecord & r)1471 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1472 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
1473 (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1474 (r.regs_user_data.reg_mask != 0) &&
1475 (r.sample_type & PERF_SAMPLE_STACK_USER) &&
1476 (r.GetValidStackSize() > 0)) {
1477 ThreadEntry* thread =
1478 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1479 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1480 std::vector<uint64_t> ips;
1481 std::vector<uint64_t> sps;
1482 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1483 r.GetValidStackSize(), &ips, &sps)) {
1484 return false;
1485 }
1486 // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1487 // from the process and retry unwinding.
1488 if (jit_debug_reader_ && !post_unwind_ &&
1489 offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1490 jit_debug_reader_->ReadProcess(r.tid_data.pid);
1491 jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1492 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1493 r.GetValidStackSize(), &ips, &sps)) {
1494 return false;
1495 }
1496 }
1497 r.ReplaceRegAndStackWithCallChain(ips);
1498 if (callchain_joiner_) {
1499 return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1500 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
1501 }
1502 }
1503 return true;
1504 }
1505
PostUnwindRecords()1506 bool RecordCommand::PostUnwindRecords() {
1507 // 1. Move records from record_filename_ to a temporary file.
1508 if (!record_file_writer_->Close()) {
1509 return false;
1510 }
1511 record_file_writer_.reset();
1512 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1513 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1514 return false;
1515 }
1516 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1517 if (!reader) {
1518 return false;
1519 }
1520
1521 // 2. Read records from the temporary file, and write unwound records back to record_filename_.
1522 record_file_writer_ = CreateRecordFile(record_filename_);
1523 if (!record_file_writer_) {
1524 return false;
1525 }
1526 sample_record_count_ = 0;
1527 lost_record_count_ = 0;
1528 auto callback = [this](std::unique_ptr<Record> record) {
1529 return SaveRecordAfterUnwinding(record.get());
1530 };
1531 return reader->ReadDataSection(callback);
1532 }
1533
JoinCallChains()1534 bool RecordCommand::JoinCallChains() {
1535 // 1. Prepare joined callchains.
1536 if (!callchain_joiner_->JoinCallChains()) {
1537 return false;
1538 }
1539 // 2. Move records from record_filename_ to a temporary file.
1540 if (!record_file_writer_->Close()) {
1541 return false;
1542 }
1543 record_file_writer_.reset();
1544 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1545 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1546 return false;
1547 }
1548
1549 // 3. Read records from the temporary file, and write record with joined call chains back
1550 // to record_filename_.
1551 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1552 record_file_writer_ = CreateRecordFile(record_filename_);
1553 if (!reader || !record_file_writer_) {
1554 return false;
1555 }
1556
1557 auto record_callback = [&](std::unique_ptr<Record> r) {
1558 if (r->type() != PERF_RECORD_SAMPLE) {
1559 return record_file_writer_->WriteRecord(*r);
1560 }
1561 SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1562 if (!sr.HasUserCallChain()) {
1563 return record_file_writer_->WriteRecord(sr);
1564 }
1565 pid_t pid;
1566 pid_t tid;
1567 CallChainJoiner::ChainType type;
1568 std::vector<uint64_t> ips;
1569 std::vector<uint64_t> sps;
1570 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1571 return false;
1572 }
1573 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1574 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1575 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1576 sr.UpdateUserCallChain(ips);
1577 return record_file_writer_->WriteRecord(sr);
1578 };
1579 return reader->ReadDataSection(record_callback);
1580 }
1581
DumpAdditionalFeatures(const std::vector<std::string> & args)1582 bool RecordCommand::DumpAdditionalFeatures(
1583 const std::vector<std::string>& args) {
1584 // Read data section of perf.data to collect hit file information.
1585 thread_tree_.ClearThreadAndMap();
1586 bool kernel_symbols_available = false;
1587 if (CheckKernelSymbolAddresses()) {
1588 Dso::ReadKernelSymbolsFromProc();
1589 kernel_symbols_available = true;
1590 }
1591 std::vector<uint64_t> auxtrace_offset;
1592 auto callback = [&](const Record* r) {
1593 thread_tree_.Update(*r);
1594 if (r->type() == PERF_RECORD_SAMPLE) {
1595 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
1596 } else if (r->type() == PERF_RECORD_AUXTRACE) {
1597 auto auxtrace = static_cast<const AuxTraceRecord*>(r);
1598 auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size());
1599 }
1600 };
1601 if (!record_file_writer_->ReadDataSection(callback)) {
1602 return false;
1603 }
1604
1605 size_t feature_count = 6;
1606 if (branch_sampling_) {
1607 feature_count++;
1608 }
1609 if (!auxtrace_offset.empty()) {
1610 feature_count++;
1611 }
1612 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
1613 return false;
1614 }
1615 if (!DumpBuildIdFeature()) {
1616 return false;
1617 }
1618 if (!DumpFileFeature()) {
1619 return false;
1620 }
1621 utsname uname_buf;
1622 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1623 PLOG(ERROR) << "uname() failed";
1624 return false;
1625 }
1626 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
1627 uname_buf.release)) {
1628 return false;
1629 }
1630 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
1631 uname_buf.machine)) {
1632 return false;
1633 }
1634
1635 std::string exec_path = android::base::GetExecutablePath();
1636 if (exec_path.empty()) exec_path = "simpleperf";
1637 std::vector<std::string> cmdline;
1638 cmdline.push_back(exec_path);
1639 cmdline.push_back("record");
1640 cmdline.insert(cmdline.end(), args.begin(), args.end());
1641 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1642 return false;
1643 }
1644 if (branch_sampling_ != 0 &&
1645 !record_file_writer_->WriteBranchStackFeature()) {
1646 return false;
1647 }
1648 if (!DumpMetaInfoFeature(kernel_symbols_available)) {
1649 return false;
1650 }
1651 if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
1652 return false;
1653 }
1654
1655 if (!record_file_writer_->EndWriteFeatures()) {
1656 return false;
1657 }
1658 return true;
1659 }
1660
DumpBuildIdFeature()1661 bool RecordCommand::DumpBuildIdFeature() {
1662 std::vector<BuildIdRecord> build_id_records;
1663 BuildId build_id;
1664 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1665 for (Dso* dso : dso_v) {
1666 // For aux tracing, we don't know which binaries are traced.
1667 // So dump build ids for all binaries.
1668 if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
1669 continue;
1670 }
1671 if (dso->type() == DSO_KERNEL) {
1672 if (!GetKernelBuildId(&build_id)) {
1673 continue;
1674 }
1675 build_id_records.push_back(
1676 BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1677 } else if (dso->type() == DSO_KERNEL_MODULE) {
1678 std::string path = dso->Path();
1679 std::string module_name = basename(&path[0]);
1680 if (android::base::EndsWith(module_name, ".ko")) {
1681 module_name = module_name.substr(0, module_name.size() - 3);
1682 }
1683 if (!GetModuleBuildId(module_name, &build_id)) {
1684 LOG(DEBUG) << "can't read build_id for module " << module_name;
1685 continue;
1686 }
1687 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
1688 } else if (dso->type() == DSO_ELF_FILE) {
1689 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
1690 continue;
1691 }
1692 if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) {
1693 LOG(DEBUG) << "Can't read build_id from file " << dso->Path();
1694 continue;
1695 }
1696 build_id_records.push_back(
1697 BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1698 }
1699 }
1700 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1701 return false;
1702 }
1703 return true;
1704 }
1705
DumpFileFeature()1706 bool RecordCommand::DumpFileFeature() {
1707 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1708 return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos());
1709 }
1710
DumpMetaInfoFeature(bool kernel_symbols_available)1711 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
1712 std::unordered_map<std::string, std::string> info_map;
1713 info_map["simpleperf_version"] = GetSimpleperfVersion();
1714 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
1715 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
1716 // By storing event types information in perf.data, the readers of perf.data have the same
1717 // understanding of event types, even if they are on another machine.
1718 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
1719 #if defined(__ANDROID__)
1720 info_map["product_props"] = android::base::StringPrintf("%s:%s:%s",
1721 android::base::GetProperty("ro.product.manufacturer", "").c_str(),
1722 android::base::GetProperty("ro.product.model", "").c_str(),
1723 android::base::GetProperty("ro.product.name", "").c_str());
1724 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
1725 if (!app_package_name_.empty()) {
1726 info_map["app_package_name"] = app_package_name_;
1727 }
1728 #endif
1729 info_map["clockid"] = clockid_;
1730 info_map["timestamp"] = std::to_string(time(nullptr));
1731 info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
1732 return record_file_writer_->WriteMetaInfoFeature(info_map);
1733 }
1734
CollectHitFileInfo(const SampleRecord & r)1735 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1736 const ThreadEntry* thread =
1737 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1738 const MapEntry* map =
1739 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1740 Dso* dso = map->dso;
1741 const Symbol* symbol;
1742 if (dump_symbols_) {
1743 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1744 if (!symbol->HasDumpId()) {
1745 dso->CreateSymbolDumpId(symbol);
1746 }
1747 }
1748 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1749 dso->CreateDumpId();
1750 }
1751 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1752 bool in_kernel = r.InKernel();
1753 bool first_ip = true;
1754 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1755 uint64_t ip = r.callchain_data.ips[i];
1756 if (ip >= PERF_CONTEXT_MAX) {
1757 switch (ip) {
1758 case PERF_CONTEXT_KERNEL:
1759 in_kernel = true;
1760 break;
1761 case PERF_CONTEXT_USER:
1762 in_kernel = false;
1763 break;
1764 default:
1765 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1766 << ip;
1767 }
1768 } else {
1769 if (first_ip) {
1770 first_ip = false;
1771 // Remove duplication with sample ip.
1772 if (ip == r.ip_data.ip) {
1773 continue;
1774 }
1775 }
1776 map = thread_tree_.FindMap(thread, ip, in_kernel);
1777 dso = map->dso;
1778 if (dump_symbols_) {
1779 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1780 if (!symbol->HasDumpId()) {
1781 dso->CreateSymbolDumpId(symbol);
1782 }
1783 }
1784 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1785 dso->CreateDumpId();
1786 }
1787 }
1788 }
1789 }
1790 }
1791
RegisterRecordCommand()1792 void RegisterRecordCommand() {
1793 RegisterCommand("record",
1794 [] { return std::unique_ptr<Command>(new RecordCommand()); });
1795 }
1796