1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/task_manager/sampling/shared_sampler.h"
6
7 #include <windows.h>
8 #include <winternl.h>
9
10 #include <algorithm>
11
12 #include "base/bind.h"
13 #include "base/bit_cast.h"
14 #include "base/command_line.h"
15 #include "base/path_service.h"
16 #include "base/time/time.h"
17 #include "chrome/browser/task_manager/sampling/shared_sampler_win_defines.h"
18 #include "chrome/browser/task_manager/task_manager_observer.h"
19 #include "chrome/common/chrome_constants.h"
20 #include "content/public/browser/browser_thread.h"
21
22 // ntstatus.h conflicts with windows.h so define this locally.
23 #define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
24 #define STATUS_BUFFER_TOO_SMALL ((NTSTATUS)0xC0000023L)
25 #define STATUS_INFO_LENGTH_MISMATCH ((NTSTATUS)0xC0000004L)
26
27 namespace task_manager {
28
29 static SharedSampler::QuerySystemInformationForTest
30 g_query_system_information_for_test = nullptr;
31
32 // static
SetQuerySystemInformationForTest(QuerySystemInformationForTest query_system_information)33 void SharedSampler::SetQuerySystemInformationForTest(
34 QuerySystemInformationForTest query_system_information) {
35 g_query_system_information_for_test = query_system_information;
36 }
37
38 namespace {
39
40 // Simple memory buffer wrapper for passing the data out of
41 // QuerySystemProcessInformation.
42 class ByteBuffer {
43 public:
ByteBuffer(size_t capacity)44 explicit ByteBuffer(size_t capacity)
45 : size_(0), capacity_(0) {
46 if (capacity > 0)
47 grow(capacity);
48 }
49
~ByteBuffer()50 ~ByteBuffer() {}
51
data()52 BYTE* data() { return data_.get(); }
53
size()54 size_t size() { return size_; }
55
set_size(size_t new_size)56 void set_size(size_t new_size) {
57 DCHECK_LE(new_size, capacity_);
58 size_ = new_size;
59 }
60
capacity()61 size_t capacity() { return capacity_; }
62
grow(size_t new_capacity)63 void grow(size_t new_capacity) {
64 DCHECK_GT(new_capacity, capacity_);
65 capacity_ = new_capacity;
66 data_.reset(new BYTE[new_capacity]);
67 }
68
69 private:
70 std::unique_ptr<BYTE[]> data_;
71 size_t size_;
72 size_t capacity_;
73
74 DISALLOW_COPY_AND_ASSIGN(ByteBuffer);
75 };
76
77 // Wrapper for NtQuerySystemProcessInformation with buffer reallocation logic.
QuerySystemProcessInformation(ByteBuffer * buffer)78 bool QuerySystemProcessInformation(ByteBuffer* buffer) {
79 HMODULE ntdll = ::GetModuleHandle(L"ntdll.dll");
80 if (!ntdll) {
81 NOTREACHED();
82 return false;
83 }
84
85 NTQUERYSYSTEMINFORMATION nt_query_system_information_ptr =
86 reinterpret_cast<NTQUERYSYSTEMINFORMATION>(
87 ::GetProcAddress(ntdll, "NtQuerySystemInformation"));
88 if (!nt_query_system_information_ptr) {
89 NOTREACHED();
90 return false;
91 }
92
93 NTSTATUS result;
94
95 // There is a potential race condition between growing the buffer and new
96 // processes being created. Try a few times before giving up.
97 for (int i = 0; i < 10; i++) {
98 ULONG data_size = 0;
99 ULONG buffer_size = static_cast<ULONG>(buffer->capacity());
100
101 if (g_query_system_information_for_test) {
102 data_size =
103 g_query_system_information_for_test(buffer->data(), buffer_size);
104 result =
105 (data_size > buffer_size) ? STATUS_BUFFER_TOO_SMALL : STATUS_SUCCESS;
106 } else {
107 result = nt_query_system_information_ptr(
108 SystemProcessInformation, buffer->data(), buffer_size, &data_size);
109 }
110
111 if (result == STATUS_SUCCESS) {
112 buffer->set_size(data_size);
113 break;
114 }
115
116 if (result == STATUS_INFO_LENGTH_MISMATCH ||
117 result == STATUS_BUFFER_TOO_SMALL) {
118 // Insufficient buffer. Grow to the returned |data_size| plus 10% extra
119 // to avoid frequent reallocations and try again.
120 DCHECK_GT(data_size, buffer_size);
121 buffer->grow(static_cast<ULONG>(data_size * 1.1));
122 } else {
123 // An error other than the two above.
124 break;
125 }
126 }
127
128 return result == STATUS_SUCCESS;
129 }
130
131 // Per-thread data extracted from SYSTEM_THREAD_INFORMATION and stored in a
132 // snapshot. This structure is accessed only on the worker thread.
133 struct ThreadData {
134 base::PlatformThreadId thread_id;
135 ULONG context_switches;
136 };
137
138 // Per-process data extracted from SYSTEM_PROCESS_INFORMATION and stored in a
139 // snapshot. This structure is accessed only on the worker thread.
140 struct ProcessData {
141 ProcessData() = default;
142 ProcessData(ProcessData&&) = default;
143
144 int64_t hard_fault_count;
145 base::Time start_time;
146 base::TimeDelta cpu_time;
147 std::vector<ThreadData> threads;
148
149 private:
150 DISALLOW_COPY_AND_ASSIGN(ProcessData);
151 };
152
153 typedef std::map<base::ProcessId, ProcessData> ProcessDataMap;
154
CountContextSwitchesDelta(const ProcessData & prev_process_data,const ProcessData & new_process_data)155 ULONG CountContextSwitchesDelta(const ProcessData& prev_process_data,
156 const ProcessData& new_process_data) {
157 // This one pass algorithm relies on the threads vectors to be
158 // ordered by thread_id.
159 ULONG delta = 0;
160 size_t prev_index = 0;
161
162 for (const auto& new_thread : new_process_data.threads) {
163 ULONG prev_thread_context_switches = 0;
164
165 // Iterate over the process threads from the previous snapshot skipping
166 // threads that don't exist anymore. Please note that this iteration starts
167 // from the last known prev_index and goes until a previous snapshot's
168 // thread ID >= the current snapshot's thread ID. So the overall algorithm
169 // is linear.
170 for (; prev_index < prev_process_data.threads.size(); ++prev_index) {
171 const auto& prev_thread = prev_process_data.threads[prev_index];
172 if (prev_thread.thread_id == new_thread.thread_id) {
173 // Threads match between two snapshots. Use the previous snapshot
174 // thread's context_switches to subtract from the delta.
175 prev_thread_context_switches = prev_thread.context_switches;
176 ++prev_index;
177 break;
178 }
179
180 if (prev_thread.thread_id > new_thread.thread_id) {
181 // This is due to a new thread that didn't exist in the previous
182 // snapshot. Keep the zero value of |prev_thread_context_switches| which
183 // essentially means the entire number of context switches of the new
184 // thread will be added to the delta.
185 break;
186 }
187 }
188
189 delta += new_thread.context_switches - prev_thread_context_switches;
190 }
191
192 return delta;
193 }
194
195 // Seeks a matching ProcessData by Process ID in a previous snapshot.
196 // This uses the fact that ProcessDataMap entries are ordered by Process ID.
SeekInPreviousSnapshot(base::ProcessId process_id,ProcessDataMap::const_iterator * iter_to_advance,const ProcessDataMap::const_iterator & range_end)197 const ProcessData* SeekInPreviousSnapshot(
198 base::ProcessId process_id, ProcessDataMap::const_iterator* iter_to_advance,
199 const ProcessDataMap::const_iterator& range_end) {
200 for (; *iter_to_advance != range_end; ++(*iter_to_advance)) {
201 if ((*iter_to_advance)->first == process_id) {
202 return &((*iter_to_advance)++)->second;
203 }
204 if ((*iter_to_advance)->first > process_id)
205 break;
206 }
207
208 return nullptr;
209 }
210
211 // A wrapper function converting ticks (in units of 100 ns) to Time.
ConvertTicksToTime(uint64_t ticks)212 base::Time ConvertTicksToTime(uint64_t ticks) {
213 FILETIME ft = bit_cast<FILETIME, uint64_t>(ticks);
214 return base::Time::FromFileTime(ft);
215 }
216
217 // A wrapper function converting ticks (in units of 100 ns) to TimeDelta.
ConvertTicksToTimeDelta(uint64_t ticks)218 base::TimeDelta ConvertTicksToTimeDelta(uint64_t ticks) {
219 return base::TimeDelta::FromMicroseconds(ticks / 10);
220 }
221
222 } // namespace
223
224 // ProcessDataSnapshot gets created and accessed only on the worker thread.
225 // This is used to calculate metrics like Idle Wakeups / sec that require
226 // a delta between two snapshots.
227 // Please note that ProcessDataSnapshot has to be outside of anonymous namespace
228 // in order to match the declaration in shared_sampler.h.
229 struct ProcessDataSnapshot {
230 ProcessDataMap processes;
231 base::TimeTicks timestamp;
232 };
233
SharedSampler(const scoped_refptr<base::SequencedTaskRunner> & blocking_pool_runner)234 SharedSampler::SharedSampler(
235 const scoped_refptr<base::SequencedTaskRunner>& blocking_pool_runner)
236 : refresh_flags_(0), previous_buffer_size_(0),
237 supported_image_names_(GetSupportedImageNames()),
238 blocking_pool_runner_(blocking_pool_runner) {
239 DCHECK(blocking_pool_runner.get());
240
241 // This object will be created on the UI thread, however the sequenced checker
242 // will be used to assert we're running the expensive operations on one of the
243 // blocking pool threads.
244 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
245 worker_pool_sequenced_checker_.DetachFromSequence();
246 }
247
~SharedSampler()248 SharedSampler::~SharedSampler() {}
249
GetSupportedFlags() const250 int64_t SharedSampler::GetSupportedFlags() const {
251 return REFRESH_TYPE_IDLE_WAKEUPS | REFRESH_TYPE_START_TIME |
252 REFRESH_TYPE_CPU_TIME | REFRESH_TYPE_HARD_FAULTS;
253 }
254
RegisterCallback(base::ProcessId process_id,OnSamplingCompleteCallback on_sampling_complete)255 void SharedSampler::RegisterCallback(
256 base::ProcessId process_id,
257 OnSamplingCompleteCallback on_sampling_complete) {
258 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
259
260 if (process_id == 0)
261 return;
262
263 bool result =
264 callbacks_map_.emplace(process_id, std::move(on_sampling_complete))
265 .second;
266 DCHECK(result);
267 }
268
UnregisterCallback(base::ProcessId process_id)269 void SharedSampler::UnregisterCallback(base::ProcessId process_id) {
270 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
271
272 if (process_id == 0)
273 return;
274
275 callbacks_map_.erase(process_id);
276
277 if (callbacks_map_.empty())
278 ClearState();
279 }
280
Refresh(base::ProcessId process_id,int64_t refresh_flags)281 void SharedSampler::Refresh(base::ProcessId process_id, int64_t refresh_flags) {
282 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
283 DCHECK_NE(0, refresh_flags & GetSupportedFlags());
284
285 if (process_id == 0)
286 return;
287
288 DCHECK(callbacks_map_.find(process_id) != callbacks_map_.end());
289
290 if (refresh_flags_ == 0) {
291 base::PostTaskAndReplyWithResult(
292 blocking_pool_runner_.get(), FROM_HERE,
293 base::BindOnce(&SharedSampler::RefreshOnWorkerThread, this),
294 base::BindOnce(&SharedSampler::OnRefreshDone, this));
295 } else {
296 // http://crbug.com/678471
297 // A group of consecutive Refresh calls should all specify the same refresh
298 // flags. Rarely RefreshOnWorkerThread could take a long time (> 1 sec),
299 // long enough for a next refresh cycle to start before results are ready
300 // from a previous cycle. In that case refresh_flags_ would still remain
301 // set to the previous cycle refresh flags which might be different than
302 // this cycle refresh flags if a column was added or removed between the two
303 // cycles. The worst that could happen in that condition is that results for
304 // a newly added column would be missing for one extra refresh cycle.
305 }
306
307 refresh_flags_ |= refresh_flags;
308 }
309
ClearState()310 void SharedSampler::ClearState() {
311 previous_snapshot_.reset();
312 }
313
RefreshOnWorkerThread()314 SharedSampler::AllSamplingResults SharedSampler::RefreshOnWorkerThread() {
315 DCHECK(worker_pool_sequenced_checker_.CalledOnValidSequence());
316
317 AllSamplingResults results;
318
319 std::unique_ptr<ProcessDataSnapshot> snapshot = CaptureSnapshot();
320 if (snapshot) {
321 if (previous_snapshot_) {
322 results = MakeResultsFromTwoSnapshots(*previous_snapshot_, *snapshot);
323 } else {
324 results = MakeResultsFromSnapshot(*snapshot);
325 }
326
327 previous_snapshot_ = std::move(snapshot);
328 } else {
329 // Failed to get snapshot. This is unlikely.
330 ClearState();
331 }
332
333 return results;
334 }
335
336 /* static */
GetSupportedImageNames()337 std::vector<base::FilePath> SharedSampler::GetSupportedImageNames() {
338 const wchar_t kNacl64Exe[] = L"nacl64.exe";
339
340 std::vector<base::FilePath> supported_names;
341
342 base::FilePath current_exe;
343 if (base::PathService::Get(base::FILE_EXE, ¤t_exe))
344 supported_names.push_back(current_exe.BaseName());
345
346 supported_names.push_back(
347 base::FilePath(chrome::kBrowserProcessExecutableName));
348 supported_names.push_back(base::FilePath(kNacl64Exe));
349
350 return supported_names;
351 }
352
IsSupportedImageName(base::FilePath::StringPieceType image_name) const353 bool SharedSampler::IsSupportedImageName(
354 base::FilePath::StringPieceType image_name) const {
355 for (const base::FilePath& supported_name : supported_image_names_) {
356 if (base::FilePath::CompareEqualIgnoreCase(image_name,
357 supported_name.value()))
358 return true;
359 }
360
361 return false;
362 }
363
CaptureSnapshot()364 std::unique_ptr<ProcessDataSnapshot> SharedSampler::CaptureSnapshot() {
365 DCHECK(worker_pool_sequenced_checker_.CalledOnValidSequence());
366
367 // Preallocate the buffer with the size determined on the previous call to
368 // QuerySystemProcessInformation. This should be sufficient most of the time.
369 // QuerySystemProcessInformation will grow the buffer if necessary.
370 ByteBuffer data_buffer(previous_buffer_size_);
371
372 if (!QuerySystemProcessInformation(&data_buffer))
373 return std::unique_ptr<ProcessDataSnapshot>();
374
375 previous_buffer_size_ = data_buffer.capacity();
376
377 std::unique_ptr<ProcessDataSnapshot> snapshot(new ProcessDataSnapshot);
378 snapshot->timestamp = base::TimeTicks::Now();
379
380 for (size_t offset = 0; offset < data_buffer.size(); ) {
381 const auto* pi = reinterpret_cast<const SYSTEM_PROCESS_INFORMATION*>(
382 data_buffer.data() + offset);
383
384 // Validate that the offset is valid and all needed data is within
385 // the buffer boundary.
386 if (offset + sizeof(SYSTEM_PROCESS_INFORMATION) > data_buffer.size())
387 break;
388 if (pi->NumberOfThreads > 0 &&
389 (offset + sizeof(SYSTEM_PROCESS_INFORMATION) +
390 (pi->NumberOfThreads - 1) * sizeof(SYSTEM_THREAD_INFORMATION) >
391 data_buffer.size())) {
392 break;
393 }
394
395 if (pi->ImageName.Buffer) {
396 // Validate that the image name is within the buffer boundary.
397 // ImageName.Length seems to be in bytes rather than characters.
398 ULONG image_name_offset =
399 reinterpret_cast<BYTE*>(pi->ImageName.Buffer) - data_buffer.data();
400 if (image_name_offset + pi->ImageName.Length > data_buffer.size())
401 break;
402
403 // Check if this is a chrome process. Ignore all other processes.
404 if (IsSupportedImageName(pi->ImageName.Buffer)) {
405 // Collect enough data to be able to do a diff between two snapshots.
406 // Some threads might stop or new threads might be created between two
407 // snapshots. If a thread with a large number of context switches gets
408 // terminated the total number of context switches for the process might
409 // go down and the delta would be negative.
410 // To avoid that we need to compare thread IDs between two snapshots and
411 // not count context switches for threads that are missing in the most
412 // recent snapshot.
413 ProcessData process_data;
414 process_data.hard_fault_count = pi->HardFaultCount;
415 process_data.start_time = ConvertTicksToTime(pi->CreateTime);
416 process_data.cpu_time =
417 ConvertTicksToTimeDelta(pi->KernelTime + pi->UserTime);
418
419 // Iterate over threads and store each thread's ID and number of context
420 // switches.
421 for (ULONG thread_index = 0; thread_index < pi->NumberOfThreads;
422 ++thread_index) {
423 const SYSTEM_THREAD_INFORMATION* ti = &pi->Threads[thread_index];
424 if (ti->ClientId.UniqueProcess != pi->ProcessId)
425 continue;
426
427 ThreadData thread_data;
428 thread_data.thread_id = static_cast<base::PlatformThreadId>(
429 reinterpret_cast<uintptr_t>(ti->ClientId.UniqueThread));
430 thread_data.context_switches = ti->ContextSwitchCount;
431 process_data.threads.push_back(thread_data);
432 }
433
434 // Order thread data by thread ID to help diff two snapshots.
435 std::sort(process_data.threads.begin(), process_data.threads.end(),
436 [](const ThreadData& l, const ThreadData r) {
437 return l.thread_id < r.thread_id;
438 });
439
440 base::ProcessId process_id = static_cast<base::ProcessId>(
441 reinterpret_cast<uintptr_t>(pi->ProcessId));
442 bool inserted = snapshot->processes.insert(
443 std::make_pair(process_id, std::move(process_data))).second;
444 DCHECK(inserted);
445 }
446 }
447
448 // Check for end of the list.
449 if (!pi->NextEntryOffset)
450 break;
451
452 // Jump to the next entry.
453 offset += pi->NextEntryOffset;
454 }
455
456 return snapshot;
457 }
458
MakeResultsFromTwoSnapshots(const ProcessDataSnapshot & prev_snapshot,const ProcessDataSnapshot & snapshot)459 SharedSampler::AllSamplingResults SharedSampler::MakeResultsFromTwoSnapshots(
460 const ProcessDataSnapshot& prev_snapshot,
461 const ProcessDataSnapshot& snapshot) {
462 // Time delta in seconds.
463 double time_delta = (snapshot.timestamp - prev_snapshot.timestamp)
464 .InSecondsF();
465
466 // Iterate over processes in both snapshots in parallel. This algorithm relies
467 // on map entries being ordered by Process ID.
468 ProcessDataMap::const_iterator prev_iter = prev_snapshot.processes.begin();
469
470 AllSamplingResults results;
471 results.reserve(snapshot.processes.size());
472 for (const auto& current_entry : snapshot.processes) {
473 base::ProcessId process_id = current_entry.first;
474 const ProcessData& process = current_entry.second;
475
476 const ProcessData* prev_snapshot_process = SeekInPreviousSnapshot(
477 process_id, &prev_iter, prev_snapshot.processes.end());
478
479 // Delta between the old snapshot and the new snapshot.
480 int64_t hard_faults_delta = 0;
481 int idle_wakeups_delta;
482
483 if (prev_snapshot_process) {
484 hard_faults_delta =
485 process.hard_fault_count - prev_snapshot_process->hard_fault_count;
486 // Processes match between two snapshots. Diff context switches.
487 idle_wakeups_delta =
488 CountContextSwitchesDelta(*prev_snapshot_process, process);
489 } else {
490 // Process is missing in the previous snapshot.
491 // Use entire number of context switches of the current process.
492 idle_wakeups_delta = CountContextSwitchesDelta(ProcessData(), process);
493 }
494
495 ProcessIdAndSamplingResult result;
496 result.process_id = process_id;
497 result.data.hard_faults_per_second =
498 static_cast<int>(round(hard_faults_delta / time_delta));
499 result.data.idle_wakeups_per_second =
500 static_cast<int>(round(idle_wakeups_delta / time_delta));
501 result.data.start_time = process.start_time;
502 result.data.cpu_time = process.cpu_time;
503 results.push_back(result);
504 }
505
506 return results;
507 }
508
MakeResultsFromSnapshot(const ProcessDataSnapshot & snapshot)509 SharedSampler::AllSamplingResults SharedSampler::MakeResultsFromSnapshot(
510 const ProcessDataSnapshot& snapshot) {
511 AllSamplingResults results;
512 results.reserve(snapshot.processes.size());
513 for (const auto& pair : snapshot.processes) {
514 ProcessIdAndSamplingResult result;
515 result.process_id = pair.first;
516 // Use 0 for Idle Wakeups / sec in this case. This is consistent with
517 // ProcessMetrics::CalculateIdleWakeupsPerSecond implementation.
518 result.data.hard_faults_per_second = 0;
519 result.data.idle_wakeups_per_second = 0;
520 result.data.start_time = pair.second.start_time;
521 result.data.cpu_time = pair.second.cpu_time;
522 results.push_back(result);
523 }
524 return results;
525 }
526
OnRefreshDone(AllSamplingResults refresh_results)527 void SharedSampler::OnRefreshDone(AllSamplingResults refresh_results) {
528 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
529 DCHECK_NE(0, refresh_flags_);
530
531 size_t result_index = 0;
532
533 for (const auto& callback_entry : callbacks_map_) {
534 base::ProcessId process_id = callback_entry.first;
535 SamplingResult process_result;
536
537 // Match refresh result by |process_id|.
538 // This relies on refresh results being ordered by Process ID.
539 // Please note that |refresh_results| might contain some extra entries that
540 // don't exist in |callbacks_map_| if there is more than one instance of
541 // Chrome. It might be missing some entries too if there is a race condition
542 // between getting process information on the worker thread and adding a
543 // corresponding TaskGroup to the task manager.
544 for (; result_index < refresh_results.size(); ++result_index) {
545 const auto& result = refresh_results[result_index];
546 if (result.process_id == process_id) {
547 // Data matched in |refresh_results|.
548 process_result = std::move(result.data);
549 ++result_index;
550 break;
551 }
552
553 if (result.process_id > process_id) {
554 // An entry corresponding to |process_id| is missing. See above.
555 break;
556 }
557 }
558
559 callback_entry.second.Run(std::move(process_result));
560 }
561
562 // Reset refresh_results_ to trigger RefreshOnWorkerThread next time Refresh
563 // is called.
564 refresh_flags_ = 0;
565 }
566
567 } // namespace task_manager
568