1 /*
2 * SystemMonitor.cpp
3 *
4 * This source file is part of the FoundationDB open source project
5 *
6 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 #include "flow/flow.h"
22 #include "flow/Platform.h"
23 #include "flow/TDMetric.actor.h"
24 #include "flow/SystemMonitor.h"
25
26 #if defined(ALLOC_INSTRUMENTATION) && defined(__linux__)
27 #include <cxxabi.h>
28 #endif
29
30 SystemMonitorMachineState machineState;
31
initializeSystemMonitorMachineState(SystemMonitorMachineState machineState)32 void initializeSystemMonitorMachineState(SystemMonitorMachineState machineState) {
33 ::machineState = machineState;
34
35 ASSERT(g_network);
36 ::machineState.monitorStartTime = now();
37 }
38
systemMonitor()39 void systemMonitor() {
40 static StatisticsState statState = StatisticsState();
41 customSystemMonitor("ProcessMetrics", &statState, true );
42 }
43
getSystemStatistics()44 SystemStatistics getSystemStatistics() {
45 static StatisticsState statState = StatisticsState();
46 const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
47 return getSystemStatistics(
48 machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState.systemState, false);
49 }
50
51 #define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator<size>::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator<size>::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na")
52 #define DETAILALLOCATORMEMUSAGE( size ) detail("TotalMemory"#size, FastAllocator<size>::getTotalMemory()).detail("ApproximateUnusedMemory"#size, FastAllocator<size>::getApproximateMemoryUnused()).detail("ActiveThreads"#size, FastAllocator<size>::getActiveThreads())
53
customSystemMonitor(std::string eventName,StatisticsState * statState,bool machineMetrics)54 SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *statState, bool machineMetrics) {
55 const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
56 SystemStatistics currentStats = getSystemStatistics(machineState.folder.present() ? machineState.folder.get() : "",
57 &ipAddr, &statState->systemState, true);
58 NetworkData netData;
59 netData.init();
60 if (!DEBUG_DETERMINISM && currentStats.initialized) {
61 {
62 TraceEvent e(eventName.c_str());
63 e
64 .detail("Elapsed", currentStats.elapsed)
65 .detail("CPUSeconds", currentStats.processCPUSeconds)
66 .detail("MainThreadCPUSeconds", currentStats.mainThreadCPUSeconds)
67 .detail("UptimeSeconds", now() - machineState.monitorStartTime)
68 .detail("Memory", currentStats.processMemory)
69 .detail("ResidentMemory", currentStats.processResidentMemory)
70 .detail("UnusedAllocatedMemory", getTotalUnusedAllocatedMemory())
71 .detail("MbpsSent", ((netData.bytesSent - statState->networkState.bytesSent) * 8e-6) / currentStats.elapsed)
72 .detail("MbpsReceived", ((netData.bytesReceived - statState->networkState.bytesReceived) * 8e-6) / currentStats.elapsed)
73 .detail("DiskTotalBytes", currentStats.processDiskTotalBytes)
74 .detail("DiskFreeBytes", currentStats.processDiskFreeBytes)
75 .detail("DiskQueueDepth", currentStats.processDiskQueueDepth)
76 .detail("DiskIdleSeconds", currentStats.processDiskIdleSeconds)
77 .detail("DiskReads", currentStats.processDiskRead)
78 .detail("DiskWrites", currentStats.processDiskWrite)
79 .detail("DiskReadsCount", currentStats.processDiskReadCount)
80 .detail("DiskWritesCount", currentStats.processDiskWriteCount)
81 .detail("DiskWriteSectors", currentStats.processDiskWriteSectors)
82 .detail("DiskReadSectors", currentStats.processDiskReadSectors)
83 .detail("FileWrites", netData.countFileLogicalWrites - statState->networkState.countFileLogicalWrites)
84 .detail("FileReads", netData.countFileLogicalReads - statState->networkState.countFileLogicalReads)
85 .detail("CacheReadBytes", netData.countFileCacheReadBytes - statState->networkState.countFileCacheReadBytes)
86 .detail("CacheFinds", netData.countFileCacheFinds - statState->networkState.countFileCacheFinds)
87 .detail("CacheWritesBlocked", netData.countFileCacheWritesBlocked - statState->networkState.countFileCacheWritesBlocked)
88 .detail("CacheReadsBlocked", netData.countFileCacheReadsBlocked - statState->networkState.countFileCacheReadsBlocked)
89 .detail("CachePageReadsMerged", netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
90 .detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
91 .detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
92 .detail("ZoneID", machineState.zoneId)
93 .detail("MachineID", machineState.machineId)
94 .detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
95 .detail("AIOCollectCount", netData.countAIOCollect - statState->networkState.countAIOCollect)
96 .detail("AIOSubmitLag", (g_network->networkMetrics.secSquaredSubmit - statState->networkMetricsState.secSquaredSubmit) / currentStats.elapsed)
97 .detail("AIODiskStall", (g_network->networkMetrics.secSquaredDiskStall - statState->networkMetricsState.secSquaredDiskStall) / currentStats.elapsed)
98 .detail("CurrentConnections", netData.countConnEstablished - netData.countConnClosedWithError - netData.countConnClosedWithoutError)
99 .detail("ConnectionsEstablished", (double) (netData.countConnEstablished - statState->networkState.countConnEstablished) / currentStats.elapsed)
100 .detail("ConnectionsClosed", ((netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) + (netData.countConnClosedWithoutError - statState->networkState.countConnClosedWithoutError)) / currentStats.elapsed)
101 .detail("ConnectionErrors", (netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) / currentStats.elapsed)
102 .trackLatest(eventName.c_str());
103
104 TraceEvent("MemoryMetrics")
105 .DETAILALLOCATORMEMUSAGE(16)
106 .DETAILALLOCATORMEMUSAGE(32)
107 .DETAILALLOCATORMEMUSAGE(64)
108 .DETAILALLOCATORMEMUSAGE(128)
109 .DETAILALLOCATORMEMUSAGE(256)
110 .DETAILALLOCATORMEMUSAGE(512)
111 .DETAILALLOCATORMEMUSAGE(1024)
112 .DETAILALLOCATORMEMUSAGE(2048)
113 .DETAILALLOCATORMEMUSAGE(4096)
114 .DETAILALLOCATORMEMUSAGE(8192)
115 .detail("HugeArenaMemory", g_hugeArenaMemory);
116
117 TraceEvent n("NetworkMetrics");
118 n
119 .detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)
120 .detail("WontSleep", netData.countWontSleep - statState->networkState.countWontSleep)
121 .detail("Yields", netData.countYields - statState->networkState.countYields)
122 .detail("YieldCalls", netData.countYieldCalls - statState->networkState.countYieldCalls)
123 .detail("YieldCallsTrue", netData.countYieldCallsTrue - statState->networkState.countYieldCallsTrue)
124 .detail("SlowTaskSignals", netData.countSlowTaskSignals - statState->networkState.countSlowTaskSignals)
125 .detail("YieldBigStack", netData.countYieldBigStack - statState->networkState.countYieldBigStack)
126 .detail("RunLoopIterations", netData.countRunLoop - statState->networkState.countRunLoop)
127 .detail("TimersExecuted", netData.countTimers - statState->networkState.countTimers)
128 .detail("TasksExecuted", netData.countTasks - statState->networkState.countTasks)
129 .detail("ASIOEventsProcessed", netData.countASIOEvents - statState->networkState.countASIOEvents)
130 .detail("ReadCalls", netData.countReads - statState->networkState.countReads)
131 .detail("WriteCalls", netData.countWrites - statState->networkState.countWrites)
132 .detail("ReadProbes", netData.countReadProbes - statState->networkState.countReadProbes)
133 .detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
134 .detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
135 .detail("PacketsGenerated", netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
136 .detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock);
137
138 for (int i = 0; i<NetworkMetrics::SLOW_EVENT_BINS; i++)
139 if (int c = g_network->networkMetrics.countSlowEvents[i] - statState->networkMetricsState.countSlowEvents[i])
140 n.detail(format("SlowTask%dM", 1 << i).c_str(), c);
141 for (int i = 0; i<NetworkMetrics::PRIORITY_BINS; i++)
142 if (double x = g_network->networkMetrics.secSquaredPriorityBlocked[i] - statState->networkMetricsState.secSquaredPriorityBlocked[i])
143 n.detail(format("S2Pri%d", g_network->networkMetrics.priorityBins[i]).c_str(), x);
144 }
145
146 if(machineMetrics) {
147 TraceEvent("MachineMetrics").detail("Elapsed", currentStats.elapsed)
148 .detail("MbpsSent", currentStats.machineMegabitsSent / currentStats.elapsed)
149 .detail("MbpsReceived", currentStats.machineMegabitsReceived / currentStats.elapsed)
150 .detail("OutSegs", currentStats.machineOutSegs)
151 .detail("RetransSegs", currentStats.machineRetransSegs)
152 .detail("CPUSeconds", currentStats.machineCPUSeconds)
153 .detail("TotalMemory", currentStats.machineTotalRAM)
154 .detail("CommittedMemory", currentStats.machineCommittedRAM)
155 .detail("AvailableMemory", currentStats.machineAvailableRAM)
156 .detail("ZoneID", machineState.zoneId)
157 .detail("MachineID", machineState.machineId)
158 .trackLatest("MachineMetrics");
159 }
160 }
161
162 #ifdef ALLOC_INSTRUMENTATION
163 {
164 static double firstTime = 0.0;
165 if(firstTime == 0.0) firstTime = now();
166 if( now() - firstTime > 10 || g_network->isSimulated() ) {
167 firstTime = now();
168 std::vector< std::pair<std::string, const char*> > typeNames;
169 for( auto i = allocInstr.begin(); i != allocInstr.end(); ++i ) {
170 std::string s;
171 #ifdef __linux__
172 char *demangled = abi::__cxa_demangle(i->first, NULL, NULL, NULL);
173 if (demangled) {
174 s = demangled;
175 if (StringRef(s).startsWith(LiteralStringRef("(anonymous namespace)::")))
176 s = s.substr(LiteralStringRef("(anonymous namespace)::").size());
177 free(demangled);
178 } else
179 s = i->first;
180 #else
181 s = i->first;
182 if (StringRef(s).startsWith(LiteralStringRef("class `anonymous namespace'::")))
183 s = s.substr(LiteralStringRef("class `anonymous namespace'::").size());
184 else if (StringRef(s).startsWith(LiteralStringRef("class ")))
185 s = s.substr(LiteralStringRef("class ").size());
186 else if (StringRef(s).startsWith(LiteralStringRef("struct ")))
187 s = s.substr(LiteralStringRef("struct ").size());
188 #endif
189 typeNames.push_back( std::make_pair(s, i->first) );
190 }
191 std::sort(typeNames.begin(), typeNames.end());
192 for(int i=0; i<typeNames.size(); i++) {
193 const char* n = typeNames[i].second;
194 auto& f = allocInstr[n];
195 if(f.maxAllocated > 10000)
196 TraceEvent("AllocInstrument").detail("CurrentAlloc", f.allocCount-f.deallocCount)
197 .detail("Name", typeNames[i].first.c_str());
198 }
199
200 std::unordered_map<uint32_t, BackTraceAccount> traceCounts;
201 size_t memSampleSize;
202 memSample_entered = true;
203 {
204 ThreadSpinLockHolder holder(memLock);
205 traceCounts = backTraceLookup;
206 memSampleSize = memSample.size();
207 }
208 memSample_entered = false;
209
210 uint64_t totalSize = 0;
211 uint64_t totalCount = 0;
212 for( auto i = traceCounts.begin(); i != traceCounts.end(); ++i ) {
213 char buf[1024];
214 std::vector<void *> *frames = i->second.backTrace;
215 std::string backTraceStr;
216 #if defined(_WIN32)
217 for (int j = 1; j < frames->size(); j++) {
218 _snprintf(buf, 1024, "%p ", frames->at(j));
219 backTraceStr += buf;
220 }
221 #else
222 backTraceStr = platform::format_backtrace(&(*frames)[0], frames->size());
223 #endif
224
225 TraceEvent("MemSample")
226 .detail("Count", (int64_t)i->second.count)
227 .detail("TotalSize", i->second.totalSize)
228 .detail("SampleCount", i->second.sampleCount)
229 .detail("Hash", format("%lld", i->first))
230 .detail("Bt", backTraceStr);
231
232 totalSize += i->second.totalSize;
233 totalCount += i->second.count;
234 }
235
236 TraceEvent("MemSampleSummary")
237 .detail("InverseByteSampleRatio", SAMPLE_BYTES)
238 .detail("MemorySamples", memSampleSize)
239 .detail("BackTraces", traceCounts.size())
240 .detail("TotalSize", totalSize)
241 .detail("TotalCount", totalCount);
242
243 TraceEvent("MemSample")
244 .detail("Count", traceCounts.size())
245 .detail("TotalSize", traceCounts.size() * ((int)(sizeof(uint32_t) + sizeof(size_t) + sizeof(size_t))))
246 .detail("SampleCount", traceCounts.size())
247 .detail("Hash", "backTraces")
248 .detail("Bt", "na");
249
250 TraceEvent("MemSample")
251 .detail("Count", memSampleSize)
252 .detail("TotalSize", memSampleSize * ((int)(sizeof(void*) + sizeof(uint32_t) + sizeof(size_t))))
253 .detail("SampleCount", memSampleSize)
254 .detail("Hash", "memSamples")
255 .detail("Bt", "na");
256 TRACEALLOCATOR(16);
257 TRACEALLOCATOR(32);
258 TRACEALLOCATOR(64);
259 TRACEALLOCATOR(128);
260 TRACEALLOCATOR(256);
261 TRACEALLOCATOR(512);
262 TRACEALLOCATOR(1024);
263 TRACEALLOCATOR(2048);
264 TRACEALLOCATOR(4096);
265 TRACEALLOCATOR(8192);
266 }
267 }
268 #endif
269 statState->networkMetricsState = g_network->networkMetrics;
270 statState->networkState = netData;
271 return currentStats;
272 }
273