1 /*
2  * SystemMonitor.cpp
3  *
4  * This source file is part of the FoundationDB open source project
5  *
6  * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 #include "flow/flow.h"
22 #include "flow/Platform.h"
23 #include "flow/TDMetric.actor.h"
24 #include "flow/SystemMonitor.h"
25 
26 #if defined(ALLOC_INSTRUMENTATION) && defined(__linux__)
27 #include <cxxabi.h>
28 #endif
29 
30 SystemMonitorMachineState machineState;
31 
initializeSystemMonitorMachineState(SystemMonitorMachineState machineState)32 void initializeSystemMonitorMachineState(SystemMonitorMachineState machineState) {
33 	::machineState = machineState;
34 
35 	ASSERT(g_network);
36 	::machineState.monitorStartTime = now();
37 }
38 
systemMonitor()39 void systemMonitor() {
40 	static StatisticsState statState = StatisticsState();
41 	customSystemMonitor("ProcessMetrics", &statState, true );
42 }
43 
getSystemStatistics()44 SystemStatistics getSystemStatistics() {
45 	static StatisticsState statState = StatisticsState();
46 	const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
47 	return getSystemStatistics(
48 		machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState.systemState, false);
49 }
50 
51 #define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator<size>::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator<size>::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na")
52 #define DETAILALLOCATORMEMUSAGE( size ) detail("TotalMemory"#size, FastAllocator<size>::getTotalMemory()).detail("ApproximateUnusedMemory"#size, FastAllocator<size>::getApproximateMemoryUnused()).detail("ActiveThreads"#size, FastAllocator<size>::getActiveThreads())
53 
customSystemMonitor(std::string eventName,StatisticsState * statState,bool machineMetrics)54 SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *statState, bool machineMetrics) {
55 	const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
56 	SystemStatistics currentStats = getSystemStatistics(machineState.folder.present() ? machineState.folder.get() : "",
57 	                                                    &ipAddr, &statState->systemState, true);
58 	NetworkData netData;
59 	netData.init();
60 	if (!DEBUG_DETERMINISM && currentStats.initialized) {
61 		{
62 			TraceEvent e(eventName.c_str());
63 			e
64 				.detail("Elapsed", currentStats.elapsed)
65 				.detail("CPUSeconds", currentStats.processCPUSeconds)
66 				.detail("MainThreadCPUSeconds", currentStats.mainThreadCPUSeconds)
67 				.detail("UptimeSeconds", now() - machineState.monitorStartTime)
68 				.detail("Memory", currentStats.processMemory)
69 				.detail("ResidentMemory", currentStats.processResidentMemory)
70 				.detail("UnusedAllocatedMemory", getTotalUnusedAllocatedMemory())
71 				.detail("MbpsSent", ((netData.bytesSent - statState->networkState.bytesSent) * 8e-6) / currentStats.elapsed)
72 				.detail("MbpsReceived", ((netData.bytesReceived - statState->networkState.bytesReceived) * 8e-6) / currentStats.elapsed)
73 				.detail("DiskTotalBytes", currentStats.processDiskTotalBytes)
74 				.detail("DiskFreeBytes", currentStats.processDiskFreeBytes)
75 				.detail("DiskQueueDepth", currentStats.processDiskQueueDepth)
76 				.detail("DiskIdleSeconds", currentStats.processDiskIdleSeconds)
77 				.detail("DiskReads", currentStats.processDiskRead)
78 				.detail("DiskWrites", currentStats.processDiskWrite)
79 				.detail("DiskReadsCount", currentStats.processDiskReadCount)
80 				.detail("DiskWritesCount", currentStats.processDiskWriteCount)
81 				.detail("DiskWriteSectors", currentStats.processDiskWriteSectors)
82 				.detail("DiskReadSectors", currentStats.processDiskReadSectors)
83 				.detail("FileWrites", netData.countFileLogicalWrites - statState->networkState.countFileLogicalWrites)
84 				.detail("FileReads", netData.countFileLogicalReads - statState->networkState.countFileLogicalReads)
85 				.detail("CacheReadBytes", netData.countFileCacheReadBytes - statState->networkState.countFileCacheReadBytes)
86 				.detail("CacheFinds", netData.countFileCacheFinds - statState->networkState.countFileCacheFinds)
87 				.detail("CacheWritesBlocked", netData.countFileCacheWritesBlocked - statState->networkState.countFileCacheWritesBlocked)
88 				.detail("CacheReadsBlocked", netData.countFileCacheReadsBlocked - statState->networkState.countFileCacheReadsBlocked)
89 				.detail("CachePageReadsMerged", netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
90 				.detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
91 				.detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
92 				.detail("ZoneID", machineState.zoneId)
93 				.detail("MachineID", machineState.machineId)
94 				.detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
95 				.detail("AIOCollectCount", netData.countAIOCollect - statState->networkState.countAIOCollect)
96 				.detail("AIOSubmitLag", (g_network->networkMetrics.secSquaredSubmit - statState->networkMetricsState.secSquaredSubmit) / currentStats.elapsed)
97 				.detail("AIODiskStall", (g_network->networkMetrics.secSquaredDiskStall - statState->networkMetricsState.secSquaredDiskStall) / currentStats.elapsed)
98 				.detail("CurrentConnections", netData.countConnEstablished - netData.countConnClosedWithError - netData.countConnClosedWithoutError)
99 				.detail("ConnectionsEstablished", (double) (netData.countConnEstablished - statState->networkState.countConnEstablished) / currentStats.elapsed)
100 				.detail("ConnectionsClosed", ((netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) + (netData.countConnClosedWithoutError - statState->networkState.countConnClosedWithoutError)) / currentStats.elapsed)
101 				.detail("ConnectionErrors", (netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) / currentStats.elapsed)
102 				.trackLatest(eventName.c_str());
103 
104 			TraceEvent("MemoryMetrics")
105 				.DETAILALLOCATORMEMUSAGE(16)
106 				.DETAILALLOCATORMEMUSAGE(32)
107 				.DETAILALLOCATORMEMUSAGE(64)
108 				.DETAILALLOCATORMEMUSAGE(128)
109 				.DETAILALLOCATORMEMUSAGE(256)
110 				.DETAILALLOCATORMEMUSAGE(512)
111 				.DETAILALLOCATORMEMUSAGE(1024)
112 				.DETAILALLOCATORMEMUSAGE(2048)
113 				.DETAILALLOCATORMEMUSAGE(4096)
114 				.DETAILALLOCATORMEMUSAGE(8192)
115 				.detail("HugeArenaMemory", g_hugeArenaMemory);
116 
117 			TraceEvent n("NetworkMetrics");
118 			n
119 				.detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)
120 				.detail("WontSleep", netData.countWontSleep - statState->networkState.countWontSleep)
121 				.detail("Yields", netData.countYields - statState->networkState.countYields)
122 				.detail("YieldCalls", netData.countYieldCalls - statState->networkState.countYieldCalls)
123 				.detail("YieldCallsTrue", netData.countYieldCallsTrue - statState->networkState.countYieldCallsTrue)
124 				.detail("SlowTaskSignals", netData.countSlowTaskSignals - statState->networkState.countSlowTaskSignals)
125 				.detail("YieldBigStack", netData.countYieldBigStack - statState->networkState.countYieldBigStack)
126 				.detail("RunLoopIterations", netData.countRunLoop - statState->networkState.countRunLoop)
127 				.detail("TimersExecuted", netData.countTimers - statState->networkState.countTimers)
128 				.detail("TasksExecuted", netData.countTasks - statState->networkState.countTasks)
129 				.detail("ASIOEventsProcessed", netData.countASIOEvents - statState->networkState.countASIOEvents)
130 				.detail("ReadCalls", netData.countReads - statState->networkState.countReads)
131 				.detail("WriteCalls", netData.countWrites - statState->networkState.countWrites)
132 				.detail("ReadProbes", netData.countReadProbes - statState->networkState.countReadProbes)
133 				.detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
134 				.detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
135 				.detail("PacketsGenerated", netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
136 				.detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock);
137 
138 			for (int i = 0; i<NetworkMetrics::SLOW_EVENT_BINS; i++)
139 				if (int c = g_network->networkMetrics.countSlowEvents[i] - statState->networkMetricsState.countSlowEvents[i])
140 					n.detail(format("SlowTask%dM", 1 << i).c_str(), c);
141 			for (int i = 0; i<NetworkMetrics::PRIORITY_BINS; i++)
142 				if (double x = g_network->networkMetrics.secSquaredPriorityBlocked[i] - statState->networkMetricsState.secSquaredPriorityBlocked[i])
143 					n.detail(format("S2Pri%d", g_network->networkMetrics.priorityBins[i]).c_str(), x);
144 		}
145 
146 		if(machineMetrics) {
147 			TraceEvent("MachineMetrics").detail("Elapsed", currentStats.elapsed)
148 				.detail("MbpsSent", currentStats.machineMegabitsSent / currentStats.elapsed)
149 				.detail("MbpsReceived", currentStats.machineMegabitsReceived / currentStats.elapsed)
150 				.detail("OutSegs", currentStats.machineOutSegs)
151 				.detail("RetransSegs", currentStats.machineRetransSegs)
152 				.detail("CPUSeconds", currentStats.machineCPUSeconds)
153 				.detail("TotalMemory", currentStats.machineTotalRAM)
154 				.detail("CommittedMemory", currentStats.machineCommittedRAM)
155 				.detail("AvailableMemory", currentStats.machineAvailableRAM)
156 				.detail("ZoneID", machineState.zoneId)
157 				.detail("MachineID", machineState.machineId)
158 				.trackLatest("MachineMetrics");
159 		}
160 	}
161 
162 #ifdef ALLOC_INSTRUMENTATION
163 	{
164 		static double firstTime = 0.0;
165 		if(firstTime == 0.0) firstTime = now();
166 		if( now() - firstTime > 10 || g_network->isSimulated() ) {
167 			firstTime = now();
168 			std::vector< std::pair<std::string, const char*> > typeNames;
169 			for( auto i = allocInstr.begin(); i != allocInstr.end(); ++i ) {
170 				std::string s;
171 #ifdef __linux__
172 				char *demangled = abi::__cxa_demangle(i->first, NULL, NULL, NULL);
173 				if (demangled) {
174 					s = demangled;
175 					if (StringRef(s).startsWith(LiteralStringRef("(anonymous namespace)::")))
176 						s = s.substr(LiteralStringRef("(anonymous namespace)::").size());
177 					free(demangled);
178 				} else
179 					s = i->first;
180 #else
181 				s = i->first;
182 				if (StringRef(s).startsWith(LiteralStringRef("class `anonymous namespace'::")))
183 					s = s.substr(LiteralStringRef("class `anonymous namespace'::").size());
184 				else if (StringRef(s).startsWith(LiteralStringRef("class ")))
185 					s = s.substr(LiteralStringRef("class ").size());
186 				else if (StringRef(s).startsWith(LiteralStringRef("struct ")))
187 					s = s.substr(LiteralStringRef("struct ").size());
188 #endif
189 				typeNames.push_back( std::make_pair(s, i->first) );
190 			}
191 			std::sort(typeNames.begin(), typeNames.end());
192 			for(int i=0; i<typeNames.size(); i++) {
193 				const char* n = typeNames[i].second;
194 				auto& f = allocInstr[n];
195 				if(f.maxAllocated > 10000)
196 					TraceEvent("AllocInstrument").detail("CurrentAlloc", f.allocCount-f.deallocCount)
197 						.detail("Name", typeNames[i].first.c_str());
198 			}
199 
200 			std::unordered_map<uint32_t, BackTraceAccount> traceCounts;
201 			size_t memSampleSize;
202 			memSample_entered = true;
203 			{
204 				ThreadSpinLockHolder holder(memLock);
205 				traceCounts = backTraceLookup;
206 				memSampleSize = memSample.size();
207 			}
208 			memSample_entered = false;
209 
210 			uint64_t totalSize = 0;
211 			uint64_t totalCount = 0;
212 			for( auto i = traceCounts.begin(); i != traceCounts.end(); ++i ) {
213 				char buf[1024];
214 				std::vector<void *> *frames = i->second.backTrace;
215 				std::string backTraceStr;
216 #if defined(_WIN32)
217 				for (int j = 1; j < frames->size(); j++) {
218 					_snprintf(buf, 1024, "%p ", frames->at(j));
219 					backTraceStr += buf;
220 				}
221 #else
222 				backTraceStr = platform::format_backtrace(&(*frames)[0], frames->size());
223 #endif
224 
225 				TraceEvent("MemSample")
226 					.detail("Count", (int64_t)i->second.count)
227 					.detail("TotalSize", i->second.totalSize)
228 					.detail("SampleCount", i->second.sampleCount)
229 					.detail("Hash", format("%lld", i->first))
230 					.detail("Bt", backTraceStr);
231 
232 				totalSize += i->second.totalSize;
233 				totalCount += i->second.count;
234 			}
235 
236 			TraceEvent("MemSampleSummary")
237 				.detail("InverseByteSampleRatio", SAMPLE_BYTES)
238 				.detail("MemorySamples", memSampleSize)
239 				.detail("BackTraces", traceCounts.size())
240 				.detail("TotalSize", totalSize)
241 				.detail("TotalCount", totalCount);
242 
243 			TraceEvent("MemSample")
244 				.detail("Count", traceCounts.size())
245 				.detail("TotalSize", traceCounts.size() * ((int)(sizeof(uint32_t) + sizeof(size_t) + sizeof(size_t))))
246 				.detail("SampleCount", traceCounts.size())
247 				.detail("Hash", "backTraces")
248 				.detail("Bt", "na");
249 
250 			TraceEvent("MemSample")
251 				.detail("Count", memSampleSize)
252 				.detail("TotalSize", memSampleSize * ((int)(sizeof(void*) + sizeof(uint32_t) + sizeof(size_t))))
253 				.detail("SampleCount", memSampleSize)
254 				.detail("Hash", "memSamples")
255 				.detail("Bt", "na");
256 			TRACEALLOCATOR(16);
257 			TRACEALLOCATOR(32);
258 			TRACEALLOCATOR(64);
259 			TRACEALLOCATOR(128);
260 			TRACEALLOCATOR(256);
261 			TRACEALLOCATOR(512);
262 			TRACEALLOCATOR(1024);
263 			TRACEALLOCATOR(2048);
264 			TRACEALLOCATOR(4096);
265 			TRACEALLOCATOR(8192);
266 		}
267 	}
268 #endif
269 	statState->networkMetricsState = g_network->networkMetrics;
270 	statState->networkState = netData;
271 	return currentStats;
272 }
273