1// Copyright 2018 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package procfs
15
16// While implementing parsing of /proc/[pid]/mountstats, this blog was used
17// heavily as a reference:
18//   https://utcc.utoronto.ca/~cks/space/blog/linux/NFSMountstatsIndex
19//
20// Special thanks to Chris Siebenmann for all of his posts explaining the
21// various statistics available for NFS.
22
23import (
24	"bufio"
25	"fmt"
26	"io"
27	"strconv"
28	"strings"
29	"time"
30)
31
32// Constants shared between multiple functions.
33const (
34	deviceEntryLen = 8
35
36	fieldBytesLen  = 8
37	fieldEventsLen = 27
38
39	statVersion10 = "1.0"
40	statVersion11 = "1.1"
41
42	fieldTransport10TCPLen = 10
43	fieldTransport10UDPLen = 7
44
45	fieldTransport11TCPLen = 13
46	fieldTransport11UDPLen = 10
47)
48
49// A Mount is a device mount parsed from /proc/[pid]/mountstats.
50type Mount struct {
51	// Name of the device.
52	Device string
53	// The mount point of the device.
54	Mount string
55	// The filesystem type used by the device.
56	Type string
57	// If available additional statistics related to this Mount.
58	// Use a type assertion to determine if additional statistics are available.
59	Stats MountStats
60}
61
62// A MountStats is a type which contains detailed statistics for a specific
63// type of Mount.
64type MountStats interface {
65	mountStats()
66}
67
68// A MountStatsNFS is a MountStats implementation for NFSv3 and v4 mounts.
69type MountStatsNFS struct {
70	// The version of statistics provided.
71	StatVersion string
72	// The mount options of the NFS mount.
73	Opts map[string]string
74	// The age of the NFS mount.
75	Age time.Duration
76	// Statistics related to byte counters for various operations.
77	Bytes NFSBytesStats
78	// Statistics related to various NFS event occurrences.
79	Events NFSEventsStats
80	// Statistics broken down by filesystem operation.
81	Operations []NFSOperationStats
82	// Statistics about the NFS RPC transport.
83	Transport NFSTransportStats
84}
85
86// mountStats implements MountStats.
87func (m MountStatsNFS) mountStats() {}
88
89// A NFSBytesStats contains statistics about the number of bytes read and written
90// by an NFS client to and from an NFS server.
91type NFSBytesStats struct {
92	// Number of bytes read using the read() syscall.
93	Read uint64
94	// Number of bytes written using the write() syscall.
95	Write uint64
96	// Number of bytes read using the read() syscall in O_DIRECT mode.
97	DirectRead uint64
98	// Number of bytes written using the write() syscall in O_DIRECT mode.
99	DirectWrite uint64
100	// Number of bytes read from the NFS server, in total.
101	ReadTotal uint64
102	// Number of bytes written to the NFS server, in total.
103	WriteTotal uint64
104	// Number of pages read directly via mmap()'d files.
105	ReadPages uint64
106	// Number of pages written directly via mmap()'d files.
107	WritePages uint64
108}
109
110// A NFSEventsStats contains statistics about NFS event occurrences.
111type NFSEventsStats struct {
112	// Number of times cached inode attributes are re-validated from the server.
113	InodeRevalidate uint64
114	// Number of times cached dentry nodes are re-validated from the server.
115	DnodeRevalidate uint64
116	// Number of times an inode cache is cleared.
117	DataInvalidate uint64
118	// Number of times cached inode attributes are invalidated.
119	AttributeInvalidate uint64
120	// Number of times files or directories have been open()'d.
121	VFSOpen uint64
122	// Number of times a directory lookup has occurred.
123	VFSLookup uint64
124	// Number of times permissions have been checked.
125	VFSAccess uint64
126	// Number of updates (and potential writes) to pages.
127	VFSUpdatePage uint64
128	// Number of pages read directly via mmap()'d files.
129	VFSReadPage uint64
130	// Number of times a group of pages have been read.
131	VFSReadPages uint64
132	// Number of pages written directly via mmap()'d files.
133	VFSWritePage uint64
134	// Number of times a group of pages have been written.
135	VFSWritePages uint64
136	// Number of times directory entries have been read with getdents().
137	VFSGetdents uint64
138	// Number of times attributes have been set on inodes.
139	VFSSetattr uint64
140	// Number of pending writes that have been forcefully flushed to the server.
141	VFSFlush uint64
142	// Number of times fsync() has been called on directories and files.
143	VFSFsync uint64
144	// Number of times locking has been attempted on a file.
145	VFSLock uint64
146	// Number of times files have been closed and released.
147	VFSFileRelease uint64
148	// Unknown.  Possibly unused.
149	CongestionWait uint64
150	// Number of times files have been truncated.
151	Truncation uint64
152	// Number of times a file has been grown due to writes beyond its existing end.
153	WriteExtension uint64
154	// Number of times a file was removed while still open by another process.
155	SillyRename uint64
156	// Number of times the NFS server gave less data than expected while reading.
157	ShortRead uint64
158	// Number of times the NFS server wrote less data than expected while writing.
159	ShortWrite uint64
160	// Number of times the NFS server indicated EJUKEBOX; retrieving data from
161	// offline storage.
162	JukeboxDelay uint64
163	// Number of NFS v4.1+ pNFS reads.
164	PNFSRead uint64
165	// Number of NFS v4.1+ pNFS writes.
166	PNFSWrite uint64
167}
168
169// A NFSOperationStats contains statistics for a single operation.
170type NFSOperationStats struct {
171	// The name of the operation.
172	Operation string
173	// Number of requests performed for this operation.
174	Requests uint64
175	// Number of times an actual RPC request has been transmitted for this operation.
176	Transmissions uint64
177	// Number of times a request has had a major timeout.
178	MajorTimeouts uint64
179	// Number of bytes sent for this operation, including RPC headers and payload.
180	BytesSent uint64
181	// Number of bytes received for this operation, including RPC headers and payload.
182	BytesReceived uint64
183	// Duration all requests spent queued for transmission before they were sent.
184	CumulativeQueueMilliseconds uint64
185	// Duration it took to get a reply back after the request was transmitted.
186	CumulativeTotalResponseMilliseconds uint64
187	// Duration from when a request was enqueued to when it was completely handled.
188	CumulativeTotalRequestMilliseconds uint64
189	// The count of operations that complete with tk_status < 0.  These statuses usually indicate error conditions.
190	Errors uint64
191}
192
193// A NFSTransportStats contains statistics for the NFS mount RPC requests and
194// responses.
195type NFSTransportStats struct {
196	// The transport protocol used for the NFS mount.
197	Protocol string
198	// The local port used for the NFS mount.
199	Port uint64
200	// Number of times the client has had to establish a connection from scratch
201	// to the NFS server.
202	Bind uint64
203	// Number of times the client has made a TCP connection to the NFS server.
204	Connect uint64
205	// Duration (in jiffies, a kernel internal unit of time) the NFS mount has
206	// spent waiting for connections to the server to be established.
207	ConnectIdleTime uint64
208	// Duration since the NFS mount last saw any RPC traffic.
209	IdleTimeSeconds uint64
210	// Number of RPC requests for this mount sent to the NFS server.
211	Sends uint64
212	// Number of RPC responses for this mount received from the NFS server.
213	Receives uint64
214	// Number of times the NFS server sent a response with a transaction ID
215	// unknown to this client.
216	BadTransactionIDs uint64
217	// A running counter, incremented on each request as the current difference
218	// ebetween sends and receives.
219	CumulativeActiveRequests uint64
220	// A running counter, incremented on each request by the current backlog
221	// queue size.
222	CumulativeBacklog uint64
223
224	// Stats below only available with stat version 1.1.
225
226	// Maximum number of simultaneously active RPC requests ever used.
227	MaximumRPCSlotsUsed uint64
228	// A running counter, incremented on each request as the current size of the
229	// sending queue.
230	CumulativeSendingQueue uint64
231	// A running counter, incremented on each request as the current size of the
232	// pending queue.
233	CumulativePendingQueue uint64
234}
235
236// parseMountStats parses a /proc/[pid]/mountstats file and returns a slice
237// of Mount structures containing detailed information about each mount.
238// If available, statistics for each mount are parsed as well.
239func parseMountStats(r io.Reader) ([]*Mount, error) {
240	const (
241		device            = "device"
242		statVersionPrefix = "statvers="
243
244		nfs3Type = "nfs"
245		nfs4Type = "nfs4"
246	)
247
248	var mounts []*Mount
249
250	s := bufio.NewScanner(r)
251	for s.Scan() {
252		// Only look for device entries in this function
253		ss := strings.Fields(string(s.Bytes()))
254		if len(ss) == 0 || ss[0] != device {
255			continue
256		}
257
258		m, err := parseMount(ss)
259		if err != nil {
260			return nil, err
261		}
262
263		// Does this mount also possess statistics information?
264		if len(ss) > deviceEntryLen {
265			// Only NFSv3 and v4 are supported for parsing statistics
266			if m.Type != nfs3Type && m.Type != nfs4Type {
267				return nil, fmt.Errorf("cannot parse MountStats for fstype %q", m.Type)
268			}
269
270			statVersion := strings.TrimPrefix(ss[8], statVersionPrefix)
271
272			stats, err := parseMountStatsNFS(s, statVersion)
273			if err != nil {
274				return nil, err
275			}
276
277			m.Stats = stats
278		}
279
280		mounts = append(mounts, m)
281	}
282
283	return mounts, s.Err()
284}
285
286// parseMount parses an entry in /proc/[pid]/mountstats in the format:
287//   device [device] mounted on [mount] with fstype [type]
288func parseMount(ss []string) (*Mount, error) {
289	if len(ss) < deviceEntryLen {
290		return nil, fmt.Errorf("invalid device entry: %v", ss)
291	}
292
293	// Check for specific words appearing at specific indices to ensure
294	// the format is consistent with what we expect
295	format := []struct {
296		i int
297		s string
298	}{
299		{i: 0, s: "device"},
300		{i: 2, s: "mounted"},
301		{i: 3, s: "on"},
302		{i: 5, s: "with"},
303		{i: 6, s: "fstype"},
304	}
305
306	for _, f := range format {
307		if ss[f.i] != f.s {
308			return nil, fmt.Errorf("invalid device entry: %v", ss)
309		}
310	}
311
312	return &Mount{
313		Device: ss[1],
314		Mount:  ss[4],
315		Type:   ss[7],
316	}, nil
317}
318
319// parseMountStatsNFS parses a MountStatsNFS by scanning additional information
320// related to NFS statistics.
321func parseMountStatsNFS(s *bufio.Scanner, statVersion string) (*MountStatsNFS, error) {
322	// Field indicators for parsing specific types of data
323	const (
324		fieldOpts       = "opts:"
325		fieldAge        = "age:"
326		fieldBytes      = "bytes:"
327		fieldEvents     = "events:"
328		fieldPerOpStats = "per-op"
329		fieldTransport  = "xprt:"
330	)
331
332	stats := &MountStatsNFS{
333		StatVersion: statVersion,
334	}
335
336	for s.Scan() {
337		ss := strings.Fields(string(s.Bytes()))
338		if len(ss) == 0 {
339			break
340		}
341
342		switch ss[0] {
343		case fieldOpts:
344			if len(ss) < 2 {
345				return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
346			}
347			if stats.Opts == nil {
348				stats.Opts = map[string]string{}
349			}
350			for _, opt := range strings.Split(ss[1], ",") {
351				split := strings.Split(opt, "=")
352				if len(split) == 2 {
353					stats.Opts[split[0]] = split[1]
354				} else {
355					stats.Opts[opt] = ""
356				}
357			}
358		case fieldAge:
359			if len(ss) < 2 {
360				return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
361			}
362			// Age integer is in seconds
363			d, err := time.ParseDuration(ss[1] + "s")
364			if err != nil {
365				return nil, err
366			}
367
368			stats.Age = d
369		case fieldBytes:
370			if len(ss) < 2 {
371				return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
372			}
373			bstats, err := parseNFSBytesStats(ss[1:])
374			if err != nil {
375				return nil, err
376			}
377
378			stats.Bytes = *bstats
379		case fieldEvents:
380			if len(ss) < 2 {
381				return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
382			}
383			estats, err := parseNFSEventsStats(ss[1:])
384			if err != nil {
385				return nil, err
386			}
387
388			stats.Events = *estats
389		case fieldTransport:
390			if len(ss) < 3 {
391				return nil, fmt.Errorf("not enough information for NFS transport stats: %v", ss)
392			}
393
394			tstats, err := parseNFSTransportStats(ss[1:], statVersion)
395			if err != nil {
396				return nil, err
397			}
398
399			stats.Transport = *tstats
400		}
401
402		// When encountering "per-operation statistics", we must break this
403		// loop and parse them separately to ensure we can terminate parsing
404		// before reaching another device entry; hence why this 'if' statement
405		// is not just another switch case
406		if ss[0] == fieldPerOpStats {
407			break
408		}
409	}
410
411	if err := s.Err(); err != nil {
412		return nil, err
413	}
414
415	// NFS per-operation stats appear last before the next device entry
416	perOpStats, err := parseNFSOperationStats(s)
417	if err != nil {
418		return nil, err
419	}
420
421	stats.Operations = perOpStats
422
423	return stats, nil
424}
425
426// parseNFSBytesStats parses a NFSBytesStats line using an input set of
427// integer fields.
428func parseNFSBytesStats(ss []string) (*NFSBytesStats, error) {
429	if len(ss) != fieldBytesLen {
430		return nil, fmt.Errorf("invalid NFS bytes stats: %v", ss)
431	}
432
433	ns := make([]uint64, 0, fieldBytesLen)
434	for _, s := range ss {
435		n, err := strconv.ParseUint(s, 10, 64)
436		if err != nil {
437			return nil, err
438		}
439
440		ns = append(ns, n)
441	}
442
443	return &NFSBytesStats{
444		Read:        ns[0],
445		Write:       ns[1],
446		DirectRead:  ns[2],
447		DirectWrite: ns[3],
448		ReadTotal:   ns[4],
449		WriteTotal:  ns[5],
450		ReadPages:   ns[6],
451		WritePages:  ns[7],
452	}, nil
453}
454
455// parseNFSEventsStats parses a NFSEventsStats line using an input set of
456// integer fields.
457func parseNFSEventsStats(ss []string) (*NFSEventsStats, error) {
458	if len(ss) != fieldEventsLen {
459		return nil, fmt.Errorf("invalid NFS events stats: %v", ss)
460	}
461
462	ns := make([]uint64, 0, fieldEventsLen)
463	for _, s := range ss {
464		n, err := strconv.ParseUint(s, 10, 64)
465		if err != nil {
466			return nil, err
467		}
468
469		ns = append(ns, n)
470	}
471
472	return &NFSEventsStats{
473		InodeRevalidate:     ns[0],
474		DnodeRevalidate:     ns[1],
475		DataInvalidate:      ns[2],
476		AttributeInvalidate: ns[3],
477		VFSOpen:             ns[4],
478		VFSLookup:           ns[5],
479		VFSAccess:           ns[6],
480		VFSUpdatePage:       ns[7],
481		VFSReadPage:         ns[8],
482		VFSReadPages:        ns[9],
483		VFSWritePage:        ns[10],
484		VFSWritePages:       ns[11],
485		VFSGetdents:         ns[12],
486		VFSSetattr:          ns[13],
487		VFSFlush:            ns[14],
488		VFSFsync:            ns[15],
489		VFSLock:             ns[16],
490		VFSFileRelease:      ns[17],
491		CongestionWait:      ns[18],
492		Truncation:          ns[19],
493		WriteExtension:      ns[20],
494		SillyRename:         ns[21],
495		ShortRead:           ns[22],
496		ShortWrite:          ns[23],
497		JukeboxDelay:        ns[24],
498		PNFSRead:            ns[25],
499		PNFSWrite:           ns[26],
500	}, nil
501}
502
503// parseNFSOperationStats parses a slice of NFSOperationStats by scanning
504// additional information about per-operation statistics until an empty
505// line is reached.
506func parseNFSOperationStats(s *bufio.Scanner) ([]NFSOperationStats, error) {
507	const (
508		// Minimum number of expected fields in each per-operation statistics set
509		minFields = 9
510	)
511
512	var ops []NFSOperationStats
513
514	for s.Scan() {
515		ss := strings.Fields(string(s.Bytes()))
516		if len(ss) == 0 {
517			// Must break when reading a blank line after per-operation stats to
518			// enable top-level function to parse the next device entry
519			break
520		}
521
522		if len(ss) < minFields {
523			return nil, fmt.Errorf("invalid NFS per-operations stats: %v", ss)
524		}
525
526		// Skip string operation name for integers
527		ns := make([]uint64, 0, minFields-1)
528		for _, st := range ss[1:] {
529			n, err := strconv.ParseUint(st, 10, 64)
530			if err != nil {
531				return nil, err
532			}
533
534			ns = append(ns, n)
535		}
536
537		opStats := NFSOperationStats{
538			Operation:                           strings.TrimSuffix(ss[0], ":"),
539			Requests:                            ns[0],
540			Transmissions:                       ns[1],
541			MajorTimeouts:                       ns[2],
542			BytesSent:                           ns[3],
543			BytesReceived:                       ns[4],
544			CumulativeQueueMilliseconds:         ns[5],
545			CumulativeTotalResponseMilliseconds: ns[6],
546			CumulativeTotalRequestMilliseconds:  ns[7],
547		}
548
549		if len(ns) > 8 {
550			opStats.Errors = ns[8]
551		}
552
553		ops = append(ops, opStats)
554	}
555
556	return ops, s.Err()
557}
558
559// parseNFSTransportStats parses a NFSTransportStats line using an input set of
560// integer fields matched to a specific stats version.
561func parseNFSTransportStats(ss []string, statVersion string) (*NFSTransportStats, error) {
562	// Extract the protocol field. It is the only string value in the line
563	protocol := ss[0]
564	ss = ss[1:]
565
566	switch statVersion {
567	case statVersion10:
568		var expectedLength int
569		if protocol == "tcp" {
570			expectedLength = fieldTransport10TCPLen
571		} else if protocol == "udp" {
572			expectedLength = fieldTransport10UDPLen
573		} else {
574			return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.0 statement: %v", protocol, ss)
575		}
576		if len(ss) != expectedLength {
577			return nil, fmt.Errorf("invalid NFS transport stats 1.0 statement: %v", ss)
578		}
579	case statVersion11:
580		var expectedLength int
581		if protocol == "tcp" {
582			expectedLength = fieldTransport11TCPLen
583		} else if protocol == "udp" {
584			expectedLength = fieldTransport11UDPLen
585		} else {
586			return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.1 statement: %v", protocol, ss)
587		}
588		if len(ss) != expectedLength {
589			return nil, fmt.Errorf("invalid NFS transport stats 1.1 statement: %v", ss)
590		}
591	default:
592		return nil, fmt.Errorf("unrecognized NFS transport stats version: %q", statVersion)
593	}
594
595	// Allocate enough for v1.1 stats since zero value for v1.1 stats will be okay
596	// in a v1.0 response. Since the stat length is bigger for TCP stats, we use
597	// the TCP length here.
598	//
599	// Note: slice length must be set to length of v1.1 stats to avoid a panic when
600	// only v1.0 stats are present.
601	// See: https://github.com/prometheus/node_exporter/issues/571.
602	ns := make([]uint64, fieldTransport11TCPLen)
603	for i, s := range ss {
604		n, err := strconv.ParseUint(s, 10, 64)
605		if err != nil {
606			return nil, err
607		}
608
609		ns[i] = n
610	}
611
612	// The fields differ depending on the transport protocol (TCP or UDP)
613	// From https://utcc.utoronto.ca/%7Ecks/space/blog/linux/NFSMountstatsXprt
614	//
615	// For the udp RPC transport there is no connection count, connect idle time,
616	// or idle time (fields #3, #4, and #5); all other fields are the same. So
617	// we set them to 0 here.
618	if protocol == "udp" {
619		ns = append(ns[:2], append(make([]uint64, 3), ns[2:]...)...)
620	}
621
622	return &NFSTransportStats{
623		Protocol:                 protocol,
624		Port:                     ns[0],
625		Bind:                     ns[1],
626		Connect:                  ns[2],
627		ConnectIdleTime:          ns[3],
628		IdleTimeSeconds:          ns[4],
629		Sends:                    ns[5],
630		Receives:                 ns[6],
631		BadTransactionIDs:        ns[7],
632		CumulativeActiveRequests: ns[8],
633		CumulativeBacklog:        ns[9],
634		MaximumRPCSlotsUsed:      ns[10],
635		CumulativeSendingQueue:   ns[11],
636		CumulativePendingQueue:   ns[12],
637	}, nil
638}
639