1// Copyright 2015 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// +build !noedac
15
16package collector
17
18import (
19	"fmt"
20	"path/filepath"
21	"regexp"
22
23	"github.com/go-kit/kit/log"
24	"github.com/prometheus/client_golang/prometheus"
25)
26
27const (
28	edacSubsystem = "edac"
29)
30
31var (
32	edacMemControllerRE = regexp.MustCompile(`.*devices/system/edac/mc/mc([0-9]*)`)
33	edacMemCsrowRE      = regexp.MustCompile(`.*devices/system/edac/mc/mc[0-9]*/csrow([0-9]*)`)
34)
35
36type edacCollector struct {
37	ceCount      *prometheus.Desc
38	ueCount      *prometheus.Desc
39	csRowCECount *prometheus.Desc
40	csRowUECount *prometheus.Desc
41	logger       log.Logger
42}
43
44func init() {
45	registerCollector("edac", defaultEnabled, NewEdacCollector)
46}
47
48// NewEdacCollector returns a new Collector exposing edac stats.
49func NewEdacCollector(logger log.Logger) (Collector, error) {
50	return &edacCollector{
51		ceCount: prometheus.NewDesc(
52			prometheus.BuildFQName(namespace, edacSubsystem, "correctable_errors_total"),
53			"Total correctable memory errors.",
54			[]string{"controller"}, nil,
55		),
56		ueCount: prometheus.NewDesc(
57			prometheus.BuildFQName(namespace, edacSubsystem, "uncorrectable_errors_total"),
58			"Total uncorrectable memory errors.",
59			[]string{"controller"}, nil,
60		),
61		csRowCECount: prometheus.NewDesc(
62			prometheus.BuildFQName(namespace, edacSubsystem, "csrow_correctable_errors_total"),
63			"Total correctable memory errors for this csrow.",
64			[]string{"controller", "csrow"}, nil,
65		),
66		csRowUECount: prometheus.NewDesc(
67			prometheus.BuildFQName(namespace, edacSubsystem, "csrow_uncorrectable_errors_total"),
68			"Total uncorrectable memory errors for this csrow.",
69			[]string{"controller", "csrow"}, nil,
70		),
71		logger: logger,
72	}, nil
73}
74
75func (c *edacCollector) Update(ch chan<- prometheus.Metric) error {
76	memControllers, err := filepath.Glob(sysFilePath("devices/system/edac/mc/mc[0-9]*"))
77	if err != nil {
78		return err
79	}
80	for _, controller := range memControllers {
81		controllerMatch := edacMemControllerRE.FindStringSubmatch(controller)
82		if controllerMatch == nil {
83			return fmt.Errorf("controller string didn't match regexp: %s", controller)
84		}
85		controllerNumber := controllerMatch[1]
86
87		value, err := readUintFromFile(filepath.Join(controller, "ce_count"))
88		if err != nil {
89			return fmt.Errorf("couldn't get ce_count for controller %s: %s", controllerNumber, err)
90		}
91		ch <- prometheus.MustNewConstMetric(
92			c.ceCount, prometheus.CounterValue, float64(value), controllerNumber)
93
94		value, err = readUintFromFile(filepath.Join(controller, "ce_noinfo_count"))
95		if err != nil {
96			return fmt.Errorf("couldn't get ce_noinfo_count for controller %s: %s", controllerNumber, err)
97		}
98		ch <- prometheus.MustNewConstMetric(
99			c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown")
100
101		value, err = readUintFromFile(filepath.Join(controller, "ue_count"))
102		if err != nil {
103			return fmt.Errorf("couldn't get ue_count for controller %s: %s", controllerNumber, err)
104		}
105		ch <- prometheus.MustNewConstMetric(
106			c.ueCount, prometheus.CounterValue, float64(value), controllerNumber)
107
108		value, err = readUintFromFile(filepath.Join(controller, "ue_noinfo_count"))
109		if err != nil {
110			return fmt.Errorf("couldn't get ue_noinfo_count for controller %s: %s", controllerNumber, err)
111		}
112		ch <- prometheus.MustNewConstMetric(
113			c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown")
114
115		// For each controller, walk the csrow directories.
116		csrows, err := filepath.Glob(controller + "/csrow[0-9]*")
117		if err != nil {
118			return err
119		}
120		for _, csrow := range csrows {
121			csrowMatch := edacMemCsrowRE.FindStringSubmatch(csrow)
122			if csrowMatch == nil {
123				return fmt.Errorf("csrow string didn't match regexp: %s", csrow)
124			}
125			csrowNumber := csrowMatch[1]
126
127			value, err = readUintFromFile(filepath.Join(csrow, "ce_count"))
128			if err != nil {
129				return fmt.Errorf("couldn't get ce_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err)
130			}
131			ch <- prometheus.MustNewConstMetric(
132				c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber)
133
134			value, err = readUintFromFile(filepath.Join(csrow, "ue_count"))
135			if err != nil {
136				return fmt.Errorf("couldn't get ue_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err)
137			}
138			ch <- prometheus.MustNewConstMetric(
139				c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber)
140		}
141	}
142
143	return err
144}
145