1// Copyright 2015 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14// +build !noedac 15 16package collector 17 18import ( 19 "fmt" 20 "path/filepath" 21 "regexp" 22 23 "github.com/go-kit/kit/log" 24 "github.com/prometheus/client_golang/prometheus" 25) 26 27const ( 28 edacSubsystem = "edac" 29) 30 31var ( 32 edacMemControllerRE = regexp.MustCompile(`.*devices/system/edac/mc/mc([0-9]*)`) 33 edacMemCsrowRE = regexp.MustCompile(`.*devices/system/edac/mc/mc[0-9]*/csrow([0-9]*)`) 34) 35 36type edacCollector struct { 37 ceCount *prometheus.Desc 38 ueCount *prometheus.Desc 39 csRowCECount *prometheus.Desc 40 csRowUECount *prometheus.Desc 41 logger log.Logger 42} 43 44func init() { 45 registerCollector("edac", defaultEnabled, NewEdacCollector) 46} 47 48// NewEdacCollector returns a new Collector exposing edac stats. 49func NewEdacCollector(logger log.Logger) (Collector, error) { 50 return &edacCollector{ 51 ceCount: prometheus.NewDesc( 52 prometheus.BuildFQName(namespace, edacSubsystem, "correctable_errors_total"), 53 "Total correctable memory errors.", 54 []string{"controller"}, nil, 55 ), 56 ueCount: prometheus.NewDesc( 57 prometheus.BuildFQName(namespace, edacSubsystem, "uncorrectable_errors_total"), 58 "Total uncorrectable memory errors.", 59 []string{"controller"}, nil, 60 ), 61 csRowCECount: prometheus.NewDesc( 62 prometheus.BuildFQName(namespace, edacSubsystem, "csrow_correctable_errors_total"), 63 "Total correctable memory errors for this csrow.", 64 []string{"controller", "csrow"}, nil, 65 ), 66 csRowUECount: prometheus.NewDesc( 67 prometheus.BuildFQName(namespace, edacSubsystem, "csrow_uncorrectable_errors_total"), 68 "Total uncorrectable memory errors for this csrow.", 69 []string{"controller", "csrow"}, nil, 70 ), 71 logger: logger, 72 }, nil 73} 74 75func (c *edacCollector) Update(ch chan<- prometheus.Metric) error { 76 memControllers, err := filepath.Glob(sysFilePath("devices/system/edac/mc/mc[0-9]*")) 77 if err != nil { 78 return err 79 } 80 for _, controller := range memControllers { 81 controllerMatch := edacMemControllerRE.FindStringSubmatch(controller) 82 if controllerMatch == nil { 83 return fmt.Errorf("controller string didn't match regexp: %s", controller) 84 } 85 controllerNumber := controllerMatch[1] 86 87 value, err := readUintFromFile(filepath.Join(controller, "ce_count")) 88 if err != nil { 89 return fmt.Errorf("couldn't get ce_count for controller %s: %s", controllerNumber, err) 90 } 91 ch <- prometheus.MustNewConstMetric( 92 c.ceCount, prometheus.CounterValue, float64(value), controllerNumber) 93 94 value, err = readUintFromFile(filepath.Join(controller, "ce_noinfo_count")) 95 if err != nil { 96 return fmt.Errorf("couldn't get ce_noinfo_count for controller %s: %s", controllerNumber, err) 97 } 98 ch <- prometheus.MustNewConstMetric( 99 c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown") 100 101 value, err = readUintFromFile(filepath.Join(controller, "ue_count")) 102 if err != nil { 103 return fmt.Errorf("couldn't get ue_count for controller %s: %s", controllerNumber, err) 104 } 105 ch <- prometheus.MustNewConstMetric( 106 c.ueCount, prometheus.CounterValue, float64(value), controllerNumber) 107 108 value, err = readUintFromFile(filepath.Join(controller, "ue_noinfo_count")) 109 if err != nil { 110 return fmt.Errorf("couldn't get ue_noinfo_count for controller %s: %s", controllerNumber, err) 111 } 112 ch <- prometheus.MustNewConstMetric( 113 c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown") 114 115 // For each controller, walk the csrow directories. 116 csrows, err := filepath.Glob(controller + "/csrow[0-9]*") 117 if err != nil { 118 return err 119 } 120 for _, csrow := range csrows { 121 csrowMatch := edacMemCsrowRE.FindStringSubmatch(csrow) 122 if csrowMatch == nil { 123 return fmt.Errorf("csrow string didn't match regexp: %s", csrow) 124 } 125 csrowNumber := csrowMatch[1] 126 127 value, err = readUintFromFile(filepath.Join(csrow, "ce_count")) 128 if err != nil { 129 return fmt.Errorf("couldn't get ce_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err) 130 } 131 ch <- prometheus.MustNewConstMetric( 132 c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber) 133 134 value, err = readUintFromFile(filepath.Join(csrow, "ue_count")) 135 if err != nil { 136 return fmt.Errorf("couldn't get ue_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err) 137 } 138 ch <- prometheus.MustNewConstMetric( 139 c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber) 140 } 141 } 142 143 return err 144} 145