1#!/bin/sh
2# weird shebang? See below: "interpreter selection"
3
4"""true"
5: <<=cut
6
7=head1 NAME
8
9olsrd - Monitor the state of an OLSR-based routing network
10
11
12=head1 APPLICABLE SYSTEMS
13
14Information is parsed from the output of "txtinfo" plugin for olsrd.
15
16
17=head1 CONFIGURATION
18
19Environment variables:
20
21    * OLSRD_HOST: name or IP of the host running the txtinfo plugin (default: localhost)
22    * OLSRD_TXTINFO_PORT: the port that the txtinfo plugin is listening to (default: 2006)
23    * OLSRD_BIN_PATH: name of the olsrd binary (only used for 'autoconf', default: /usr/sbin/olsrd)
24    * MICROPYTHON_HEAP: adjust this parameter for micropython if your olsr network contains
25      more than a few thousand nodes (default: 512k)
26
27=head1 USAGE
28
29Collect basic information about the neighbours of an OLSR node:
30
31    * link quality
32    * neighbour link quality
33    * number of nodes reachable behind each neighbour
34    * ping times of direct neighbours
35
36This plugin works with the following python interpreters:
37
38    * Python 2
39    * Python 3
40    * micropython (e.g. OpenWrt)
41
42
43=head1 VERSION
44
45  0.4
46
47
48=head1 AUTHOR
49
50Lars Kruse <devel@sumpfralle.de>
51
52
53=head1 LICENSE
54
55GPLv3 or above
56
57
58=head1 MAGIC MARKERS
59
60  #%# family=auto
61  #%# capabilities=autoconf
62
63=cut
64
65
66# ****************** Interpreter Selection ***************
67# This unbelievable dirty hack allows to find a suitable python interpreter.
68# This is specifically useful for OpenWRT where typically only micropython is available.
69#
70# Additionally we need to run micropython with additional startup options.
71# This is necessary due to our demand for more than 128k heap (this default is sufficient for only
72# 400 olsr nodes).
73#
74# This "execution hack" works as follows:
75#   * the script is executed by busybox ash or another shell
76#   * the above line (three quotes before and one quote after 'true') evaluates differently for
77#     shell and python:
78#       * shell: run "true" (i.e. nothing happens)
79#       * python: ignore everything up to the next three consecutive quotes
80# Thus we may place shell code here that will take care for selecting an interpreter.
81
82# prefer micropython if it is available - otherwise fall back to any python (2 or 3)
83MICROPYTHON_BIN=$(which micropython || true)
84if [ -n "$MICROPYTHON_BIN" ]; then
85    "$MICROPYTHON_BIN" -X "heapsize=${MICROPYTHON_HEAP:-512k}" "$0" "$@"
86else
87    python "$0" "$@"
88fi
89exit $?
90
91# For shell: ignore everything starting from here until the last line of this file.
92# This is necessary for syntax checkers that try to complain about invalid shell syntax below.
93true <<EOF
94"""
95
96
97import os
98import os.path
99import socket
100import sys
101
102
103plugin_version = "0.4"
104
105LQ_GRAPH_CONFIG = """
106graph_title     {title}
107graph_vlabel    Link Quality (-) / Neighbour Link Quality (+)
108graph_category  network
109graph_info      OLSR estimates the quality of a connection by the ratio of successfully received \
110(link quality) and transmitted (neighbour link quality) hello packets.
111"""
112
113LQ_VALUES_CONFIG = """
114nlq{suffix}.label none
115nlq{suffix}.type GAUGE
116nlq{suffix}.graph no
117nlq{suffix}.draw {draw_type}
118nlq{suffix}.min 0
119lq{suffix}.label {label}
120lq{suffix}.type GAUGE
121lq{suffix}.draw {draw_type}
122lq{suffix}.negative nlq{suffix}
123lq{suffix}.min 0
124"""
125
126NEIGHBOUR_COUNT_CONFIG = """
127graph_title     Reachable nodes via neighbours
128graph_vlabel    Number of Nodes
129graph_category  network
130graph_info      Count the number of locally known routes passing through each direct neighbour. \
131This number is a good approximation for the number of mesh nodes reachable via this specific \
132neighbour. MIDs (alternative addresses of an OLSR node) and HNAs (host network announcements) are \
133ignored.
134"""
135
136NEIGHBOUR_COUNT_VALUE = """
137neighbour_{host_fieldname}.label {host}
138neighbour_{host_fieldname}.type GAUGE
139neighbour_{host_fieldname}.draw {draw_type}
140neighbour_{host_fieldname}.min 0
141"""
142
143NEIGHBOUR_PING_CONFIG = """
144graph_title     {title}
145graph_vlabel    roundtrip time (ms)
146graph_category  network
147graph_info      This graph shows ping RTT statistics.
148graph_args      --base 1000 --lower-limit 0
149graph_scale     no
150"""
151
152NEIGHBOUR_PING_VALUE = """neighbour_{host_fieldname}.label {host}"""
153
154# micropython (as of 2015) does not contain "os.linesep"
155LINESEP = getattr(os, "linesep", "\n")
156
157
158def get_clean_fieldname(name):
159    chars = []
160    for index, char in enumerate(name):
161        if ("a" <= char.lower() <= "z") or ((index == 0) or ("0" <= char <= "9")):
162            chars.append(char)
163        else:
164            chars.append("_")
165    return "".join(chars)
166
167
168def query_olsrd_txtservice(section=""):
169    host = os.getenv("OLSRD_HOST", "localhost")
170    port = os.getenv("OLSRD_TXTINFO_PORT", "2006")
171    conn = socket.create_connection((host, port), 1.0)
172    try:
173        # Python3
174        request = bytes("/%s" % section, "ascii")
175    except TypeError:
176        # Python2
177        request = bytes("/%s" % section)
178    conn.sendall(request)
179    fconn = conn.makefile()
180    in_header = True
181    in_body_count = 0
182    for line in fconn.readlines():
183        if in_header:
184            if not line.strip():
185                # the empty line marks the end of the header
186                in_header = False
187            # ignore header lines (nothing to be done)
188        else:
189            # skip the first two body lines - they are table headers
190            if in_body_count >= 2:
191                line = line.strip()
192                if line:
193                    yield line
194            in_body_count += 1
195    fconn.close()
196    conn.close()
197
198
199def get_address_device_mapping():
200    mapping = {}
201    for line in query_olsrd_txtservice("mid"):
202        # example line content:
203        #    192.168.2.171   192.168.22.171;192.168.12.171
204        # since olsr v0.9.5:
205        #    192.168.2.171   192.168.22.171    192.168.12.171
206        device_id, mids = line.split(None, 1)
207        for mid in mids.replace(";", " ").split():
208            mapping[mid] = device_id
209    return mapping
210
211
212def count_routes_by_neighbour(address_mapping, ignore_list):
213    node_count = {}
214    for line in query_olsrd_txtservice("rou"):
215        # example line content:
216        #    192.168.1.79/32 192.168.12.38   4       4.008   wlan0
217        tokens = line.split()
218        target = tokens[0]
219        via = tokens[1]
220        # we care only about single-host routes
221        if target.endswith("/32"):
222            if target[:-3] in address_mapping:
223                # we ignore MIDs - we want only real nodes
224                continue
225            if target in ignore_list:
226                continue
227            # replace the neighbour's IP with its main IP (if it is an MID)
228            via = address_mapping.get(via, via)
229            # increase the counter
230            node_count[via] = node_count.get(via, 0) + 1
231    return node_count
232
233
234def get_olsr_links():
235    mid_mapping = get_address_device_mapping()
236    hna_list = [line.split()[0] for line in query_olsrd_txtservice("hna")]
237    route_count = count_routes_by_neighbour(mid_mapping, hna_list)
238    result = []
239    for line in query_olsrd_txtservice("lin"):
240        tokens = line.split()
241        # the "cost" may be infinite
242        if tokens[-1] == "INFINITE":
243            # "inf" is the python keyword for "maximum float number"
244            tokens[-1] = "inf"
245        link = {}
246        link["local"] = tokens.pop(0)
247        remote = tokens.pop(0)
248        # replace the neighbour's IP with its main IP (if it is an MID)
249        link["remote"] = mid_mapping.get(remote, remote)
250        for key in ("hysterese", "lq", "nlq", "cost"):
251            link[key] = float(tokens.pop(0))
252        # add the route count
253        link["route_count"] = route_count.get(link["remote"], 0)
254        result.append(link)
255    result.sort(key=lambda link: link["remote"])
256    return result
257
258
259def _read_file(filename):
260    try:
261        return open(filename, "r").read().split(LINESEP)
262    except OSError:
263        return []
264
265
266def get_ping_times(hosts):
267    tempfile = "/tmp/munin-olsrd-{pid}.tmp".format(pid=os.getpid())
268    command = ('for host in {hosts}; do echo -n "$host "; '
269               'ping -c 1 -w 1 "$host" | grep /avg/ || echo; done >{tempfile}'
270               .format(hosts=" ".join(hosts), tempfile=tempfile))
271    # micropython supports only "os.system" (as of 2015) - thus we need to stick with it for
272    # OpenWrt.
273    returncode = os.system(command)
274    if returncode != 0:
275        return {}
276    lines = _read_file(tempfile)
277    os.unlink(tempfile)
278    # example output for one host:
279    #   192.168.2.41 round-trip min/avg/max = 4.226/4.226/4.226 ms
280    result = {}
281    for line in lines:
282        tokens = line.split(None)
283        if len(tokens) > 1:
284            host = tokens[0]
285            avg_ping = tokens[-2].split("/")[1]
286            result[host] = float(avg_ping)
287    return result
288
289
290def do_config():
291    links = list(get_olsr_links())
292
293    # link quality with regard to neighbours
294    print("multigraph olsr_link_quality")
295    print(LQ_GRAPH_CONFIG.format(title="OLSR Link Quality"))
296    for link in links:
297        print(LQ_VALUES_CONFIG.format(
298            label=link["remote"],
299            suffix="_{host}".format(host=get_clean_fieldname(link["remote"])),
300            draw_type="AREASTACK"))
301    for link in links:
302        print("multigraph olsr_link_quality.host_{remote}"
303              .format(remote=get_clean_fieldname(link["remote"])))
304        title = "Link Quality towards {host}".format(host=link["remote"])
305        print(LQ_GRAPH_CONFIG.format(title=title))
306        print(LQ_VALUES_CONFIG.format(label="Link Quality", suffix="", draw_type="AREA"))
307
308    # link count ("number of nodes behind each neighbour")
309    print("multigraph olsr_neighbour_link_count")
310    print(NEIGHBOUR_COUNT_CONFIG)
311    for link in links:
312        print(NEIGHBOUR_COUNT_VALUE
313              .format(host=link["remote"], host_fieldname=get_clean_fieldname(link["remote"]),
314                      draw_type="AREASTACK"))
315
316    # neighbour ping
317    print("multigraph olsr_neighbour_ping")
318    print(NEIGHBOUR_PING_CONFIG.format(title="Ping time of neighbours"))
319    for link in links:
320        print(NEIGHBOUR_PING_VALUE
321              .format(host=link["remote"], host_fieldname=get_clean_fieldname(link["remote"])))
322    # neighbour pings - single subgraphs
323    for link in links:
324        remote = get_clean_fieldname(link["remote"])
325        print("multigraph olsr_neighbour_ping.host_{remote}".format(remote=remote))
326        title = "Ping time of {remote}".format(remote=remote)
327        print(NEIGHBOUR_PING_CONFIG.format(title=title))
328        print(NEIGHBOUR_PING_VALUE.format(host=link["remote"], host_fieldname=remote))
329
330
331def do_fetch():
332    # output values
333    links = list(get_olsr_links())
334
335    # overview graph for the link quality (ETX) of all neighbours
336    print("multigraph olsr_link_quality")
337    for link in links:
338        print("lq_{remote}.value {lq:f}".format(lq=link["lq"],
339                                                remote=get_clean_fieldname(link["remote"])))
340        print("nlq_{remote}.value {nlq:f}".format(nlq=link["nlq"],
341                                                  remote=get_clean_fieldname(link["remote"])))
342    # detailed ETX graph for each single neighbour link
343    for link in links:
344        print("multigraph olsr_link_quality.host_{remote}"
345              .format(remote=get_clean_fieldname(link["remote"])))
346        print("lq.value {lq:f}".format(lq=link["lq"]))
347        print("nlq.value {nlq:f}".format(nlq=link["nlq"]))
348
349    # count the links/nodes behind each neighbour node
350    print("multigraph olsr_neighbour_link_count")
351    for link in links:
352        print("neighbour_{host_fieldname}.value {value}"
353              .format(value=link["route_count"],
354                      host_fieldname=get_clean_fieldname(link["remote"])))
355
356    # overview of ping roundtrip times
357    print("multigraph olsr_neighbour_ping")
358    ping_times = get_ping_times([link["remote"] for link in links])
359    for link in links:
360        ping_time = ping_times.get(link["remote"], None)
361        value = "{:.4f}".format(ping_time) if ping_time is not None else "U"
362        print("neighbour_{remote}.value {value}"
363              .format(value=value, remote=get_clean_fieldname(link["remote"])))
364    # single detailed graphs for the ping time of each link
365    for link in links:
366        ping_time = ping_times.get(link["remote"], None)
367        value = "{:.4f}".format(ping_time) if ping_time is not None else "U"
368        remote = get_clean_fieldname(link["remote"])
369        print("multigraph olsr_neighbour_ping.host_{remote}".format(remote=remote))
370        print("neighbour_{remote}.value {value}".format(remote=remote, value=value))
371
372
373if __name__ == "__main__":
374    # parse arguments
375    if len(sys.argv) > 1:
376        if sys.argv[1] == "config":
377            do_config()
378            if os.getenv("MUNIN_CAP_DIRTYCONFIG") == "1":
379                do_fetch()
380            sys.exit(0)
381        elif sys.argv[1] == "autoconf":
382            if os.path.exists(os.getenv('OLSRD_BIN_PATH', '/usr/sbin/olsrd')):
383                print('yes')
384            else:
385                print('no')
386            sys.exit(0)
387        elif sys.argv[1] == "version":
388            print('olsrd Munin plugin, version %s' % plugin_version)
389            sys.exit(0)
390        elif sys.argv[1] == "":
391            # ignore
392            pass
393        else:
394            # unknown argument
395            sys.stderr.write("Unknown argument{eol}".format(eol=LINESEP))
396            sys.exit(1)
397
398    do_fetch()
399
400# final marker for shell / python hybrid script (see "Interpreter Selection")
401EOF = True
402EOF
403