xref: /freebsd/tools/pkgbase/metalog_reader.lua (revision 6419bb52)
1#!/usr/libexec/flua
2
3-- SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4--
5-- Copyright(c) 2020 The FreeBSD Foundation.
6--
7-- Redistribution and use in source and binary forms, with or without
8-- modification, are permitted provided that the following conditions
9-- are met:
10-- 1. Redistributions of source code must retain the above copyright
11--    notice, this list of conditions and the following disclaimer.
12-- 2. Redistributions in binary form must reproduce the above copyright
13--    notice, this list of conditions and the following disclaimer in the
14--    documentation and/or other materials provided with the distribution.
15--
16-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19-- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26-- SUCH DAMAGE.
27
28-- $FreeBSD$
29
30function main(args)
31	if #args == 0 then usage() end
32	local filename
33	local printall, checkonly, pkgonly =
34	    #args == 1, false, false
35	local dcount, dsize, fuid, fgid, fid =
36	    false, false, false, false, false
37	local verbose = false
38	local w_notagdirs = false
39
40	local i = 1
41	while i <= #args do
42		if args[i] == '-h' then
43			usage(true)
44		elseif args[i] == '-a' then
45			printall = true
46		elseif args[i] == '-c' then
47			printall = false
48			checkonly = true
49		elseif args[i] == '-p' then
50			printall = false
51			pkgonly = true
52			while i < #args do
53				i = i+1
54				if args[i] == '-count' then
55					dcount = true
56				elseif args[i] == '-size' then
57					dsize = true
58				elseif args[i] == '-fsetuid' then
59					fuid = true
60				elseif args[i] == '-fsetgid' then
61					fgid = true
62				elseif args[i] == '-fsetid' then
63					fid = true
64				else
65					i = i-1
66					break
67				end
68			end
69		elseif args[i] == '-v' then
70			verbose = true
71		elseif args[i] == '-Wcheck-notagdir' then
72			w_notagdirs = true
73		elseif args[i]:match('^%-') then
74			io.stderr:write('Unknown argument '..args[i]..'.\n')
75			usage()
76		else
77			filename = args[i]
78		end
79		i = i+1
80	end
81
82	if filename == nil then
83		io.stderr:write('Missing filename.\n')
84		usage()
85	end
86
87	local sess = Analysis_session(filename, verbose, w_notagdirs)
88
89	if printall then
90		io.write('--- PACKAGE REPORTS ---\n')
91		io.write(sess.pkg_report_full())
92		io.write('--- LINTING REPORTS ---\n')
93		print_lints(sess)
94	elseif checkonly then
95		print_lints(sess)
96	elseif pkgonly then
97		io.write(sess.pkg_report_simple(dcount, dsize, {
98			fuid and sess.pkg_issetuid or nil,
99			fgid and sess.pkg_issetgid or nil,
100			fid and sess.pkg_issetid or nil
101		}))
102	else
103		io.stderr:write('This text should not be displayed.')
104		usage()
105	end
106end
107
108--- @param man boolean
109function usage(man)
110	local sn = 'Usage: '..arg[0].. ' [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] metalog-path \n'
111	if man then
112		io.write('\n')
113		io.write(sn)
114		io.write(
115[[
116
117The script reads METALOG file created by pkgbase (make packages) and generates
118reports about the installed system and issues.  It accepts an mtree file in a
119format that's returned by `mtree -c | mtree -C`
120
121  Options:
122  -a         prints all scan results. this is the default option if no option
123             is provided.
124  -c         lints the file and gives warnings/errors, including duplication
125             and conflicting metadata
126      -Wcheck-notagdir    entries with dir type and no tags will be also
127                          included the first time they appear
128  -p         list all package names found in the file as exactly specified by
129             `tags=package=...`
130      -count       display the number of files of the package
131      -size        display the size of the package
132      -fsetgid     only include packages with setgid files
133      -fsetuid     only include packages with setuid files
134      -fsetid      only include packages with setgid or setuid files
135  -v          verbose mode
136  -h          help page
137
138]])
139		os.exit()
140	else
141		io.stderr:write(sn)
142		os.exit(1)
143	end
144end
145
146--- @param sess Analysis_session
147function print_lints(sess)
148	local dupwarn, duperr = sess.dup_report()
149	io.write(dupwarn)
150	io.write(duperr)
151	local inodewarn, inodeerr = sess.inode_report()
152	io.write(inodewarn)
153	io.write(inodeerr)
154end
155
156--- @param t table
157function sortedPairs(t)
158	local sortedk = {}
159	for k in next, t do sortedk[#sortedk+1] = k end
160	table.sort(sortedk)
161	local i = 0
162	return function()
163		i = i + 1
164		return sortedk[i], t[sortedk[i]]
165	end
166end
167
168--- @param t table <T, U>
169--- @param f function <U -> U>
170function table_map(t, f)
171	local res = {}
172	for k, v in pairs(t) do res[k] = f(v) end
173	return res
174end
175
176--- @class MetalogRow
177-- a table contaning file's info, from a line content from METALOG file
178-- all fields in the table are strings
179-- sample output:
180--	{
181--		filename = ./usr/share/man/man3/inet6_rthdr_segments.3.gz
182--		lineno = 5
183--		attrs = {
184--			gname = 'wheel'
185--			uname = 'root'
186--			mode = '0444'
187--			size = '1166'
188--			time = nil
189--			type = 'file'
190--			tags = 'package=clibs,debug'
191--		}
192--	}
193--- @param line string
194function MetalogRow(line, lineno)
195	local res, attrs = {}, {}
196	local filename, rest = line:match('^(%S+) (.+)$')
197	-- mtree file has space escaped as '\\040', not affecting splitting
198	-- string by space
199	for attrpair in rest:gmatch('[^ ]+') do
200		local k, v = attrpair:match('^(.-)=(.+)')
201		attrs[k] = v
202	end
203	res.filename = filename
204	res.linenum = lineno
205	res.attrs = attrs
206	return res
207end
208
209-- check if an array of MetalogRows are equivalent. if not, the first field
210-- that's different is returned secondly
211--- @param rows MetalogRow[]
212--- @param ignore_name boolean
213--- @param ignore_tags boolean
214function metalogrows_all_equal(rows, ignore_name, ignore_tags)
215	local __eq = function(l, o)
216		if not ignore_name and l.filename ~= o.filename then
217			return false, 'filename'
218		end
219		-- ignoring linenum in METALOG file as it's not relavant
220		for k in pairs(l.attrs) do
221			if ignore_tags and k == 'tags' then goto continue end
222			if l.attrs[k] ~= o.attrs[k] and o.attrs[k] ~= nil then
223				return false, k
224			end
225			::continue::
226		end
227		return true
228	end
229	for _, v in ipairs(rows) do
230		local bol, offby = __eq(v, rows[1])
231		if not bol then return false, offby end
232	end
233	return true
234end
235
236--- @param tagstr string
237function pkgname_from_tag(tagstr)
238	local ext, pkgname, pkgend = '', '', ''
239	for seg in tagstr:gmatch('[^,]+') do
240		if seg:match('package=') then
241			pkgname = seg:sub(9)
242		elseif seg == 'development' or seg == 'profile'
243			or seg == 'debug' or seg == 'docs' then
244			pkgend = seg
245		else
246			ext = ext == '' and seg or ext..'-'..seg
247		end
248	end
249	pkgname = pkgname
250		..(ext == '' and '' or '-'..ext)
251		..(pkgend == '' and '' or '-'..pkgend)
252	return pkgname
253end
254
255--- @class Analysis_session
256--- @param metalog string
257--- @param verbose boolean
258--- @param w_notagdirs boolean turn on to also check directories
259function Analysis_session(metalog, verbose, w_notagdirs)
260	local files = {} -- map<string, MetalogRow[]>
261	-- set is map<elem, bool>. if bool is true then elem exists
262	local pkgs = {} -- map<string, set<string>>
263	----- used to keep track of files not belonging to a pkg. not used so
264	----- it is commented with -----
265	-----local nopkg = {} --            set<string>
266	--- @public
267	local swarn = {}
268	--- @public
269	local serrs = {}
270
271	-- returns number of files in package and size of package
272	-- nil is  returned upon errors
273	--- @param pkgname string
274	local function pkg_size(pkgname)
275		local filecount, sz = 0, 0
276		for filename in pairs(pkgs[pkgname]) do
277			local rows = files[filename]
278			-- normally, there should be only one row per filename
279			-- if these rows are equal, there should be warning, but it
280			-- does not affect size counting. if not, it is an error
281			if #rows > 1 and not metalogrows_all_equal(rows) then
282				return nil
283			end
284			local row = rows[1]
285			if row.attrs.type == 'file' then
286				sz = sz + tonumber(row.attrs.size)
287			end
288			filecount = filecount + 1
289		end
290		return filecount, sz
291	end
292
293	--- @param pkgname string
294	--- @param mode number
295	local function pkg_ismode(pkgname, mode)
296		for filename in pairs(pkgs[pkgname]) do
297			for _, row in ipairs(files[filename]) do
298				if tonumber(row.attrs.mode, 8) & mode ~= 0 then
299					return true
300				end
301			end
302		end
303		return false
304	end
305
306	--- @param pkgname string
307	--- @public
308	local function pkg_issetuid(pkgname)
309		return pkg_ismode(pkgname, 2048)
310	end
311
312	--- @param pkgname string
313	--- @public
314	local function pkg_issetgid(pkgname)
315		return pkg_ismode(pkgname, 1024)
316	end
317
318	--- @param pkgname string
319	--- @public
320	local function pkg_issetid(pkgname)
321		return pkg_issetuid(pkgname) or pkg_issetgid(pkgname)
322	end
323
324	-- sample return:
325	-- { [*string]: { count=1, size=2, issetuid=true, issetgid=true } }
326	local function pkg_report_helper_table()
327		local res = {}
328		for pkgname in pairs(pkgs) do
329			res[pkgname] = {}
330			res[pkgname].count,
331			res[pkgname].size = pkg_size(pkgname)
332			res[pkgname].issetuid = pkg_issetuid(pkgname)
333			res[pkgname].issetgid = pkg_issetgid(pkgname)
334		end
335		return res
336	end
337
338	-- returns a string describing package scan report
339	--- @public
340	local function pkg_report_full()
341		local sb = {}
342		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
343			sb[#sb+1] = 'Package '..pkgname..':'
344			if v.issetuid or v.issetgid then
345				sb[#sb+1] = ''..table.concat({
346					v.issetuid and ' setuid' or '',
347					v.issetgid and ' setgid' or '' }, '')
348			end
349			sb[#sb+1] = '\n  number of files: '..(v.count or '?')
350				..'\n  total size: '..(v.size or '?')
351			sb[#sb+1] = '\n'
352		end
353		return table.concat(sb, '')
354	end
355
356	--- @param have_count boolean
357	--- @param have_size boolean
358	--- @param filters function[]
359	--- @public
360	-- returns a string describing package size report.
361	-- sample: "mypackage 2 2048"* if both booleans are true
362	local function pkg_report_simple(have_count, have_size, filters)
363		filters = filters or {}
364		local sb = {}
365		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
366			local pred = true
367			-- doing a foldl to all the function results with (and)
368			for _, f in pairs(filters) do pred = pred and f(pkgname) end
369			if pred then
370				sb[#sb+1] = pkgname..table.concat({
371					have_count and (' '..(v.count or '?')) or '',
372					have_size and (' '..(v.size or '?')) or ''}, '')
373					..'\n'
374			end
375		end
376		return table.concat(sb, '')
377	end
378
379	-- returns a string describing duplicate file warnings,
380	-- returns a string describing duplicate file errors
381	--- @public
382	local function dup_report()
383		local warn, errs = {}, {}
384		for filename, rows in sortedPairs(files) do
385			if #rows == 1 then goto continue end
386			local iseq, offby = metalogrows_all_equal(rows)
387			if iseq then -- repeated line, just a warning
388				warn[#warn+1] = 'warning: '..filename
389					..' repeated with same meta: line '
390					..table.concat(
391						table_map(rows, function(e) return e.linenum end), ',')
392				warn[#warn+1] = '\n'
393			elseif not metalogrows_all_equal(rows, false, true) then
394			-- same filename (possibly different tags), different metadata, an error
395				errs[#errs+1] = 'error: '..filename
396					..' exists in multiple locations and with different meta: line '
397					..table.concat(
398						table_map(rows, function(e) return e.linenum end), ',')
399					..'. off by "'..offby..'"'
400				errs[#errs+1] = '\n'
401			end
402			::continue::
403		end
404		return table.concat(warn, ''), table.concat(errs, '')
405	end
406
407	-- returns a string describing warnings of found hard links
408	-- returns a string describing errors of found hard links
409	--- @public
410	local function inode_report()
411		-- obtain inodes of filenames
412		local attributes = require('lfs').attributes
413		local inm = {} -- map<number, string[]>
414		local unstatables = {} -- string[]
415		for filename in pairs(files) do
416			-- i only took the first row of a filename,
417			-- and skip links and folders
418			if files[filename][1].attrs.type ~= 'file' then
419				goto continue
420			end
421			-- make ./xxx become /xxx so that we can stat
422			filename = filename:sub(2)
423			local fs = attributes(filename)
424			if fs == nil then
425				unstatables[#unstatables+1] = filename
426				goto continue
427			end
428			local inode = fs.ino
429			inm[inode] = inm[inode] or {}
430			-- add back the dot prefix
431			table.insert(inm[inode], '.'..filename)
432			::continue::
433		end
434
435		local warn, errs = {}, {}
436		for _, filenames in pairs(inm) do
437			if #filenames == 1 then goto continue end
438			-- i only took the first row of a filename
439			local rows = table_map(filenames, function(e)
440				return files[e][1]
441			end)
442			local iseq, offby = metalogrows_all_equal(rows, true, true)
443			if not iseq then
444				errs[#errs+1] = 'error: '
445					..'entries point to the same inode but have different meta: '
446					..table.concat(filenames, ',')..' in line '
447					..table.concat(
448						table_map(rows, function(e) return e.linenum end), ',')
449					..'. off by "'..offby..'"'
450				errs[#errs+1] = '\n'
451			end
452			::continue::
453		end
454
455		if #unstatables > 0 then
456			warn[#warn+1] = verbose and
457				'note: skipped checking inodes: '..table.concat(unstatables, ',')..'\n'
458				or
459				'note: skipped checking inodes for '..#unstatables..' entries\n'
460		end
461
462		return table.concat(warn, ''), table.concat(errs, '')
463	end
464
465	do
466	local fp, errmsg, errcode = io.open(metalog, 'r')
467	if fp == nil then
468		io.stderr:write('cannot open '..metalog..': '..errmsg..': '..errcode..'\n')
469		os.exit(1)
470	end
471
472	-- scan all lines and put file data into the dictionaries
473	local firsttimes = {} -- set<string>
474	local lineno = 0
475	for line in fp:lines() do
476		-----local isinpkg = false
477		lineno = lineno + 1
478		-- skip lines begining with #
479		if line:match('^%s*#') then goto continue end
480		-- skip blank lines
481		if line:match('^%s*$') then goto continue end
482
483		local data = MetalogRow(line, lineno)
484		-- entries with dir and no tags... ignore for the first time
485		if not w_notagdirs and
486			data.attrs.tags == nil and data.attrs.type == 'dir'
487			and not firsttimes[data.filename] then
488			firsttimes[data.filename] = true
489			goto continue
490		end
491
492		files[data.filename] = files[data.filename] or {}
493		table.insert(files[data.filename], data)
494
495		if data.attrs.tags ~= nil then
496			pkgname = pkgname_from_tag(data.attrs.tags)
497			pkgs[pkgname] = pkgs[pkgname] or {}
498			pkgs[pkgname][data.filename] = true
499			------isinpkg = true
500		end
501		-----if not isinpkg then nopkg[data.filename] = true end
502		::continue::
503	end
504
505	fp:close()
506	end
507
508	return {
509		warn = swarn,
510		errs = serrs,
511		pkg_issetuid = pkg_issetuid,
512		pkg_issetgid = pkg_issetgid,
513		pkg_issetid = pkg_issetid,
514		pkg_report_full = pkg_report_full,
515		pkg_report_simple = pkg_report_simple,
516		dup_report = dup_report,
517		inode_report = inode_report
518	}
519end
520
521main(arg)
522