1 /*
2     SPDX-License-Identifier: GPL-2.0-or-later
3     SPDX-FileCopyrightText: 2021 Harald Sitter <sitter@kde.org>
4 */
5 
6 #include "linuxprocmapsparser.h"
7 
8 #include <QByteArray>
9 #include <QByteArrayList>
10 #include <QDebug>
11 #include <QFile>
12 #include <QRegularExpression>
13 
14 #include <errno.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/types.h>
18 #include <unistd.h>
19 
20 #include "drkonqi_debug.h"
21 
isLibraryPath(const QString & path)22 bool LinuxProc::isLibraryPath(const QString &path)
23 {
24     // This intentionally matches potential suffixes, i.e. "/usr/lib/foo.so.0" but also "foo.so (deleted)"
25     static QRegularExpression soExpression(QStringLiteral("(?<path>.+\\.(so|py)([^/]*))$"));
26     const auto soMatch = soExpression.match(path);
27     return soMatch.isValid() && soMatch.hasMatch() && !soMatch.captured(u"path").isEmpty();
28 }
29 
hasMapsDeletedFiles(const QString & exePathString,const QByteArray & maps,Check check)30 bool LinuxProc::hasMapsDeletedFiles(const QString &exePathString, const QByteArray &maps, Check check)
31 {
32     const QByteArray exePath = QFile::encodeName(exePathString);
33     const QByteArrayList lines = maps.split('\n');
34     for (const auto &line : lines) {
35         if (line.isEmpty()) {
36             continue;
37         }
38         // Walk string by tokens. This is by far the easiest way to parse the format as anything after
39         // the first 5 fields (minus the tokens) is the pathname. The pathname may be nothing, or contain more
40         // spaces in turn. Qt has no convenient API for this, use strtok.
41 
42         QByteArray mutableLine = line;
43         // address
44         strtok(mutableLine.data(), " ");
45         // perms
46         strtok(nullptr, " ");
47         // offset
48         strtok(nullptr, " ");
49         // dev
50         strtok(nullptr, " ");
51         // inode
52         const QByteArray inode(strtok(nullptr, " "));
53         // remainder is the pathname
54         const QByteArray pathname = QByteArray(strtok(nullptr, "\n")).simplified(); // simplify to make evaluation easier
55 
56         if (pathname.isEmpty() || pathname.at(0) != QLatin1Char('/')) {
57             // Could be pseudo entry like [heap] or anonymous region.
58             continue;
59         }
60 
61         if (pathname.startsWith(QByteArrayLiteral("/memfd"))) {
62             // Qml.so's JIT shows up under memfd. This is a false positive as it isn't a real path in the
63             // file system. Skip over it.
64             continue;
65         }
66 
67         const QByteArray deletedMarker = QByteArrayLiteral(" (deleted)");
68         // We filter only .so files to ensure that we don't trip over cache files or the like.
69         // NB: includes .so* and .py* since we also implicitly support snakes to
70         //   a degree
71         // As a result we need to explicitly look for the main executable.
72         if (pathname == exePath + deletedMarker) {
73             return true;
74         }
75 
76         if (pathname != exePath && !isLibraryPath(QFile::decodeName(pathname))) {
77             continue; // not exe and not a library.
78         }
79 
80         // Deleted marker always declares something missing. Even when we perform additional stat checks on it.
81         if (pathname.endsWith(deletedMarker)) {
82             return true;
83         }
84 
85         switch (check) {
86         case Check::DeletedMarker: {
87             // If we get here the file hasn't been marked deleted.
88             break;
89         }
90         case Check::Stat: {
91             struct stat info {
92             };
93             const int ret = stat(pathname.constData(), &info);
94             if (ret == -1) {
95                 qCWarning(DRKONQI_LOG) << "Couldn't stat file, assuming it was deleted" << pathname << strerror(errno);
96                 return true;
97                 break;
98             }
99 
100             if (info.st_ino != inode.toULongLong()) {
101                 qCWarning(DRKONQI_LOG) << "Found mismatching inode on" << pathname << info.st_ino << inode;
102                 return true;
103                 break;
104             }
105 
106             // It's very awkward but st_dev seems dodgy at least with btrfs. The dev_t the kernel has is not the one
107             // stat has and what's more the kernel has one that solid doesn't know about either. That may simply be
108             // because btrfs makes up fake dev_ts since multiple btrfs subvolumes may be on the same block device.
109             // Anyway, it's unfortunate but I guess we had best not look at the device.
110         } break;
111         }
112     }
113 
114     return false;
115 }
116