1 /* SPDX-FileCopyrightText: 2010-2018 Jesper Pedersen <blackie@blackie.dk> and
2    Robert Krawitz <rlk@alum.mit.edu>
3 
4    SPDX-License-Identifier: GPL-2.0-or-later
5 */
6 
7 #include "FastDir.h"
8 
9 #include <kpabase/Logging.h>
10 
11 #include <QFile>
12 #include <QLoggingCategory>
13 #include <QMap>
14 
15 extern "C" {
16 #include <dirent.h>
17 #include <stdio.h>
18 #include <sys/types.h>
19 
20 /*
21  * Ideally the order of entries returned by readdir() should be close
22  * to optimal; intuitively, it should reflect the order in which inodes
23  * are returned from getdents() or equivalent, which should be the order
24  * in which they're stored on the disk.  Experimentally, that isn't always
25  * true.  One test, involving 10839 files totaling 90 GB resulted in
26  * readdir() returning files in random order, where "find" returned them
27  * sorted (and the same version of find does *not* in general return files
28  * in alphabetical order).
29  *
30  * By repeated measurement, loading the files in the order returned by
31  * readdir took about 16:30, where loading them in alphanumeric sorted order
32  * took about 15:00.  Running a similar test outside of kpa (using the order
33  * returned by readdir() vs. sorted to cat the files through dd and measuring
34  * the time) yielded if anything an even greater discrepancy (17:35 vs. 14:10).
35  *
36  * This issue is filesystem dependent, but is known to affect the extN
37  * filesystems commonly used on Linux that use a hashed tree structure to
38  * store directories.  See e. g.
39  * http://home.ifi.uio.no/paalh/publications/files/ipccc09.pdf and its
40  * accompanying presentation
41  * http://www.linux-kongress.org/2009/slides/linux_disk_io_performance_havard_espeland.pdf
42  *
43  * We could do even better by sorting by block position, but that would
44  * greatly increase complexity.
45  */
46 #ifdef __linux__
47 #include <linux/magic.h>
48 #include <sys/vfs.h>
49 #define HAVE_STATFS
50 #define STATFS_FSTYPE_EXT2 EXT2_SUPER_MAGIC // Includes EXT3_SUPER_MAGIC, EXT4_SUPER_MAGIC
51 #else
52 #ifdef __FreeBSD__
53 #include <sys/disklabel.h>
54 #include <sys/mount.h>
55 #include <sys/param.h>
56 #define HAVE_STATFS
57 #define STATFS_FSTYPE_EXT2 FS_EXT2FS
58 #endif
59 // other platforms fall back to known-safe (but slower) implementation
60 #endif // __linux__
61 }
62 
63 typedef QMap<ino_t, QString> InodeMap;
64 
65 typedef QSet<QString> StringSet;
66 
FastDir(const QString & path)67 DB::FastDir::FastDir(const QString &path)
68     : m_path(path)
69 {
70     InodeMap tmpAnswer;
71     DIR *dir;
72     dirent *file;
73     QByteArray bPath(QFile::encodeName(path));
74     dir = opendir(bPath.constData());
75     if (!dir)
76         return;
77     const bool doSortByInode = sortByInode(bPath);
78     const bool doSortByName = sortByName(bPath);
79 
80 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
81     // ZaJ (2016-03-23): while porting to Qt5/KF5, this code-path is disabled on my system
82     //     I don't want to touch this right now since I can't verify correctness in any way.
83     // rlk 2018-05-20: readdir_r is deprecated as of glibc 2.24; see
84     //     http://man7.org/linux/man-pages/man3/readdir_r.3.html.
85     //     There are problems with MAXNAMLEN/NAME_MAX and friends, that
86     //     can differ from filesystem to filesystem.  It's also expected
87     //     that POSIX will (if it hasn't already) deprecate readdir_r
88     //     and require readdir to be thread safe.
89     union dirent_buf {
90         struct KDE_struct_dirent mt_file;
91         char b[sizeof(struct dirent) + MAXNAMLEN + 1];
92     } *u = new union dirent_buf;
93     while (readdir_r(dir, &(u->mt_file), &file) == 0 && file)
94 #else
95     // FIXME: use 64bit versions of readdir and dirent?
96     while ((file = readdir(dir)))
97 #endif // QT_THREAD_SUPPORT && _POSIX_THREAD_SAFE_FUNCTIONS
98     {
99         if (doSortByInode)
100             tmpAnswer.insert(file->d_ino, QFile::decodeName(file->d_name));
101         else
102             m_sortedList.append(QFile::decodeName(file->d_name));
103     }
104 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
105     delete u;
106 #endif
107     (void)closedir(dir);
108 
109     if (doSortByInode) {
110         for (InodeMap::iterator it = tmpAnswer.begin(); it != tmpAnswer.end(); ++it) {
111             m_sortedList << it.value();
112         }
113     } else if (doSortByName) {
114         m_sortedList.sort();
115     }
116 }
117 
118 // No currently known filesystems where sort by name is optimal
sortByName(const QByteArray &)119 constexpr bool DB::sortByName(const QByteArray &)
120 {
121     return false;
122 }
123 
sortByInode(const QByteArray & path)124 bool DB::sortByInode(const QByteArray &path)
125 {
126 #ifdef HAVE_STATFS
127     struct statfs buf;
128     if (statfs(path.constData(), &buf) == -1)
129         return -1;
130     // Add other filesystems as appropriate
131     switch (buf.f_type) {
132     case STATFS_FSTYPE_EXT2:
133         return true;
134     default:
135         return false;
136     }
137 #else // HAVE_STATFS
138     Q_UNUSED(path);
139     return false;
140 #endif // HAVE_STATFS
141 }
142 
entryList() const143 const QStringList DB::FastDir::entryList() const
144 {
145     return m_sortedList;
146 }
147 
sortFileList(const StringSet & files) const148 QStringList DB::FastDir::sortFileList(const StringSet &files) const
149 {
150     QStringList answer;
151     StringSet tmp(files);
152     for (const QString &fileName : m_sortedList) {
153         if (tmp.contains(fileName)) {
154             answer << fileName;
155             tmp.remove(fileName);
156         } else if (tmp.contains(m_path + fileName)) {
157             answer << m_path + fileName;
158             tmp.remove(m_path + fileName);
159         }
160     }
161     if (tmp.count() > 0) {
162         qCDebug(FastDirLog) << "Files left over after sorting on " << m_path;
163         for (const QString &fileName : tmp) {
164             qCDebug(FastDirLog) << fileName;
165             answer << fileName;
166         }
167     }
168     return answer;
169 }
170 
sortFileList(const QStringList & files) const171 QStringList DB::FastDir::sortFileList(const QStringList &files) const
172 {
173     StringSet tmp;
174     for (const QString &fileName : files) {
175         tmp << fileName;
176     }
177     return sortFileList(tmp);
178 }
179 
180 // vi:expandtab:tabstop=4 shiftwidth=4:
181