1 /* SPDX-FileCopyrightText: 2010-2018 Jesper Pedersen <blackie@blackie.dk> and
2 Robert Krawitz <rlk@alum.mit.edu>
3
4 SPDX-License-Identifier: GPL-2.0-or-later
5 */
6
7 #include "FastDir.h"
8
9 #include <kpabase/Logging.h>
10
11 #include <QFile>
12 #include <QLoggingCategory>
13 #include <QMap>
14
15 extern "C" {
16 #include <dirent.h>
17 #include <stdio.h>
18 #include <sys/types.h>
19
20 /*
21 * Ideally the order of entries returned by readdir() should be close
22 * to optimal; intuitively, it should reflect the order in which inodes
23 * are returned from getdents() or equivalent, which should be the order
24 * in which they're stored on the disk. Experimentally, that isn't always
25 * true. One test, involving 10839 files totaling 90 GB resulted in
26 * readdir() returning files in random order, where "find" returned them
27 * sorted (and the same version of find does *not* in general return files
28 * in alphabetical order).
29 *
30 * By repeated measurement, loading the files in the order returned by
31 * readdir took about 16:30, where loading them in alphanumeric sorted order
32 * took about 15:00. Running a similar test outside of kpa (using the order
33 * returned by readdir() vs. sorted to cat the files through dd and measuring
34 * the time) yielded if anything an even greater discrepancy (17:35 vs. 14:10).
35 *
36 * This issue is filesystem dependent, but is known to affect the extN
37 * filesystems commonly used on Linux that use a hashed tree structure to
38 * store directories. See e. g.
39 * http://home.ifi.uio.no/paalh/publications/files/ipccc09.pdf and its
40 * accompanying presentation
41 * http://www.linux-kongress.org/2009/slides/linux_disk_io_performance_havard_espeland.pdf
42 *
43 * We could do even better by sorting by block position, but that would
44 * greatly increase complexity.
45 */
46 #ifdef __linux__
47 #include <linux/magic.h>
48 #include <sys/vfs.h>
49 #define HAVE_STATFS
50 #define STATFS_FSTYPE_EXT2 EXT2_SUPER_MAGIC // Includes EXT3_SUPER_MAGIC, EXT4_SUPER_MAGIC
51 #else
52 #ifdef __FreeBSD__
53 #include <sys/disklabel.h>
54 #include <sys/mount.h>
55 #include <sys/param.h>
56 #define HAVE_STATFS
57 #define STATFS_FSTYPE_EXT2 FS_EXT2FS
58 #endif
59 // other platforms fall back to known-safe (but slower) implementation
60 #endif // __linux__
61 }
62
63 typedef QMap<ino_t, QString> InodeMap;
64
65 typedef QSet<QString> StringSet;
66
FastDir(const QString & path)67 DB::FastDir::FastDir(const QString &path)
68 : m_path(path)
69 {
70 InodeMap tmpAnswer;
71 DIR *dir;
72 dirent *file;
73 QByteArray bPath(QFile::encodeName(path));
74 dir = opendir(bPath.constData());
75 if (!dir)
76 return;
77 const bool doSortByInode = sortByInode(bPath);
78 const bool doSortByName = sortByName(bPath);
79
80 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
81 // ZaJ (2016-03-23): while porting to Qt5/KF5, this code-path is disabled on my system
82 // I don't want to touch this right now since I can't verify correctness in any way.
83 // rlk 2018-05-20: readdir_r is deprecated as of glibc 2.24; see
84 // http://man7.org/linux/man-pages/man3/readdir_r.3.html.
85 // There are problems with MAXNAMLEN/NAME_MAX and friends, that
86 // can differ from filesystem to filesystem. It's also expected
87 // that POSIX will (if it hasn't already) deprecate readdir_r
88 // and require readdir to be thread safe.
89 union dirent_buf {
90 struct KDE_struct_dirent mt_file;
91 char b[sizeof(struct dirent) + MAXNAMLEN + 1];
92 } *u = new union dirent_buf;
93 while (readdir_r(dir, &(u->mt_file), &file) == 0 && file)
94 #else
95 // FIXME: use 64bit versions of readdir and dirent?
96 while ((file = readdir(dir)))
97 #endif // QT_THREAD_SUPPORT && _POSIX_THREAD_SAFE_FUNCTIONS
98 {
99 if (doSortByInode)
100 tmpAnswer.insert(file->d_ino, QFile::decodeName(file->d_name));
101 else
102 m_sortedList.append(QFile::decodeName(file->d_name));
103 }
104 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
105 delete u;
106 #endif
107 (void)closedir(dir);
108
109 if (doSortByInode) {
110 for (InodeMap::iterator it = tmpAnswer.begin(); it != tmpAnswer.end(); ++it) {
111 m_sortedList << it.value();
112 }
113 } else if (doSortByName) {
114 m_sortedList.sort();
115 }
116 }
117
118 // No currently known filesystems where sort by name is optimal
sortByName(const QByteArray &)119 constexpr bool DB::sortByName(const QByteArray &)
120 {
121 return false;
122 }
123
sortByInode(const QByteArray & path)124 bool DB::sortByInode(const QByteArray &path)
125 {
126 #ifdef HAVE_STATFS
127 struct statfs buf;
128 if (statfs(path.constData(), &buf) == -1)
129 return -1;
130 // Add other filesystems as appropriate
131 switch (buf.f_type) {
132 case STATFS_FSTYPE_EXT2:
133 return true;
134 default:
135 return false;
136 }
137 #else // HAVE_STATFS
138 Q_UNUSED(path);
139 return false;
140 #endif // HAVE_STATFS
141 }
142
entryList() const143 const QStringList DB::FastDir::entryList() const
144 {
145 return m_sortedList;
146 }
147
sortFileList(const StringSet & files) const148 QStringList DB::FastDir::sortFileList(const StringSet &files) const
149 {
150 QStringList answer;
151 StringSet tmp(files);
152 for (const QString &fileName : m_sortedList) {
153 if (tmp.contains(fileName)) {
154 answer << fileName;
155 tmp.remove(fileName);
156 } else if (tmp.contains(m_path + fileName)) {
157 answer << m_path + fileName;
158 tmp.remove(m_path + fileName);
159 }
160 }
161 if (tmp.count() > 0) {
162 qCDebug(FastDirLog) << "Files left over after sorting on " << m_path;
163 for (const QString &fileName : tmp) {
164 qCDebug(FastDirLog) << fileName;
165 answer << fileName;
166 }
167 }
168 return answer;
169 }
170
sortFileList(const QStringList & files) const171 QStringList DB::FastDir::sortFileList(const QStringList &files) const
172 {
173 StringSet tmp;
174 for (const QString &fileName : files) {
175 tmp << fileName;
176 }
177 return sortFileList(tmp);
178 }
179
180 // vi:expandtab:tabstop=4 shiftwidth=4:
181