1 /*
2 * Copyright 2005-2021 Fabrice Colin
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18
19 #include "config.h"
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #ifdef HAVE_MMAP
26 #include <sys/mman.h>
27 #endif
28 #ifdef HAVE_SYS_XATTR_H
29 #include <sys/xattr.h>
30 #endif
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <iostream>
35 #include <set>
36
37 #include "Document.h"
38 #include "TimeConverter.h"
39 #include "Memory.h"
40
41 using std::clog;
42 using std::endl;
43 using std::string;
44 using std::set;
45
46 #ifdef HAVE_SYS_XATTR_H
getXAttr(int fd,const string & attrName)47 static char *getXAttr(int fd, const string &attrName)
48 {
49 ssize_t attrSize = fgetxattr(fd, attrName.c_str(), NULL, 0);
50
51 if (attrSize > 0)
52 {
53 char *pAttr = new char[attrSize];
54
55 if ((pAttr != NULL) &&
56 (fgetxattr(fd, attrName.c_str(), pAttr, attrSize) > 0))
57 {
58 return pAttr;
59 }
60 }
61
62 return NULL;
63 }
64 #endif
65
Document()66 Document::Document() :
67 DocumentInfo(),
68 m_pData(NULL),
69 m_dataLength(0),
70 m_isMapped(false)
71 {
72 }
73
Document(const string & title,const string & location,const string & type,const string & language)74 Document::Document(const string &title, const string &location,
75 const string &type, const string &language) :
76 DocumentInfo(title, location, type, language),
77 m_pData(NULL),
78 m_dataLength(0),
79 m_isMapped(false)
80 {
81 }
82
Document(const DocumentInfo & info)83 Document::Document(const DocumentInfo &info) :
84 DocumentInfo(info),
85 m_pData(NULL),
86 m_dataLength(0),
87 m_isMapped(false)
88 {
89 }
90
Document(const Document & other)91 Document::Document(const Document &other) :
92 DocumentInfo(other),
93 m_pData(NULL),
94 m_dataLength(0),
95 m_isMapped(false)
96 {
97 // Copying does a deep copy
98 setData(other.m_pData, other.m_dataLength);
99 }
100
~Document()101 Document::~Document()
102 {
103 resetData();
104 }
105
operator =(const Document & other)106 Document& Document::operator=(const Document& other)
107 {
108 if (this != &other)
109 {
110 // Copying does a deep copy
111 DocumentInfo::operator=(other);
112 setData(other.m_pData, other.m_dataLength);
113 m_isMapped = false;
114 }
115
116 return *this;
117 }
118
operator <(const Document & other) const119 bool Document::operator<(const Document& other) const
120 {
121 if (DocumentInfo::operator<(other) == false)
122 {
123 if (m_dataLength < other.m_dataLength)
124 {
125 return true;
126 }
127
128 return false;
129 }
130
131 return true;
132 }
133
134 /// Copies the given data in the document.
setData(const char * data,off_t length)135 bool Document::setData(const char *data, off_t length)
136 {
137 if ((data == NULL) ||
138 (length == 0))
139 {
140 return false;
141 }
142
143 // Discard existing data
144 resetData();
145
146 m_pData = Memory::allocateBuffer(length + 1);
147 if (m_pData != NULL)
148 {
149 memcpy(m_pData, data, length);
150 m_pData[length] = '\0';
151 m_dataLength = length;
152
153 return true;
154 }
155
156 return false;
157 }
158
159 /// Maps the given file.
setDataFromFile(const string & fileName)160 bool Document::setDataFromFile(const string &fileName)
161 {
162 struct stat fileStat;
163 int openFlags = O_RDONLY;
164 #ifdef O_CLOEXEC
165 openFlags = openFlags|O_CLOEXEC;
166 #endif
167
168 if (fileName.empty() == true)
169 {
170 return false;
171 }
172
173 // Make sure the file exists
174 if (stat(fileName.c_str(), &fileStat) != 0)
175 {
176 return false;
177 }
178
179 if ((!S_ISDIR(fileStat.st_mode)) &&
180 (!S_ISREG(fileStat.st_mode)))
181 {
182 return false;
183 }
184
185 if ((S_ISDIR(fileStat.st_mode)) ||
186 (fileStat.st_size == 0))
187 {
188 // The file is empty
189 resetData();
190 return true;
191 }
192
193 // Open the file in read-only mode
194 #ifdef O_NOATIME
195 int fd = open(fileName.c_str(), openFlags|O_NOATIME);
196 #else
197 int fd = open(fileName.c_str(), openFlags);
198 #endif
199 #ifdef O_NOATIME
200 if ((fd < 0) &&
201 (errno == EPERM))
202 {
203 // Try again
204 fd = open(fileName.c_str(), openFlags);
205 }
206 #endif
207 if (fd < 0)
208 {
209 clog << "Document::setDataFromFile: " << fileName << " couldn't be opened" << endl;
210 return false;
211 }
212 #ifndef O_CLOEXEC
213 int fdFlags = fcntl(fd, F_GETFD);
214 fcntl(fd, F_SETFD, fdFlags|FD_CLOEXEC);
215 #endif
216
217 // Discard existing data
218 resetData();
219
220 #ifdef HAVE_MMAP
221 // Don't try and map more than 2Gb !
222 if (fileStat.st_size > 2147483647)
223 {
224 #ifdef DEBUG
225 clog << "Document::setDataFromFile: reached large file cap" << endl;
226 #endif
227 fileStat.st_size = 2147483647;
228 }
229 // Request a mapping of the whole file
230 void *mapSpace = mmap(NULL, (size_t)fileStat.st_size, PROT_READ, MAP_SHARED, fd, 0);
231 if (mapSpace != MAP_FAILED)
232 {
233 m_pData = (char*)mapSpace;
234 m_dataLength = fileStat.st_size;
235 m_isMapped = true;
236 #ifdef HAVE_MADVISE
237 if (madvise(mapSpace, (size_t)fileStat.st_size, MADV_SEQUENTIAL) != 0)
238 {
239 #ifdef DEBUG
240 clog << "Document::setDataFromFile: ignored memory advice" << endl;
241 #endif
242 }
243 #endif
244 }
245 else clog << "Document::setDataFromFile: mapping failed" << endl;
246 #else
247 m_pData = Memory::allocateBuffer(fileStat.st_size + 1);
248 if (m_pData != NULL)
249 {
250 if (read(fd, (void*)m_pData, fileStat.st_size) == fileStat.st_size)
251 {
252 m_pData[fileStat.st_size] = '\0';
253 m_dataLength = fileStat.st_size;
254 }
255 else
256 {
257 Memory::freeBuffer(m_pData, fileStat.st_size + 1);
258 m_pData = NULL;
259 }
260 }
261 else clog << "Document::setDataFromFile: reading failed" << endl;
262 #endif
263
264 setTimestamp(TimeConverter::toTimestamp(fileStat.st_mtime));
265 setSize(fileStat.st_size);
266
267 #ifdef HAVE_SYS_XATTR_H
268 // Any extended attributes ?
269 ssize_t listSize = flistxattr(fd, NULL, 0);
270 if (listSize > 0)
271 {
272 char *pList = new char[listSize];
273
274 if (flistxattr(fd, pList, listSize) > 0)
275 {
276 set<string> labels;
277 string attrList(pList, listSize);
278 string::size_type startPos = 0, endPos = attrList.find('\0');
279
280 while (endPos != string::npos)
281 {
282 string attrName(attrList.substr(startPos, endPos - startPos));
283 char *pAttr = NULL;
284
285 // FIXME: support common attributes defined at
286 // http://www.freedesktop.org/wiki/CommonExtendedAttributes
287 if (attrName == "user.mime_type")
288 {
289 pAttr = getXAttr(fd, attrName);
290 if (pAttr != NULL)
291 {
292 // Set the MIME type
293 setType(pAttr);
294 }
295 }
296
297 if (pAttr != NULL)
298 {
299 #ifdef DEBUG
300 clog << "Document::setDataFromFile: xattr " << attrName << "=" << pAttr << endl;
301 #endif
302 delete[] pAttr;
303 }
304
305 // Next
306 startPos = endPos + 1;
307 if (startPos < listSize)
308 {
309 endPos = attrList.find('\0', startPos);
310 }
311 else
312 {
313 endPos = string::npos;
314 }
315 }
316
317 if (labels.empty() == false)
318 {
319 setLabels(labels);
320 }
321 }
322
323 delete[] pList;
324 }
325 #endif
326
327 // Close the file
328 if (close(fd) == -1)
329 {
330 #ifdef DEBUG
331 clog << "Document::setDataFromFile: close failed" << endl;
332 #endif
333 }
334
335 return m_isMapped;
336 }
337
338 /// Returns the document's data; NULL if document is empty.
getData(off_t & length) const339 const char *Document::getData(off_t &length) const
340 {
341 length = m_dataLength;
342 return m_pData;
343 }
344
345 /// Resets the document's data.
resetData(void)346 void Document::resetData(void)
347 {
348 if (m_pData != NULL)
349 {
350 if (m_isMapped == false)
351 {
352 // Free
353 Memory::freeBuffer(m_pData, m_dataLength + 1);
354 }
355 #ifdef HAVE_MMAP
356 else
357 {
358 #ifdef HAVE_MADVISE
359 if (madvise((void*)m_pData, (size_t)m_dataLength, MADV_DONTNEED) != 0)
360 {
361 #ifdef DEBUG
362 clog << "Document::resetData: ignored memory advice" << endl;
363 #endif
364 }
365 #endif
366 // Unmap
367 munmap((void*)m_pData, (size_t)m_dataLength);
368 }
369 #endif
370 }
371
372 m_pData = NULL;
373 m_dataLength = 0;
374 m_isMapped = false;
375 }
376
377 /// Checks whether the document is binary.
isBinary(void) const378 bool Document::isBinary(void) const
379 {
380 unsigned int maxLen = 100;
381
382 // Look at the first 100 bytes or so
383 if (m_dataLength < 100)
384 {
385 maxLen = m_dataLength;
386 }
387 for (unsigned int i = 0; i < maxLen; ++i)
388 {
389 if (isascii(m_pData[i]) == 0)
390 {
391 #ifdef DEBUG
392 clog << "Document::isBinary: " << m_pData[i] << endl;
393 #endif
394 return true;
395 }
396 }
397
398 return false;
399 }
400