1 /*
2  *  Copyright 2005-2021 Fabrice Colin
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  */
18 
19 #include "config.h"
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #ifdef HAVE_MMAP
26 #include <sys/mman.h>
27 #endif
28 #ifdef HAVE_SYS_XATTR_H
29 #include <sys/xattr.h>
30 #endif
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <iostream>
35 #include <set>
36 
37 #include "Document.h"
38 #include "TimeConverter.h"
39 #include "Memory.h"
40 
41 using std::clog;
42 using std::endl;
43 using std::string;
44 using std::set;
45 
46 #ifdef HAVE_SYS_XATTR_H
getXAttr(int fd,const string & attrName)47 static char *getXAttr(int fd, const string &attrName)
48 {
49 	ssize_t attrSize = fgetxattr(fd, attrName.c_str(), NULL, 0);
50 
51 	if (attrSize > 0)
52 	{
53 		char *pAttr = new char[attrSize];
54 
55 		if ((pAttr != NULL) &&
56 			(fgetxattr(fd, attrName.c_str(), pAttr, attrSize) > 0))
57 		{
58 			return pAttr;
59 		}
60 	}
61 
62 	return NULL;
63 }
64 #endif
65 
Document()66 Document::Document() :
67 	DocumentInfo(),
68 	m_pData(NULL),
69 	m_dataLength(0),
70 	m_isMapped(false)
71 {
72 }
73 
Document(const string & title,const string & location,const string & type,const string & language)74 Document::Document(const string &title, const string &location,
75 	const string &type, const string &language) :
76 	DocumentInfo(title, location, type, language),
77 	m_pData(NULL),
78 	m_dataLength(0),
79 	m_isMapped(false)
80 {
81 }
82 
Document(const DocumentInfo & info)83 Document::Document(const DocumentInfo &info) :
84 	DocumentInfo(info),
85 	m_pData(NULL),
86 	m_dataLength(0),
87 	m_isMapped(false)
88 {
89 }
90 
Document(const Document & other)91 Document::Document(const Document &other) :
92 	DocumentInfo(other),
93 	m_pData(NULL),
94 	m_dataLength(0),
95 	m_isMapped(false)
96 {
97 	// Copying does a deep copy
98 	setData(other.m_pData, other.m_dataLength);
99 }
100 
~Document()101 Document::~Document()
102 {
103 	resetData();
104 }
105 
operator =(const Document & other)106 Document& Document::operator=(const Document& other)
107 {
108 	if (this != &other)
109 	{
110 		// Copying does a deep copy
111 		DocumentInfo::operator=(other);
112 		setData(other.m_pData, other.m_dataLength);
113 		m_isMapped = false;
114 	}
115 
116 	return *this;
117 }
118 
operator <(const Document & other) const119 bool Document::operator<(const Document& other) const
120 {
121 	if (DocumentInfo::operator<(other) == false)
122 	{
123 		if (m_dataLength < other.m_dataLength)
124 		{
125 			return true;
126 		}
127 
128 		return false;
129 	}
130 
131 	return true;
132 }
133 
134 /// Copies the given data in the document.
setData(const char * data,off_t length)135 bool Document::setData(const char *data, off_t length)
136 {
137 	if ((data == NULL) ||
138 		(length == 0))
139 	{
140 		return false;
141 	}
142 
143 	// Discard existing data
144 	resetData();
145 
146 	m_pData = Memory::allocateBuffer(length + 1);
147 	if (m_pData != NULL)
148 	{
149 		memcpy(m_pData, data, length);
150 		m_pData[length] = '\0';
151 		m_dataLength = length;
152 
153 		return true;
154 	}
155 
156 	return false;
157 }
158 
159 /// Maps the given file.
setDataFromFile(const string & fileName)160 bool Document::setDataFromFile(const string &fileName)
161 {
162 	struct stat fileStat;
163 	int openFlags = O_RDONLY;
164 #ifdef O_CLOEXEC
165 	openFlags = openFlags|O_CLOEXEC;
166 #endif
167 
168 	if (fileName.empty() == true)
169 	{
170 		return false;
171 	}
172 
173 	// Make sure the file exists
174 	if (stat(fileName.c_str(), &fileStat) != 0)
175 	{
176 		return false;
177 	}
178 
179 	if ((!S_ISDIR(fileStat.st_mode)) &&
180 		(!S_ISREG(fileStat.st_mode)))
181 	{
182 		return false;
183 	}
184 
185 	if ((S_ISDIR(fileStat.st_mode)) ||
186 		(fileStat.st_size == 0))
187 	{
188 		// The file is empty
189 		resetData();
190 		return true;
191 	}
192 
193 	// Open the file in read-only mode
194 #ifdef O_NOATIME
195 	int fd = open(fileName.c_str(), openFlags|O_NOATIME);
196 #else
197 	int fd = open(fileName.c_str(), openFlags);
198 #endif
199 #ifdef O_NOATIME
200 	if ((fd < 0) &&
201 		(errno == EPERM))
202 	{
203 		// Try again
204 		fd = open(fileName.c_str(), openFlags);
205 	}
206 #endif
207 	if (fd < 0)
208 	{
209 		clog << "Document::setDataFromFile: " << fileName << " couldn't be opened" << endl;
210 		return false;
211 	}
212 #ifndef O_CLOEXEC
213 	int fdFlags = fcntl(fd, F_GETFD);
214 	fcntl(fd, F_SETFD, fdFlags|FD_CLOEXEC);
215 #endif
216 
217 	// Discard existing data
218 	resetData();
219 
220 #ifdef HAVE_MMAP
221 	// Don't try and map more than 2Gb !
222 	if (fileStat.st_size > 2147483647)
223 	{
224 #ifdef DEBUG
225 		clog << "Document::setDataFromFile: reached large file cap" << endl;
226 #endif
227 		fileStat.st_size = 2147483647;
228 	}
229 	// Request a mapping of the whole file
230 	void *mapSpace = mmap(NULL, (size_t)fileStat.st_size, PROT_READ, MAP_SHARED, fd, 0);
231 	if (mapSpace != MAP_FAILED)
232 	{
233 		m_pData = (char*)mapSpace;
234 		m_dataLength = fileStat.st_size;
235 		m_isMapped = true;
236 #ifdef HAVE_MADVISE
237 		if (madvise(mapSpace, (size_t)fileStat.st_size, MADV_SEQUENTIAL) != 0)
238 		{
239 #ifdef DEBUG
240 			clog << "Document::setDataFromFile: ignored memory advice" << endl;
241 #endif
242 		}
243 #endif
244 	}
245 	else clog << "Document::setDataFromFile: mapping failed" << endl;
246 #else
247 	m_pData = Memory::allocateBuffer(fileStat.st_size + 1);
248 	if (m_pData != NULL)
249 	{
250 		if (read(fd, (void*)m_pData, fileStat.st_size) == fileStat.st_size)
251 		{
252 			m_pData[fileStat.st_size] = '\0';
253 			m_dataLength = fileStat.st_size;
254 		}
255 		else
256 		{
257 			Memory::freeBuffer(m_pData, fileStat.st_size + 1);
258 			m_pData = NULL;
259 		}
260 	}
261 	else clog << "Document::setDataFromFile: reading failed" << endl;
262 #endif
263 
264 	setTimestamp(TimeConverter::toTimestamp(fileStat.st_mtime));
265 	setSize(fileStat.st_size);
266 
267 #ifdef HAVE_SYS_XATTR_H
268 	// Any extended attributes ?
269 	ssize_t listSize = flistxattr(fd, NULL, 0);
270 	if (listSize > 0)
271 	{
272 		char *pList = new char[listSize];
273 
274 		if (flistxattr(fd, pList, listSize) > 0)
275 		{
276 			set<string> labels;
277 			string attrList(pList, listSize);
278 			string::size_type startPos = 0, endPos = attrList.find('\0');
279 
280 			while (endPos != string::npos)
281 			{
282 				string attrName(attrList.substr(startPos, endPos - startPos));
283 				char *pAttr = NULL;
284 
285 				// FIXME: support common attributes defined at
286 				// http://www.freedesktop.org/wiki/CommonExtendedAttributes
287 				if (attrName == "user.mime_type")
288 				{
289 					pAttr = getXAttr(fd, attrName);
290 					if (pAttr != NULL)
291 					{
292 						// Set the MIME type
293 						setType(pAttr);
294 					}
295 				}
296 
297 				if (pAttr != NULL)
298 				{
299 #ifdef DEBUG
300 					clog << "Document::setDataFromFile: xattr " << attrName << "=" << pAttr << endl;
301 #endif
302 					delete[] pAttr;
303 				}
304 
305 				// Next
306 				startPos = endPos + 1;
307 				if (startPos < listSize)
308 				{
309 					endPos = attrList.find('\0', startPos);
310 				}
311 				else
312 				{
313 					endPos = string::npos;
314 				}
315 			}
316 
317 			if (labels.empty() == false)
318 			{
319 				setLabels(labels);
320 			}
321 		}
322 
323 		delete[] pList;
324 	}
325 #endif
326 
327 	// Close the file
328 	if (close(fd) == -1)
329 	{
330 #ifdef DEBUG
331 		clog << "Document::setDataFromFile: close failed" << endl;
332 #endif
333 	}
334 
335 	return m_isMapped;
336 }
337 
338 /// Returns the document's data; NULL if document is empty.
getData(off_t & length) const339 const char *Document::getData(off_t &length) const
340 {
341 	length = m_dataLength;
342 	return m_pData;
343 }
344 
345 /// Resets the document's data.
resetData(void)346 void Document::resetData(void)
347 {
348 	if (m_pData != NULL)
349 	{
350 		if (m_isMapped == false)
351 		{
352 			// Free
353 			Memory::freeBuffer(m_pData, m_dataLength + 1);
354 		}
355 #ifdef HAVE_MMAP
356 		else
357 		{
358 #ifdef HAVE_MADVISE
359 			if (madvise((void*)m_pData, (size_t)m_dataLength, MADV_DONTNEED) != 0)
360 			{
361 #ifdef DEBUG
362 				clog << "Document::resetData: ignored memory advice" << endl;
363 #endif
364 			}
365 #endif
366 			// Unmap
367 			munmap((void*)m_pData, (size_t)m_dataLength);
368 		}
369 #endif
370 	}
371 
372 	m_pData = NULL;
373 	m_dataLength = 0;
374 	m_isMapped = false;
375 }
376 
377 /// Checks whether the document is binary.
isBinary(void) const378 bool Document::isBinary(void) const
379 {
380 	unsigned int maxLen = 100;
381 
382 	// Look at the first 100 bytes or so
383 	if (m_dataLength < 100)
384 	{
385 		maxLen = m_dataLength;
386 	}
387 	for (unsigned int i = 0; i < maxLen; ++i)
388 	{
389 		if (isascii(m_pData[i]) == 0)
390 		{
391 #ifdef DEBUG
392 			clog << "Document::isBinary: " << m_pData[i] << endl;
393 #endif
394 			return true;
395 		}
396 	}
397 
398 	return false;
399 }
400