1 /*
2  * Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  *
18  */
19 
20 #include <zim/zim.h>
21 #include <zim/error.h>
22 #include "file_reader.h"
23 #include "file_compound.h"
24 #include "buffer.h"
25 #include <errno.h>
26 #include <string.h>
27 #include <cstring>
28 #include <fcntl.h>
29 #include <pthread.h>
30 #include <sstream>
31 #include <system_error>
32 #include <algorithm>
33 
34 
35 #ifndef _WIN32
36 #  include <sys/mman.h>
37 #  include <unistd.h>
38 #endif
39 
40 #if defined(_MSC_VER)
41 # include <io.h>
42 # include <BaseTsd.h>
43   typedef SSIZE_T ssize_t;
44 #endif
45 
46 namespace zim {
47 
FileReader(std::shared_ptr<const FileCompound> source)48 FileReader::FileReader(std::shared_ptr<const FileCompound> source)
49   : FileReader(source, offset_t(0), source->fsize()) {}
50 
FileReader(std::shared_ptr<const FileCompound> source,offset_t offset)51 FileReader::FileReader(std::shared_ptr<const FileCompound> source, offset_t offset)
52   : FileReader(source, offset, zsize_t(source->fsize().v-offset.v)) {}
53 
FileReader(std::shared_ptr<const FileCompound> source,offset_t offset,zsize_t size)54 FileReader::FileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size)
55   : source(source),
56     _offset(offset),
57     _size(size)
58 {
59   ASSERT(offset.v, <=, source->fsize().v);
60   ASSERT(offset.v+size.v, <=, source->fsize().v);
61 }
62 
read(offset_t offset) const63 char FileReader::read(offset_t offset) const {
64   ASSERT(offset.v, <, _size.v);
65   offset += _offset;
66   auto part_pair = source->locate(offset);
67   auto& fhandle = part_pair->second->fhandle();
68   offset_t local_offset = offset - part_pair->first.min;
69   ASSERT(local_offset, <=, part_pair->first.max);
70   char ret;
71   try {
72     fhandle.readAt(&ret, zsize_t(1), local_offset);
73   } catch (std::runtime_error& e) {
74     //Error while reading.
75     std::ostringstream s;
76     s << "Cannot read a char.\n";
77     s << " - File part is " <<  part_pair->second->filename() << "\n";
78     s << " - File part size is " << part_pair->second->size().v << "\n";
79     s << " - File part range is " << part_pair->first.min << "-" << part_pair->first.max << "\n";
80     s << " - Reading offset at " << offset.v << "\n";
81     s << " - local offset is " << local_offset.v << "\n";
82     s << " - error is " << strerror(errno) << "\n";
83     std::error_code ec(errno, std::generic_category());
84     throw std::system_error(ec, s.str());
85   };
86   return ret;
87 }
88 
89 
read(char * dest,offset_t offset,zsize_t size) const90 void FileReader::read(char* dest, offset_t offset, zsize_t size) const {
91   ASSERT(offset.v, <, _size.v);
92   ASSERT(offset.v+size.v, <=, _size.v);
93   if (! size ) {
94     return;
95   }
96   offset += _offset;
97   auto found_range = source->locate(offset, size);
98   for(auto current = found_range.first; current!=found_range.second; current++){
99     auto part = current->second;
100     Range partRange = current->first;
101     offset_t local_offset = offset-partRange.min;
102     ASSERT(size.v, >, 0U);
103     zsize_t size_to_get = zsize_t(std::min(size.v, part->size().v-local_offset.v));
104     try {
105       part->fhandle().readAt(dest, size_to_get, local_offset);
106     } catch (std::runtime_error& e) {
107       std::ostringstream s;
108       s << "Cannot read chars.\n";
109       s << " - File part is " <<  part->filename() << "\n";
110       s << " - File part size is " << part->size().v << "\n";
111       s << " - File part range is " << partRange.min << "-" << partRange.max << "\n";
112       s << " - size_to_get is " << size_to_get.v << "\n";
113       s << " - total size is " << size.v << "\n";
114       s << " - Reading offset at " << offset.v << "\n";
115       s << " - local offset is " << local_offset.v << "\n";
116       s << " - error is " << strerror(errno) << "\n";
117       std::error_code ec(errno, std::generic_category());
118       throw std::system_error(ec, s.str());
119     };
120     ASSERT(size_to_get, <=, size);
121     dest += size_to_get.v;
122     size -= size_to_get;
123     offset += size_to_get;
124   }
125   ASSERT(size.v, ==, 0U);
126 }
127 
128 #ifdef ENABLE_USE_MMAP
129 namespace
130 {
131 
132 class MMapException : std::exception {};
133 
134 char*
mmapReadOnly(int fd,offset_type offset,size_type size)135 mmapReadOnly(int fd, offset_type offset, size_type size)
136 {
137 #if defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)
138   const auto MAP_FLAGS = MAP_PRIVATE;
139 #elif defined(__FreeBSD__)
140   const auto MAP_FLAGS = MAP_PRIVATE|MAP_PREFAULT_READ;
141 #else
142   const auto MAP_FLAGS = MAP_PRIVATE|MAP_POPULATE;
143 #endif
144 
145   const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset);
146   if (p == MAP_FAILED )
147   {
148     std::ostringstream s;
149     s << "Cannot mmap size " << size << " at off " << offset
150       << " : " << strerror(errno);
151     throw std::runtime_error(s.str());
152   }
153   return p;
154 }
155 
156 Buffer::DataPtr
makeMmappedBuffer(int fd,offset_t offset,zsize_t size)157 makeMmappedBuffer(int fd, offset_t offset, zsize_t size)
158 {
159   const offset_type pageAlignedOffset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1));
160   const size_t alignmentAdjustment = offset.v - pageAlignedOffset;
161   size += alignmentAdjustment;
162 
163 #if !MMAP_SUPPORT_64
164   if(pageAlignedOffset >= INT32_MAX) {
165     throw MMapException();
166   }
167 #endif
168   char* const mmappedAddress = mmapReadOnly(fd, pageAlignedOffset, size.v);
169   const auto munmapDeleter = [mmappedAddress, size](char* ) {
170                                munmap(mmappedAddress, size.v);
171                              };
172 
173   return Buffer::DataPtr(mmappedAddress+alignmentAdjustment, munmapDeleter);
174 }
175 
176 } // unnamed namespace
177 #endif // ENABLE_USE_MMAP
178 
get_buffer(offset_t offset,zsize_t size) const179 const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const {
180   ASSERT(size, <=, _size);
181 #ifdef ENABLE_USE_MMAP
182   try {
183     auto found_range = source->locate(_offset+offset, size);
184     auto first_part_containing_it = found_range.first;
185     if (++first_part_containing_it != found_range.second) {
186       throw MMapException();
187     }
188 
189     // The range is in only one part
190     auto range = found_range.first->first;
191     auto part = found_range.first->second;
192     auto local_offset = offset + _offset - range.min;
193     ASSERT(size, <=, part->size());
194     int fd = part->fhandle().getNativeHandle();
195     return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size);
196   } catch(MMapException& e)
197 #endif
198   {
199     // The range is several part, or we are on Windows.
200     // We will have to do some memory copies :/
201     // [TODO] Use Windows equivalent for mmap.
202     auto ret_buffer = Buffer::makeBuffer(size);
203     read(const_cast<char*>(ret_buffer.data()), offset, size);
204     return ret_buffer;
205   }
206 }
207 
can_read(offset_t offset,zsize_t size) const208 bool Reader::can_read(offset_t offset, zsize_t size) const
209 {
210     return (offset.v <= this->size().v && (offset.v+size.v) <= this->size().v);
211 }
212 
213 
sub_reader(offset_t offset,zsize_t size) const214 std::unique_ptr<const Reader> FileReader::sub_reader(offset_t offset, zsize_t size) const
215 {
216   ASSERT(size, <=, _size);
217   return std::unique_ptr<Reader>(new FileReader(source, _offset+offset, size));
218 }
219 
220 } // zim
221