1 /*
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *
5  *  Copyright (C) 2006-2007 Peng Wu
6  *
7  *  This program is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef MEMORY_CHUNK_H
22 #define MEMORY_CHUNK_H
23 
24 #include "config.h"
25 #include <assert.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <stdlib.h>
31 #ifdef HAVE_MMAP
32 #include <sys/mman.h>
33 #define LIBPINYIN_USE_MMAP
34 #endif
35 #include "stl_lite.h"
36 
37 namespace pinyin{
38 
39 /*  for unmanaged mode
40  *  m_free_func == free, when memory is allocated by malloc
41  *  m_free_func == munmap, when memory is allocated by mmap
42  *  m_free_func == NULL,
43  *  when memory is in small protion of allocated area
44  *  m_free_func == other,
45  *  malloc then free.
46  */
47 
48 /**
49  * MemoryChunk:
50  *
51  * The utility to manage the memory chunks.
52  *
53  */
54 
55 class MemoryChunk{
56     typedef void (* free_func_t)(...);
57 private:
58     char * m_data_begin;
59     char * m_data_end; //one data pass the end.
60     char * m_allocated; //one data pass the end.
61     free_func_t m_free_func;
62 
63     const gint32 header = sizeof(guint32) * 2;
64 
65 private:
freemem()66     void freemem(){
67         if ((free_func_t)free == m_free_func)
68             free(m_data_begin);
69 #ifdef LIBPINYIN_USE_MMAP
70         else if ((free_func_t)munmap == m_free_func)
71             /* we hide the file header in mmap. */
72             munmap(m_data_begin - header, header + capacity());
73 #endif
74         else
75             assert(FALSE);
76     }
77 
78 
reset()79     void reset(){
80         if (m_free_func)
81             freemem();
82 
83         m_data_begin = NULL;
84         m_data_end = NULL;
85         m_allocated = NULL;
86         m_free_func = NULL;
87     }
88 
ensure_has_space(size_t new_size)89     void ensure_has_space(size_t new_size){
90         int delta_size = m_data_begin + new_size - m_data_end;
91         if ( delta_size <= 0 ) return;
92         ensure_has_more_space ( delta_size );
93     }
94 
95     /* enlarge function */
ensure_has_more_space(size_t extra_size)96     void ensure_has_more_space(size_t extra_size){
97         if ( 0 == extra_size ) return;
98         size_t newsize;
99         size_t cursize = size();
100         if ( m_free_func != (free_func_t)free ) {
101             /* copy on resize */
102             newsize = cursize + extra_size;
103             /* do the copy */
104             char * tmp = (char *) malloc(newsize);
105             assert(tmp);
106             memset(tmp, 0, newsize);
107             memmove(tmp, m_data_begin, cursize);
108             /* free the origin memory */
109             if (m_free_func)
110                 freemem();
111             /* change varibles */
112             m_data_begin = tmp;
113             m_data_end = m_data_begin + cursize;
114             m_allocated = m_data_begin + newsize;
115             m_free_func = (free_func_t)free;
116             return;
117         }
118         /* the memory area is managed by this memory chunk */
119         if ( extra_size <= (size_t) (m_allocated - m_data_end))
120             return;
121         newsize = std_lite::max( capacity()<<1, cursize + extra_size);
122         m_data_begin = (char *) realloc(m_data_begin, newsize);
123         assert(m_data_begin);
124         memset(m_data_begin + cursize, 0, newsize - cursize);
125         m_data_end = m_data_begin + cursize;
126         m_allocated = m_data_begin + newsize;
127         return;
128     }
129 
get_check_sum(const char * data,guint32 length)130     guint32 get_check_sum(const char * data, guint32 length){
131         guint32 checksum = 0x0;
132         guint32 aligns = length & ~0x3;
133 
134         /* checksum for aligned parts. */
135         guint32 index = 0;
136         for (; index < aligns; index += sizeof(guint32)) {
137             const char * p = data + index;
138 
139             /* use little endian here. */
140             guint32 item = *p | *(p + 1) << 8 |
141                 *(p + 2) << 16 | *(p + 3) << 24;
142 
143             checksum ^= item;
144         }
145 
146         /* checksum for remained parts. */
147         guint32 shift = 0;
148         for (; index < length; index++) {
149             const char * p = data + index;
150 
151             guint32 item = *p << shift;
152             shift += 8;
153 
154             checksum ^= item;
155         }
156 
157         return checksum;
158     }
159 
160 public:
161     /**
162      * MemoryChunk::MemoryChunk:
163      *
164      * The constructor of the MemoryChunk.
165      *
166      */
MemoryChunk()167     MemoryChunk(){
168         m_data_begin = NULL;
169         m_data_end = NULL;
170         m_allocated = NULL;
171         m_free_func = NULL;
172     }
173 
174     /**
175      * MemoryChunk::~MemoryChunk:
176      *
177      * The destructor of the MemoryChunk.
178      *
179      */
~MemoryChunk()180     ~MemoryChunk(){
181         reset();
182     }
183 
184     /**
185      * MemoryChunk::begin:
186      *
187      * Read access method, to get the begin of the MemoryChunk.
188      *
189      */
begin()190     void* begin() const{
191         return m_data_begin;
192     }
193 
194     /**
195      * MemoryChunk::end:
196      *
197      * Write access method, to get the end of the MemoryChunk.
198      *
199      */
end()200     void* end() const{
201         return m_data_end;
202     }
203 
204     /**
205      * MemoryChunk::size:
206      *
207      * Get the size of the content in the MemoryChunk.
208      *
209      */
size()210     size_t size() const{
211         return m_data_end - m_data_begin;
212     }
213 
214     /**
215      * MemoryChunk::set_size:
216      *
217      * Set the size of the content in the MemoryChunk.
218      *
219      */
set_size(size_t newsize)220     void set_size(size_t newsize){
221         ensure_has_space(newsize);
222         m_data_end = m_data_begin + newsize;
223     }
224 
225     /**
226      * MemoryChunk::capacity:
227      *
228      * Get the capacity of the MemoryChunk.
229      *
230      */
capacity()231     size_t capacity(){
232         return m_allocated - m_data_begin;
233     }
234 
235     /**
236      * MemoryChunk::set_chunk:
237      * @begin: the begin of the data
238      * @length: the length of the data
239      * @free_func: the function to free the data
240      *
241      * Transfer management of a memory chunk allocated by other part of the
242      * system to the memory chunk.
243      *
244      */
set_chunk(void * begin,size_t length,free_func_t free_func)245     void set_chunk(void* begin, size_t length, free_func_t free_func){
246         if (m_free_func)
247             freemem();
248 
249         m_data_begin = (char *) begin;
250         m_data_end = (char *) m_data_begin + length;
251         m_allocated = (char *) m_data_begin + length;
252         m_free_func = free_func;
253     }
254 
255     /**
256      * MemoryChunk::get_sub_chunk:
257      * @offset: the offset in this MemoryChunk.
258      * @length: the data length to be retrieved.
259      * @returns: the newly allocated MemoryChunk.
260      *
261      * Get a sub MemoryChunk from this MemoryChunk.
262      *
263      * Note: use set_chunk internally.
264      * the returned new chunk need to be deleted.
265      *
266      */
get_sub_chunk(size_t offset,size_t length)267     MemoryChunk * get_sub_chunk(size_t offset, size_t length){
268         MemoryChunk * retval = new MemoryChunk();
269         char * begin_pos = m_data_begin + offset;
270         retval->set_chunk(begin_pos, length, NULL);
271         return retval;
272     }
273 
274     /**
275      * MemoryChunk::set_content:
276      * @offset: the offset in this MemoryChunk.
277      * @data: the begin of the data to be copied.
278      * @len: the length of the data to be copied.
279      * @returns: whether the data is copied successfully.
280      *
281      * Data are written directly to the memory area in this MemoryChunk.
282      *
283      */
set_content(size_t offset,const void * data,size_t len)284     bool set_content(size_t offset, const void * data, size_t len){
285         size_t cursize = std_lite::max(size(), offset + len);
286         ensure_has_space(offset + len);
287         memmove(m_data_begin + offset, data, len);
288         m_data_end = m_data_begin + cursize;
289         return true;
290     }
291 
292     /**
293      * MemoryChunk::append_content:
294      * @data: the begin of the data to be copied.
295      * @len: the length of the data to be copied.
296      * @returns: whether the data is appended successfully.
297      *
298      * Data are appended at the end of the MemoryChunk.
299      *
300      */
append_content(const void * data,size_t len)301     bool append_content(const void * data, size_t len){
302         return set_content(size(), data, len);
303     }
304 
305     /**
306      * MemoryChunk::insert_content:
307      * @offset: the offset in this MemoryChunk, which starts from zero.
308      * @data: the begin of the data to be copied.
309      * @length: the length of the data to be copied.
310      * @returns: whether the data is inserted successfully.
311      *
312      * Data are written to the memory area,
313      * the original content are moved towards the rear.
314      *
315      */
insert_content(size_t offset,const void * data,size_t length)316     bool insert_content(size_t offset, const void * data, size_t length){
317         ensure_has_more_space(length);
318         size_t move_size = size() - offset;
319         memmove(m_data_begin + offset + length, m_data_begin + offset, move_size);
320         memmove(m_data_begin + offset, data, length);
321         m_data_end += length;
322         return true;
323     }
324 
325     /**
326      * MemoryChunk::remove_content:
327      * @offset: the offset in this MemoryChunk.
328      * @length: the length of the removed content.
329      * @returns: whether the content is removed successfully.
330      *
331      * Data are removed directly,
332      * the following content are moved towards the front.
333      *
334      */
remove_content(size_t offset,size_t length)335     bool remove_content(size_t offset, size_t length){
336         size_t move_size = size() - offset - length;
337         memmove(m_data_begin + offset, m_data_begin + offset + length, move_size);
338         m_data_end -= length;
339         return true;
340     }
341 
342     /**
343      * MemoryChunk::get_content:
344      * @offset: the offset in this MemoryChunk.
345      * @buffer: the buffer to retrieve the content.
346      * @length: the length of content to be retrieved.
347      * @returns: whether the content is retrieved.
348      *
349      * Get the content in this MemoryChunk.
350      *
351      */
get_content(size_t offset,void * buffer,size_t length)352     bool get_content(size_t offset, void * buffer, size_t length){
353         if ( size() < offset + length )
354             return false;
355         memcpy( buffer, m_data_begin + offset, length);
356         return true;
357     }
358 
359     /**
360      * MemoryChunk::compact_memory:
361      *
362      * Compact memory, reduce the size.
363      *
364      */
compact_memory()365     void compact_memory(){
366         if ( m_free_func != (free_func_t)free )
367             return;
368         size_t newsize = size();
369         m_data_begin = (char *) realloc(m_data_begin, newsize);
370         m_allocated = m_data_begin + newsize;
371     }
372 
373     /**
374      * MemoryChunk::load:
375      * @filename: load the MemoryChunk from the filename.
376      * @returns: whether the load is successful.
377      *
378      * Load the content from the filename.
379      *
380      */
load(const char * filename)381     bool load(const char * filename){
382         /* free old data */
383         reset();
384 
385         int fd = open(filename, O_RDONLY);
386         if (-1 == fd)
387             return false;
388 
389         off_t file_size = lseek(fd, 0, SEEK_END);
390         lseek(fd, 0, SEEK_SET);
391 
392         if (file_size < header) {
393             close(fd);
394             return false;
395         }
396 
397         guint32 length = 0;
398         ssize_t ret_len = read(fd, &length, sizeof(guint32));
399         assert(ret_len == sizeof(length));
400 
401         guint32 checksum = 0;
402         ret_len = read(fd, &checksum, sizeof(guint32));
403         assert(ret_len == sizeof(checksum));
404 
405         guint32 data_len = file_size - header;
406         if (data_len != length) {
407             close(fd);
408             return false;
409         }
410 
411         char * data = (char *) malloc(data_len);
412         if ( !data ){
413             close(fd);
414             return false;
415         }
416 
417         data_len = read(fd, data, data_len);
418         guint32 calc = get_check_sum(data, data_len);
419         if (checksum != calc) {
420             free(data);
421             close(fd);
422             return false;
423         }
424 
425         set_chunk(data, data_len, (free_func_t)free);
426 
427         close(fd);
428         return true;
429     }
430 
431 #ifdef LIBPINYIN_USE_MMAP
432     /**
433      * MemoryChunk::mmap:
434      * @filename: mmap the MemoryChunk from the filename.
435      * @returns: whether the mmap is successful.
436      *
437      * mmap the content from the filename.
438      *
439      */
mmap(const char * filename)440     bool mmap(const char * filename){
441         /* free old data */
442         reset();
443 
444         int fd = open(filename, O_RDONLY);
445         if (-1 == fd)
446             return false;
447 
448         off_t file_size = lseek(fd, 0, SEEK_END);
449         lseek(fd, 0, SEEK_SET);
450 
451         if (file_size < header) {
452             close(fd);
453             return false;
454         }
455 
456         guint32 length = 0;
457         ssize_t ret_len = read(fd, &length, sizeof(guint32));
458         assert(ret_len == sizeof(length));
459 
460         guint32 checksum = 0;
461         ret_len = read(fd, &checksum, sizeof(guint32));
462         assert(ret_len == sizeof(checksum));
463 
464         guint32 data_len = file_size - header;
465         if (data_len != length) {
466             close(fd);
467             return false;
468         }
469 
470         char * data = (char *)::mmap(NULL, file_size,
471                                      PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
472 
473         if (MAP_FAILED == data) {
474             close(fd);
475             return false;
476         }
477 
478         data = data + header;
479         guint32 calc = get_check_sum(data, data_len);
480         if (checksum != calc) {
481             munmap(data - header, file_size);
482             close(fd);
483             return false;
484         }
485 
486         set_chunk(data, data_len, (free_func_t)munmap);
487 
488         close(fd);
489         return true;
490     }
491 #endif
492 
493     /**
494      * MemoryChunk::save:
495      * @filename: save this MemoryChunk to the filename.
496      * @returns: whether the save is successful.
497      *
498      * Save the content to the filename.
499      *
500      */
save(const char * filename)501     bool save(const char * filename){
502         int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
503         if ( -1 == fd )
504             return false;
505 
506         guint32 length = size();
507         ssize_t ret_len = write(fd, &length, sizeof(guint32));
508         assert(ret_len == sizeof(length));
509 
510         guint32 checksum = get_check_sum(m_data_begin, size());
511         ret_len = write(fd, &checksum, sizeof(guint32));
512         assert(ret_len == sizeof(checksum));
513 
514         ret_len = write(fd, begin(), size());
515         if (ret_len != (ssize_t) size()){
516             close(fd);
517             return false;
518         }
519 
520         fsync(fd);
521         close(fd);
522         return true;
523     }
524 };
525 
526 };
527 
528 #endif
529