1 /* 2 * libpinyin 3 * Library to deal with pinyin. 4 * 5 * Copyright (C) 2006-2007 Peng Wu 6 * 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #ifndef MEMORY_CHUNK_H 22 #define MEMORY_CHUNK_H 23 24 #include "config.h" 25 #include <assert.h> 26 #include <sys/types.h> 27 #include <sys/stat.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <stdlib.h> 31 #ifdef HAVE_MMAP 32 #include <sys/mman.h> 33 #define LIBPINYIN_USE_MMAP 34 #endif 35 #include "stl_lite.h" 36 37 namespace pinyin{ 38 39 /* for unmanaged mode 40 * m_free_func == free, when memory is allocated by malloc 41 * m_free_func == munmap, when memory is allocated by mmap 42 * m_free_func == NULL, 43 * when memory is in small protion of allocated area 44 * m_free_func == other, 45 * malloc then free. 46 */ 47 48 /** 49 * MemoryChunk: 50 * 51 * The utility to manage the memory chunks. 52 * 53 */ 54 55 class MemoryChunk{ 56 typedef void (* free_func_t)(...); 57 private: 58 char * m_data_begin; 59 char * m_data_end; //one data pass the end. 60 char * m_allocated; //one data pass the end. 61 free_func_t m_free_func; 62 63 const gint32 header = sizeof(guint32) * 2; 64 65 private: freemem()66 void freemem(){ 67 if ((free_func_t)free == m_free_func) 68 free(m_data_begin); 69 #ifdef LIBPINYIN_USE_MMAP 70 else if ((free_func_t)munmap == m_free_func) 71 /* we hide the file header in mmap. */ 72 munmap(m_data_begin - header, header + capacity()); 73 #endif 74 else 75 assert(FALSE); 76 } 77 78 reset()79 void reset(){ 80 if (m_free_func) 81 freemem(); 82 83 m_data_begin = NULL; 84 m_data_end = NULL; 85 m_allocated = NULL; 86 m_free_func = NULL; 87 } 88 ensure_has_space(size_t new_size)89 void ensure_has_space(size_t new_size){ 90 int delta_size = m_data_begin + new_size - m_data_end; 91 if ( delta_size <= 0 ) return; 92 ensure_has_more_space ( delta_size ); 93 } 94 95 /* enlarge function */ ensure_has_more_space(size_t extra_size)96 void ensure_has_more_space(size_t extra_size){ 97 if ( 0 == extra_size ) return; 98 size_t newsize; 99 size_t cursize = size(); 100 if ( m_free_func != (free_func_t)free ) { 101 /* copy on resize */ 102 newsize = cursize + extra_size; 103 /* do the copy */ 104 char * tmp = (char *) malloc(newsize); 105 assert(tmp); 106 memset(tmp, 0, newsize); 107 memmove(tmp, m_data_begin, cursize); 108 /* free the origin memory */ 109 if (m_free_func) 110 freemem(); 111 /* change varibles */ 112 m_data_begin = tmp; 113 m_data_end = m_data_begin + cursize; 114 m_allocated = m_data_begin + newsize; 115 m_free_func = (free_func_t)free; 116 return; 117 } 118 /* the memory area is managed by this memory chunk */ 119 if ( extra_size <= (size_t) (m_allocated - m_data_end)) 120 return; 121 newsize = std_lite::max( capacity()<<1, cursize + extra_size); 122 m_data_begin = (char *) realloc(m_data_begin, newsize); 123 assert(m_data_begin); 124 memset(m_data_begin + cursize, 0, newsize - cursize); 125 m_data_end = m_data_begin + cursize; 126 m_allocated = m_data_begin + newsize; 127 return; 128 } 129 get_check_sum(const char * data,guint32 length)130 guint32 get_check_sum(const char * data, guint32 length){ 131 guint32 checksum = 0x0; 132 guint32 aligns = length & ~0x3; 133 134 /* checksum for aligned parts. */ 135 guint32 index = 0; 136 for (; index < aligns; index += sizeof(guint32)) { 137 const char * p = data + index; 138 139 /* use little endian here. */ 140 guint32 item = *p | *(p + 1) << 8 | 141 *(p + 2) << 16 | *(p + 3) << 24; 142 143 checksum ^= item; 144 } 145 146 /* checksum for remained parts. */ 147 guint32 shift = 0; 148 for (; index < length; index++) { 149 const char * p = data + index; 150 151 guint32 item = *p << shift; 152 shift += 8; 153 154 checksum ^= item; 155 } 156 157 return checksum; 158 } 159 160 public: 161 /** 162 * MemoryChunk::MemoryChunk: 163 * 164 * The constructor of the MemoryChunk. 165 * 166 */ MemoryChunk()167 MemoryChunk(){ 168 m_data_begin = NULL; 169 m_data_end = NULL; 170 m_allocated = NULL; 171 m_free_func = NULL; 172 } 173 174 /** 175 * MemoryChunk::~MemoryChunk: 176 * 177 * The destructor of the MemoryChunk. 178 * 179 */ ~MemoryChunk()180 ~MemoryChunk(){ 181 reset(); 182 } 183 184 /** 185 * MemoryChunk::begin: 186 * 187 * Read access method, to get the begin of the MemoryChunk. 188 * 189 */ begin()190 void* begin() const{ 191 return m_data_begin; 192 } 193 194 /** 195 * MemoryChunk::end: 196 * 197 * Write access method, to get the end of the MemoryChunk. 198 * 199 */ end()200 void* end() const{ 201 return m_data_end; 202 } 203 204 /** 205 * MemoryChunk::size: 206 * 207 * Get the size of the content in the MemoryChunk. 208 * 209 */ size()210 size_t size() const{ 211 return m_data_end - m_data_begin; 212 } 213 214 /** 215 * MemoryChunk::set_size: 216 * 217 * Set the size of the content in the MemoryChunk. 218 * 219 */ set_size(size_t newsize)220 void set_size(size_t newsize){ 221 ensure_has_space(newsize); 222 m_data_end = m_data_begin + newsize; 223 } 224 225 /** 226 * MemoryChunk::capacity: 227 * 228 * Get the capacity of the MemoryChunk. 229 * 230 */ capacity()231 size_t capacity(){ 232 return m_allocated - m_data_begin; 233 } 234 235 /** 236 * MemoryChunk::set_chunk: 237 * @begin: the begin of the data 238 * @length: the length of the data 239 * @free_func: the function to free the data 240 * 241 * Transfer management of a memory chunk allocated by other part of the 242 * system to the memory chunk. 243 * 244 */ set_chunk(void * begin,size_t length,free_func_t free_func)245 void set_chunk(void* begin, size_t length, free_func_t free_func){ 246 if (m_free_func) 247 freemem(); 248 249 m_data_begin = (char *) begin; 250 m_data_end = (char *) m_data_begin + length; 251 m_allocated = (char *) m_data_begin + length; 252 m_free_func = free_func; 253 } 254 255 /** 256 * MemoryChunk::get_sub_chunk: 257 * @offset: the offset in this MemoryChunk. 258 * @length: the data length to be retrieved. 259 * @returns: the newly allocated MemoryChunk. 260 * 261 * Get a sub MemoryChunk from this MemoryChunk. 262 * 263 * Note: use set_chunk internally. 264 * the returned new chunk need to be deleted. 265 * 266 */ get_sub_chunk(size_t offset,size_t length)267 MemoryChunk * get_sub_chunk(size_t offset, size_t length){ 268 MemoryChunk * retval = new MemoryChunk(); 269 char * begin_pos = m_data_begin + offset; 270 retval->set_chunk(begin_pos, length, NULL); 271 return retval; 272 } 273 274 /** 275 * MemoryChunk::set_content: 276 * @offset: the offset in this MemoryChunk. 277 * @data: the begin of the data to be copied. 278 * @len: the length of the data to be copied. 279 * @returns: whether the data is copied successfully. 280 * 281 * Data are written directly to the memory area in this MemoryChunk. 282 * 283 */ set_content(size_t offset,const void * data,size_t len)284 bool set_content(size_t offset, const void * data, size_t len){ 285 size_t cursize = std_lite::max(size(), offset + len); 286 ensure_has_space(offset + len); 287 memmove(m_data_begin + offset, data, len); 288 m_data_end = m_data_begin + cursize; 289 return true; 290 } 291 292 /** 293 * MemoryChunk::append_content: 294 * @data: the begin of the data to be copied. 295 * @len: the length of the data to be copied. 296 * @returns: whether the data is appended successfully. 297 * 298 * Data are appended at the end of the MemoryChunk. 299 * 300 */ append_content(const void * data,size_t len)301 bool append_content(const void * data, size_t len){ 302 return set_content(size(), data, len); 303 } 304 305 /** 306 * MemoryChunk::insert_content: 307 * @offset: the offset in this MemoryChunk, which starts from zero. 308 * @data: the begin of the data to be copied. 309 * @length: the length of the data to be copied. 310 * @returns: whether the data is inserted successfully. 311 * 312 * Data are written to the memory area, 313 * the original content are moved towards the rear. 314 * 315 */ insert_content(size_t offset,const void * data,size_t length)316 bool insert_content(size_t offset, const void * data, size_t length){ 317 ensure_has_more_space(length); 318 size_t move_size = size() - offset; 319 memmove(m_data_begin + offset + length, m_data_begin + offset, move_size); 320 memmove(m_data_begin + offset, data, length); 321 m_data_end += length; 322 return true; 323 } 324 325 /** 326 * MemoryChunk::remove_content: 327 * @offset: the offset in this MemoryChunk. 328 * @length: the length of the removed content. 329 * @returns: whether the content is removed successfully. 330 * 331 * Data are removed directly, 332 * the following content are moved towards the front. 333 * 334 */ remove_content(size_t offset,size_t length)335 bool remove_content(size_t offset, size_t length){ 336 size_t move_size = size() - offset - length; 337 memmove(m_data_begin + offset, m_data_begin + offset + length, move_size); 338 m_data_end -= length; 339 return true; 340 } 341 342 /** 343 * MemoryChunk::get_content: 344 * @offset: the offset in this MemoryChunk. 345 * @buffer: the buffer to retrieve the content. 346 * @length: the length of content to be retrieved. 347 * @returns: whether the content is retrieved. 348 * 349 * Get the content in this MemoryChunk. 350 * 351 */ get_content(size_t offset,void * buffer,size_t length)352 bool get_content(size_t offset, void * buffer, size_t length){ 353 if ( size() < offset + length ) 354 return false; 355 memcpy( buffer, m_data_begin + offset, length); 356 return true; 357 } 358 359 /** 360 * MemoryChunk::compact_memory: 361 * 362 * Compact memory, reduce the size. 363 * 364 */ compact_memory()365 void compact_memory(){ 366 if ( m_free_func != (free_func_t)free ) 367 return; 368 size_t newsize = size(); 369 m_data_begin = (char *) realloc(m_data_begin, newsize); 370 m_allocated = m_data_begin + newsize; 371 } 372 373 /** 374 * MemoryChunk::load: 375 * @filename: load the MemoryChunk from the filename. 376 * @returns: whether the load is successful. 377 * 378 * Load the content from the filename. 379 * 380 */ load(const char * filename)381 bool load(const char * filename){ 382 /* free old data */ 383 reset(); 384 385 int fd = open(filename, O_RDONLY); 386 if (-1 == fd) 387 return false; 388 389 off_t file_size = lseek(fd, 0, SEEK_END); 390 lseek(fd, 0, SEEK_SET); 391 392 if (file_size < header) { 393 close(fd); 394 return false; 395 } 396 397 guint32 length = 0; 398 ssize_t ret_len = read(fd, &length, sizeof(guint32)); 399 assert(ret_len == sizeof(length)); 400 401 guint32 checksum = 0; 402 ret_len = read(fd, &checksum, sizeof(guint32)); 403 assert(ret_len == sizeof(checksum)); 404 405 guint32 data_len = file_size - header; 406 if (data_len != length) { 407 close(fd); 408 return false; 409 } 410 411 char * data = (char *) malloc(data_len); 412 if ( !data ){ 413 close(fd); 414 return false; 415 } 416 417 data_len = read(fd, data, data_len); 418 guint32 calc = get_check_sum(data, data_len); 419 if (checksum != calc) { 420 free(data); 421 close(fd); 422 return false; 423 } 424 425 set_chunk(data, data_len, (free_func_t)free); 426 427 close(fd); 428 return true; 429 } 430 431 #ifdef LIBPINYIN_USE_MMAP 432 /** 433 * MemoryChunk::mmap: 434 * @filename: mmap the MemoryChunk from the filename. 435 * @returns: whether the mmap is successful. 436 * 437 * mmap the content from the filename. 438 * 439 */ mmap(const char * filename)440 bool mmap(const char * filename){ 441 /* free old data */ 442 reset(); 443 444 int fd = open(filename, O_RDONLY); 445 if (-1 == fd) 446 return false; 447 448 off_t file_size = lseek(fd, 0, SEEK_END); 449 lseek(fd, 0, SEEK_SET); 450 451 if (file_size < header) { 452 close(fd); 453 return false; 454 } 455 456 guint32 length = 0; 457 ssize_t ret_len = read(fd, &length, sizeof(guint32)); 458 assert(ret_len == sizeof(length)); 459 460 guint32 checksum = 0; 461 ret_len = read(fd, &checksum, sizeof(guint32)); 462 assert(ret_len == sizeof(checksum)); 463 464 guint32 data_len = file_size - header; 465 if (data_len != length) { 466 close(fd); 467 return false; 468 } 469 470 char * data = (char *)::mmap(NULL, file_size, 471 PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); 472 473 if (MAP_FAILED == data) { 474 close(fd); 475 return false; 476 } 477 478 data = data + header; 479 guint32 calc = get_check_sum(data, data_len); 480 if (checksum != calc) { 481 munmap(data - header, file_size); 482 close(fd); 483 return false; 484 } 485 486 set_chunk(data, data_len, (free_func_t)munmap); 487 488 close(fd); 489 return true; 490 } 491 #endif 492 493 /** 494 * MemoryChunk::save: 495 * @filename: save this MemoryChunk to the filename. 496 * @returns: whether the save is successful. 497 * 498 * Save the content to the filename. 499 * 500 */ save(const char * filename)501 bool save(const char * filename){ 502 int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); 503 if ( -1 == fd ) 504 return false; 505 506 guint32 length = size(); 507 ssize_t ret_len = write(fd, &length, sizeof(guint32)); 508 assert(ret_len == sizeof(length)); 509 510 guint32 checksum = get_check_sum(m_data_begin, size()); 511 ret_len = write(fd, &checksum, sizeof(guint32)); 512 assert(ret_len == sizeof(checksum)); 513 514 ret_len = write(fd, begin(), size()); 515 if (ret_len != (ssize_t) size()){ 516 close(fd); 517 return false; 518 } 519 520 fsync(fd); 521 close(fd); 522 return true; 523 } 524 }; 525 526 }; 527 528 #endif 529