1 /******************************************************************************
2 *
3 * rawstr.cpp - code for class 'RawStr'- a module that reads raw text
4 * files: ot and nt using indexs ??.bks ??.cps ??.vss
5 * and provides lookup and parsing functions based on
6 * class StrKey
7 *
8 * $Id: rawstr.cpp 3524 2017-11-07 03:08:49Z scribe $
9 *
10 * Copyright 1998-2013 CrossWire Bible Society (http://www.crosswire.org)
11 * CrossWire Bible Society
12 * P. O. Box 2528
13 * Tempe, AZ 85280-2528
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the
17 * Free Software Foundation version 2.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 */
25
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <errno.h>
29
30 #include <stdlib.h>
31 #include <utilstr.h>
32 #include <rawstr.h>
33 #include <sysdata.h>
34 #include <swlog.h>
35 #include <filemgr.h>
36 #include <swbuf.h>
37 #include <stringmgr.h>
38
39 SWORD_NAMESPACE_START
40
41 /******************************************************************************
42 * RawStr Statics
43 */
44
45 int RawStr::instance = 0;
46 const char RawStr::nl = '\n';
47 const int RawStr::IDXENTRYSIZE = 6;
48
49
50
51 /******************************************************************************
52 * RawStr Constructor - Initializes data for instance of RawStr
53 *
54 * ENT: ipath - path of the directory where data and index files are located.
55 * be sure to include the trailing separator (e.g. '/' or '\')
56 * (e.g. 'modules/texts/rawtext/webster/')
57 */
58
RawStr(const char * ipath,int fileMode,bool caseSensitive)59 RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
60 {
61 SWBuf buf;
62
63 lastoff = -1;
64 path = 0;
65 stdstr(&path, ipath);
66
67 if (fileMode == -1) { // try read/write if possible
68 fileMode = FileMgr::RDWR;
69 }
70
71 buf.setFormatted("%s.idx", path);
72 idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
73
74 buf.setFormatted("%s.dat", path);
75 datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
76
77 if (!datfd || datfd->getFd() < 0) {
78 // couldn't find datafile but this might be fine if we're
79 // merely instantiating a remote InstallMgr SWMgr
80 SWLog::getSystemLog()->logDebug("Couldn't open file: %s. errno: %d", buf.c_str(), errno);
81 }
82
83 instance++;
84 }
85
86
87 /******************************************************************************
88 * RawStr Destructor - Cleans up instance of RawStr
89 */
90
~RawStr()91 RawStr::~RawStr()
92 {
93 if (path)
94 delete [] path;
95
96 --instance;
97
98 FileMgr::getSystemFileMgr()->close(idxfd);
99 FileMgr::getSystemFileMgr()->close(datfd);
100 }
101
102
103 /******************************************************************************
104 * RawStr::getidxbufdat - Gets the index string at the given idx offset
105 * NOTE: buf is allocated and must be freed by
106 * calling function
107 *
108 * ENT: ioffset - offset in dat file to lookup
109 * buf - address of pointer to allocate for storage of string
110 */
111
getIDXBufDat(long ioffset,char ** buf) const112 void RawStr::getIDXBufDat(long ioffset, char **buf) const
113 {
114 int size;
115 char ch;
116 if (datfd && datfd->getFd() >= 0) {
117 datfd->seek(ioffset, SEEK_SET);
118 for (size = 0; datfd->read(&ch, 1) == 1; size++) {
119 if ((ch == '\\') || (ch == 10) || (ch == 13))
120 break;
121 }
122 *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
123 if (size) {
124 datfd->seek(ioffset, SEEK_SET);
125 datfd->read(*buf, size);
126 }
127 (*buf)[size] = 0;
128 if (!caseSensitive) toupperstr_utf8(*buf, size*2);
129 }
130 else {
131 *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
132 **buf = 0;
133 }
134 }
135
136
137 /******************************************************************************
138 * RawStr::getidxbuf - Gets the index string at the given idx offset
139 * NOTE: buf is allocated and must be freed by
140 * calling function
141 *
142 * ENT: ioffset - offset in idx file to lookup
143 * buf - address of pointer to allocate for storage of string
144 */
145
getIDXBuf(long ioffset,char ** buf) const146 void RawStr::getIDXBuf(long ioffset, char **buf) const
147 {
148 __u32 offset;
149
150 if (idxfd && idxfd->getFd() >= 0) {
151 idxfd->seek(ioffset, SEEK_SET);
152 idxfd->read(&offset, 4);
153
154 offset = swordtoarch32(offset);
155
156 getIDXBufDat(offset, buf);
157 }
158 }
159
160
161 /******************************************************************************
162 * RawStr::findoffset - Finds the offset of the key string from the indexes
163 *
164 * ENT: key - key string to lookup
165 * start - address to store the starting offset
166 * size - address to store the size of the entry
167 * away - number of entries before of after to jump
168 * (default = 0)
169 *
170 * RET: error status -1 general error; -2 new file
171 */
172
findOffset(const char * ikey,__u32 * start,__u16 * size,long away,__u32 * idxoff) const173 signed char RawStr::findOffset(const char *ikey, __u32 *start, __u16 *size, long away, __u32 *idxoff) const
174 {
175 char *trybuf, *maxbuf, *key = 0, quitflag = 0;
176 signed char retval = -1;
177 long headoff, tailoff, tryoff = 0, maxoff = 0;
178 int diff = 0;
179 bool awayFromSubstrCheck = false;
180
181 if (idxfd->getFd() >=0) {
182 tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6;
183 retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
184 if (*ikey && retval != -2) {
185 headoff = 0;
186
187 stdstr(&key, ikey, 3);
188 if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*3));
189
190 int keylen = (int)strlen(key);
191 bool substr = false;
192
193 trybuf = maxbuf = 0;
194 getIDXBuf(maxoff, &maxbuf);
195
196 while (headoff < tailoff) {
197 tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff;
198 lastoff = -1;
199 getIDXBuf(tryoff, &trybuf);
200
201 if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
202 tryoff += (tryoff > (maxoff / 2))?-6:6;
203 retval = -1;
204 break;
205 }
206
207 diff = strcmp(key, trybuf);
208
209 if (!diff)
210 break;
211
212 if (!strncmp(trybuf, key, keylen)) substr = true;
213
214 if (diff < 0)
215 tailoff = (tryoff == headoff) ? headoff : tryoff;
216 else headoff = tryoff;
217
218 if (tailoff == headoff + 6) {
219 if (quitflag++)
220 headoff = tailoff;
221 }
222 }
223
224 // didn't find exact match
225 if (headoff >= tailoff) {
226 tryoff = headoff;
227 if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
228 awayFromSubstrCheck = true;
229 away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
230 }
231 }
232 if (trybuf)
233 free(trybuf);
234 delete [] key;
235 if (maxbuf)
236 free(maxbuf);
237 }
238 else tryoff = 0;
239
240 idxfd->seek(tryoff, SEEK_SET);
241
242 __u32 tmpStart;
243 __u16 tmpSize;
244 *start = *size = tmpStart = tmpSize = 0;
245 idxfd->read(&tmpStart, 4);
246 idxfd->read(&tmpSize, 2);
247 if (idxoff)
248 *idxoff = (__u32)tryoff;
249
250 *start = swordtoarch32(tmpStart);
251 *size = swordtoarch16(tmpSize);
252
253 while (away) {
254 unsigned long laststart = *start;
255 unsigned short lastsize = *size;
256 long lasttry = tryoff;
257 tryoff += (away > 0) ? 6 : -6;
258
259 bool bad = false;
260 if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
261 bad = true;
262 else if (idxfd->seek(tryoff, SEEK_SET) < 0)
263 bad = true;
264 if (bad) {
265 if(!awayFromSubstrCheck)
266 retval = -1;
267 *start = (__u32)laststart;
268 *size = lastsize;
269 tryoff = lasttry;
270 if (idxoff)
271 *idxoff = (__u32)tryoff;
272 break;
273 }
274 idxfd->read(&tmpStart, 4);
275 idxfd->read(&tmpSize, 2);
276 if (idxoff)
277 *idxoff = (__u32)tryoff;
278
279 *start = swordtoarch32(tmpStart);
280 *size = swordtoarch16(tmpSize);
281
282 if (((laststart != *start) || (lastsize != *size)) && (*size))
283 away += (away < 0) ? 1 : -1;
284 }
285
286 lastoff = tryoff;
287 }
288 else {
289 *start = 0;
290 *size = 0;
291 if (idxoff)
292 *idxoff = 0;
293 retval = -1;
294 }
295 return retval;
296 }
297
298
299 /******************************************************************************
300 * RawStr::readtext - gets text at a given offset
301 *
302 * ENT:
303 * start - starting offset where the text is located in the file
304 * size - size of text entry
305 * buf - buffer to store text
306 *
307 */
308
readText(__u32 istart,__u16 * isize,char ** idxbuf,SWBuf & buf) const309 void RawStr::readText(__u32 istart, __u16 *isize, char **idxbuf, SWBuf &buf) const
310 {
311 unsigned int ch;
312 char *idxbuflocal = 0;
313 getIDXBufDat(istart, &idxbuflocal);
314 __u32 start = istart;
315
316 do {
317 if (*idxbuf)
318 delete [] *idxbuf;
319
320 buf = "";
321 buf.setFillByte(0);
322 buf.setSize(++(*isize));
323
324 *idxbuf = new char [ (*isize) ];
325
326 datfd->seek(start, SEEK_SET);
327 datfd->read(buf.getRawData(), (int)((*isize) - 1));
328
329 for (ch = 0; buf[ch]; ch++) { // skip over index string
330 if (buf[ch] == 10) {
331 ch++;
332 break;
333 }
334 }
335 buf = SWBuf(buf.c_str()+ch);
336 // resolve link
337 if (!strncmp(buf.c_str(), "@LINK", 5)) {
338 for (ch = 0; buf[ch]; ch++) { // null before nl
339 if (buf[ch] == 10) {
340 buf[ch] = 0;
341 break;
342 }
343 }
344 findOffset(buf.c_str() + 6, &start, isize);
345 }
346 else break;
347 }
348 while (true); // while we're resolving links
349
350 if (idxbuflocal) {
351 int localsize = (int)strlen(idxbuflocal);
352 localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
353 strncpy(*idxbuf, idxbuflocal, localsize);
354 (*idxbuf)[localsize] = 0;
355 free(idxbuflocal);
356 }
357 }
358
359
360 /******************************************************************************
361 * RawLD::settext - Sets text for current offset
362 *
363 * ENT: key - key for this entry
364 * buf - buffer to store
365 * len - length of buffer (0 - null terminated)
366 */
367
doSetText(const char * ikey,const char * buf,long len)368 void RawStr::doSetText(const char *ikey, const char *buf, long len)
369 {
370
371 __u32 start, outstart;
372 __u32 idxoff;
373 __u32 endoff;
374 __s32 shiftSize;
375 __u16 size;
376 __u16 outsize;
377 char *tmpbuf = 0;
378 char *key = 0;
379 char *dbKey = 0;
380 char *idxBytes = 0;
381 char *outbuf = 0;
382 char *ch = 0;
383
384 char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
385 stdstr(&key, ikey, 2);
386 if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*2));
387
388 len = (len < 0) ? strlen(buf) : len;
389
390 getIDXBufDat(start, &dbKey);
391
392 if (strcmp(key, dbKey) < 0) {
393 }
394 else if (strcmp(key, dbKey) > 0) {
395 if (errorStatus != (char)-2) // not a new file
396 idxoff += 6;
397 else idxoff = 0;
398 }
399 else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry
400 do {
401 tmpbuf = new char [ size + 2 ];
402 memset(tmpbuf, 0, size + 2);
403 datfd->seek(start, SEEK_SET);
404 datfd->read(tmpbuf, (int)(size - 1));
405
406 for (ch = tmpbuf; *ch; ch++) { // skip over index string
407 if (*ch == 10) {
408 ch++;
409 break;
410 }
411 }
412 memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
413
414 // resolve link
415 if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
416 for (ch = tmpbuf; *ch; ch++) { // null before nl
417 if (*ch == 10) {
418 *ch = 0;
419 break;
420 }
421 }
422 findOffset(tmpbuf + 6, &start, &size, 0, &idxoff);
423 }
424 else break;
425 }
426 while (true); // while we're resolving links
427 }
428
429 endoff = (__u32)idxfd->seek(0, SEEK_END);
430
431 shiftSize = endoff - idxoff;
432
433 if (shiftSize > 0) {
434 idxBytes = new char [ shiftSize ];
435 idxfd->seek(idxoff, SEEK_SET);
436 idxfd->read(idxBytes, shiftSize);
437 }
438
439 outbuf = new char [ len + strlen(key) + 5 ];
440 sprintf(outbuf, "%s%c%c", key, 13, 10);
441 size = strlen(outbuf);
442 memcpy(outbuf + size, buf, len);
443 size = outsize = size + (len);
444
445 start = outstart = (__u32)datfd->seek(0, SEEK_END);
446
447 outstart = archtosword32(start);
448 outsize = archtosword16(size);
449
450 idxfd->seek(idxoff, SEEK_SET);
451 if (len > 0) {
452 datfd->seek(start, SEEK_SET);
453 datfd->write(outbuf, (int)size);
454
455 // add a new line to make data file easier to read in an editor
456 datfd->write(&nl, 1);
457
458 idxfd->write(&outstart, 4);
459 idxfd->write(&outsize, 2);
460 if (idxBytes) {
461 idxfd->write(idxBytes, shiftSize);
462 delete [] idxBytes;
463 }
464 }
465 else { // delete entry
466 if (idxBytes) {
467 idxfd->write(idxBytes+6, shiftSize-6);
468 idxfd->seek(-1, SEEK_CUR); // last valid byte
469 FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
470 delete [] idxBytes;
471 }
472 }
473
474 delete [] key;
475 delete [] outbuf;
476 free(dbKey);
477 }
478
479
480 /******************************************************************************
481 * RawLD::linkentry - links one entry to another
482 *
483 * ENT: testmt - testament to find (0 - Bible/module introduction)
484 * destidxoff - dest offset into .vss
485 * srcidxoff - source offset into .vss
486 */
487
doLinkEntry(const char * destkey,const char * srckey)488 void RawStr::doLinkEntry(const char *destkey, const char *srckey) {
489 char *text = new char [ strlen(destkey) + 7 ];
490 sprintf(text, "@LINK %s", destkey);
491 doSetText(srckey, text);
492 delete [] text;
493 }
494
495 /******************************************************************************
496 * RawLD::CreateModule - Creates new module files
497 *
498 * ENT: path - directory to store module files
499 * RET: error status
500 */
501
createModule(const char * ipath)502 signed char RawStr::createModule(const char *ipath)
503 {
504 char *path = 0;
505 char *buf = new char [ strlen (ipath) + 20 ];
506 FileDesc *fd, *fd2;
507
508 stdstr(&path, ipath);
509
510 if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
511 path[strlen(path)-1] = 0;
512
513 sprintf(buf, "%s.dat", path);
514 FileMgr::removeFile(buf);
515 fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
516 fd->getFd();
517 FileMgr::getSystemFileMgr()->close(fd);
518
519 sprintf(buf, "%s.idx", path);
520 FileMgr::removeFile(buf);
521 fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
522 fd2->getFd();
523 FileMgr::getSystemFileMgr()->close(fd2);
524
525 delete [] path;
526
527 return 0;
528 }
529
530 SWORD_NAMESPACE_END
531