1 /******************************************************************************
2  *
3  *  rawstr.cpp -	code for class 'RawStr'- a module that reads raw text
4  *			files:  ot and nt using indexs ??.bks ??.cps ??.vss
5  *			and provides lookup and parsing functions based on
6  *			class StrKey
7  *
8  * $Id: rawstr.cpp 3524 2017-11-07 03:08:49Z scribe $
9  *
10  * Copyright 1998-2013 CrossWire Bible Society (http://www.crosswire.org)
11  *	CrossWire Bible Society
12  *	P. O. Box 2528
13  *	Tempe, AZ  85280-2528
14  *
15  * This program is free software; you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License as published by the
17  * Free Software Foundation version 2.
18  *
19  * This program is distributed in the hope that it will be useful, but
20  * WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * General Public License for more details.
23  *
24  */
25 
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 
30 #include <stdlib.h>
31 #include <utilstr.h>
32 #include <rawstr.h>
33 #include <sysdata.h>
34 #include <swlog.h>
35 #include <filemgr.h>
36 #include <swbuf.h>
37 #include <stringmgr.h>
38 
39 SWORD_NAMESPACE_START
40 
41 /******************************************************************************
42  * RawStr Statics
43  */
44 
45 int RawStr::instance = 0;
46 const char RawStr::nl = '\n';
47 const int RawStr::IDXENTRYSIZE = 6;
48 
49 
50 
51 /******************************************************************************
52  * RawStr Constructor - Initializes data for instance of RawStr
53  *
54  * ENT:	ipath - path of the directory where data and index files are located.
55  *		be sure to include the trailing separator (e.g. '/' or '\')
56  *		(e.g. 'modules/texts/rawtext/webster/')
57  */
58 
RawStr(const char * ipath,int fileMode,bool caseSensitive)59 RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
60 {
61 	SWBuf buf;
62 
63 	lastoff = -1;
64 	path = 0;
65 	stdstr(&path, ipath);
66 
67 	if (fileMode == -1) { // try read/write if possible
68 		fileMode = FileMgr::RDWR;
69 	}
70 
71 	buf.setFormatted("%s.idx", path);
72 	idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
73 
74 	buf.setFormatted("%s.dat", path);
75 	datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
76 
77 	if (!datfd || datfd->getFd() < 0) {
78 		// couldn't find datafile but this might be fine if we're
79 		// merely instantiating a remote InstallMgr SWMgr
80 		SWLog::getSystemLog()->logDebug("Couldn't open file: %s. errno: %d", buf.c_str(), errno);
81 	}
82 
83 	instance++;
84 }
85 
86 
87 /******************************************************************************
88  * RawStr Destructor - Cleans up instance of RawStr
89  */
90 
~RawStr()91 RawStr::~RawStr()
92 {
93 	if (path)
94 		delete [] path;
95 
96 	--instance;
97 
98 	FileMgr::getSystemFileMgr()->close(idxfd);
99 	FileMgr::getSystemFileMgr()->close(datfd);
100 }
101 
102 
103 /******************************************************************************
104  * RawStr::getidxbufdat	- Gets the index string at the given idx offset
105  *						NOTE: buf is allocated and must be freed by
106  *							calling function
107  *
108  * ENT:	ioffset	- offset in dat file to lookup
109  *		buf		- address of pointer to allocate for storage of string
110  */
111 
getIDXBufDat(long ioffset,char ** buf) const112 void RawStr::getIDXBufDat(long ioffset, char **buf) const
113 {
114 	int size;
115 	char ch;
116 	if (datfd && datfd->getFd() >= 0) {
117 		datfd->seek(ioffset, SEEK_SET);
118 		for (size = 0; datfd->read(&ch, 1) == 1; size++) {
119 			if ((ch == '\\') || (ch == 10) || (ch == 13))
120 				break;
121 		}
122 		*buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
123 		if (size) {
124 			datfd->seek(ioffset, SEEK_SET);
125 			datfd->read(*buf, size);
126 		}
127 		(*buf)[size] = 0;
128 		if (!caseSensitive) toupperstr_utf8(*buf, size*2);
129 	}
130 	else {
131 		*buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
132 		**buf = 0;
133 	}
134 }
135 
136 
137 /******************************************************************************
138  * RawStr::getidxbuf	- Gets the index string at the given idx offset
139  *						NOTE: buf is allocated and must be freed by
140  *							calling function
141  *
142  * ENT:	ioffset	- offset in idx file to lookup
143  *		buf		- address of pointer to allocate for storage of string
144  */
145 
getIDXBuf(long ioffset,char ** buf) const146 void RawStr::getIDXBuf(long ioffset, char **buf) const
147 {
148 	__u32 offset;
149 
150 	if (idxfd && idxfd->getFd() >= 0) {
151 		idxfd->seek(ioffset, SEEK_SET);
152 		idxfd->read(&offset, 4);
153 
154 		offset = swordtoarch32(offset);
155 
156 		getIDXBufDat(offset, buf);
157 	}
158 }
159 
160 
161 /******************************************************************************
162  * RawStr::findoffset	- Finds the offset of the key string from the indexes
163  *
164  * ENT:	key		- key string to lookup
165  *		start	- address to store the starting offset
166  *		size		- address to store the size of the entry
167  *		away		- number of entries before of after to jump
168  *					(default = 0)
169  *
170  * RET: error status -1 general error; -2 new file
171  */
172 
findOffset(const char * ikey,__u32 * start,__u16 * size,long away,__u32 * idxoff) const173 signed char RawStr::findOffset(const char *ikey, __u32 *start, __u16 *size, long away, __u32 *idxoff) const
174 {
175 	char *trybuf, *maxbuf, *key = 0, quitflag = 0;
176 	signed char retval = -1;
177 	long headoff, tailoff, tryoff = 0, maxoff = 0;
178 	int diff = 0;
179 	bool awayFromSubstrCheck = false;
180 
181 	if (idxfd->getFd() >=0) {
182 		tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6;
183 		retval = (tailoff >= 0) ? 0 : -2;	// if NOT new file
184 		if (*ikey && retval != -2) {
185 			headoff = 0;
186 
187 			stdstr(&key, ikey, 3);
188 			if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*3));
189 
190 			int keylen = (int)strlen(key);
191 			bool substr = false;
192 
193 			trybuf = maxbuf = 0;
194 			getIDXBuf(maxoff, &maxbuf);
195 
196 			while (headoff < tailoff) {
197 				tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff;
198 				lastoff = -1;
199 				getIDXBuf(tryoff, &trybuf);
200 
201 				if (!*trybuf && tryoff) {		// In case of extra entry at end of idx (not first entry)
202 					tryoff += (tryoff > (maxoff / 2))?-6:6;
203 					retval = -1;
204 					break;
205 				}
206 
207 				diff = strcmp(key, trybuf);
208 
209 				if (!diff)
210 					break;
211 
212 				if (!strncmp(trybuf, key, keylen)) substr = true;
213 
214 				if (diff < 0)
215 					tailoff = (tryoff == headoff) ? headoff : tryoff;
216 				else headoff = tryoff;
217 
218 				if (tailoff == headoff + 6) {
219 					if (quitflag++)
220 						headoff = tailoff;
221 				}
222 			}
223 
224 			// didn't find exact match
225 			if (headoff >= tailoff) {
226 				tryoff = headoff;
227 				if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
228 					awayFromSubstrCheck = true;
229 					away--;	// if our entry doesn't startwith our key, prefer the previous entry over the next
230 				}
231 			}
232 			if (trybuf)
233 				free(trybuf);
234 			delete [] key;
235                         if (maxbuf)
236                         	free(maxbuf);
237 		}
238 		else	tryoff = 0;
239 
240 		idxfd->seek(tryoff, SEEK_SET);
241 
242 		__u32 tmpStart;
243 		__u16 tmpSize;
244 		*start = *size = tmpStart = tmpSize = 0;
245 		idxfd->read(&tmpStart, 4);
246 		idxfd->read(&tmpSize, 2);
247 		if (idxoff)
248 			*idxoff = (__u32)tryoff;
249 
250 		*start = swordtoarch32(tmpStart);
251 		*size  = swordtoarch16(tmpSize);
252 
253 		while (away) {
254 			unsigned long laststart = *start;
255 			unsigned short lastsize = *size;
256 			long lasttry = tryoff;
257 			tryoff += (away > 0) ? 6 : -6;
258 
259 			bool bad = false;
260 			if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
261 				bad = true;
262 			else if (idxfd->seek(tryoff, SEEK_SET) < 0)
263 				bad = true;
264 			if (bad) {
265 				if(!awayFromSubstrCheck)
266 					retval = -1;
267 				*start = (__u32)laststart;
268 				*size = lastsize;
269 				tryoff = lasttry;
270 				if (idxoff)
271 					*idxoff = (__u32)tryoff;
272 				break;
273 			}
274 			idxfd->read(&tmpStart, 4);
275 			idxfd->read(&tmpSize, 2);
276 			if (idxoff)
277 				*idxoff = (__u32)tryoff;
278 
279 			*start = swordtoarch32(tmpStart);
280 			*size  = swordtoarch16(tmpSize);
281 
282 			if (((laststart != *start) || (lastsize != *size)) && (*size))
283 				away += (away < 0) ? 1 : -1;
284 		}
285 
286 		lastoff = tryoff;
287 	}
288 	else {
289 		*start = 0;
290 		*size  = 0;
291 		if (idxoff)
292 			*idxoff = 0;
293 		retval = -1;
294 	}
295 	return retval;
296 }
297 
298 
299 /******************************************************************************
300  * RawStr::readtext	- gets text at a given offset
301  *
302  * ENT:
303  *	start	- starting offset where the text is located in the file
304  *	size		- size of text entry
305  *	buf		- buffer to store text
306  *
307  */
308 
readText(__u32 istart,__u16 * isize,char ** idxbuf,SWBuf & buf) const309 void RawStr::readText(__u32 istart, __u16 *isize, char **idxbuf, SWBuf &buf) const
310 {
311 	unsigned int ch;
312 	char *idxbuflocal = 0;
313 	getIDXBufDat(istart, &idxbuflocal);
314 	__u32 start = istart;
315 
316 	do {
317 		if (*idxbuf)
318 			delete [] *idxbuf;
319 
320 		buf = "";
321 		buf.setFillByte(0);
322 		buf.setSize(++(*isize));
323 
324 		*idxbuf = new char [ (*isize) ];
325 
326 		datfd->seek(start, SEEK_SET);
327 		datfd->read(buf.getRawData(), (int)((*isize) - 1));
328 
329 		for (ch = 0; buf[ch]; ch++) {		// skip over index string
330 			if (buf[ch] == 10) {
331 				ch++;
332 				break;
333 			}
334 		}
335 		buf = SWBuf(buf.c_str()+ch);
336 		// resolve link
337 		if (!strncmp(buf.c_str(), "@LINK", 5)) {
338 			for (ch = 0; buf[ch]; ch++) {		// null before nl
339 				if (buf[ch] == 10) {
340 					buf[ch] = 0;
341 					break;
342 				}
343 			}
344 			findOffset(buf.c_str() + 6, &start, isize);
345 		}
346 		else break;
347 	}
348 	while (true);	// while we're resolving links
349 
350 	if (idxbuflocal) {
351 		int localsize = (int)strlen(idxbuflocal);
352 		localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
353 		strncpy(*idxbuf, idxbuflocal, localsize);
354 		(*idxbuf)[localsize] = 0;
355 		free(idxbuflocal);
356 	}
357 }
358 
359 
360 /******************************************************************************
361  * RawLD::settext	- Sets text for current offset
362  *
363  * ENT: key	- key for this entry
364  *	buf	- buffer to store
365  *      len     - length of buffer (0 - null terminated)
366  */
367 
doSetText(const char * ikey,const char * buf,long len)368 void RawStr::doSetText(const char *ikey, const char *buf, long len)
369 {
370 
371 	__u32 start, outstart;
372 	__u32 idxoff;
373 	__u32 endoff;
374 	__s32 shiftSize;
375 	__u16 size;
376 	__u16 outsize;
377 	char *tmpbuf = 0;
378 	char *key = 0;
379 	char *dbKey = 0;
380 	char *idxBytes = 0;
381 	char *outbuf = 0;
382 	char *ch = 0;
383 
384 	char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
385 	stdstr(&key, ikey, 2);
386 	if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*2));
387 
388 	len = (len < 0) ? strlen(buf) : len;
389 
390 	getIDXBufDat(start, &dbKey);
391 
392 	if (strcmp(key, dbKey) < 0) {
393 	}
394 	else if (strcmp(key, dbKey) > 0) {
395 		if (errorStatus != (char)-2)	// not a new file
396 			idxoff += 6;
397 		else idxoff = 0;
398 	}
399 	else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry
400 		do {
401 			tmpbuf = new char [ size + 2 ];
402 			memset(tmpbuf, 0, size + 2);
403 			datfd->seek(start, SEEK_SET);
404 			datfd->read(tmpbuf, (int)(size - 1));
405 
406 			for (ch = tmpbuf; *ch; ch++) {		// skip over index string
407 				if (*ch == 10) {
408 					ch++;
409 					break;
410 				}
411 			}
412 			memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
413 
414 			// resolve link
415 			if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
416 				for (ch = tmpbuf; *ch; ch++) {		// null before nl
417 					if (*ch == 10) {
418 						*ch = 0;
419 						break;
420 					}
421 				}
422 				findOffset(tmpbuf + 6, &start, &size, 0, &idxoff);
423 			}
424 			else break;
425 		}
426 		while (true);	// while we're resolving links
427 	}
428 
429 	endoff = (__u32)idxfd->seek(0, SEEK_END);
430 
431 	shiftSize = endoff - idxoff;
432 
433 	if (shiftSize > 0) {
434 		idxBytes = new char [ shiftSize ];
435 		idxfd->seek(idxoff, SEEK_SET);
436 		idxfd->read(idxBytes, shiftSize);
437 	}
438 
439 	outbuf = new char [ len + strlen(key) + 5 ];
440 	sprintf(outbuf, "%s%c%c", key, 13, 10);
441 	size = strlen(outbuf);
442 	memcpy(outbuf + size, buf, len);
443 	size = outsize = size + (len);
444 
445 	start = outstart = (__u32)datfd->seek(0, SEEK_END);
446 
447 	outstart = archtosword32(start);
448 	outsize  = archtosword16(size);
449 
450 	idxfd->seek(idxoff, SEEK_SET);
451 	if (len > 0) {
452 		datfd->seek(start, SEEK_SET);
453 		datfd->write(outbuf, (int)size);
454 
455 		// add a new line to make data file easier to read in an editor
456 		datfd->write(&nl, 1);
457 
458 		idxfd->write(&outstart, 4);
459 		idxfd->write(&outsize, 2);
460 		if (idxBytes) {
461 			idxfd->write(idxBytes, shiftSize);
462 			delete [] idxBytes;
463 		}
464 	}
465 	else {	// delete entry
466 		if (idxBytes) {
467 			idxfd->write(idxBytes+6, shiftSize-6);
468 			idxfd->seek(-1, SEEK_CUR);	// last valid byte
469 			FileMgr::getSystemFileMgr()->trunc(idxfd);	// truncate index
470 			delete [] idxBytes;
471 		}
472 	}
473 
474 	delete [] key;
475 	delete [] outbuf;
476 	free(dbKey);
477 }
478 
479 
480 /******************************************************************************
481  * RawLD::linkentry	- links one entry to another
482  *
483  * ENT: testmt	- testament to find (0 - Bible/module introduction)
484  *	destidxoff	- dest offset into .vss
485  *	srcidxoff		- source offset into .vss
486  */
487 
doLinkEntry(const char * destkey,const char * srckey)488 void RawStr::doLinkEntry(const char *destkey, const char *srckey) {
489 	char *text = new char [ strlen(destkey) + 7 ];
490 	sprintf(text, "@LINK %s", destkey);
491 	doSetText(srckey, text);
492 	delete [] text;
493 }
494 
495 /******************************************************************************
496  * RawLD::CreateModule	- Creates new module files
497  *
498  * ENT: path	- directory to store module files
499  * RET: error status
500  */
501 
createModule(const char * ipath)502 signed char RawStr::createModule(const char *ipath)
503 {
504 	char *path = 0;
505 	char *buf = new char [ strlen (ipath) + 20 ];
506 	FileDesc *fd, *fd2;
507 
508 	stdstr(&path, ipath);
509 
510 	if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
511 		path[strlen(path)-1] = 0;
512 
513 	sprintf(buf, "%s.dat", path);
514 	FileMgr::removeFile(buf);
515 	fd = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
516 	fd->getFd();
517 	FileMgr::getSystemFileMgr()->close(fd);
518 
519 	sprintf(buf, "%s.idx", path);
520 	FileMgr::removeFile(buf);
521 	fd2 = FileMgr::getSystemFileMgr()->open(buf, FileMgr::CREAT|FileMgr::WRONLY, FileMgr::IREAD|FileMgr::IWRITE);
522 	fd2->getFd();
523 	FileMgr::getSystemFileMgr()->close(fd2);
524 
525 	delete [] path;
526 
527 	return 0;
528 }
529 
530 SWORD_NAMESPACE_END
531