1 /************************************************************************/
2 /*									*/
3 /*  Manage the string of text bytes in a paragraph.			*/
4 /*  (Try not to be too clever, this will be changed to unicode once.)	*/
5 /*									*/
6 /************************************************************************/
7 
8 #   include	"docBufConfig.h"
9 
10 #   include	<stdlib.h>
11 
12 #   include	<appDebugon.h>
13 
14 #   include	<uniShiftUtf8.h>
15 #   include	<uniUtf8.h>
16 #   include	<ucdGeneralCategory.h>
17 #   include	<textConverter.h>
18 
19 #   include	"docBuf.h"
20 #   include	"docParaString.h"
21 #   include	"docParaParticules.h"
22 
23 /************************************************************************/
24 /*									*/
25 /*  Replace part of the string of a paragraph with a different string	*/
26 /*									*/
27 /************************************************************************/
28 
docParaStringReplace(int * pSizeShift,BufferItem * paraNode,int stroffBegin,int stroffTail,const char * addedString,int addedStrlen)29 int docParaStringReplace(		int *			pSizeShift,
30 					BufferItem *		paraNode,
31 					int			stroffBegin,
32 					int			stroffTail,
33 					const char *		addedString,
34 					int			addedStrlen )
35     {
36     int		rval;
37 
38     rval= utilMemoryBufferReplaceBytes( &(paraNode->biParaStringBuffer),
39 			stroffBegin, stroffTail,
40 			(unsigned char *)addedString, addedStrlen );
41 
42     if  ( pSizeShift )
43 	{
44 	*pSizeShift= addedStrlen- stroffTail+ stroffBegin;
45 	}
46 
47     return rval;
48     }
49 
50 /************************************************************************/
51 
docParaNextWord(const BufferItem * paraNode,int stroff)52 int docParaNextWord(		const BufferItem *		paraNode,
53 				int				stroff )
54     {
55     const char *	from= (char *)docParaString( paraNode, stroff );
56     int			upto= docParaStrlen( paraNode );
57 
58     unsigned short		unicode;
59     int				step;
60 
61     while( stroff < upto )
62 	{
63 	step= uniGetUtf8( &unicode, from );
64 	if  ( step < 1 )
65 	    { LLDEB(stroff,step); return -1;	}
66 
67 	if  ( ucdIsZ( unicode ) )
68 	    { break;	}
69 
70 	from += step;
71 	stroff += step;
72 	}
73 
74     while( stroff < upto )
75 	{
76 	step= uniGetUtf8( &unicode, from );
77 	if  ( step < 1 )
78 	    { LLDEB(stroff,step); return -1;	}
79 
80 	if  ( ! ucdIsZ( unicode ) )
81 	    { break;	}
82 
83 	from += step;
84 	stroff += step;
85 	}
86 
87     return stroff;
88     }
89 
docParaPrevWord(const BufferItem * paraNode,int stroff)90 int docParaPrevWord(		const BufferItem *		paraNode,
91 				int				stroff )
92     {
93     const char *	from= (const char *)docParaString( paraNode, stroff );
94 
95     unsigned short	unicode;
96     int			step;
97 
98     while( stroff > 0 )
99 	{
100 	int		st;
101 	const char *	fr;
102 
103 	st= 1; fr= from- 1;
104 	while( stroff- st > 0 && ( *fr & 0xc0 ) == 0x80 )
105 	    { st++; fr--; }
106 
107 	step= uniGetUtf8( &unicode, fr );
108 	if  ( step < 1 )
109 	    { LLDEB(stroff-st,step); return -1;	}
110 
111 	if  ( step != st )
112 	    { LLDEB(step,st);	}
113 
114 	if  ( ! ucdIsZ( unicode ) )
115 	    { break;	}
116 
117 	from -= step;
118 	stroff -= step;
119 	}
120 
121     while( stroff > 0 )
122 	{
123 	int		st;
124 	const char *	fr;
125 
126 	st= 1; fr= from- 1;
127 	while( stroff- st > 0 && ( *fr & 0xc0 ) == 0x80 )
128 	    { st++; fr--; }
129 
130 	step= uniGetUtf8( &unicode, fr );
131 	if  ( step < 1 )
132 	    { LLDEB(stroff-st,step); return -1;	}
133 
134 	if  ( step != st )
135 	    { LLDEB(step,st);	}
136 
137 	if  ( ucdIsZ( unicode ) )
138 	    { break;	}
139 
140 	from -= step;
141 	stroff -= step;
142 	}
143 
144     return stroff;
145     }
146 
docParaHeadOfWord(int * pStroff,const BufferItem * paraNode,int part)147 int docParaHeadOfWord(		int *				pStroff,
148 				const BufferItem *		paraNode,
149 				int				part )
150     {
151     const TextParticule *	tp= paraNode->biParaParticules+ part;
152     int				stroff= tp->tpStroff;
153 
154     while( part > 0						&&
155 	   tp[-1].tpKind == DOCkindSPAN				&&
156 	   docParaString( paraNode, tp->tpStroff- 1 )[0] != ' '	)
157 	{
158 	tp--; part--;
159 	stroff= tp->tpStroff;
160 	}
161 
162     *pStroff= stroff;
163     return part;
164     }
165 
docParaTailOfWord(int * pStroff,const BufferItem * paraNode,int part)166 int docParaTailOfWord(		int *				pStroff,
167 				const BufferItem *		paraNode,
168 				int				part )
169     {
170     const TextParticule *	tp= paraNode->biParaParticules+ part;
171     int				stroff= tp->tpStroff+ tp->tpStrlen;
172 
173     while( part < paraNode->biParaParticuleCount- 1	&&
174 	   tp[1].tpKind == DOCkindSPAN			&&
175 	   docParaString( paraNode, stroff- 1 )[0] != ' '	)
176 	{
177 	tp++; part++;
178 	stroff= tp->tpStroff+ tp->tpStrlen;
179 	}
180 
181     while( stroff > tp->tpStroff			&&
182 	   docParaString( paraNode, stroff- 1 )[0] == ' '	)
183 	{ stroff--; }
184 
185     *pStroff= stroff;
186     return part;
187     }
188 
189 /************************************************************************/
190 /*									*/
191 /*  Translate a stretch of text to upper case for capitals or smallcaps	*/
192 /*  drawing.								*/
193 /*									*/
194 /*  3)  Worst case alternating, starting with lower case. 1+ len	*/
195 /*	segments. But when it is just one lower case letter we need 3!	*/
196 /*									*/
197 /************************************************************************/
198 
docMakeCapsString(char ** pUpperString,int ** pSegments,int * pSegmentCount,const TextAttribute * ta,const char * printString,int len)199 int docMakeCapsString(		char **			pUpperString,
200 				int **			pSegments,
201 				int *			pSegmentCount,
202 				const TextAttribute *	ta,
203 				const char *		printString,
204 				int			len )
205     {
206     int				rval= 0;
207 
208     int				segmentCount= 0;
209     char *			upperString= (char *)0;
210     int *			segments= (int *)0;
211 
212     if  ( ta->taSmallCaps && ! ta->taCapitals )
213 	{
214 	if  ( uniShiftUtf8String( &upperString, &segments, &segmentCount,
215 					    SHIFT_UPPER, printString, len ) )
216 	    { LDEB(len); rval= -1; goto ready;	}
217 	}
218     else{
219 	if  ( uniShiftUtf8String( &upperString, (int **)0, (int *)0,
220 					    SHIFT_UPPER, printString, len ) )
221 	    { LDEB(len); rval= -1; goto ready;	}
222 	}
223 
224     *pUpperString= upperString; upperString= (char *)0; /* steal */
225     *pSegments= segments; segments= (int *)0; /* steal */
226     *pSegmentCount= segmentCount;
227 
228   ready:
229 
230     if  ( upperString )
231 	{ free( upperString );	}
232     if  ( segments )
233 	{ free( segments );	}
234 
235     return rval;
236     }
237 
238 /************************************************************************/
239 
docParaPastLastNonBlank(const BufferItem * paraNode,int from,int upto)240 int docParaPastLastNonBlank(		const BufferItem *	paraNode,
241 					int			from,
242 					int			upto )
243     {
244     while( upto > from					&&
245 	   paraNode->biParaString[upto-1] == ' '	)
246 	{ upto--;	}
247 
248     return upto;
249     }
250 
251 /************************************************************************/
252 /*									*/
253 /*  Delimit a single particule.						*/
254 /*									*/
255 /************************************************************************/
256 
docDelimitParticule(TextParticule * tpNew,const char * from,int strLen)257 static int docDelimitParticule(	TextParticule *			tpNew,
258 				const char *			from,
259 				int				strLen )
260     {
261     int			len= 0;
262 
263     /*  Visible text  */
264     while( len < strLen )
265 	{
266 	unsigned short	unicode;
267 	int step= uniGetUtf8( &unicode, from );
268 	if  ( step < 1 )
269 	    { LDEB(step); return -1;	}
270 
271 	if  ( ucdIsZ( unicode ) )
272 	    { break;	}
273 
274 	from += step; len += step;
275 	}
276 
277     /*  Space  */
278     while( len < strLen )
279 	{
280 	unsigned short	unicode;
281 	int step= uniGetUtf8( &unicode, from );
282 	if  ( step < 1 )
283 	    { LDEB(step); return -1;	}
284 
285 	if  ( ! ucdIsZ( unicode ) )
286 	    { break;	}
287 
288 	from += step; len += step;
289 	}
290 
291     tpNew->tpKind= DOCkindSPAN;
292     tpNew->tpStrlen= len;
293 
294     return len;
295     }
296 
297 /************************************************************************/
298 /*									*/
299 /*  Redivide a piece of a paragraph in particules.			*/
300 /*									*/
301 /************************************************************************/
302 
docRedivideStringInParticules(BufferItem * paraNode,int strOff,int strLen,int part,int partsFree,int textAttributeNumber)303 int docRedivideStringInParticules(	BufferItem *	paraNode,
304 					int		strOff,
305 					int		strLen,
306 					int		part,
307 					int		partsFree,
308 					int		textAttributeNumber )
309     {
310     int			bytesDone= 0;
311     int			partsDone= 0;
312 
313     TextParticule *	tp= paraNode->biParaParticules+ part;
314 
315     while( bytesDone < strLen )
316 	{
317 	TextParticule	tpNew;
318 	int		len;
319 
320 #	ifdef DEB_PARTICULES
321 	const char *	label= "?-?";
322 #	endif
323 
324 	tpNew.tpKind= DOCkindUNKNOWN;
325 	tpNew.tpStroff= strOff;
326 	tpNew.tpStrlen= 0;
327 	tpNew.tpTextAttrNr= textAttributeNumber;
328 
329 	len= docDelimitParticule( &tpNew,
330 				(const char *)paraNode->biParaString+ strOff,
331 				strLen- bytesDone );
332 	if  ( len < 0 )
333 	    { LDEB(len); return -1;	}
334 
335 	if  ( partsDone < partsFree )
336 	    {
337 	    tp->tpKind= tpNew.tpKind;
338 	    tp->tpStroff= tpNew.tpStroff;
339 	    tp->tpStrlen= tpNew.tpStrlen;
340 	    tp->tpTextAttrNr= tpNew.tpTextAttrNr;
341 
342 	    tp->tpTwipsWide= 0;
343 
344 #	    ifdef DEB_PARTICULES
345 	    label= "NW.";
346 #	    endif
347 	    }
348 	else{
349 	    tp= docInsertTextParticule( paraNode, part,
350 					    tpNew.tpStroff, tpNew.tpStrlen,
351 					    tpNew.tpKind, tpNew.tpTextAttrNr );
352 	    if  ( ! tp )
353 		{ XDEB(tp); return -1;	}
354 #	    ifdef DEB_PARTICULES
355 	    label= "NW+";
356 #	    endif
357 	    }
358 
359 #	ifdef DEB_PARTICULES
360 	appDebug( "%s %3d: [%4d..%4d] %s \"%.*s\" len= %d\n", label, part,
361 		    tp->tpStroff,
362 		    tp->tpStroff+ tp->tpStrlen,
363 		    docKindStr( tp->tpKind ),
364 		    (int)tp->tpStrlen,
365 		    docParaString( paraNode, tp->tpStroff ),
366 		    tp->tpStrlen );
367 #	endif
368 
369 	strOff += len; bytesDone += len;
370 	partsDone++; part++, tp++;
371 	}
372 
373     return partsDone;
374     }
375 
376 /************************************************************************/
377 /*									*/
378 /*  Save paragraph contents for readers.				*/
379 /*									*/
380 /************************************************************************/
381 
docParaAppendBytes(void * vParaNode,int offset,const char * bytes,int count)382 static int docParaAppendBytes(	void *		vParaNode,
383 				int		offset,
384 				const char *	bytes,
385 				int		count )
386     {
387     BufferItem *	paraNode= (BufferItem *)vParaNode;
388     int			stroffShift= 0;
389 
390     if  ( docParaStringReplace( &stroffShift, paraNode, offset, offset,
391 							    bytes, count ) )
392 	{ LDEB(docParaStrlen(paraNode)); return -1;	}
393 
394     return stroffShift;
395     }
396 
docParaDivideAppendedText(BufferItem * paraNode,int textAttributeNumber,int stroff,int upto)397 int docParaDivideAppendedText(	BufferItem *	paraNode,
398 				int		textAttributeNumber,
399 				int		stroff,
400 				int		upto )
401     {
402     int		npart= paraNode->biParaParticuleCount;
403     int		part= paraNode->biParaParticuleCount;
404     int		partsFree= 0;
405 
406     if  ( npart > 0 )
407 	{
408 	const TextParticule *	tp=  paraNode->biParaParticules+ npart- 1;
409 
410 	if  ( tp->tpKind == DOCkindSPAN			&&
411 	      tp->tpTextAttrNr == textAttributeNumber	)
412 	    {
413 	    stroff= tp->tpStroff;
414 	    part--;
415 	    partsFree++;
416 	    }
417 	}
418 
419     if  ( docRedivideStringInParticules( paraNode, stroff, upto- stroff,
420 				part, partsFree, textAttributeNumber ) < 0 )
421 	{ LLDEB(upto- stroff,paraNode->biParaParticuleCount); return -1; }
422 
423     return 0;
424     }
425 
426 /************************************************************************/
427 /*									*/
428 /*  Append the text pointed to by 'text' to the paragraph and		*/
429 /*  split it into text particules.					*/
430 /*									*/
431 /************************************************************************/
432 
docParaAppendText(BufferDocument * bd,BufferItem * paraNode,const TextAttribute * ta,struct TextConverter * tc,const char * text,int len)433 int docParaAppendText(	BufferDocument *	bd,
434 			BufferItem *		paraNode,
435 			const TextAttribute *	ta,
436 			struct TextConverter *	tc,
437 			const char *		text,
438 			int			len )
439     {
440     int			textAttributeNumber;
441     int			stroff= docParaStrlen( paraNode );
442     int			upto= 0;
443     int			consumed= 0;
444 
445     textAttributeNumber= docTextAttributeNumber( bd, ta );
446     if  ( textAttributeNumber < 0 )
447 	{ LDEB(textAttributeNumber); return -1;	}
448 
449     upto= textConverterConvertToUtf8( tc, (void *)paraNode,
450 				    &consumed, stroff, text, len );
451     if  ( upto < 0 )
452 	{ LDEB(upto); return -1;	}
453     if  ( consumed != len )
454 	{ LLDEB(consumed,len); return -1;	}
455 
456     if  ( docParaDivideAppendedText( paraNode, textAttributeNumber,
457 							    stroff, upto ) )
458 	{ LDEB(1); return -1;	}
459 
460     return 0;
461     }
462 
docParaSetupTextConverter(struct TextConverter * tc)463 void docParaSetupTextConverter(	struct TextConverter *	tc )
464     {
465     textConverterSetProduce( tc, docParaAppendBytes );
466     }
467 
468 /************************************************************************/
469 /*									*/
470 /*  Fix a string offset. I.E. Return the highest string offset that is	*/
471 /*  <= stroff and that does not point inside an UTF8 sequence.		*/
472 /*  Offsets of administrative particules are perfectly acceptable here.	*/
473 /*									*/
474 /************************************************************************/
475 
docParaFixStroff(const BufferItem * paraNode,int stroff)476 int docParaFixStroff(		const BufferItem *	paraNode,
477 				int			stroff )
478     {
479     unsigned char *	from= docParaString( paraNode, stroff );
480 
481     while( stroff > 0 && ( *from & 0xc0 ) == 0x80 )
482 	{ stroff--; from--;	}
483 
484     return stroff;
485     }
486 
487 /************************************************************************/
488 /*									*/
489 /*  Return the next valid string offset in the paragraph.		*/
490 /*									*/
491 /*  Positions inside an UTF-8 sequence are invalid.			*/
492 /*  Offsets of administrative particules are perfectly acceptable here.	*/
493 /*									*/
494 /************************************************************************/
495 
docParaNextStroff(const BufferItem * paraNode,int stroff)496 int docParaNextStroff(	const BufferItem *	paraNode,
497 			int			stroff )
498     {
499     const char *	from= (const char *)docParaString( paraNode, stroff );
500 
501     unsigned short	unicode;
502     int			step;
503 
504     step= uniGetUtf8( &unicode, from );
505     if  ( step < 1 )
506 	{ LLDEB(stroff,step); return -1;	}
507 
508     return stroff+ step;
509     }
510 
511 /************************************************************************/
512 /*									*/
513 /*  Return the previous valid string offset in the paragraph.		*/
514 /*									*/
515 /*  Positions inside an UTF-8 sequence are invalid.			*/
516 /*  Offsets of administrative particules are perfectly acceptable here.	*/
517 /*									*/
518 /************************************************************************/
519 
docParaPrevStroff(const BufferItem * paraNode,int stroff)520 int docParaPrevStroff(	const BufferItem *	paraNode,
521 			int			stroff )
522     {
523     unsigned short		unicode;
524     const char *		from;
525     int				step;
526 
527     if  ( stroff <= 0 )
528 	{ LDEB(stroff); return -1;	}
529 
530     stroff--;
531     from= (const char *)docParaString( paraNode, stroff );
532 
533     while( ( *from & 0xc0 ) == 0x80 )
534 	{
535 	if  ( stroff <= 0 )
536 	    { LDEB(stroff); return -1;	}
537 
538 	stroff--; from--;
539 	}
540 
541     step= uniGetUtf8( &unicode, from );
542     if  ( step < 1 )
543 	{ LLDEB(stroff,step); return -1;	}
544 
545     return stroff;
546     }
547 
548