1 /************************************************************************/
2 /* */
3 /* Manage the string of text bytes in a paragraph. */
4 /* (Try not to be too clever, this will be changed to unicode once.) */
5 /* */
6 /************************************************************************/
7
8 # include "docBufConfig.h"
9
10 # include <stdlib.h>
11
12 # include <appDebugon.h>
13
14 # include <uniShiftUtf8.h>
15 # include <uniUtf8.h>
16 # include <ucdGeneralCategory.h>
17 # include <textConverter.h>
18
19 # include "docBuf.h"
20 # include "docParaString.h"
21 # include "docParaParticules.h"
22
23 /************************************************************************/
24 /* */
25 /* Replace part of the string of a paragraph with a different string */
26 /* */
27 /************************************************************************/
28
docParaStringReplace(int * pSizeShift,BufferItem * paraNode,int stroffBegin,int stroffTail,const char * addedString,int addedStrlen)29 int docParaStringReplace( int * pSizeShift,
30 BufferItem * paraNode,
31 int stroffBegin,
32 int stroffTail,
33 const char * addedString,
34 int addedStrlen )
35 {
36 int rval;
37
38 rval= utilMemoryBufferReplaceBytes( &(paraNode->biParaStringBuffer),
39 stroffBegin, stroffTail,
40 (unsigned char *)addedString, addedStrlen );
41
42 if ( pSizeShift )
43 {
44 *pSizeShift= addedStrlen- stroffTail+ stroffBegin;
45 }
46
47 return rval;
48 }
49
50 /************************************************************************/
51
docParaNextWord(const BufferItem * paraNode,int stroff)52 int docParaNextWord( const BufferItem * paraNode,
53 int stroff )
54 {
55 const char * from= (char *)docParaString( paraNode, stroff );
56 int upto= docParaStrlen( paraNode );
57
58 unsigned short unicode;
59 int step;
60
61 while( stroff < upto )
62 {
63 step= uniGetUtf8( &unicode, from );
64 if ( step < 1 )
65 { LLDEB(stroff,step); return -1; }
66
67 if ( ucdIsZ( unicode ) )
68 { break; }
69
70 from += step;
71 stroff += step;
72 }
73
74 while( stroff < upto )
75 {
76 step= uniGetUtf8( &unicode, from );
77 if ( step < 1 )
78 { LLDEB(stroff,step); return -1; }
79
80 if ( ! ucdIsZ( unicode ) )
81 { break; }
82
83 from += step;
84 stroff += step;
85 }
86
87 return stroff;
88 }
89
docParaPrevWord(const BufferItem * paraNode,int stroff)90 int docParaPrevWord( const BufferItem * paraNode,
91 int stroff )
92 {
93 const char * from= (const char *)docParaString( paraNode, stroff );
94
95 unsigned short unicode;
96 int step;
97
98 while( stroff > 0 )
99 {
100 int st;
101 const char * fr;
102
103 st= 1; fr= from- 1;
104 while( stroff- st > 0 && ( *fr & 0xc0 ) == 0x80 )
105 { st++; fr--; }
106
107 step= uniGetUtf8( &unicode, fr );
108 if ( step < 1 )
109 { LLDEB(stroff-st,step); return -1; }
110
111 if ( step != st )
112 { LLDEB(step,st); }
113
114 if ( ! ucdIsZ( unicode ) )
115 { break; }
116
117 from -= step;
118 stroff -= step;
119 }
120
121 while( stroff > 0 )
122 {
123 int st;
124 const char * fr;
125
126 st= 1; fr= from- 1;
127 while( stroff- st > 0 && ( *fr & 0xc0 ) == 0x80 )
128 { st++; fr--; }
129
130 step= uniGetUtf8( &unicode, fr );
131 if ( step < 1 )
132 { LLDEB(stroff-st,step); return -1; }
133
134 if ( step != st )
135 { LLDEB(step,st); }
136
137 if ( ucdIsZ( unicode ) )
138 { break; }
139
140 from -= step;
141 stroff -= step;
142 }
143
144 return stroff;
145 }
146
docParaHeadOfWord(int * pStroff,const BufferItem * paraNode,int part)147 int docParaHeadOfWord( int * pStroff,
148 const BufferItem * paraNode,
149 int part )
150 {
151 const TextParticule * tp= paraNode->biParaParticules+ part;
152 int stroff= tp->tpStroff;
153
154 while( part > 0 &&
155 tp[-1].tpKind == DOCkindSPAN &&
156 docParaString( paraNode, tp->tpStroff- 1 )[0] != ' ' )
157 {
158 tp--; part--;
159 stroff= tp->tpStroff;
160 }
161
162 *pStroff= stroff;
163 return part;
164 }
165
docParaTailOfWord(int * pStroff,const BufferItem * paraNode,int part)166 int docParaTailOfWord( int * pStroff,
167 const BufferItem * paraNode,
168 int part )
169 {
170 const TextParticule * tp= paraNode->biParaParticules+ part;
171 int stroff= tp->tpStroff+ tp->tpStrlen;
172
173 while( part < paraNode->biParaParticuleCount- 1 &&
174 tp[1].tpKind == DOCkindSPAN &&
175 docParaString( paraNode, stroff- 1 )[0] != ' ' )
176 {
177 tp++; part++;
178 stroff= tp->tpStroff+ tp->tpStrlen;
179 }
180
181 while( stroff > tp->tpStroff &&
182 docParaString( paraNode, stroff- 1 )[0] == ' ' )
183 { stroff--; }
184
185 *pStroff= stroff;
186 return part;
187 }
188
189 /************************************************************************/
190 /* */
191 /* Translate a stretch of text to upper case for capitals or smallcaps */
192 /* drawing. */
193 /* */
194 /* 3) Worst case alternating, starting with lower case. 1+ len */
195 /* segments. But when it is just one lower case letter we need 3! */
196 /* */
197 /************************************************************************/
198
docMakeCapsString(char ** pUpperString,int ** pSegments,int * pSegmentCount,const TextAttribute * ta,const char * printString,int len)199 int docMakeCapsString( char ** pUpperString,
200 int ** pSegments,
201 int * pSegmentCount,
202 const TextAttribute * ta,
203 const char * printString,
204 int len )
205 {
206 int rval= 0;
207
208 int segmentCount= 0;
209 char * upperString= (char *)0;
210 int * segments= (int *)0;
211
212 if ( ta->taSmallCaps && ! ta->taCapitals )
213 {
214 if ( uniShiftUtf8String( &upperString, &segments, &segmentCount,
215 SHIFT_UPPER, printString, len ) )
216 { LDEB(len); rval= -1; goto ready; }
217 }
218 else{
219 if ( uniShiftUtf8String( &upperString, (int **)0, (int *)0,
220 SHIFT_UPPER, printString, len ) )
221 { LDEB(len); rval= -1; goto ready; }
222 }
223
224 *pUpperString= upperString; upperString= (char *)0; /* steal */
225 *pSegments= segments; segments= (int *)0; /* steal */
226 *pSegmentCount= segmentCount;
227
228 ready:
229
230 if ( upperString )
231 { free( upperString ); }
232 if ( segments )
233 { free( segments ); }
234
235 return rval;
236 }
237
238 /************************************************************************/
239
docParaPastLastNonBlank(const BufferItem * paraNode,int from,int upto)240 int docParaPastLastNonBlank( const BufferItem * paraNode,
241 int from,
242 int upto )
243 {
244 while( upto > from &&
245 paraNode->biParaString[upto-1] == ' ' )
246 { upto--; }
247
248 return upto;
249 }
250
251 /************************************************************************/
252 /* */
253 /* Delimit a single particule. */
254 /* */
255 /************************************************************************/
256
docDelimitParticule(TextParticule * tpNew,const char * from,int strLen)257 static int docDelimitParticule( TextParticule * tpNew,
258 const char * from,
259 int strLen )
260 {
261 int len= 0;
262
263 /* Visible text */
264 while( len < strLen )
265 {
266 unsigned short unicode;
267 int step= uniGetUtf8( &unicode, from );
268 if ( step < 1 )
269 { LDEB(step); return -1; }
270
271 if ( ucdIsZ( unicode ) )
272 { break; }
273
274 from += step; len += step;
275 }
276
277 /* Space */
278 while( len < strLen )
279 {
280 unsigned short unicode;
281 int step= uniGetUtf8( &unicode, from );
282 if ( step < 1 )
283 { LDEB(step); return -1; }
284
285 if ( ! ucdIsZ( unicode ) )
286 { break; }
287
288 from += step; len += step;
289 }
290
291 tpNew->tpKind= DOCkindSPAN;
292 tpNew->tpStrlen= len;
293
294 return len;
295 }
296
297 /************************************************************************/
298 /* */
299 /* Redivide a piece of a paragraph in particules. */
300 /* */
301 /************************************************************************/
302
docRedivideStringInParticules(BufferItem * paraNode,int strOff,int strLen,int part,int partsFree,int textAttributeNumber)303 int docRedivideStringInParticules( BufferItem * paraNode,
304 int strOff,
305 int strLen,
306 int part,
307 int partsFree,
308 int textAttributeNumber )
309 {
310 int bytesDone= 0;
311 int partsDone= 0;
312
313 TextParticule * tp= paraNode->biParaParticules+ part;
314
315 while( bytesDone < strLen )
316 {
317 TextParticule tpNew;
318 int len;
319
320 # ifdef DEB_PARTICULES
321 const char * label= "?-?";
322 # endif
323
324 tpNew.tpKind= DOCkindUNKNOWN;
325 tpNew.tpStroff= strOff;
326 tpNew.tpStrlen= 0;
327 tpNew.tpTextAttrNr= textAttributeNumber;
328
329 len= docDelimitParticule( &tpNew,
330 (const char *)paraNode->biParaString+ strOff,
331 strLen- bytesDone );
332 if ( len < 0 )
333 { LDEB(len); return -1; }
334
335 if ( partsDone < partsFree )
336 {
337 tp->tpKind= tpNew.tpKind;
338 tp->tpStroff= tpNew.tpStroff;
339 tp->tpStrlen= tpNew.tpStrlen;
340 tp->tpTextAttrNr= tpNew.tpTextAttrNr;
341
342 tp->tpTwipsWide= 0;
343
344 # ifdef DEB_PARTICULES
345 label= "NW.";
346 # endif
347 }
348 else{
349 tp= docInsertTextParticule( paraNode, part,
350 tpNew.tpStroff, tpNew.tpStrlen,
351 tpNew.tpKind, tpNew.tpTextAttrNr );
352 if ( ! tp )
353 { XDEB(tp); return -1; }
354 # ifdef DEB_PARTICULES
355 label= "NW+";
356 # endif
357 }
358
359 # ifdef DEB_PARTICULES
360 appDebug( "%s %3d: [%4d..%4d] %s \"%.*s\" len= %d\n", label, part,
361 tp->tpStroff,
362 tp->tpStroff+ tp->tpStrlen,
363 docKindStr( tp->tpKind ),
364 (int)tp->tpStrlen,
365 docParaString( paraNode, tp->tpStroff ),
366 tp->tpStrlen );
367 # endif
368
369 strOff += len; bytesDone += len;
370 partsDone++; part++, tp++;
371 }
372
373 return partsDone;
374 }
375
376 /************************************************************************/
377 /* */
378 /* Save paragraph contents for readers. */
379 /* */
380 /************************************************************************/
381
docParaAppendBytes(void * vParaNode,int offset,const char * bytes,int count)382 static int docParaAppendBytes( void * vParaNode,
383 int offset,
384 const char * bytes,
385 int count )
386 {
387 BufferItem * paraNode= (BufferItem *)vParaNode;
388 int stroffShift= 0;
389
390 if ( docParaStringReplace( &stroffShift, paraNode, offset, offset,
391 bytes, count ) )
392 { LDEB(docParaStrlen(paraNode)); return -1; }
393
394 return stroffShift;
395 }
396
docParaDivideAppendedText(BufferItem * paraNode,int textAttributeNumber,int stroff,int upto)397 int docParaDivideAppendedText( BufferItem * paraNode,
398 int textAttributeNumber,
399 int stroff,
400 int upto )
401 {
402 int npart= paraNode->biParaParticuleCount;
403 int part= paraNode->biParaParticuleCount;
404 int partsFree= 0;
405
406 if ( npart > 0 )
407 {
408 const TextParticule * tp= paraNode->biParaParticules+ npart- 1;
409
410 if ( tp->tpKind == DOCkindSPAN &&
411 tp->tpTextAttrNr == textAttributeNumber )
412 {
413 stroff= tp->tpStroff;
414 part--;
415 partsFree++;
416 }
417 }
418
419 if ( docRedivideStringInParticules( paraNode, stroff, upto- stroff,
420 part, partsFree, textAttributeNumber ) < 0 )
421 { LLDEB(upto- stroff,paraNode->biParaParticuleCount); return -1; }
422
423 return 0;
424 }
425
426 /************************************************************************/
427 /* */
428 /* Append the text pointed to by 'text' to the paragraph and */
429 /* split it into text particules. */
430 /* */
431 /************************************************************************/
432
docParaAppendText(BufferDocument * bd,BufferItem * paraNode,const TextAttribute * ta,struct TextConverter * tc,const char * text,int len)433 int docParaAppendText( BufferDocument * bd,
434 BufferItem * paraNode,
435 const TextAttribute * ta,
436 struct TextConverter * tc,
437 const char * text,
438 int len )
439 {
440 int textAttributeNumber;
441 int stroff= docParaStrlen( paraNode );
442 int upto= 0;
443 int consumed= 0;
444
445 textAttributeNumber= docTextAttributeNumber( bd, ta );
446 if ( textAttributeNumber < 0 )
447 { LDEB(textAttributeNumber); return -1; }
448
449 upto= textConverterConvertToUtf8( tc, (void *)paraNode,
450 &consumed, stroff, text, len );
451 if ( upto < 0 )
452 { LDEB(upto); return -1; }
453 if ( consumed != len )
454 { LLDEB(consumed,len); return -1; }
455
456 if ( docParaDivideAppendedText( paraNode, textAttributeNumber,
457 stroff, upto ) )
458 { LDEB(1); return -1; }
459
460 return 0;
461 }
462
docParaSetupTextConverter(struct TextConverter * tc)463 void docParaSetupTextConverter( struct TextConverter * tc )
464 {
465 textConverterSetProduce( tc, docParaAppendBytes );
466 }
467
468 /************************************************************************/
469 /* */
470 /* Fix a string offset. I.E. Return the highest string offset that is */
471 /* <= stroff and that does not point inside an UTF8 sequence. */
472 /* Offsets of administrative particules are perfectly acceptable here. */
473 /* */
474 /************************************************************************/
475
docParaFixStroff(const BufferItem * paraNode,int stroff)476 int docParaFixStroff( const BufferItem * paraNode,
477 int stroff )
478 {
479 unsigned char * from= docParaString( paraNode, stroff );
480
481 while( stroff > 0 && ( *from & 0xc0 ) == 0x80 )
482 { stroff--; from--; }
483
484 return stroff;
485 }
486
487 /************************************************************************/
488 /* */
489 /* Return the next valid string offset in the paragraph. */
490 /* */
491 /* Positions inside an UTF-8 sequence are invalid. */
492 /* Offsets of administrative particules are perfectly acceptable here. */
493 /* */
494 /************************************************************************/
495
docParaNextStroff(const BufferItem * paraNode,int stroff)496 int docParaNextStroff( const BufferItem * paraNode,
497 int stroff )
498 {
499 const char * from= (const char *)docParaString( paraNode, stroff );
500
501 unsigned short unicode;
502 int step;
503
504 step= uniGetUtf8( &unicode, from );
505 if ( step < 1 )
506 { LLDEB(stroff,step); return -1; }
507
508 return stroff+ step;
509 }
510
511 /************************************************************************/
512 /* */
513 /* Return the previous valid string offset in the paragraph. */
514 /* */
515 /* Positions inside an UTF-8 sequence are invalid. */
516 /* Offsets of administrative particules are perfectly acceptable here. */
517 /* */
518 /************************************************************************/
519
docParaPrevStroff(const BufferItem * paraNode,int stroff)520 int docParaPrevStroff( const BufferItem * paraNode,
521 int stroff )
522 {
523 unsigned short unicode;
524 const char * from;
525 int step;
526
527 if ( stroff <= 0 )
528 { LDEB(stroff); return -1; }
529
530 stroff--;
531 from= (const char *)docParaString( paraNode, stroff );
532
533 while( ( *from & 0xc0 ) == 0x80 )
534 {
535 if ( stroff <= 0 )
536 { LDEB(stroff); return -1; }
537
538 stroff--; from--;
539 }
540
541 step= uniGetUtf8( &unicode, from );
542 if ( step < 1 )
543 { LLDEB(stroff,step); return -1; }
544
545 return stroff;
546 }
547
548