1#Copyright ReportLab Europe Ltd. 2000-2017
2#see license.txt for license details
3#history https://hg.reportlab.com/hg-public/reportlab/log/tip/src/reportlab/platypus/paragraph.py
4__all__=(
5        'Paragraph',
6        'cleanBlockQuotedText',
7        'ParaLines',
8        'FragLine',
9        )
10__version__='3.5.20'
11__doc__='''The standard paragraph implementation'''
12from string import whitespace
13from operator import truth
14from unicodedata import category
15from reportlab.pdfbase.pdfmetrics import stringWidth, getFont, getAscentDescent
16from reportlab.platypus.paraparser import ParaParser, _PCT, _num as _parser_num, _re_us_value
17from reportlab.platypus.flowables import Flowable
18from reportlab.lib.colors import Color
19from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
20from reportlab.lib.geomutils import normalizeTRBL
21from reportlab.lib.textsplit import wordSplit, ALL_CANNOT_START
22from reportlab.lib.styles import ParagraphStyle
23from copy import deepcopy
24from reportlab.lib.abag import ABag
25from reportlab.rl_config import platypus_link_underline, decimalSymbol, _FUZZ,\
26        paraFontSizeHeightOffset, hyphenationMinWordLength
27from reportlab.lib.utils import _className, isBytes, unicodeT, bytesT, isStr
28from reportlab.lib.rl_accel import sameFrag
29from reportlab import xrange
30import re
31from types import MethodType
32try:
33    import pyphen
34except:
35    pyphen = None
36
37#on UTF8/py33 branch, split and strip must be unicode-safe!
38#thanks to Dirk Holtwick for helpful discussions/insight
39#on this one
40_wsc = ''.join((
41    u'\u0009',  # HORIZONTAL TABULATION
42    u'\u000A',  # LINE FEED
43    u'\u000B',  # VERTICAL TABULATION
44    u'\u000C',  # FORM FEED
45    u'\u000D',  # CARRIAGE RETURN
46    u'\u001C',  # FILE SEPARATOR
47    u'\u001D',  # GROUP SEPARATOR
48    u'\u001E',  # RECORD SEPARATOR
49    u'\u001F',  # UNIT SEPARATOR
50    u'\u0020',  # SPACE
51    u'\u0085',  # NEXT LINE
52    #u'\u00A0', # NO-BREAK SPACE
53    u'\u1680',  # OGHAM SPACE MARK
54    u'\u2000',  # EN QUAD
55    u'\u2001',  # EM QUAD
56    u'\u2002',  # EN SPACE
57    u'\u2003',  # EM SPACE
58    u'\u2004',  # THREE-PER-EM SPACE
59    u'\u2005',  # FOUR-PER-EM SPACE
60    u'\u2006',  # SIX-PER-EM SPACE
61    u'\u2007',  # FIGURE SPACE
62    u'\u2008',  # PUNCTUATION SPACE
63    u'\u2009',  # THIN SPACE
64    u'\u200A',  # HAIR SPACE
65    u'\u200B',  # ZERO WIDTH SPACE
66    u'\u2028',  # LINE SEPARATOR
67    u'\u2029',  # PARAGRAPH SEPARATOR
68    u'\u202F',  # NARROW NO-BREAK SPACE
69    u'\u205F',  # MEDIUM MATHEMATICAL SPACE
70    u'\u3000',  # IDEOGRAPHIC SPACE
71    ))
72_wsc_re_split=re.compile('[%s]+'% re.escape(_wsc)).split
73_wsc_end_search=re.compile('[%s]+$'% re.escape(_wsc)).search
74
75def _usConv(s, vMap, default=None):
76    '''convert a strike/underline distance to a number'''
77    if isStr(s):
78        s = s.strip()
79        if s:
80            m = _re_us_value.match(s)
81            if m:
82                return float(m.group(1))*vMap[m.group(2)]
83            else:
84                return _parser_num(s,allowRelative=False)
85        elif default:
86            return default
87    return s
88
89def split(text, delim=None):
90    if isBytes(text): text = text.decode('utf8')
91    if delim is not None and isBytes(delim): delim = delim.decode('utf8')
92    return [uword for uword in (_wsc_re_split(text) if delim is None and u'\xa0' in text else text.split(delim))]
93
94def strip(text):
95    if isBytes(text): text = text.decode('utf8')
96    return text.strip(_wsc)
97
98def lstrip(text):
99    if isBytes(text): text = text.decode('utf8')
100    return text.lstrip(_wsc)
101
102def rstrip(text):
103    if isBytes(text): text = text.decode('utf8')
104    return text.rstrip(_wsc)
105
106class ParaLines(ABag):
107    """
108    class ParaLines contains the broken into lines representation of Paragraphs
109        kind=0  Simple
110        fontName, fontSize, textColor apply to whole Paragraph
111        lines   [(extraSpace1,words1),....,(extraspaceN,wordsN)]
112
113        kind==1 Complex
114        lines   [FragLine1,...,FragLineN]
115    """
116
117class FragLine(ABag):
118    """
119    class FragLine contains a styled line (ie a line with more than one style)::
120
121        extraSpace  unused space for justification only
122        wordCount   1+spaces in line for justification purposes
123        words       [ParaFrags] style text lumps to be concatenated together
124        fontSize    maximum fontSize seen on the line; not used at present,
125                    but could be used for line spacing.
126    """
127
128def _lineClean(L):
129    return ' '.join(list(filter(truth,split(strip(L)))))
130
131def cleanBlockQuotedText(text,joiner=' '):
132    """This is an internal utility which takes triple-
133    quoted text form within the document and returns
134    (hopefully) the paragraph the user intended originally."""
135    L=list(filter(truth,list(map(_lineClean, split(text, '\n')))))
136    return joiner.join(L)
137
138def setXPos(tx,dx):
139    if dx>1e-6 or dx<-1e-6:
140        tx.setXPos(dx)
141
142def _nbspCount(w):
143    if isBytes(w):
144        return w.count(b'\xc2\xa0')
145    else:
146        return w.count(u'\xa0')
147
148def _leftDrawParaLine( tx, offset, extraspace, words, last=0):
149    simple = extraspace>-1e-8 or getattr(tx,'preformatted',False)
150    text = ' '.join(words)
151    setXPos(tx,offset)
152    if not simple:
153        nSpaces = len(words)+_nbspCount(text)-1
154        simple = nSpaces<=0
155    if simple:
156        tx._textOut(text,1)
157    else:
158        tx.setWordSpace(extraspace / float(nSpaces))
159        tx._textOut(text,1)
160        tx.setWordSpace(0)
161    setXPos(tx,-offset)
162    return offset
163
164def _centerDrawParaLine( tx, offset, extraspace, words, last=0):
165    simple = extraspace>-1e-8 or getattr(tx,'preformatted',False)
166    text = ' '.join(words)
167    if not simple:
168        nSpaces = len(words)+_nbspCount(text)-1
169        simple = nSpaces<=0
170    if simple:
171        m = offset + 0.5 * extraspace
172        setXPos(tx,m)
173        tx._textOut(text,1)
174    else:
175        m = offset
176        tx.setWordSpace(extraspace / float(nSpaces))
177        setXPos(tx,m)
178        tx._textOut(text,1)
179        tx.setWordSpace(0)
180    setXPos(tx,-m)
181    return m
182
183def _rightDrawParaLine( tx, offset, extraspace, words, last=0):
184    simple = extraspace>-1e-8 or getattr(tx,'preformatted',False)
185    text = ' '.join(words)
186    if not simple:
187        nSpaces = len(words)+_nbspCount(text)-1
188        simple = nSpaces<=0
189    if simple:
190        m = offset + extraspace
191        setXPos(tx,m)
192        tx._textOut(' '.join(words),1)
193    else:
194        m = offset
195        tx.setWordSpace(extraspace / float(nSpaces))
196        setXPos(tx,m)
197        tx._textOut(text,1)
198        tx.setWordSpace(0)
199    setXPos(tx,-m)
200    return m
201
202def _justifyDrawParaLine( tx, offset, extraspace, words, last=0):
203    setXPos(tx,offset)
204    text  = ' '.join(words)
205    simple = last or (-1e-8<extraspace<=1e-8) or getattr(tx,'preformatted',False)
206    if not simple:
207        nSpaces = len(words)+_nbspCount(text)-1
208        simple = nSpaces<=0
209    if simple:
210        #last one or no extra space so left align
211        tx._textOut(text,1)
212    else:
213        tx.setWordSpace(extraspace / float(nSpaces))
214        tx._textOut(text,1)
215        tx.setWordSpace(0)
216    setXPos(tx,-offset)
217    return offset
218
219def _justifyDrawParaLineRTL( tx, offset, extraspace, words, last=0):
220    return (_rightDrawParaLine if last else _justifyDrawParaLine)(tx, offset, extraspace, words, last)
221
222def imgVRange(h,va,fontSize):
223    '''return bottom,top offsets relative to baseline(0)'''
224    if va=='baseline':
225        iyo = 0
226    elif va in ('text-top','top'):
227        iyo = fontSize-h
228    elif va=='middle':
229        iyo = fontSize - (1.2*fontSize+h)*0.5
230    elif va in ('text-bottom','bottom'):
231        iyo = fontSize - 1.2*fontSize
232    elif va=='super':
233        iyo = 0.5*fontSize
234    elif va=='sub':
235        iyo = -0.5*fontSize
236    elif hasattr(va,'normalizedValue'):
237        iyo = va.normalizedValue(fontSize)
238    else:
239        iyo = va
240    return iyo,iyo+h
241
242def imgNormV(v,nv):
243    if hasattr(v,'normalizedValue'):
244        return v.normalizedValue(nv)
245    else:
246        return v
247
248def _getDotsInfo(style):
249    dots = style.endDots
250    if isStr(dots):
251        text = dots
252        fontName = style.fontName
253        fontSize = style.fontSize
254        textColor = style.textColor
255        backColor = style.backColor
256        dy = 0
257    else:
258        text = getattr(dots,'text','.')
259        fontName = getattr(dots,'fontName',style.fontName)
260        fontSize = getattr(dots,'fontSize',style.fontSize)
261        textColor = getattr(dots,'textColor',style.textColor)
262        backColor = getattr(dots,'backColor',style.backColor)
263        dy = getattr(dots,'dy',0)
264    return text,fontName,fontSize,textColor,backColor,dy
265
266_56=5./6
267_16=1./6
268def _putFragLine(cur_x, tx, line, last, pKind):
269    preformatted = tx.preformatted
270    xs = tx.XtraState
271    cur_y = xs.cur_y
272    x0 = tx._x0
273    autoLeading = xs.autoLeading
274    leading = xs.leading
275    cur_x += xs.leftIndent
276    dal = autoLeading in ('min','max')
277    if dal:
278        if autoLeading=='max':
279            ascent = max(_56*leading,line.ascent)
280            descent = max(_16*leading,-line.descent)
281        else:
282            ascent = line.ascent
283            descent = -line.descent
284        leading = ascent+descent
285    if tx._leading!=leading:
286        tx.setLeading(leading)
287    if dal:
288        olb = tx._olb
289        if olb is not None:
290            xcy = olb-ascent
291            if tx._oleading!=leading:
292                cur_y += leading - tx._oleading
293            if abs(xcy-cur_y)>1e-8:
294                cur_y = xcy
295                tx.setTextOrigin(x0,cur_y)
296                xs.cur_y = cur_y
297        tx._olb = cur_y - descent
298        tx._oleading = leading
299    ws = getattr(tx,'_wordSpace',0)
300    nSpaces = 0
301    words = line.words
302    AL = []
303    LL = []
304    us_lines = xs.us_lines
305    links = xs.links
306    for i, f in enumerate(words):
307        if hasattr(f,'cbDefn'):
308            cbDefn = f.cbDefn
309            kind = cbDefn.kind
310            if kind=='img':
311                #draw image cbDefn,cur_y,cur_x
312                txfs = tx._fontsize
313                if txfs is None:
314                    txfs = xs.style.fontSize
315                w = imgNormV(cbDefn.width,xs.paraWidth)
316                h = imgNormV(cbDefn.height,txfs)
317                iy0,iy1 = imgVRange(h,cbDefn.valign,txfs)
318                cur_x_s = cur_x + nSpaces*ws
319                tx._canvas.drawImage(cbDefn.image,cur_x_s,cur_y+iy0,w,h,mask='auto')
320                cur_x += w
321                cur_x_s += w
322                setXPos(tx,cur_x_s-tx._x0)
323            else:
324                name = cbDefn.name
325                if kind=='anchor':
326                    tx._canvas.bookmarkHorizontal(name,cur_x,cur_y+leading)
327                else:
328                    func = getattr(tx._canvas,name,None)
329                    if not func:
330                        raise AttributeError("Missing %s callback attribute '%s'" % (kind,name))
331                    tx._canvas._curr_tx_info=dict(tx=tx,cur_x=cur_x,cur_y=cur_y,leading=leading,xs=tx.XtraState)
332                    try:
333                        func(tx._canvas,kind,getattr(cbDefn,'label',None))
334                    finally:
335                        del tx._canvas._curr_tx_info
336            if f is words[-1]:
337                if not tx._fontname:
338                    tx.setFont(xs.style.fontName,xs.style.fontSize)
339                tx._textOut('',1)
340        else:
341            cur_x_s = cur_x + nSpaces*ws
342            end_x = cur_x_s
343            fontSize = f.fontSize
344            textColor = f.textColor
345            rise = f.rise
346            if i > 0:
347                end_x = cur_x_s - (0 if preformatted else _trailingSpaceLength(words[i-1].text, tx))
348            if (tx._fontname,tx._fontsize)!=(f.fontName,fontSize):
349                tx._setFont(f.fontName, fontSize)
350            if xs.textColor!=textColor:
351                xs.textColor = textColor
352                tx.setFillColor(textColor)
353            if xs.rise!=rise:
354                xs.rise=rise
355                tx.setRise(rise)
356            text = f.text
357            tx._textOut(text,f is words[-1])    # cheap textOut
358            if LL != f.us_lines:
359                S = set(LL)
360                NS = set(f.us_lines)
361                nL = NS - S #new lines
362                eL = S - NS #ending lines
363                for l in eL:
364                    us_lines[l] = us_lines[l],end_x
365                for l in nL:
366                    us_lines[l] = (l,fontSize,textColor,cur_x_s),fontSize
367                LL = f.us_lines
368            if LL:
369                for l in LL:
370                    l0, fsmax = us_lines[l]
371                    if fontSize>fsmax:
372                        us_lines[l] = l0, fontSize
373
374            nlo = rise - 0.2*fontSize
375            nhi = rise + fontSize
376            if AL != f.link:
377                S = set(AL)
378                NS = set(f.link)
379                nL = NS - S #new linkis
380                eL = S - NS #ending links
381                for l in eL:
382                    links[l] = links[l],end_x
383                for l in nL:
384                    links[l] = (l,cur_x),nlo,nhi
385                AL = f.link
386            if AL:
387                for l in AL:
388                    l0, lo, hi = links[l]
389                    if nlo<lo or nhi>hi:
390                        links[l] = l0,min(nlo,lo),max(nhi,hi)
391
392            bg = getattr(f,'backColor',None)
393            if bg and not xs.backColor:
394                xs.backColor = bg
395                xs.backColor_x = cur_x_s
396            elif xs.backColor:
397                if not bg:
398                    xs.backColors.append( (xs.backColor_x, end_x, xs.backColor) )
399                    xs.backColor = None
400                elif f.backColor!=xs.backColor or xs.textColor!=xs.backColor:
401                    xs.backColors.append( (xs.backColor_x, end_x, xs.backColor) )
402                    xs.backColor = bg
403                    xs.backColor_x = cur_x_s
404            txtlen = tx._canvas.stringWidth(text, tx._fontname, tx._fontsize)
405            cur_x += txtlen
406            nSpaces += text.count(' ')+_nbspCount(text)
407
408    cur_x_s = cur_x+(nSpaces-1)*ws
409    if last and xs.style.endDots:
410        if xs.style.wordWrap!='RTL':    #assume dots left --> right
411            if pKind!='right':
412                _do_dots_frag(cur_x,cur_x_s,line.maxWidth,xs,tx)
413        elif pKind!='left':
414            start = tx._x_offset
415            _do_dots_frag(start, start, x0 - start, xs, tx, left=False)
416
417    if LL:
418        for l in LL:
419            us_lines[l] = us_lines[l], cur_x_s
420
421    if AL:
422        for l in AL:
423            links[l] = links[l], cur_x_s
424
425    if xs.backColor:
426        xs.backColors.append( (xs.backColor_x, cur_x_s, xs.backColor) )
427    if tx._x0!=x0:
428        setXPos(tx,x0-tx._x0)
429
430def _do_dots_frag(cur_x, cur_x_s, maxWidth, xs, tx, left=True):
431    text,fontName,fontSize,textColor,backColor,dy = _getDotsInfo(xs.style)
432    txtlen = tx._canvas.stringWidth(text, fontName, fontSize)
433    if cur_x_s+txtlen<=maxWidth:
434        if tx._fontname!=fontName or tx._fontsize!=fontSize:
435            tx.setFont(fontName,fontSize)
436        if left: maxWidth += getattr(tx,'_dotsOffsetX',tx._x0)
437        tx.setTextOrigin(0,xs.cur_y+dy)
438        setXPos(tx,cur_x_s-cur_x)
439        n = int((maxWidth-cur_x_s)/txtlen)
440        setXPos(tx,maxWidth - txtlen*n)
441        if xs.textColor!=textColor:
442            tx.setFillColor(textColor)
443        if backColor: xs.backColors.append((cur_x,maxWidth,backColor))
444        tx._textOut(n*text,1)
445        if dy: tx.setTextOrigin(tx._x0,xs.cur_y-dy)
446
447def _leftDrawParaLineX( tx, offset, line, last=0):
448    tx._x_offset = offset
449    setXPos(tx,offset)
450    extraSpace = line.extraSpace
451    simple = extraSpace>-1e-8 or getattr(line,'preformatted',False)
452    if not simple:
453        nSpaces = line.wordCount+sum([_nbspCount(w.text) for w in line.words if not hasattr(w,'cbDefn')])-1
454        simple = nSpaces<=0
455    if simple:
456        _putFragLine(offset, tx, line, last, 'left')
457    else:
458        tx.setWordSpace(extraSpace / float(nSpaces))
459        _putFragLine(offset, tx, line, last, 'left')
460        tx.setWordSpace(0)
461    setXPos(tx,-offset)
462
463def _centerDrawParaLineX( tx, offset, line, last=0):
464    tx._x_offset = offset
465    tx._dotsOffsetX = offset + tx._x0
466    try:
467        extraSpace = line.extraSpace
468        simple = extraSpace>-1e-8 or getattr(line,'preformatted',False)
469        if not simple:
470            nSpaces = line.wordCount+sum([_nbspCount(w.text) for w in line.words if not hasattr(w,'cbDefn')])-1
471            simple = nSpaces<=0
472        if simple:
473            m = offset+0.5*line.extraSpace
474            setXPos(tx,m)
475            _putFragLine(m, tx, line, last,'center')
476        else:
477            m = offset
478            tx.setWordSpace(extraSpace / float(nSpaces))
479            _putFragLine(m, tx, line, last, 'center')
480            tx.setWordSpace(0)
481        setXPos(tx,-m)
482    finally:
483        del tx._dotsOffsetX
484
485def _rightDrawParaLineX( tx, offset, line, last=0):
486    tx._x_offset = offset
487    extraSpace = line.extraSpace
488    simple = extraSpace>-1e-8 or getattr(line,'preformatted',False)
489    if not simple:
490        nSpaces = line.wordCount+sum([_nbspCount(w.text) for w in line.words if not hasattr(w,'cbDefn')])-1
491        simple = nSpaces<=0
492    if simple:
493        m = offset+line.extraSpace
494        setXPos(tx,m)
495        _putFragLine(m,tx, line, last, 'right')
496    else:
497        m = offset
498        tx.setWordSpace(extraSpace / float(nSpaces))
499        _putFragLine(m, tx, line, last, 'right')
500        tx.setWordSpace(0)
501    setXPos(tx,-m)
502
503def _justifyDrawParaLineX( tx, offset, line, last=0):
504    tx._x_offset = offset
505    setXPos(tx,offset)
506    extraSpace = line.extraSpace
507    simple = last or abs(extraSpace)<=1e-8 or line.lineBreak
508    if not simple:
509        nSpaces = line.wordCount+sum([_nbspCount(w.text) for w in line.words if not hasattr(w,'cbDefn')])-1
510        simple = nSpaces<=0
511    if not simple:
512        tx.setWordSpace(extraSpace / float(nSpaces))
513        _putFragLine(offset, tx, line, last, 'justify')
514        tx.setWordSpace(0)
515    else:
516        _putFragLine(offset, tx, line, last, 'justify') #no space modification
517    setXPos(tx,-offset)
518
519def _justifyDrawParaLineXRTL( tx, offset, line, last=0):
520    return (_rightDrawParaLineX if last else _justifyDrawParaLineX)( tx, offset, line, last)
521
522def _trailingSpaceLength(text, tx):
523    ws = _wsc_end_search(text)
524    return tx._canvas.stringWidth(ws.group(), tx._fontname, tx._fontsize) if ws else 0
525
526class _HSFrag(list):
527    '''a frag that's followed by a space'''
528    pass
529
530class _InjectedFrag(list):
531    '''a frag that's injected in breaklines and must be removed on reprocessing'''
532    pass
533
534class _SplitFrag(list):
535    '''a split frag'''
536    pass
537
538class _SplitFragH(_SplitFrag):
539    '''a split frag that's the head part of the split'''
540    pass
541
542
543class _SplitFragHY(_SplitFragH):
544    '''a head split frag that needs '-' removing before rejoining'''
545    pass
546
547class _SplitFragHS(_SplitFrag,_HSFrag):
548    """a split frag that's followed by a space"""
549    pass
550
551class _SplitFragLL(_SplitFragHS):
552    """a frag that is forced to end in - because of paragraph split"""
553    pass
554
555class _SHYIndexedStr(unicodeT):
556    def __new__(cls, u, X=None):
557        if not X:
558            u = u.split(_shy)
559            X = []
560            a = X.append
561            x = 0
562            for s in u:
563                x += len(s)
564                a(x)
565            u = u''.join(u)
566            X = X[:-1]
567        self = unicodeT.__new__(cls,u)
568        self._shyIndices = X
569        return self
570
571def _shyUnsplit(s,ss=None):
572    '''rejoin two parts of an original _SHYIndexedStr or str that was split'''
573    u = s.rstrip(u'-')
574    if isinstance(s,_SHYIndexedStr):
575        X = s._shyIndices[:]
576        x = X[-1]
577        if ss:
578            if hasattr(ss,'_shyIndices'):
579                X.extend([_+x for _ in ss._shyIndices])
580            u += ss
581        return _SHYIndexedStr(u,X)
582    elif ss:
583        u += ss
584        if hasattr(ss,'_shyIndices'):
585            X.extend([_+x for _ in ss._shyIndices])
586            return _SHYIndexedStr(u,X)
587    return u
588
589class _SHYWord(list):
590    '''a fragword containing soft hyphens some of its strings are _SHYIndexedStr'''
591    def shyphenate(self, newWidth, maxWidth):
592        ww = self[0]
593        self._fsww = 0x7fffffff
594        if ww==0: return []
595        possible = None
596        exceeded = False
597        baseWidth = baseWidth0 = newWidth - ww
598        fsww = None
599        for i,(f,t) in enumerate(self[1:]):
600            sW = lambda s: stringWidth(s, f.fontName, f.fontSize)
601            if isinstance(t,_SHYIndexedStr):
602                # there's a shy in this bit
603                shyLen = sW(u'-')
604                bw = baseWidth + shyLen
605                for j, x in enumerate(t._shyIndices):
606                    left, right = t[:x], t[x:]
607                    leftw = bw+sW(left)
608                    if fsww is None: fsww = leftw
609                    exceeded = leftw > maxWidth
610                    if exceeded: break
611                    possible = i, j, x, leftw, left, right, shyLen
612                baseWidth += sW(t)
613            else:
614                baseWidth += sW(t)
615                exceeded = baseWidth > maxWidth
616            if exceeded and fsww is not None: break
617        self._fsww = fsww-baseWidth0 if fsww is not None else 0x7fffffff
618        if not possible: return []
619        i, j, x, leftw, left, right, shyLen = possible
620        i1 = i+1
621        f, t = self[i1] #we're splitting this subfrag
622        X = t._shyIndices
623        lefts = _SHYIndexedStr(left+u'-',X[:j+1])
624        L = self[:i1] + [(f,lefts)]
625        L[0] = leftw - baseWidth0
626        R = [ww-L[0]+shyLen]+([] if not right else [(f,_SHYIndexedStr(right,[_-x for _ in X[j+1:]]))]) + self[i1+1:]
627        return _SplitFragSHY(L), _SHYWordHS(R)
628
629class _SplitFragSHY(_SHYWord, _SplitFragHY):
630    '''a head split frag that requires removal of a hyphen at the end before rejoining'''
631
632class _SHYWordHS(_SHYWord,_SplitFragHS):
633    '''a fragword containing soft hyphens that's followed by a space'''
634    pass
635
636def _processed_frags(frags):
637    try:
638        return isinstance(frags[0][0],(float,int))
639    except:
640        return False
641
642_FK_TEXT = 0
643_FK_IMG = 1
644_FK_APPEND = 2
645_FK_BREAK = 3
646
647def _rejoinSplitFragWords(F):
648    '''F should be a list of _SplitFrags'''
649    R = [0]
650    aR = R.append
651    wLen = 0
652    psty = None
653    for f in F:
654        wLen += f[0]
655        rmhy = isinstance(f,_SplitFragHY)
656        for ff in f[1:]:
657            sty, t = ff
658            if rmhy and ff is f[-1]:
659                wLen -= stringWidth(t[-1],sty.fontName,sty.fontSize) + 1e-8
660                t = _shyUnsplit(t) #strip the '-'
661            if psty is sty:
662                R[-1] = (sty, _shyUnsplit(R[-1][1],t))
663            else:
664                aR((sty,t))
665                psty = sty
666    R[0] = wLen
667    return _reconstructSplitFrags(f)(R)
668
669def _reconstructSplitFrags(f):
670    return ((_SHYWordHS if isinstance(f,_HSFrag) else _SHYWord) if isinstance(f,_SHYWord)
671            else ((_SplitFragLL if isinstance(f,_SplitFragLL) else _HSFrag) if isinstance(f,_HSFrag) else list))
672
673def _getFragWords(frags,maxWidth=None):
674    ''' given a Parafrag list return a list of fragwords
675        [[size, (f00,w00), ..., (f0n,w0n)],....,[size, (fm0,wm0), ..., (f0n,wmn)]]
676        each pair f,w represents a style and some string
677        each sublist represents a word
678    '''
679    def _rescaleFrag(f):
680        w = f[0]
681        if isinstance(w,_PCT):
682            if w._normalizer!=maxWidth:
683                w._normalizer = maxWidth
684                w = w.normalizedValue(maxWidth)
685                f[0] = w
686    R = []
687    aR = R.append
688    W = []
689    if _processed_frags(frags):
690        aW = W.append
691        #print('\nprocessed frags')
692        #for _i,_r in enumerate(frags):
693        #   print('%3d: [%d, [%s]](%s)' % (_i,_r[0],', '.join(('%r' % _ff[1] for _ff in _r[1:])), type(_r)))
694        if True:
695            for f in frags:
696                if isinstance(f,_InjectedFrag): continue
697                _rescaleFrag(f)
698                if isinstance(f,_SplitFrag):
699                    aW(f)
700                    if isinstance(f, _HSFrag):
701                        aR(_rejoinSplitFragWords(W))
702                        del W[:]
703                else:
704                    if W:
705                        aR(_rejoinSplitFragWords(W))
706                        del W[:]
707                    aR(f)
708            if W:
709                aR(_rejoinSplitFragWords(W))
710        else:
711            for f in frags:
712                if isinstance(f,_InjectedFrag): continue
713                _rescaleFrag(f)
714                if isinstance(f,_SplitFrag):
715                    f0 = f[0]
716                    if not W:
717                        Wlen = 0
718                        sty = None
719                    else:
720                        if isinstance(lf,_SplitFragHY):
721                            sty, t = W[-1]
722                            Wlen -= stringWidth(t[-1],sty.fontName,sty.fontSize) + 1e-8
723                            W[-1] = (sty,_shyUnsplit(t)) #strip the '-'
724                    Wlen += f0
725                    for ts,t in f[1:]:
726                        if ts is sty:
727                            W[-1] = (sty, _shyUnsplit(W[-1][1],t))
728                        else:
729                            aW((ts,t))
730                            sty = ts
731                    if isinstance(f, _HSFrag):
732                        lf = None
733                        aR(_reconstructSplitFrags(f)([Wlen]+W))
734                        #aR((((_SHYWordHS if isinstance(f,_HSFrag) else _SHYWord) if isinstance(f,_SHYWord)
735                        #       else (_HSFrag if isinstance(f,_HSFrag) else list))
736                        #   )([Wlen]+W))
737                        del W[:]
738                    else:
739                        lf = f          #latest f in W
740                else:
741                    if W:
742                        #must end a joining
743                        aR(_reconstructSplitFrags(f)([Wlen]+W))
744                        #aR((((_SHYWordHS if isinstance(lf,_HSFrag) else _SHYWord) if isinstance(lf,_SHYWord)
745                        #       else (_HSFrag if isinstance(lf,_HSFrag) else list))
746                        #   )([Wlen]+W))
747                        del W[:]
748                    aR(f)
749            if W:
750                #must end a joining
751                aR(_reconstructSplitFrags(lf)([Wlen]+W))
752                #aR((((_SHYWordHS if isinstance(lf,_HSFrag) else _SHYWord) if isinstance(lf,_SHYWord)
753                #       else (_HSFrag if isinstance(lf,_HSFrag) else list))
754                #   )([Wlen]+W))
755        #print('\nreconstructed frag words')
756        #for _i,_r in enumerate(R):
757        #   print('%3d: [%d, [%s]](%s)' % (_i,_r[0],', '.join(('%r' % _ff[1] for _ff in _r[1:])), type(_r)))
758    else:
759        hangingSpace = False
760        n = 0
761        hangingStrip = True
762        shyIndices = False
763        for f in frags:
764            text = f.text
765            if text!='':
766                f._fkind = _FK_TEXT
767                if hangingStrip:
768                    text = lstrip(text)
769                    if not text: continue
770                    hangingStrip = False
771                S = split(text)
772                if text[0] in whitespace or not S:
773                    if W:
774                        W.insert(0,n)   #end preceding word
775                        aR(_SHYWord(W) if shyIndices else W)
776                        whs = hangingSpace
777                        W = []
778                        shyIndices = False
779                        hangingSpace = False
780                        n = 0
781                    else:
782                        whs = R and isinstance(R[-1],_HSFrag)
783                    if not whs:
784                        S.insert(0,'')
785                    elif not S:
786                        continue
787
788                for w in S[:-1]:
789                    if _shy in w:
790                        w = _SHYIndexedStr(w)
791                        shyIndices = True
792                    W.append((f,w))
793                    n += stringWidth(w, f.fontName, f.fontSize)
794                    W.insert(0,n)
795                    aR(_SHYWordHS(W) if shyIndices or isinstance(W,_SHYWord) else _HSFrag(W))
796                    W = []
797                    shyIndices = False
798                    n = 0
799
800                hangingSpace = False
801                w = S[-1]
802                if _shy in w:
803                    w = _SHYIndexedStr(w)
804                    shyIndices = True
805                W.append((f,w))
806                n += stringWidth(w, f.fontName, f.fontSize)
807                if text and text[-1] in whitespace:
808                    W.insert(0,n)
809                    aR(_SHYWord(W) if shyIndices or isinstance(W,_SHYWord) else _HSFrag(W))
810                    W = []
811                    shyIndices = False
812                    n = 0
813            elif hasattr(f,'cbDefn'):
814                cb = f.cbDefn
815                w = getattr(cb,'width',0)
816                if w:
817                    if hasattr(w,'normalizedValue'):
818                        w._normalizer = maxWidth
819                        w = w.normalizedValue(maxWidth)
820                    if W:
821                        W.insert(0,n)
822                        aR(_HSFrag(W) if hangingSpace else W)
823                        W = []
824                        shyIndices = False
825                        hangingSpace = False
826                        n = 0
827                    f._fkind = _FK_IMG
828                    aR([w,(f,'')])
829                    hangingStrip = False
830                else:
831                    f._fkind = _FK_APPEND
832                    if not W and R and isinstance(R[-1],_HSFrag):
833                        R[-1].append((f,''))
834                    else:
835                        W.append((f,''))
836            elif hasattr(f, 'lineBreak'):
837                #pass the frag through.  The line breaker will scan for it.
838                if W:
839                    W.insert(0,n)
840                    aR(W)
841                    W = []
842                    n = 0
843                    shyIndices = False
844                    hangingSpace = False
845                f._fkind = _FK_BREAK
846                aR([0,(f,'')])
847                hangingStrip = True
848
849        if W:
850            W.insert(0,n)
851            aR(_SHYWord(W) if shyIndices or isinstance(W,_SHYWord) else W)
852    if not R:
853        if frags:
854            f = frags[0]
855            f._fkind = _FK_TEXT
856            R = [[0,(f,u'')]]
857
858    #print('\nreturned frag words')
859    #for _i,_r in enumerate(R):
860    #   print('%3d: [%d, [%s]](%s)' % (_i,_r[0],', '.join(('%r' % _ff[1] for _ff in _r[1:])), type(_r)))
861    return R
862
863def _fragWordIter(w):
864    for f, s in w[1:]:
865        if hasattr(f,'cbDefn'):
866            yield f, getattr(f.cbDefn,'width',0), s
867        elif s:
868            if isBytes(s):
869                s = s.decode('utf8')    #only encoding allowed
870            for c in s:
871                yield f, stringWidth(c,f.fontName, f.fontSize), c
872        else:
873            yield f, 0, s
874
875def _splitFragWord(w,maxWidth,maxWidths,lineno):
876    '''given a frag word, w, as returned by getFragWords
877    split it into frag words that fit in lines of length
878    maxWidth
879    maxWidths[lineno+1]
880    .....
881    maxWidths[lineno+n]
882
883    return the new word list which is either
884    _SplitFrag....._SPlitFrag or
885    _SplitFrag....._SplitFragHS if the word is hanging space.
886    '''
887    R = []
888    maxlineno = len(maxWidths)-1
889    W = []
890    lineWidth = 0
891    fragText = u''
892    wordWidth = 0
893    f = w[1][0]
894    for g,cw,c in _fragWordIter(w):
895        newLineWidth = lineWidth+cw
896        tooLong = newLineWidth>maxWidth
897        if g is not f or tooLong:
898            f = f.clone()
899            if hasattr(f,'text'):
900                f.text = fragText
901            W.append((f,fragText))
902            if tooLong:
903                W = _SplitFrag([wordWidth]+W)
904                R.append(W)
905                lineno += 1
906                maxWidth = maxWidths[min(maxlineno,lineno)]
907                W = []
908                newLineWidth = cw
909                wordWidth = 0
910            fragText = u''
911            f = g
912        wordWidth += cw
913        fragText += c
914        lineWidth = newLineWidth
915    W.append((f,fragText))
916    W = (_SplitFragHS if isinstance(w,_HSFrag) else _SplitFragH)([wordWidth]+W)
917
918    R.append(W)
919    return R
920
921
922#derived from Django validator
923#https://github.com/django/django/blob/master/django/core/validators.py
924uri_pat = re.compile(u'(^(?:[a-z0-9\\.\\-\\+]*)://)(?:\\S+(?::\\S*)?@)?(?:(?:25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(?:\\.(?:25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}|\\[[0-9a-f:\\.]+\\]|([a-z\xa1-\uffff0-9](?:[a-z\xa1-\uffff0-9-]{0,61}[a-z\xa1-\uffff0-9])?(?:\\.(?!-)[a-z\xa1-\uffff0-9-]{1,63}(?<!-))*\\.(?!-)(?:[a-z\xa1-\uffff-]{2,63}|xn--[a-z0-9]{1,59})(?<!-)\\.?|localhost))(?::\\d{2,5})?(?:[/?#][^\\s]*)?\\Z', re.I)
925
926def _slash_parts(uri,scheme,slash):
927    tail = u''
928    while uri.endswith(slash):
929        tail += slash
930        uri = uri[:-1]
931
932    i = 2
933    while True:
934        i = uri.find(slash,i)
935        if i<0: break
936        i += 1
937        yield scheme+uri[:i],uri[i:]+tail
938
939def _uri_split_pairs(uri):
940    if isBytes(uri): uri = uri.decode('utf8')
941    m = uri_pat.match(uri)
942    if not m: return None
943    scheme = m.group(1)
944    uri = uri[len(scheme):]
945
946    slash = (u'\\' if not scheme and u'/' not in uri #might be a microsoft pattern
947            else u'/')
948    R = ([(scheme, uri)] if scheme and uri else []) + list(_slash_parts(uri,scheme,slash))
949    R.reverse()
950    return R
951
952#valid letters determined by inspection of
953#    https://en.wikipedia.org/wiki/List_of_Unicode_characters#Latin_script
954_hy_letters=u'A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u024f\u1e80-\u1e85\u1e00-\u1eff\u0410-\u044f\u1e02\u1e03\u1e0a\u1e0b\u1e1e\u1e1f\u1e40\u1e41\u1e56\u1e57\u1e60\u1e61\u1e6a\u1e6b\u1e9b\u1ef2\u1ef3'
955#explicit hyphens
956_shy = u'\xad'
957_hy_shy = u'-\xad'
958
959_hy_pfx_pat = re.compile(u'^[\'"([{\xbf\u2018\u201a\u201c\u201e]+')
960_hy_sfx_pat = re.compile(u'[]\'")}?!.,;:\u2019\u201b\u201d\u201f]+$')
961_hy_letters_pat=re.compile(u''.join((u"^[",_hy_letters,u"]+$")))
962_hy_shy_letters_pat=re.compile(u''.join((u"^[",_hy_shy,_hy_letters,"]+$")))
963_hy_shy_pat = re.compile(u''.join((u"([",_hy_shy,u"])")))
964
965def _hyGenPair(hyphenator, s, ww, newWidth, maxWidth, fontName, fontSize, uriWasteReduce, embeddedHyphenation, hymwl):
966    if isBytes(s): s = s.decode('utf8') #only encoding allowed
967    m = _hy_pfx_pat.match(s)
968    if m:
969        pfx = m.group(0)
970        s = s[len(pfx):]
971    else:
972        pfx = u''
973    if isinstance(s,_SplitWordLL) and s[-1]=='-':
974        sfx = u'-'
975        s = s[:-1]
976    else:
977        m = _hy_sfx_pat.search(s)
978        if m:
979            sfx = m.group(0)
980            s = s[:-len(sfx)]
981        else:
982            sfx = u''
983    if len(s) < hymwl: return
984
985    w0 = newWidth - ww
986    R = _uri_split_pairs(s)
987    if R is not None:
988        #a uri match was seen
989        if ww>maxWidth or (uriWasteReduce and w0 <= (1-uriWasteReduce)*maxWidth):
990            #we matched a uri and it makes sense to split
991            for h, t in R:
992                h = pfx+h
993                t = t + sfx
994                hw = stringWidth(h,fontName,fontSize)
995                tw = w0 + hw
996                if tw<=maxWidth:
997                    return u'',0,hw,ww-hw,h,t
998        return
999
1000    H = _hy_shy_pat.split(s)
1001    if hyphenator and  (_hy_letters_pat.match(s) or (_hy_shy_letters_pat.match(s) and u'' not in H)):
1002        hylen = stringWidth(u'-',fontName,fontSize)
1003        for h,t in hyphenator(s):
1004            h = pfx + h
1005            if not _hy_shy_pat.match(h[-1]):
1006                jc = u'-'
1007                jclen = hylen
1008            else:
1009                jc = u''
1010                jclen = 0
1011            t = t + sfx
1012            hw = stringWidth(h,fontName,fontSize)
1013            tw = hw+w0 + jclen
1014            if tw<=maxWidth:
1015                return jc,jclen,hw,ww-hw,h,t
1016
1017    #even though the above tries for words with '-' it may be that no split ended with '-'
1018    #so this may succeed where the above does not
1019    n = len(H)
1020    if n>=3 and embeddedHyphenation and u'' not in H and _hy_shy_letters_pat.match(s):
1021        for i in reversed(xrange(2,n,2)):
1022            h = pfx + ''.join(H[:i])
1023            t = ''.join(H[i:]) + sfx
1024            hw = stringWidth(h,fontName,fontSize)
1025            tw = hw+w0
1026            if tw<=maxWidth:
1027                return u'',0,hw,ww-hw,h,t
1028
1029def _fragWordSplitRep(FW):
1030    '''takes a frag word and assembles a unicode word from it
1031    if a rise is seen or a non-zerowidth cbdefn then we return
1032    None. Otherwise we return (uword,([i1,c1],[i2,c2],...])
1033    where each ii is the index of the word fragment in the word
1034    '''
1035    cc = plen = 0
1036    X = []
1037    eX = X.extend
1038    U = []
1039    aU = U.append
1040    for i in xrange(1,len(FW)):
1041        f, t = FW[i]
1042        if f.rise!=0: return None
1043        if hasattr(f,'cbDefn') and getattr(f.cbDefn,'width',0): return
1044        if not t: continue
1045        if isBytes(t): t = t.decode('utf8')
1046        aU(t)
1047        eX(len(t)*[(i,cc)])
1048        cc += len(t)
1049    return u''.join(U),tuple(X)
1050
1051def _rebuildFragWord(F):
1052    '''F are the frags'''
1053    return [sum((stringWidth(u,s.fontName,s.fontSize) for s,u in F))]+F
1054
1055def _hyGenFragsPair(hyphenator, FW, newWidth, maxWidth, uriWasteReduce, embeddedHyphenation, hymwl):
1056    X = _fragWordSplitRep(FW)
1057    if not X: return
1058    s, X = X
1059    if isBytes(s): s = s.decode('utf8') #only encoding allowed
1060    m = _hy_pfx_pat.match(s)
1061    if m:
1062        pfx = m.group(0)
1063        s = s[len(pfx):]
1064    else:
1065        pfx = u''
1066    if isinstance(FW,_SplitFragLL) and FW[-1][1][-1]=='-':
1067        sfx = u'-'
1068        s = s[:-1]
1069    else:
1070        m = _hy_sfx_pat.search(s)
1071        if m:
1072            sfx = m.group(0)
1073            s = s[:-len(sfx)]
1074        else:
1075            sfx = u''
1076    if len(s) < hymwl: return
1077    ww = FW[0]
1078    w0 = newWidth - ww
1079
1080    #try for a uri
1081    R = _uri_split_pairs(s)
1082    if R is not None:
1083        #a uri match was seen
1084        if ww>maxWidth or (uriWasteReduce and w0 <= (1-uriWasteReduce)*maxWidth):
1085            #we matched a uri and it makes sense to split
1086            for h, t in R:
1087                h = pfx+h
1088                pos = len(h)
1089                #FW[fx] is split
1090                fx, cc = X[pos]
1091                FL = FW[1:fx]
1092                ffx, sfx = FW[fx]
1093                sfxl = sfx[:pos-cc]
1094                if sfxl: FL.append((ffx,sfxl))
1095                sfxr = sfx[pos-cc:]
1096                FR = FW[fx+1:]
1097                if sfxr: FR.insert(0,(ffx,sfxr))
1098                h = _rebuildFragWord(FL)
1099                if w0+h[0]<=maxWidth:
1100                    return u'',h,_rebuildFragWord(FR)
1101        return
1102
1103    H = _hy_shy_pat.split(s)
1104    if hyphenator and (_hy_letters_pat.match(s) or (_hy_shy_letters_pat.match(s) and u'' not in H)):
1105        #not too diffcult for now
1106        for h,t in hyphenator(s):
1107            h = pfx+h
1108            pos = len(h)
1109            #FW[fx] is split
1110            fx, cc = X[pos]
1111            FL = FW[1:fx]
1112            ffx, sfx = FW[fx]
1113            sfxl = sfx[:pos-cc]
1114            if not _hy_shy_pat.match(h[-1]):
1115                jc = u'-'
1116            else:
1117                jc = u''
1118            if sfxl or jc:
1119                FL.append((ffx,sfxl+jc))
1120            sfxr = sfx[pos-cc:]
1121            FR = FW[fx+1:]
1122            if sfxr: FR.insert(0,(ffx,sfxr))
1123            h = _rebuildFragWord(FL)
1124            if w0+h[0]<=maxWidth:
1125                return jc,h,_rebuildFragWord(FR)
1126
1127    #even though the above tries for words with '-' it may be that no split ended with '-'
1128    #so this may succeed where the above does not
1129    n = len(H)
1130    if n>=3 and embeddedHyphenation and u'' not in H and _hy_shy_letters_pat.match(s):
1131        for i in reversed(xrange(2,n,2)):
1132            pos = len(pfx + u''.join(H[:i]))
1133            fx, cc = X[pos]
1134            #FW[fx] is split
1135            FL = FW[1:fx]
1136            ffx, sfx = FW[fx]
1137            sfxl = sfx[:pos-cc]
1138            if sfxl: FL.append((ffx,sfxl))
1139            sfxr = sfx[pos-cc:]
1140            FR = FW[fx+1:]
1141            if sfxr: FR.insert(0,(ffx,sfxr))
1142            h = _rebuildFragWord(FL)
1143            if w0+h[0]<=maxWidth:
1144                return u'',h,_rebuildFragWord(FR)
1145
1146def _hyphenateFragWord(hyphenator,FW,newWidth,maxWidth,uriWasteReduce,embeddedHyphenation,
1147                        hymwl=hyphenationMinWordLength):
1148    ww = FW[0]
1149    if ww==0: return []
1150    if len(FW)==2:
1151        f, s = FW[1]
1152        if isinstance(FW,_SplitFragLL):
1153            s = _SplitWordLL(s)
1154        R = _hyGenPair(hyphenator, s, ww, newWidth, maxWidth, f.fontName, f.fontSize,uriWasteReduce,embeddedHyphenation, hymwl)
1155        if R:
1156            jc, hylen, hw, tw, h, t = R
1157            return [(_SplitFragHY if jc else _SplitFragH)([hw+hylen,(f,h+jc)]),(_SplitFragHS if isinstance(FW,_HSFrag) else _SplitFrag)([tw,(f,t)])]
1158    else:
1159        R = _hyGenFragsPair(hyphenator, FW, newWidth, maxWidth,uriWasteReduce,embeddedHyphenation, hymwl)
1160        if R:
1161            jc, h, t = R
1162            return [(_SplitFragHY if jc else _SplitFragH)(h),(_SplitFragHS if isinstance(FW,_HSFrag) else _SplitFrag)(t)]
1163
1164    return None
1165
1166class _SplitWord(unicodeT):
1167    pass
1168
1169class _SplitWordEnd(_SplitWord):
1170    pass
1171
1172class _SplitWordH(_SplitWord):
1173    pass
1174
1175class _SplitWordHY(_SplitWordH):
1176    '''head part of a hyphenation word pair'''
1177    pass
1178
1179class _SplitWordLL(unicodeT):
1180    '''a word that's forced to end with - because of paragraph split'''
1181    pass
1182
1183class _SHYStr(unicodeT):
1184    '''for simple soft hyphenated words'''
1185    def __new__(cls,s):
1186        S = s.split(_shy)
1187        if len(S)>1:
1188            self = unicodeT.__new__(cls, u''.join(S))
1189            sp = [0]
1190            asp = sp.append
1191            for ss in S:
1192                asp(sp[-1]+len(ss))
1193            self.__sp__ = sp[1:-1]
1194        else:
1195            self = unicodeT.__new__(cls, s)
1196            self.__sp__ = []
1197        return self
1198
1199    def __shysplit__(self, fontName, fontSize, baseWidth, limWidth, encoding='utf8'):
1200            '''
1201            baseWidth = currentWidth + spaceWidth + hyphenWidth
1202            limWidth = maxWidth + spaceShrink
1203            '''
1204            self._fsww = 0x7fffffff
1205            for i, sp in reversed(list(enumerate(self.__sp__))):
1206                #we iterate backwards so that we return the longest that fits
1207                #else we will end up with the shortest value in self._fsww
1208                sw = self[:sp]
1209                sww = stringWidth(sw, fontName, fontSize, encoding)
1210                if not i: self._fsww = sww
1211                swnw = baseWidth + sww
1212                if swnw <= limWidth:
1213                    #we found a suitable split in a soft-hyphenated word
1214                    T = self.__sp__[i:] + [len(self)]
1215                    S = [self[T[j]:T[j+1]] for j in range(len(T)-1)]
1216                    sw = _SHYStr(sw+u'-')
1217                    sw.__sp__ = self.__sp__[:i]
1218                    return [sw,_SHYStr(_shy.join(S))]
1219
1220class _SHYSplitHY(_SHYStr,_SplitWordHY):
1221    pass
1222
1223class _SHYSplit(_SHYStr,_SplitWord):
1224    pass
1225
1226def _hyphenateWord(hyphenator,fontName,fontSize,w,ww,newWidth,maxWidth, uriWasteReduce,embeddedHyphenation,
1227                    hymwl=hyphenationMinWordLength):
1228    if ww==0: return []
1229    R = _hyGenPair(hyphenator, w, ww, newWidth, maxWidth, fontName, fontSize, uriWasteReduce,embeddedHyphenation, hymwl)
1230    if R:
1231        hy, hylen, hw, tw, h, t = R
1232        return [(_SplitWordHY if hy else _SplitWordH)(h+hy),_SplitWordEnd(t)]
1233
1234def _splitWord(w, lineWidth, maxWidths, lineno, fontName, fontSize, encoding='utf8'):
1235    '''
1236    split w into words that fit in lines of length
1237    maxWidth
1238    maxWidths[lineno+1]
1239    .....
1240    maxWidths[lineno+n]
1241
1242    then push those new words onto words
1243    '''
1244    #TODO fix this to use binary search for the split points
1245    R = []
1246    aR = R.append
1247    maxlineno = len(maxWidths)-1
1248    wordText = u''
1249    maxWidth = maxWidths[min(maxlineno,lineno)]
1250    if isBytes(w):
1251        w = w.decode(encoding)
1252    for c in w:
1253        cw = stringWidth(c,fontName,fontSize,encoding)
1254        newLineWidth = lineWidth+cw
1255        if newLineWidth>maxWidth:
1256            aR(_SplitWord(wordText))
1257            lineno += 1
1258            maxWidth = maxWidths[min(maxlineno,lineno)]
1259            newLineWidth = cw
1260            wordText = u''
1261        wordText += c
1262        lineWidth = newLineWidth
1263    aR(_SplitWordEnd(wordText))
1264    return R
1265
1266def _rejoinSplitWords(R):
1267    '''R can be a list of pure _SplitWord or _SHYStr'''
1268    if isinstance(R[0],_SHYStr):
1269        r = R[0]
1270        for _ in R[:]:
1271            r = _shyUnsplit(r,_)
1272        return r
1273    elif isinstance(R[0],_SplitWordHY):
1274        cf = str if isinstance(R[-1], _SplitWordEnd) else _SplitWordHY
1275        s = u''.join((_[:-1] if isinstance(_,_SplitWordHY) else _ for _ in R))
1276        return s if isinstance(R[-1], _SplitWordEnd) else _SplitWordHY(s+u'-')
1277    else:
1278        return ''.join(R)
1279
1280def _yieldBLParaWords(blPara,start,stop):
1281    R = []
1282    aR = R.append
1283    for l in blPara.lines[start:stop]:
1284        for w in l[1]:
1285            if isinstance(w,_SplitWord):
1286                aR(w)
1287                if isinstance(w,_SplitWordEnd):
1288                    yield _rejoinSplitWords(R)
1289                    del R[:]
1290                continue
1291            else:
1292                if R:
1293                    yield _rejoinSplitWords(R)
1294                    del R[:]
1295            yield w
1296    if R:
1297        yield _rejoinSplitWords(R)
1298
1299def _split_blParaSimple(blPara,start,stop):
1300    f = blPara.clone()
1301    for a in ('lines', 'kind', 'text'):
1302        if hasattr(f,a): delattr(f,a)
1303    f.words = list(_yieldBLParaWords(blPara,start,stop))
1304    if isinstance(f.words[-1],_SplitWordHY):
1305        f.words[-1].__class__ = _SHYSplit if isinstance(f.words[-1],_SHYStr) else _SplitWordLL
1306    return [f]
1307
1308def _split_blParaHard(blPara,start,stop):
1309    f = []
1310    lines = blPara.lines[start:stop]
1311    for l in lines:
1312        for w in l.words:
1313            f.append(w)
1314        if l is not lines[-1]:
1315            i = len(f)-1
1316            while i>=0 and hasattr(f[i],'cbDefn') and not getattr(f[i].cbDefn,'width',0): i -= 1
1317            if i>=0:
1318                g = f[i]
1319                if not g.text: g.text = ' '
1320                elif g.text[-1]!=' ': g.text += ' '
1321    return f
1322
1323def _drawBullet(canvas, offset, cur_y, bulletText, style, rtl):
1324    '''draw a bullet text could be a simple string or a frag list'''
1325    bulletAnchor = style.bulletAnchor
1326    if rtl or style.bulletAnchor!='start':
1327        numeric = bulletAnchor=='numeric'
1328        if isStr(bulletText):
1329            t =  bulletText
1330            q = numeric and decimalSymbol in t
1331            if q: t = t[:t.index(decimalSymbol)]
1332            bulletWidth = stringWidth(t, style.bulletFontName, style.bulletFontSize)
1333            if q: bulletWidth += 0.5 * stringWidth(decimalSymbol, style.bulletFontName, style.bulletFontSize)
1334        else:
1335            #it's a list of fragments
1336            bulletWidth = 0
1337            for f in bulletText:
1338                t = f.text
1339                q = numeric and decimalSymbol in t
1340                if q:
1341                    t = t[:t.index(decimalSymbol)]
1342                    bulletWidth += 0.5 * stringWidth(decimalSymbol, f.fontName, f.fontSize)
1343                bulletWidth += stringWidth(t, f.fontName, f.fontSize)
1344                if q:
1345                    break
1346    else:
1347        bulletWidth = 0
1348    if bulletAnchor=='middle': bulletWidth *= 0.5
1349    cur_y += getattr(style,"bulletOffsetY",0)
1350    if not rtl:
1351        tx2 = canvas.beginText(style.bulletIndent-bulletWidth,cur_y)
1352    else:
1353        width = rtl[0]
1354        bulletStart = width+style.rightIndent-(style.bulletIndent+bulletWidth)
1355        tx2 = canvas.beginText(bulletStart, cur_y)
1356    tx2.setFont(style.bulletFontName, style.bulletFontSize)
1357    tx2.setFillColor(getattr(style,'bulletColor',style.textColor))
1358    if isStr(bulletText):
1359        tx2.textOut(bulletText)
1360    else:
1361        for f in bulletText:
1362            tx2.setFont(f.fontName, f.fontSize)
1363            tx2.setFillColor(f.textColor)
1364            tx2.textOut(f.text)
1365
1366    canvas.drawText(tx2)
1367    if not rtl:
1368        #AR making definition lists a bit less ugly
1369        #bulletEnd = tx2.getX()
1370        bulletEnd = tx2.getX() + style.bulletFontSize * 0.6
1371        offset = max(offset,bulletEnd - style.leftIndent)
1372    return offset
1373
1374def _handleBulletWidth(bulletText,style,maxWidths):
1375    '''work out bullet width and adjust maxWidths[0] if neccessary
1376    '''
1377    if bulletText:
1378        if isStr(bulletText):
1379            bulletWidth = stringWidth( bulletText, style.bulletFontName, style.bulletFontSize)
1380        else:
1381            #it's a list of fragments
1382            bulletWidth = 0
1383            for f in bulletText:
1384                bulletWidth += stringWidth(f.text, f.fontName, f.fontSize)
1385        bulletLen = style.bulletIndent + bulletWidth + 0.6 * style.bulletFontSize
1386        if style.wordWrap=='RTL':
1387            indent = style.rightIndent+style.firstLineIndent
1388        else:
1389            indent = style.leftIndent+style.firstLineIndent
1390        if bulletLen > indent:
1391            #..then it overruns, and we have less space available on line 1
1392            maxWidths[0] -= (bulletLen - indent)
1393
1394def splitLines0(frags,widths):
1395    '''
1396    given a list of ParaFrags we return a list of ParaLines
1397
1398    each ParaLine has
1399    1)  ExtraSpace
1400    2)  blankCount
1401    3)  [textDefns....]
1402    each text definition is a (ParaFrag, start, limit) triplet
1403    '''
1404    #initialise the algorithm
1405    lines   = []
1406    lineNum = 0
1407    maxW    = widths[lineNum]
1408    i       = -1
1409    l       = len(frags)
1410    lim     = start = 0
1411    while 1:
1412        #find a non whitespace character
1413        while i<l:
1414            while start<lim and text[start]==' ': start += 1
1415            if start==lim:
1416                i += 1
1417                if i==l: break
1418                start = 0
1419                f = frags[i]
1420                text = f.text
1421                lim = len(text)
1422            else:
1423                break   # we found one
1424
1425        if start==lim: break    #if we didn't find one we are done
1426
1427        #start of a line
1428        g       = (None,None,None)
1429        line    = []
1430        cLen    = 0
1431        nSpaces = 0
1432        while cLen<maxW:
1433            j = text.find(' ',start)
1434            if j<0: j==lim
1435            w = stringWidth(text[start:j],f.fontName,f.fontSize)
1436            cLen += w
1437            if cLen>maxW and line!=[]:
1438                cLen = cLen-w
1439                #this is the end of the line
1440                while g.text[lim]==' ':
1441                    lim = lim - 1
1442                    nSpaces = nSpaces-1
1443                break
1444            if j<0: j = lim
1445            if g[0] is f: g[2] = j  #extend
1446            else:
1447                g = (f,start,j)
1448                line.append(g)
1449            if j==lim:
1450                i += 1
1451
1452def _do_line(tx, x1, y1, x2, y2, nlw, nsc):
1453    canv = tx._canvas
1454    olw = canv._lineWidth
1455    if nlw!=olw:
1456        canv.setLineWidth(nlw)
1457    osc = canv._strokeColorObj
1458    if nsc!=osc:
1459        canv.setStrokeColor(nsc)
1460    canv.line(x1, y1, x2, y2)
1461
1462def _do_under_line(i, x1, ws, tx, us_lines):
1463    xs = tx.XtraState
1464    style = xs.style
1465    y0 = xs.cur_y - i*style.leading
1466    f = xs.f
1467    fs = f.fontSize
1468    tc = f.textColor
1469    values = dict(L=fs,F=fs,f=fs)
1470    dw = tx._defaultLineWidth
1471    x2 = x1 + tx._canvas.stringWidth(' '.join(tx.XtraState.lines[i][1]), tx._fontname, fs) + ws
1472    for n,k,c,w,o,r,m,g in us_lines:
1473        underline = k=='underline'
1474        lw = _usConv(w,values,default=tx._defaultLineWidth)
1475        lg = _usConv(g,values,default=1)
1476        dy = lg+lw
1477        if not underline: dy = -dy
1478        y = y0 + r + _usConv(('-0.125*L' if underline else '0.25*L') if o=='' else o,values)
1479        if not c: c = tc
1480        while m>0:
1481            tx._do_line(x1, y, x2, y, lw, c)
1482            y -= dy
1483            m -= 1
1484
1485_scheme_re = re.compile('^[a-zA-Z][-+a-zA-Z0-9]+$')
1486def _doLink(tx,link,rect):
1487    if not link: return
1488    if link.startswith('#'):
1489        tx._canvas.linkRect("", link[1:], rect, relative=1)
1490    else:
1491        parts = link.split(':',1)
1492        scheme = len(parts)==2 and parts[0].lower() or ''
1493        if scheme=='document':
1494            tx._canvas.linkRect("", parts[1], rect, relative=1)
1495        elif _scheme_re.match(scheme):
1496            kind=scheme.lower()=='pdf' and 'GoToR' or 'URI'
1497            if kind=='GoToR': link = parts[1]
1498            tx._canvas.linkURL(link, rect, relative=1, kind=kind)
1499        else:
1500            tx._canvas.linkURL(link, rect, relative=1, kind='URI')
1501
1502def _do_link_line(i, t_off, ws, tx):
1503    xs = tx.XtraState
1504    leading = xs.style.leading
1505    y = xs.cur_y - i*leading - xs.f.fontSize/8.0 # 8.0 factor copied from para.py
1506    text = ' '.join(xs.lines[i][1])
1507    textlen = tx._canvas.stringWidth(text, tx._fontname, tx._fontsize)
1508    for n, link in xs.link:
1509        _doLink(tx, link, (t_off, y, t_off+textlen, y+leading))
1510
1511def _do_post_text(tx):
1512    xs = tx.XtraState
1513    y0 = xs.cur_y
1514    f = xs.f
1515    leading = xs.style.leading
1516    autoLeading = xs.autoLeading
1517    fontSize = f.fontSize
1518    if autoLeading=='max':
1519        leading = max(leading,1.2*fontSize)
1520    elif autoLeading=='min':
1521        leading = 1.2*fontSize
1522
1523    if xs.backColors:
1524        yl = y0 + fontSize
1525        ydesc = yl - leading
1526
1527        for x1,x2,c in xs.backColors:
1528            tx._canvas.setFillColor(c)
1529            tx._canvas.rect(x1,ydesc,x2-x1,leading,stroke=0,fill=1)
1530        xs.backColors=[]
1531        xs.backColor=None
1532
1533    for (((n,link),x1),lo,hi),x2 in sorted(xs.links.values()):
1534        _doLink(tx, link, (x1, y0+lo, x2, y0+hi))
1535    xs.links = {}
1536
1537    if xs.us_lines:
1538        #print 'lines'
1539        dw = tx._defaultLineWidth
1540        values = dict(L=fontSize)
1541        for (((n,k,c,w,o,r,m,g),fs,tc,x1),fsmax),x2 in sorted(xs.us_lines.values()):
1542            underline = k=='underline'
1543            values['f'] = fs
1544            values['F'] = fsmax
1545            lw = _usConv(w,values,default=tx._defaultLineWidth)
1546            lg = _usConv(g,values,default=1)
1547            dy = lg+lw
1548            if not underline: dy = -dy
1549            y = y0 + r + _usConv(o if o!='' else ('-0.125*L' if underline else '0.25*L'),values)
1550            #print 'n=%s k=%s x1=%s x2=%s r=%s c=%s w=%r o=%r fs=%r tc=%s y=%s lw=%r offs=%r' % (n,k,x1,x2,r,(c.hexval() if c else ''),w,o,fs,tc.hexval(),y,lw,y-y0-r)
1551            if not c: c = tc
1552            while m>0:
1553                tx._do_line(x1, y, x2, y, lw, c)
1554                y -= dy
1555                m -= 1
1556        xs.us_lines = {}
1557
1558    xs.cur_y -= leading
1559
1560def textTransformFrags(frags,style):
1561    tt = style.textTransform
1562    if tt:
1563        tt=tt.lower()
1564        if tt=='lowercase':
1565            tt = unicodeT.lower
1566        elif tt=='uppercase':
1567            tt = unicodeT.upper
1568        elif  tt=='capitalize':
1569            tt = unicodeT.title
1570        elif tt=='none':
1571            return
1572        else:
1573            raise ValueError('ParaStyle.textTransform value %r is invalid' % style.textTransform)
1574        n = len(frags)
1575        if n==1:
1576            #single fragment the easy case
1577            frags[0].text = tt(frags[0].text)
1578        elif tt is unicodeT.title:
1579            pb = True
1580            for f in frags:
1581                u = f.text
1582                if not u: continue
1583                if u.startswith(u' ') or pb:
1584                    u = tt(u)
1585                else:
1586                    i = u.find(u' ')
1587                    if i>=0:
1588                        u = u[:i]+tt(u[i:])
1589                pb = u.endswith(u' ')
1590                f.text = u
1591        else:
1592            for f in frags:
1593                u = f.text
1594                if not u: continue
1595                f.text = tt(u)
1596
1597class cjkU(unicodeT):
1598    '''simple class to hold the frag corresponding to a str'''
1599    def __new__(cls,value,frag,encoding):
1600        self = unicodeT.__new__(cls,value)
1601        self._frag = frag
1602        if hasattr(frag,'cbDefn'):
1603            w = getattr(frag.cbDefn,'width',0)
1604            self._width = w
1605        else:
1606            self._width = stringWidth(value,frag.fontName,frag.fontSize)
1607        return self
1608    frag = property(lambda self: self._frag)
1609    width = property(lambda self: self._width)
1610
1611def makeCJKParaLine(U,maxWidth,widthUsed,extraSpace,lineBreak,calcBounds):
1612    words = []
1613    CW = []
1614    f0 = FragLine()
1615    maxSize = maxAscent = minDescent = 0
1616    for u in U:
1617        f = u.frag
1618        fontSize = f.fontSize
1619        if calcBounds:
1620            cbDefn = getattr(f,'cbDefn',None)
1621            if getattr(cbDefn,'width',0):
1622                descent, ascent = imgVRange(imgNormV(cbDefn.height,fontSize),cbDefn.valign,fontSize)
1623            else:
1624                ascent, descent = getAscentDescent(f.fontName,fontSize)
1625        else:
1626            ascent, descent = getAscentDescent(f.fontName,fontSize)
1627        maxSize = max(maxSize,fontSize)
1628        maxAscent = max(maxAscent,ascent)
1629        minDescent = min(minDescent,descent)
1630        if not sameFrag(f0,f):
1631            f0=f0.clone()
1632            f0.text = u''.join(CW)
1633            words.append(f0)
1634            CW = []
1635            f0 = f
1636        CW.append(u)
1637    if CW:
1638        f0=f0.clone()
1639        f0.text = u''.join(CW)
1640        words.append(f0)
1641    return FragLine(kind=1,extraSpace=extraSpace,wordCount=1,words=words[1:],fontSize=maxSize,ascent=maxAscent,descent=minDescent,maxWidth=maxWidth,currentWidth=widthUsed,lineBreak=lineBreak)
1642
1643def cjkFragSplit(frags, maxWidths, calcBounds, encoding='utf8'):
1644    '''This attempts to be wordSplit for frags using the dumb algorithm'''
1645    U = []  #get a list of single glyphs with their widths etc etc
1646    for f in frags:
1647        text = f.text
1648        if isBytes(text):
1649            text = text.decode(encoding)
1650        if text:
1651            U.extend([cjkU(t,f,encoding) for t in text])
1652        else:
1653            U.append(cjkU(text,f,encoding))
1654    lines = []
1655    i = widthUsed = lineStartPos = 0
1656    maxWidth = maxWidths[0]
1657    nU = len(U)
1658    while i<nU:
1659        u = U[i]
1660        i += 1
1661        w = u.width
1662        if hasattr(w,'normalizedValue'):
1663            w._normalizer = maxWidth
1664            w = w.normalizedValue(maxWidth)
1665        widthUsed += w
1666        lineBreak = hasattr(u.frag,'lineBreak')
1667        endLine = (widthUsed>maxWidth + _FUZZ and widthUsed>0) or lineBreak
1668        if endLine:
1669            extraSpace = maxWidth - widthUsed
1670            if not lineBreak:
1671                if ord(u)<0x3000:
1672                    # we appear to be inside a non-Asian script section.
1673                    # (this is a very crude test but quick to compute).
1674                    # This is likely to be quite rare so the speed of the
1675                    # code below is hopefully not a big issue.  The main
1676                    # situation requiring this is that a document title
1677                    # with an english product name in it got cut.
1678
1679
1680                    # we count back and look for
1681                    #  - a space-like character
1682                    #  - reversion to Kanji (which would be a good split point)
1683                    #  - in the worst case, roughly half way back along the line
1684                    limitCheck = (lineStartPos+i)>>1        #(arbitrary taste issue)
1685                    for j in xrange(i-1,limitCheck,-1):
1686                        uj = U[j]
1687                        if uj and category(uj)=='Zs' or ord(uj)>=0x3000:
1688                            k = j+1
1689                            if k<i:
1690                                j = k+1
1691                                extraSpace += sum(U[ii].width for ii in xrange(j,i))
1692                                w = U[k].width
1693                                u = U[k]
1694                                i = j
1695                                break
1696
1697                #we are pushing this character back, but
1698                #the most important of the Japanese typography rules
1699                #if this character cannot start a line, wrap it up to this line so it hangs
1700                #in the right margin. We won't do two or more though - that's unlikely and
1701                #would result in growing ugliness.
1702                #and increase the extra space
1703                #bug fix contributed by Alexander Vasilenko <alexs.vasilenko@gmail.com>
1704                if u not in ALL_CANNOT_START and i>lineStartPos+1:
1705                    #otherwise we need to push the character back
1706                    #the i>lineStart+1 condition ensures progress
1707                    i -= 1
1708                    extraSpace += w
1709            lines.append(makeCJKParaLine(U[lineStartPos:i],maxWidth,widthUsed,extraSpace,lineBreak,calcBounds))
1710            try:
1711                maxWidth = maxWidths[len(lines)]
1712            except IndexError:
1713                maxWidth = maxWidths[-1]  # use the last one
1714
1715            lineStartPos = i
1716            widthUsed = 0
1717
1718    #any characters left?
1719    if widthUsed > 0:
1720        lines.append(makeCJKParaLine(U[lineStartPos:],maxWidth,widthUsed,maxWidth-widthUsed,False,calcBounds))
1721
1722    return ParaLines(kind=1,lines=lines)
1723
1724def _setTXLineProps(tx, canvas, style):
1725    tx._defaultLineWidth = canvas._lineWidth
1726    tx._underlineColor = getattr(style,'underlineColor','')
1727    tx._underlineWidth = getattr(style,'underlineWidth','')
1728    tx._underlineOffset = getattr(style,'underlineOffset','') or '-0.125f'
1729    tx._strikeColor = getattr(style,'strikeColor','')
1730    tx._strikeWidth = getattr(style,'strikeWidth','')
1731    tx._strikeOffset = getattr(style,'strikeOffset','') or '0.25f'
1732
1733class Paragraph(Flowable):
1734    """ Paragraph(text, style, bulletText=None, caseSensitive=1)
1735        text a string of stuff to go into the paragraph.
1736        style is a style definition as in reportlab.lib.styles.
1737        bulletText is an optional bullet defintion.
1738        caseSensitive set this to 0 if you want the markup tags and their attributes to be case-insensitive.
1739
1740        This class is a flowable that can format a block of text
1741        into a paragraph with a given style.
1742
1743        The paragraph Text can contain XML-like markup including the tags:
1744        <b> ... </b> - bold
1745        < u [color="red"] [width="pts"] [offset="pts"]> < /u > - underline
1746            width and offset can be empty meaning use existing canvas line width
1747            or with an f/F suffix regarded as a fraction of the font size
1748        < strike > < /strike > - strike through has the same parameters as underline
1749        <i> ... </i> - italics
1750        <u> ... </u> - underline
1751        <strike> ... </strike> - strike through
1752        <super> ... </super> - superscript
1753        <sub> ... </sub> - subscript
1754        <font name=fontfamily/fontname color=colorname size=float>
1755        <span name=fontfamily/fontname color=colorname backcolor=colorname size=float style=stylename>
1756        <onDraw name=callable label="a label"/>
1757        <index [name="callablecanvasattribute"] label="a label"/>
1758        <link>link text</link>
1759            attributes of links
1760                size/fontSize/uwidth/uoffset=num
1761                name/face/fontName=name
1762                fg/textColor/color/ucolor=color
1763                backcolor/backColor/bgcolor=color
1764                dest/destination/target/href/link=target
1765                underline=bool turn on underline
1766        <a>anchor text</a>
1767            attributes of anchors
1768                size/fontSize/uwidth/uoffset=num
1769                fontName=name
1770                fg/textColor/color/ucolor=color
1771                backcolor/backColor/bgcolor=color
1772                href=href
1773                underline="yes|no"
1774        <a name="anchorpoint"/>
1775        <unichar name="unicode character name"/>
1776        <unichar value="unicode code point"/>
1777        <img src="path" width="1in" height="1in" valign="bottom"/>
1778                width="w%" --> fontSize*w/100   idea from Roberto Alsina
1779                height="h%" --> linewidth*h/100 <ralsina@netmanagers.com.ar>
1780
1781        The whole may be surrounded by <para> </para> tags
1782
1783        The <b> and <i> tags will work for the built-in fonts (Helvetica
1784        /Times / Courier).  For other fonts you need to register a family
1785        of 4 fonts using reportlab.pdfbase.pdfmetrics.registerFont; then
1786        use the addMapping function to tell the library that these 4 fonts
1787        form a family e.g.
1788        from reportlab.lib.fonts import addMapping
1789        addMapping('Vera', 0, 0, 'Vera')    #normal
1790        addMapping('Vera', 0, 1, 'Vera-Italic')    #italic
1791        addMapping('Vera', 1, 0, 'Vera-Bold')    #bold
1792        addMapping('Vera', 1, 1, 'Vera-BoldItalic')    #italic and bold
1793
1794        It will also be able to handle any MathML specified Greek characters.
1795    """
1796    def __init__(self, text, style=None, bulletText = None, frags=None, caseSensitive=1, encoding='utf8'):
1797        if style is None:
1798            style = ParagraphStyle(name='paragraphImplicitDefaultStyle')
1799        self.caseSensitive = caseSensitive
1800        self.encoding = encoding
1801        self._setup(text, style, bulletText or getattr(style,'bulletText',None), frags, cleanBlockQuotedText)
1802
1803
1804    def __repr__(self):
1805        n = self.__class__.__name__
1806        L = [n+"("]
1807        keys = list(self.__dict__.keys())
1808        for k in keys:
1809            L.append('%s: %s' % (repr(k).replace("\n", " ").replace("  "," "),repr(getattr(self, k)).replace("\n", " ").replace("  "," ")))
1810        L.append(") #"+n)
1811        return '\n'.join(L)
1812
1813    def _setup(self, text, style, bulletText, frags, cleaner):
1814
1815        #This used to be a global parser to save overhead.
1816        #In the interests of thread safety it is being instantiated per paragraph.
1817        #On the next release, we'll replace with a cElementTree parser
1818        if frags is None:
1819            text = cleaner(text)
1820            _parser = ParaParser()
1821            _parser.caseSensitive = self.caseSensitive
1822            style, frags, bulletTextFrags = _parser.parse(text,style)
1823            if frags is None:
1824                raise ValueError("xml parser error (%s) in paragraph beginning\n'%s'"\
1825                    % (_parser.errors[0],text[:min(30,len(text))]))
1826            textTransformFrags(frags,style)
1827            if bulletTextFrags: bulletText = bulletTextFrags
1828
1829        #AR hack
1830        self.text = text
1831        self.frags = frags  #either the parse fragments or frag word list
1832        self.style = style
1833        self.bulletText = bulletText
1834        self.debug = 0  #turn this on to see a pretty one with all the margins etc.
1835
1836    def wrap(self, availWidth, availHeight):
1837        if availWidth<_FUZZ:
1838            #we cannot fit here
1839            return 0, 0x7fffffff
1840        # work out widths array for breaking
1841        self.width = availWidth
1842        style = self.style
1843        leftIndent = style.leftIndent
1844        first_line_width = availWidth - (leftIndent+style.firstLineIndent) - style.rightIndent
1845        later_widths = availWidth - leftIndent - style.rightIndent
1846        self._wrapWidths = [first_line_width, later_widths]
1847        if style.wordWrap == 'CJK':
1848            #use Asian text wrap algorithm to break characters
1849            blPara = self.breakLinesCJK(self._wrapWidths)
1850        else:
1851            blPara = self.breakLines(self._wrapWidths)
1852        self.blPara = blPara
1853        autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
1854        leading = style.leading
1855        if blPara.kind==1:
1856            if autoLeading not in ('','off'):
1857                height = 0
1858                if autoLeading=='max':
1859                    for l in blPara.lines:
1860                        height += max(l.ascent-l.descent,leading)
1861                elif autoLeading=='min':
1862                    for l in blPara.lines:
1863                        height += l.ascent - l.descent
1864                else:
1865                    raise ValueError('invalid autoLeading value %r' % autoLeading)
1866            else:
1867                height = len(blPara.lines) * leading
1868        else:
1869            if autoLeading=='max':
1870                leading = max(leading,blPara.ascent-blPara.descent)
1871            elif autoLeading=='min':
1872                leading = blPara.ascent-blPara.descent
1873            height = len(blPara.lines) * leading
1874        self.height = height
1875        return self.width, height
1876
1877    def minWidth(self):
1878        'Attempt to determine a minimum sensible width'
1879        frags = self.frags
1880        nFrags= len(frags)
1881        if not nFrags: return 0
1882        if nFrags==1 and not _processed_frags(frags):
1883            f = frags[0]
1884            fS = f.fontSize
1885            fN = f.fontName
1886            return max(stringWidth(w,fN,fS) for w in (split(f.text, ' ') if hasattr(f,'text') else f.words))
1887        else:
1888            return max(w[0] for w in _getFragWords(frags))
1889
1890    def _split_blParaProcessed(self,blPara,start,stop):
1891        if not stop: return []
1892        lines = blPara.lines
1893        sFW = lines[start].sFW
1894        sFWN = lines[stop].sFW if stop!=len(lines) else len(self.frags)
1895        F = self.frags[sFW:sFWN]
1896        while F and isinstance(F[-1],_InjectedFrag): del F[-1]
1897        if isinstance(F[-1],_SplitFragHY):
1898            F[-1].__class__ = _SHYWordHS if isinstance(F[-1],_SHYWord) else _SplitFragLL
1899        return F
1900
1901    def _get_split_blParaFunc(self):
1902        return (_split_blParaSimple if self.blPara.kind==0
1903                    else (_split_blParaHard if not _processed_frags(self.frags)
1904                        else self._split_blParaProcessed))
1905
1906    def split(self,availWidth, availHeight):
1907        if len(self.frags)<=0 or availWidth<_FUZZ or availHeight<_FUZZ: return []
1908
1909        #the split information is all inside self.blPara
1910        if not hasattr(self,'blPara'):
1911            self.wrap(availWidth,availHeight)
1912        blPara = self.blPara
1913        style = self.style
1914        autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
1915        leading = style.leading
1916        lines = blPara.lines
1917        if blPara.kind==1 and autoLeading not in ('','off'):
1918            s = height = 0
1919            if autoLeading=='max':
1920                for i,l in enumerate(blPara.lines):
1921                    h = max(l.ascent-l.descent,leading)
1922                    n = height+h
1923                    if n>availHeight+1e-8:
1924                        break
1925                    height = n
1926                    s = i+1
1927            elif autoLeading=='min':
1928                for i,l in enumerate(blPara.lines):
1929                    n = height+l.ascent-l.descent
1930                    if n>availHeight+1e-8:
1931                        break
1932                    height = n
1933                    s = i+1
1934            else:
1935                raise ValueError('invalid autoLeading value %r' % autoLeading)
1936        else:
1937            l = leading
1938            if autoLeading=='max':
1939                l = max(leading,1.2*style.fontSize)
1940            elif autoLeading=='min':
1941                l = 1.2*style.fontSize
1942            s = int(availHeight/float(l))
1943            height = s*l
1944
1945        allowOrphans = getattr(self,'allowOrphans',getattr(style,'allowOrphans',0))
1946        if (not allowOrphans and s<=1) or s==0: #orphan or not enough room
1947            del self.blPara
1948            return []
1949        n = len(lines)
1950        allowWidows = getattr(self,'allowWidows',getattr(style,'allowWidows',1))
1951        if n<=s:
1952            return [self]
1953        if not allowWidows:
1954            if n==s+1: #widow?
1955                if (allowOrphans and n==3) or n>3:
1956                    s -= 1  #give the widow some company
1957                else:
1958                    del self.blPara #no room for adjustment; force the whole para onwards
1959                    return []
1960        func = self._get_split_blParaFunc()
1961
1962        if style.endDots:
1963            style1 = deepcopy(style)
1964            style1.endDots = None
1965        else:
1966            style1 = style
1967        P1=self.__class__(None,style1,bulletText=self.bulletText,frags=func(blPara,0,s))
1968        #this is a major hack
1969        P1.blPara = ParaLines(kind=1,lines=blPara.lines[0:s],aH=availHeight,aW=availWidth)
1970        #do not justify text if linebreak was inserted after the text
1971        #bug reported and fix contributed by Niharika Singh <nsingh@shoobx.com>
1972        P1._JustifyLast = not (isinstance(blPara.lines[s-1],FragLine)
1973                                and hasattr(blPara.lines[s-1], 'lineBreak')
1974                                and blPara.lines[s-1].lineBreak)
1975        P1._splitpara = 1
1976        P1.height = height
1977        P1.width = availWidth
1978        if style.firstLineIndent != 0:
1979            style = deepcopy(style)
1980            style.firstLineIndent = 0
1981        P2=self.__class__(None,style,bulletText=None,frags=func(blPara,s,n))
1982        #propagate attributes that might be on self; suggestion from Dirk Holtwick
1983        for a in ('autoLeading',    #possible attributes that might be directly on self.
1984                ):
1985            if hasattr(self,a):
1986                setattr(P1,a,getattr(self,a))
1987                setattr(P2,a,getattr(self,a))
1988
1989        return [P1,P2]
1990
1991    def draw(self):
1992        #call another method for historical reasons.  Besides, I
1993        #suspect I will be playing with alternate drawing routines
1994        #so not doing it here makes it easier to switch.
1995        self.drawPara(self.debug)
1996
1997    def breakLines(self, width):
1998        """
1999        Returns a broken line structure. There are two cases
2000
2001        A) For the simple case of a single formatting input fragment the output is
2002            A fragment specifier with
2003                - kind = 0
2004                - fontName, fontSize, leading, textColor
2005                - lines=  A list of lines
2006
2007                        Each line has two items.
2008
2009                        1. unused width in points
2010                        2. word list
2011
2012        B) When there is more than one input formatting fragment the output is
2013            A fragment specifier with
2014               - kind = 1
2015               - lines=  A list of fragments each having fields
2016                            - extraspace (needed for justified)
2017                            - fontSize
2018                            - words=word list
2019                                each word is itself a fragment with
2020                                various settings
2021            in addition frags becomes a frag word list
2022
2023        This structure can be used to easily draw paragraphs with the various alignments.
2024        You can supply either a single width or a list of widths; the latter will have its
2025        last item repeated until necessary. A 2-element list is useful when there is a
2026        different first line indent; a longer list could be created to facilitate custom wraps
2027        around irregular objects."""
2028
2029        self._width_max = 0
2030        if not isinstance(width,(tuple,list)): maxWidths = [width]
2031        else: maxWidths = width
2032        lines = []
2033        self.height = lineno = 0
2034        maxlineno = len(maxWidths)-1
2035        style = self.style
2036        hyphenator = getattr(style,'hyphenationLang','')
2037        if hyphenator:
2038            if isStr(hyphenator):
2039                hyphenator = hyphenator.strip()
2040                if hyphenator and pyphen:
2041                    hyphenator = pyphen.Pyphen(lang=hyphenator).iterate
2042                else:
2043                    hyphenator = None
2044            elif not callable(hyphenator):
2045                raise ValueError('hyphenator should be a language spec or a callable unicode -->  pairs not %r' % hyphenator)
2046        else:
2047            hyphenator = None
2048        uriWasteReduce = style.uriWasteReduce
2049        embeddedHyphenation = style.embeddedHyphenation
2050        hyphenation2 = embeddedHyphenation>1
2051        spaceShrinkage = style.spaceShrinkage
2052        splitLongWords = style.splitLongWords
2053        attemptHyphenation = hyphenator or uriWasteReduce or embeddedHyphenation
2054        if attemptHyphenation:
2055            hymwl = getattr(style,'hyphenationMinWordLength',hyphenationMinWordLength)
2056        self._splitLongWordCount = self._hyphenations = 0
2057
2058        #for bullets, work out width and ensure we wrap the right amount onto line one
2059        _handleBulletWidth(self.bulletText,style,maxWidths)
2060
2061        maxWidth = maxWidths[0]
2062
2063        autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
2064        calcBounds = autoLeading not in ('','off')
2065        frags = self.frags
2066        nFrags= len(frags)
2067        if (nFrags==1
2068                and not (style.endDots or hasattr(frags[0],'cbDefn') or hasattr(frags[0],'backColor')
2069                            or _processed_frags(frags))):
2070            f = frags[0]
2071            fontSize = f.fontSize
2072            fontName = f.fontName
2073            ascent, descent = getAscentDescent(fontName,fontSize)
2074            if hasattr(f,'text'):
2075                text = strip(f.text)
2076                if not text:
2077                    return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize)
2078                else:
2079                    words = split(text)
2080            else:
2081                words = f.words[:]
2082                for w in words:
2083                    if strip(w): break
2084                else:
2085                    return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize)
2086            spaceWidth = stringWidth(' ', fontName, fontSize, self.encoding)
2087            dSpaceShrink = spaceShrinkage*spaceWidth
2088            spaceShrink = 0
2089            cLine = []
2090            currentWidth = -spaceWidth   # hack to get around extra space for word 1
2091            hyw = stringWidth('-', fontName, fontSize, self.encoding)
2092            forcedSplit = 0
2093            while words:
2094                word = words.pop(0)
2095                if not word and isinstance(word,_SplitWord):
2096                    forcedSplit = 1
2097                elif _shy in word:
2098                    word = _SHYStr(word)    #allow for soft hyphenation
2099                #this underscores my feeling that Unicode throughout would be easier!
2100                wordWidth = stringWidth(word, fontName, fontSize, self.encoding)
2101                newWidth = currentWidth + spaceWidth + wordWidth
2102                if newWidth>maxWidth+spaceShrink and not (isinstance(word,_SplitWordH) or forcedSplit):
2103                    if isinstance(word,_SHYStr):
2104                        hsw = word.__shysplit__(
2105                                fontName, fontSize,
2106                                currentWidth + spaceWidth + hyw - 1e-8,
2107                                maxWidth+spaceShrink,
2108                                encoding = self.encoding,
2109                                )
2110                        if hsw:
2111                            words[0:0] = hsw
2112                            self._hyphenations += 1
2113                            forcedSplit = 1
2114                            continue
2115                        elif len(cLine):
2116                            nMW = maxWidths[min(maxlineno,lineno)]
2117                            if hyphenation2 or (word._fsww+hyw+1e-8)<=nMW:
2118                                hsw = word.__shysplit__(
2119                                    fontName, fontSize,
2120                                    0 + hyw - 1e-8,
2121                                    nMW,
2122                                    encoding = self.encoding,
2123                                    )
2124                                if hsw:
2125                                    words[0:0] = [word]
2126                                    forcedSplit = 1
2127                                    word = None
2128                                    newWidth = currentWidth
2129                    elif attemptHyphenation:
2130                        hyOk = not getattr(f,'nobr',False)
2131                        hsw = _hyphenateWord(hyphenator if hyOk else None,
2132                                fontName, fontSize, word, wordWidth, newWidth, maxWidth+spaceShrink,
2133                                    uriWasteReduce if hyOk else False,
2134                                    embeddedHyphenation and hyOk, hymwl)
2135                        if hsw:
2136                            words[0:0] = hsw
2137                            self._hyphenations += 1
2138                            forcedSplit = 1
2139                            continue
2140                        elif hyphenation2 and len(cLine):
2141                            hsw = _hyphenateWord(hyphenator if hyOk else None,
2142                                fontName, fontSize, word, wordWidth, wordWidth, maxWidth,
2143                                    uriWasteReduce if hyOk else False,
2144                                    embeddedHyphenation and hyOk, hymwl)
2145                            if hsw:
2146                                words[0:0] = [word]
2147                                forcedSplit = 1
2148                                newWidth = currentWidth
2149                                word = None
2150                    if splitLongWords and not (isinstance(word,_SplitWord) or forcedSplit):
2151                        nmw = min(lineno,maxlineno)
2152                        if wordWidth>max(maxWidths[nmw:nmw+1]):
2153                            #a long word
2154                            words[0:0] = _splitWord(word,currentWidth+spaceWidth,maxWidths,lineno,fontName,fontSize,self.encoding)
2155                            self._splitLongWordCount += 1
2156                            forcedSplit = 1
2157                            continue
2158                if newWidth <= (maxWidth+spaceShrink) or not len(cLine) or forcedSplit:
2159                    # fit one more on this line
2160                    if word: cLine.append(word)
2161                    if forcedSplit:
2162                        forcedSplit = 0
2163                        if newWidth > self._width_max: self._width_max = newWidth
2164                        lines.append((maxWidth - newWidth, cLine))
2165                        cLine = []
2166                        currentWidth = -spaceWidth
2167                        spaceShrink = 0
2168                        lineno += 1
2169                        maxWidth = maxWidths[min(maxlineno,lineno)]
2170                    else:
2171                        currentWidth = newWidth
2172                        spaceShrink += dSpaceShrink
2173                else:
2174                    if currentWidth > self._width_max: self._width_max = currentWidth
2175                    #end of line
2176                    lines.append((maxWidth - currentWidth, cLine))
2177                    cLine = [word]
2178                    spaceShrink = 0
2179                    currentWidth = wordWidth
2180                    lineno += 1
2181                    maxWidth = maxWidths[min(maxlineno,lineno)]
2182
2183            #deal with any leftovers on the final line
2184            if cLine!=[]:
2185                if currentWidth>self._width_max: self._width_max = currentWidth
2186                lines.append((maxWidth - currentWidth, cLine))
2187
2188            return f.clone(kind=0, lines=lines,ascent=ascent,descent=descent,fontSize=fontSize)
2189        elif nFrags<=0:
2190            return ParaLines(kind=0, fontSize=style.fontSize, fontName=style.fontName,
2191                            textColor=style.textColor, ascent=style.fontSize,descent=-0.2*style.fontSize,
2192                            lines=[])
2193        else:
2194            njlbv = not style.justifyBreaks
2195            words = []
2196            FW = []
2197            aFW = FW.append
2198            _words = _getFragWords(frags,maxWidth)
2199            sFW = 0
2200            while _words:
2201                w = _words.pop(0)
2202                aFW(w)
2203                f = w[-1][0]
2204                fontName = f.fontName
2205                fontSize = f.fontSize
2206
2207                if not words:
2208                    n = dSpaceShrink = spaceShrink = spaceWidth = currentWidth = 0
2209                    maxSize = fontSize
2210                    maxAscent, minDescent = getAscentDescent(fontName,fontSize)
2211
2212                wordWidth = w[0]
2213                f = w[1][0]
2214                if wordWidth>0:
2215                    newWidth = currentWidth + spaceWidth + wordWidth
2216                else:
2217                    newWidth = currentWidth
2218
2219                #test to see if this frag is a line break. If it is we will only act on it
2220                #if the current width is non-negative or the previous thing was a deliberate lineBreak
2221                lineBreak = f._fkind==_FK_BREAK
2222                if not lineBreak and newWidth>(maxWidth+spaceShrink) and not isinstance(w,_SplitFragH) and not hasattr(f,'cbDefn'):
2223                    if isinstance(w,_SHYWord):
2224                        hsw = w.shyphenate(newWidth, maxWidth+spaceShrink)
2225                        if hsw:
2226                            _words[0:0] = hsw
2227                            _words.insert(1,_InjectedFrag([0,(f.clone(_fkind=_FK_BREAK,text=''),'')]))
2228                            FW.pop(-1)  #remove this as we are doing this one again
2229                            self._hyphenations += 1
2230                            continue
2231                        elif len(FW)>1: #only if we are not the first word on the line
2232                            nMW = maxWidths[min(maxlineno,lineno)]  #next maxWidth or current one
2233                            if hyphenation2 or w._fsww+1e-8<=nMW:
2234                                hsw = w.shyphenate(wordWidth, nMW)
2235                                if hsw:
2236                                    _words[0:0] = [_InjectedFrag([0,(f.clone(_fkind=_FK_BREAK,text=''),'')]),w]
2237                                    FW.pop(-1)  #remove this as we are doing this one again
2238                                    continue
2239                        #else: try to split an overlong word
2240                    elif attemptHyphenation:
2241                        hyOk = not getattr(f,'nobr',False)
2242                        hsw = _hyphenateFragWord(hyphenator if hyOk else None,
2243                                    w,newWidth,maxWidth+spaceShrink,
2244                                    uriWasteReduce if hyOk else False,
2245                                    embeddedHyphenation and hyOk, hymwl)
2246                        if hsw:
2247                            _words[0:0] = hsw
2248                            _words.insert(1,_InjectedFrag([0,(f.clone(_fkind=_FK_BREAK,text=''),'')]))
2249                            FW.pop(-1)  #remove this as we are doing this one again
2250                            self._hyphenations += 1
2251                            continue
2252                        elif hyphenation2 and len(FW)>1:
2253                            hsw = _hyphenateFragWord(hyphenator if hyOk else None,
2254                                        w,wordWidth,maxWidth,
2255                                        uriWasteReduce if hyOk else False,
2256                                        embeddedHyphenation and hyOk, hymwl)
2257                            if hsw:
2258                                _words[0:0] = [_InjectedFrag([0,(f.clone(_fkind=_FK_BREAK,text=''),'')]),w]
2259                                FW.pop(-1)  #remove this as we are doing this one again
2260                                continue
2261                        #else: try to split an overlong word
2262                    if splitLongWords and not isinstance(w,_SplitFrag):
2263                        nmw = min(lineno,maxlineno)
2264                        if wordWidth>max(maxWidths[nmw:nmw+1]):
2265                            #a long word
2266                            _words[0:0] = _splitFragWord(w,maxWidth-spaceWidth-currentWidth,maxWidths,lineno)
2267                            _words.insert(1,_InjectedFrag([0,(f.clone(_fkind=_FK_BREAK,text=''),'')]))
2268                            FW.pop(-1)  #remove this as we are doing this one again
2269                            self._splitLongWordCount += 1
2270                            continue
2271                endLine = (newWidth>(maxWidth+spaceShrink) and n>0) or lineBreak
2272                if not endLine:
2273                    if lineBreak: continue      #throw it away
2274                    nText = w[1][1]
2275                    if nText: n += 1
2276                    fontSize = f.fontSize
2277                    if calcBounds:
2278                        if f._fkind==_FK_IMG:
2279                            descent,ascent = imgVRange(imgNormV(f.cbDefn.height,fontSize),f.cbDefn.valign,fontSize)
2280                        else:
2281                            ascent, descent = getAscentDescent(f.fontName,fontSize)
2282                    else:
2283                        ascent, descent = getAscentDescent(f.fontName,fontSize)
2284                    maxSize = max(maxSize,fontSize)
2285                    maxAscent = max(maxAscent,ascent)
2286                    minDescent = min(minDescent,descent)
2287                    if not words:
2288                        g = f.clone()
2289                        words = [g]
2290                        g.text = nText
2291                    elif not sameFrag(g,f):
2292                        if spaceWidth:
2293                            i = len(words)-1
2294                            while i>=0:
2295                                wi = words[i]
2296                                i -= 1
2297                                if wi._fkind==_FK_TEXT:
2298                                    if not wi.text.endswith(' '):
2299                                        wi.text += ' '
2300                                        spaceShrink += dSpaceShrink
2301                                    break
2302                        g = f.clone()
2303                        words.append(g)
2304                        g.text = nText
2305                    elif spaceWidth:
2306                        if not g.text.endswith(' '):
2307                            g.text += ' ' + nText
2308                            spaceShrink += dSpaceShrink
2309                        else:
2310                            g.text += nText
2311                    else:
2312                        g.text += nText
2313
2314                    spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSFrag) else 0 #of the space following this word
2315                    dSpaceShrink = spaceWidth*spaceShrinkage
2316
2317                    ni = 0
2318                    for i in w[2:]:
2319                        g = i[0].clone()
2320                        g.text=i[1]
2321                        if g.text: ni = 1
2322                        words.append(g)
2323                        fontSize = g.fontSize
2324                        if calcBounds:
2325                            if g._fkind==_FK_IMG:
2326                                descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
2327                            else:
2328                                ascent, descent = getAscentDescent(g.fontName,fontSize)
2329                        else:
2330                            ascent, descent = getAscentDescent(g.fontName,fontSize)
2331                        maxSize = max(maxSize,fontSize)
2332                        maxAscent = max(maxAscent,ascent)
2333                        minDescent = min(minDescent,descent)
2334                    if not nText and ni:
2335                        #one bit at least of the word was real
2336                        n+=1
2337
2338                    currentWidth = newWidth
2339                else:  #either it won't fit, or it's a lineBreak tag
2340                    if lineBreak:
2341                        g = f.clone()
2342                        #del g.lineBreak
2343                        words.append(g)
2344
2345                    if currentWidth>self._width_max: self._width_max = currentWidth
2346                    #end of line
2347                    lines.append(FragLine(extraSpace=maxWidth-currentWidth, wordCount=n,
2348                                        lineBreak=lineBreak and njlbv, words=words, fontSize=maxSize, ascent=maxAscent, descent=minDescent, maxWidth=maxWidth,
2349                                        sFW=sFW))
2350                    sFW = len(FW)-1
2351
2352                    #start new line
2353                    lineno += 1
2354                    maxWidth = maxWidths[min(maxlineno,lineno)]
2355
2356                    if lineBreak:
2357                        words = []
2358                        continue
2359
2360                    spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSFrag) else 0 #of the space following this word
2361                    dSpaceShrink = spaceWidth*spaceShrinkage
2362                    currentWidth = wordWidth
2363                    n = 1
2364                    spaceShrink = 0
2365                    g = f.clone()
2366                    maxSize = g.fontSize
2367                    if calcBounds:
2368                        if g._fkind==_FK_IMG:
2369                            descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
2370                        else:
2371                            maxAscent, minDescent = getAscentDescent(g.fontName,maxSize)
2372                    else:
2373                        maxAscent, minDescent = getAscentDescent(g.fontName,maxSize)
2374                    words = [g]
2375                    g.text = w[1][1]
2376
2377                    for i in w[2:]:
2378                        g = i[0].clone()
2379                        g.text=i[1]
2380                        words.append(g)
2381                        fontSize = g.fontSize
2382                        if calcBounds:
2383                            if g._fkind==_FK_IMG:
2384                                descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
2385                            else:
2386                                ascent, descent = getAscentDescent(g.fontName,fontSize)
2387                        else:
2388                            ascent, descent = getAscentDescent(g.fontName,fontSize)
2389                        maxSize = max(maxSize,fontSize)
2390                        maxAscent = max(maxAscent,ascent)
2391                        minDescent = min(minDescent,descent)
2392
2393            #deal with any leftovers on the final line
2394            if words:
2395                if currentWidth>self._width_max: self._width_max = currentWidth
2396                lines.append(ParaLines(extraSpace=(maxWidth - currentWidth),wordCount=n,lineBreak=False,
2397                                    words=words, fontSize=maxSize,ascent=maxAscent,descent=minDescent,maxWidth=maxWidth,sFW=sFW))
2398            self.frags = FW
2399            return ParaLines(kind=1, lines=lines)
2400
2401    def breakLinesCJK(self, maxWidths):
2402        """Initially, the dumbest possible wrapping algorithm.
2403        Cannot handle font variations."""
2404
2405        if not isinstance(maxWidths,(list,tuple)): maxWidths = [maxWidths]
2406        style = self.style
2407        self.height = 0
2408
2409        #for bullets, work out width and ensure we wrap the right amount onto line one
2410        _handleBulletWidth(self.bulletText, style, maxWidths)
2411        frags = self.frags
2412        nFrags = len(frags)
2413        if nFrags==1 and not hasattr(frags[0],'cbDefn') and not style.endDots:
2414            f = frags[0]
2415            if hasattr(self,'blPara') and getattr(self,'_splitpara',0):
2416                return f.clone(kind=0, lines=self.blPara.lines)
2417            #single frag case
2418            lines = []
2419            lineno = 0
2420            if hasattr(f,'text'):
2421                text = f.text
2422            else:
2423                text = ''.join(getattr(f,'words',[]))
2424
2425            from reportlab.lib.textsplit import wordSplit
2426            lines = wordSplit(text, maxWidths, f.fontName, f.fontSize)
2427            #the paragraph drawing routine assumes multiple frags per line, so we need an
2428            #extra list like this
2429            #  [space, [text]]
2430            #
2431            wrappedLines = [(sp, [line]) for (sp, line) in lines]
2432            return f.clone(kind=0, lines=wrappedLines, ascent=f.fontSize, descent=-0.2*f.fontSize)
2433        elif nFrags<=0:
2434            return ParaLines(kind=0, fontSize=style.fontSize, fontName=style.fontName,
2435                            textColor=style.textColor, lines=[],ascent=style.fontSize,descent=-0.2*style.fontSize)
2436
2437        #general case nFrags>1 or special
2438        if hasattr(self,'blPara') and getattr(self,'_splitpara',0):
2439            return self.blPara
2440        autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
2441        calcBounds = autoLeading not in ('','off')
2442        return cjkFragSplit(frags, maxWidths, calcBounds)
2443
2444    def beginText(self, x, y):
2445        return self.canv.beginText(x, y)
2446
2447    def drawPara(self,debug=0):
2448        """Draws a paragraph according to the given style.
2449        Returns the final y position at the bottom. Not safe for
2450        paragraphs without spaces e.g. Japanese; wrapping
2451        algorithm will go infinite."""
2452
2453        #stash the key facts locally for speed
2454        canvas = self.canv
2455        style = self.style
2456        blPara = self.blPara
2457        lines = blPara.lines
2458        leading = style.leading
2459        autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
2460
2461        #work out the origin for line 1
2462        leftIndent = style.leftIndent
2463        cur_x = leftIndent
2464
2465        if debug:
2466            bw = 0.5
2467            bc = Color(1,1,0)
2468            bg = Color(0.9,0.9,0.9)
2469        else:
2470            bw = getattr(style,'borderWidth',None)
2471            bc = getattr(style,'borderColor',None)
2472            bg = style.backColor
2473
2474        #if has a background or border, draw it
2475        if bg or (bc and bw):
2476            canvas.saveState()
2477            op = canvas.rect
2478            kwds = dict(fill=0,stroke=0)
2479            if bc and bw:
2480                canvas.setStrokeColor(bc)
2481                canvas.setLineWidth(bw)
2482                kwds['stroke'] = 1
2483                br = getattr(style,'borderRadius',0)
2484                if br and not debug:
2485                    op = canvas.roundRect
2486                    kwds['radius'] = br
2487            if bg:
2488                canvas.setFillColor(bg)
2489                kwds['fill'] = 1
2490            bp = getattr(style,'borderPadding',0)
2491            tbp, rbp, bbp, lbp = normalizeTRBL(bp)
2492            op(leftIndent - lbp,
2493                        -bbp,
2494                        self.width - (leftIndent+style.rightIndent) + lbp+rbp,
2495                        self.height + tbp+bbp,
2496                        **kwds)
2497            canvas.restoreState()
2498
2499        nLines = len(lines)
2500        bulletText = self.bulletText
2501        if nLines > 0:
2502            _offsets = getattr(self,'_offsets',[0])
2503            _offsets += (nLines-len(_offsets))*[_offsets[-1]]
2504            canvas.saveState()
2505            #canvas.addLiteral('%% %s.drawPara' % _className(self))
2506            alignment = style.alignment
2507            offset = style.firstLineIndent+_offsets[0]
2508            lim = nLines-1
2509            noJustifyLast = not getattr(self,'_JustifyLast',False)
2510            jllwc = style.justifyLastLine
2511            isRTL = style.wordWrap=='RTL'
2512            bRTL = isRTL and self._wrapWidths or False
2513
2514            if blPara.kind==0:
2515                if alignment == TA_LEFT:
2516                    dpl = _leftDrawParaLine
2517                elif alignment == TA_CENTER:
2518                    dpl = _centerDrawParaLine
2519                elif alignment == TA_RIGHT:
2520                    dpl = _rightDrawParaLine
2521                elif alignment == TA_JUSTIFY:
2522                    dpl = _justifyDrawParaLineRTL if isRTL else _justifyDrawParaLine
2523                f = blPara
2524                if paraFontSizeHeightOffset:
2525                    cur_y = self.height - f.fontSize
2526                else:
2527                    cur_y = self.height - getattr(f,'ascent',f.fontSize)
2528                if bulletText:
2529                    offset = _drawBullet(canvas,offset,cur_y,bulletText,style,rtl=bRTL)
2530
2531                #set up the font etc.
2532                canvas.setFillColor(f.textColor)
2533
2534                tx = self.beginText(cur_x, cur_y)
2535                tx.preformatted = 'preformatted' in self.__class__.__name__.lower()
2536                if autoLeading=='max':
2537                    leading = max(leading,blPara.ascent-blPara.descent)
2538                elif autoLeading=='min':
2539                    leading = blPara.ascent-blPara.descent
2540
2541                # set the paragraph direction
2542                tx.direction = self.style.wordWrap
2543
2544                #now the font for the rest of the paragraph
2545                tx.setFont(f.fontName, f.fontSize, leading)
2546                ws = lines[0][0]
2547                words = lines[0][1]
2548                lastLine = noJustifyLast and nLines==1
2549                if lastLine and jllwc and len(words)>jllwc:
2550                    lastLine=False
2551                t_off = dpl( tx, offset, ws, words, lastLine)
2552                if f.us_lines or f.link:# or style.endDots:
2553                    tx._do_line = MethodType(_do_line,tx)
2554                    tx.xs = xs = tx.XtraState = ABag()
2555                    _setTXLineProps(tx, canvas, style)
2556                    xs.cur_y = cur_y
2557                    xs.f = f
2558                    xs.style = style
2559                    xs.lines = lines
2560                    xs.link=f.link
2561                    xs.textColor = f.textColor
2562                    xs.backColors = []
2563                    dx = t_off+leftIndent
2564                    if alignment!=TA_JUSTIFY or lastLine: ws = 0
2565                    if f.us_lines:
2566                        _do_under_line(0, dx, ws, tx, f.us_lines)
2567                    if f.link: _do_link_line(0, dx, ws, tx)
2568                    #if lastLine and style.endDots and dpl!=_rightDrawParaLine: _do_dots(0, dx, ws, xs, tx, dpl)
2569
2570                    #now the middle of the paragraph, aligned with the left margin which is our origin.
2571                    for i in xrange(1, nLines):
2572                        ws = lines[i][0]
2573                        words = lines[i][1]
2574                        lastLine = noJustifyLast and i==lim
2575                        if lastLine and jllwc and len(words)>jllwc:
2576                            lastLine=False
2577                        t_off = dpl( tx, _offsets[i], ws, words, lastLine)
2578                        dx = t_off+leftIndent
2579                        if alignment!=TA_JUSTIFY or lastLine: ws = 0
2580                        if f.us_lines:
2581                            _do_under_line(i, t_off, ws, tx, f.us_lines)
2582                        if f.link: _do_link_line(i, dx, ws, tx)
2583                        #if lastLine and style.endDots and dpl!=_rightDrawParaLine: _do_dots(i, dx, ws, xs, tx, dpl)
2584                else:
2585                    for i in xrange(1, nLines):
2586                        words = lines[i][1]
2587                        lastLine = noJustifyLast and i==lim
2588                        if lastLine and jllwc and len(words)>jllwc:
2589                            lastLine=False
2590                        dpl( tx, _offsets[i], lines[i][0], words, lastLine)
2591            else:
2592                if isRTL:
2593                    for line in lines:
2594                        line.words = line.words[::-1]
2595                f = lines[0]
2596                if paraFontSizeHeightOffset:
2597                    cur_y = self.height - f.fontSize
2598                else:
2599                    cur_y = self.height - getattr(f,'ascent',f.fontSize)
2600                # default?
2601                dpl = _leftDrawParaLineX
2602                if bulletText:
2603                    oo = offset
2604                    offset = _drawBullet(canvas,offset,cur_y,bulletText,style, rtl=bRTL)
2605                if alignment == TA_LEFT:
2606                    dpl = _leftDrawParaLineX
2607                elif alignment == TA_CENTER:
2608                    dpl = _centerDrawParaLineX
2609                elif alignment == TA_RIGHT:
2610                    dpl = _rightDrawParaLineX
2611                elif alignment == TA_JUSTIFY:
2612                    dpl = _justifyDrawParaLineXRTL if isRTL else _justifyDrawParaLineX
2613                else:
2614                    raise ValueError("bad align %s" % repr(alignment))
2615
2616                #set up the font etc.
2617                tx = self.beginText(cur_x, cur_y)
2618                tx.preformatted = 'preformatted' in self.__class__.__name__.lower()
2619                _setTXLineProps(tx, canvas, style)
2620                tx._do_line = MethodType(_do_line,tx)
2621                # set the paragraph direction
2622                tx.direction = self.style.wordWrap
2623
2624                xs = tx.XtraState=ABag()
2625                xs.textColor=None
2626                xs.backColor=None
2627                xs.rise=0
2628                xs.backColors=[]
2629                xs.us_lines = {}
2630                xs.links = {}
2631                xs.link={}
2632                xs.leading = style.leading
2633                xs.leftIndent = leftIndent
2634                tx._leading = None
2635                tx._olb = None
2636                xs.cur_y = cur_y
2637                xs.f = f
2638                xs.style = style
2639                xs.autoLeading = autoLeading
2640                xs.paraWidth = self.width
2641
2642                tx._fontname,tx._fontsize = None, None
2643                line = lines[0]
2644                lastLine = noJustifyLast and nLines==1
2645                if lastLine and jllwc and line.wordCount>jllwc:
2646                    lastLine=False
2647                dpl( tx, offset, line, lastLine)
2648                _do_post_text(tx)
2649
2650                #now the middle of the paragraph, aligned with the left margin which is our origin.
2651                for i in xrange(1, nLines):
2652                    line = lines[i]
2653                    lastLine = noJustifyLast and i==lim
2654                    if lastLine and jllwc and line.wordCount>jllwc:
2655                        lastLine=False
2656                    dpl( tx, _offsets[i], line, lastLine)
2657                    _do_post_text(tx)
2658
2659            canvas.drawText(tx)
2660            canvas.restoreState()
2661
2662    def getPlainText(self,identify=None):
2663        """Convenience function for templates which want access
2664        to the raw text, without XML tags. """
2665        frags = getattr(self,'frags',None)
2666        if frags:
2667            plains = []
2668            plains_append = plains.append
2669            if _processed_frags(frags):
2670                for word in frags:
2671                    for style,text in word[1:]:
2672                        plains_append(text)
2673                    if isinstance(word,_HSFrag):
2674                        plains_append(' ')
2675            else:
2676                for frag in frags:
2677                    if hasattr(frag, 'text'):
2678                        plains_append(frag.text)
2679            return ''.join(plains)
2680        elif identify:
2681            text = getattr(self,'text',None)
2682            if text is None: text = repr(self)
2683            return text
2684        else:
2685            return ''
2686
2687    def getActualLineWidths0(self):
2688        """Convenience function; tells you how wide each line
2689        actually is.  For justified styles, this will be
2690        the same as the wrap width; for others it might be
2691        useful for seeing if paragraphs will fit in spaces."""
2692        assert hasattr(self, 'width'), "Cannot call this method before wrap()"
2693        if self.blPara.kind:
2694            func = lambda frag, w=self.width: w - frag.extraSpace
2695        else:
2696            func = lambda frag, w=self.width: w - frag[0]
2697        return list(map(func,self.blPara.lines))
2698
2699    @staticmethod
2700    def dumpFrags(frags,indent=4,full=False):
2701        R = ['[']
2702        aR = R.append
2703        for i,f in enumerate(frags):
2704            if full:
2705                aR('    [%r,' % f[0])
2706                for fx in f[1:]:
2707                    aR('        (%s,)' % repr(fx[0]))
2708                    aR('        %r),' % fx[1])
2709                    aR('    ], #%d %s' % (i,f.__class__.__name__))
2710                aR('    ]')
2711            else:
2712                aR('[%r, %s], #%d %s' % (f[0],', '.join(('(%s,%r)' % (fx[0].__class__.__name__,fx[1]) for fx in f[1:])),i,f.__class__.__name__))
2713        i = indent*' '
2714        return i + ('\n'+i).join(R)
2715
2716if __name__=='__main__':    #NORUNTESTS
2717    def dumpParagraphLines(P):
2718        print('dumpParagraphLines(<Paragraph @ %d>)' % id(P))
2719        lines = P.blPara.lines
2720        outw = sys.stdout.write
2721        for l,line in enumerate(lines):
2722            line = lines[l]
2723            if hasattr(line,'words'):
2724                words = line.words
2725            else:
2726                words = line[1]
2727            nwords = len(words)
2728            outw('line%d: %d(%s)\n  ' % (l,nwords,str(getattr(line,'wordCount','Unknown'))))
2729            for w in xrange(nwords):
2730                outw(" %d:'%s'"%(w,getattr(words[w],'text',words[w])))
2731            print()
2732
2733    def fragDump(w):
2734        R= ["'%s'" % w[1]]
2735        for a in ('fontName', 'fontSize', 'textColor', 'rise', 'underline', 'strike', 'link', 'cbDefn','lineBreak'):
2736            if hasattr(w[0],a):
2737                R.append('%s=%r' % (a,getattr(w[0],a)))
2738        return ', '.join(R)
2739
2740    def dumpParagraphFrags(P):
2741        print('dumpParagraphFrags(<Paragraph @ %d>) minWidth() = %.2f' % (id(P), P.minWidth()))
2742        frags = P.frags
2743        n =len(frags)
2744        for l in xrange(n):
2745            print("frag%d: '%s' %s" % (l, frags[l].text,' '.join(['%s=%s' % (k,getattr(frags[l],k)) for k in frags[l].__dict__ if k!=text])))
2746
2747        outw = sys.stdout.write
2748        l = 0
2749        cum = 0
2750        for W in _getFragWords(frags,360):
2751            cum += W[0]
2752            outw("fragword%d: cum=%3d size=%d" % (l, cum, W[0]))
2753            for w in W[1:]:
2754                outw(' (%s)' % fragDump(w))
2755            print()
2756            l += 1
2757
2758    def dumpProcessedFrags(P,label='processed_frags'):
2759        if isinstance(P2.frags[0],list):
2760            _F = {}
2761            _S = [].append
2762            def _showWord(w):
2763                t = [].append
2764                for _ in w[1:]:
2765                    fid = id(_[0])
2766                    if fid not in _F:
2767                        _F[fid] = (len(_F),_[0])
2768                    t('(__frag_%s__, %r)' % (_F[fid][0],_[1]))
2769                return '\x20\x20%s([%s, %s]),' % (w.__class__.__name__, w[0], ', '.join(t.__self__))
2770            for _ in P2.frags:
2771                _S(_showWord(_))
2772            print('from reportlab.platypus.paragraph import _HSFrag, _SplitFragHS, _SplitFragHY, _SplitFrag, _getFragWords\nfrom reportlab.platypus.paraparser import ParaFrag\nfrom reportlab.lib.colors import Color')
2773            print('\n'.join(('__frag_%s__ = %r' % _ for _ in sorted(_F.values()))))
2774            print('%s=[\n%s\x20\x20]' % (processed_frags,'\n'.join(_S.__self__)))
2775            print('print(_getFragWords(processed_frags))')
2776
2777
2778    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
2779    from reportlab.lib.units import cm
2780    import sys
2781    TESTS = sys.argv[1:]
2782    if TESTS==[]: TESTS=['4']
2783    def flagged(i,TESTS=TESTS):
2784        return 'all' in TESTS or '*' in TESTS or str(i) in TESTS
2785
2786    styleSheet = getSampleStyleSheet()
2787    B = styleSheet['BodyText']
2788    style = ParagraphStyle("discussiontext", parent=B)
2789    style.fontName= 'Helvetica'
2790    if flagged(1):
2791        text='''The <font name=courier color=green>CMYK</font> or subtractive method follows the way a printer
2792mixes three pigments (cyan, magenta, and yellow) to form colors.
2793Because mixing chemicals is more difficult than combining light there
2794is a fourth parameter for darkness.  For example a chemical
2795combination of the <font name=courier color=green>CMY</font> pigments generally never makes a perfect
2796black -- instead producing a muddy color -- so, to get black printers
2797don't use the <font name=courier color=green>CMY</font> pigments but use a direct black ink.  Because
2798<font name=courier color=green>CMYK</font> maps more directly to the way printer hardware works it may
2799be the case that &amp;| &amp; | colors specified in <font name=courier color=green>CMYK</font> will provide better fidelity
2800and better control when printed.
2801'''
2802        P=Paragraph(text,style)
2803        dumpParagraphFrags(P)
2804        aW, aH = 456.0, 42.8
2805        w,h = P.wrap(aW, aH)
2806        dumpParagraphLines(P)
2807        S = P.split(aW,aH)
2808        for s in S:
2809            s.wrap(aW,aH)
2810            dumpParagraphLines(s)
2811            aH = 500
2812
2813    if flagged(2):
2814        P=Paragraph("""Price<super><font color="red">*</font></super>""", styleSheet['Normal'])
2815        dumpParagraphFrags(P)
2816        w,h = P.wrap(24, 200)
2817        dumpParagraphLines(P)
2818
2819    if flagged(3):
2820        text = """Dieses Kapitel bietet eine schnelle <b><font color=red>Programme :: starten</font></b>
2821<onDraw name=myIndex label="Programme :: starten">
2822<b><font color=red>Eingabeaufforderung :: (&gt;&gt;&gt;)</font></b>
2823<onDraw name=myIndex label="Eingabeaufforderung :: (&gt;&gt;&gt;)">
2824<b><font color=red>&gt;&gt;&gt; (Eingabeaufforderung)</font></b>
2825<onDraw name=myIndex label="&gt;&gt;&gt; (Eingabeaufforderung)">
2826Einf&#xfc;hrung in Python <b><font color=red>Python :: Einf&#xfc;hrung</font></b>
2827<onDraw name=myIndex label="Python :: Einf&#xfc;hrung">.
2828Das Ziel ist, die grundlegenden Eigenschaften von Python darzustellen, ohne
2829sich zu sehr in speziellen Regeln oder Details zu verstricken. Dazu behandelt
2830dieses Kapitel kurz die wesentlichen Konzepte wie Variablen, Ausdr&#xfc;cke,
2831Kontrollfluss, Funktionen sowie Ein- und Ausgabe. Es erhebt nicht den Anspruch,
2832umfassend zu sein."""
2833        P=Paragraph(text, styleSheet['Code'])
2834        dumpParagraphFrags(P)
2835        w,h = P.wrap(6*72, 9.7*72)
2836        dumpParagraphLines(P)
2837
2838    if flagged(4):
2839        text='''Die eingebaute Funktion <font name=Courier>range(i, j [, stride])</font><onDraw name=myIndex label="eingebaute Funktionen::range()"><onDraw name=myIndex label="range() (Funktion)"><onDraw name=myIndex label="Funktionen::range()"> erzeugt eine Liste von Ganzzahlen und f&#xfc;llt sie mit Werten <font name=Courier>k</font>, f&#xfc;r die gilt: <font name=Courier>i &lt;= k &lt; j</font>. Man kann auch eine optionale Schrittweite angeben. Die eingebaute Funktion <font name=Courier>xrange()</font><onDraw name=myIndex label="eingebaute Funktionen::xrange()"><onDraw name=myIndex label="xrange() (Funktion)"><onDraw name=myIndex label="Funktionen::xrange()"> erf&#xfc;llt einen &#xe4;hnlichen Zweck, gibt aber eine unver&#xe4;nderliche Sequenz vom Typ <font name=Courier>XRangeType</font><onDraw name=myIndex label="XRangeType"> zur&#xfc;ck. Anstatt alle Werte in der Liste abzuspeichern, berechnet diese Liste ihre Werte, wann immer sie angefordert werden. Das ist sehr viel speicherschonender, wenn mit sehr langen Listen von Ganzzahlen gearbeitet wird. <font name=Courier>XRangeType</font> kennt eine einzige Methode, <font name=Courier>s.tolist()</font><onDraw name=myIndex label="XRangeType::tolist() (Methode)"><onDraw name=myIndex label="s.tolist() (Methode)"><onDraw name=myIndex label="Methoden::s.tolist()">, die seine Werte in eine Liste umwandelt.'''
2840        aW = 420
2841        aH = 64.4
2842        P=Paragraph(text, B)
2843        dumpParagraphFrags(P)
2844        w,h = P.wrap(aW,aH)
2845        print('After initial wrap',w,h)
2846        dumpParagraphLines(P)
2847        S = P.split(aW,aH)
2848        dumpParagraphFrags(S[0])
2849        w0,h0 = S[0].wrap(aW,aH)
2850        print('After split wrap',w0,h0)
2851        dumpParagraphLines(S[0])
2852
2853    if flagged(5):
2854        text = '<para> %s <![CDATA[</font></b>& %s < >]]></para>' % (chr(163),chr(163))
2855        P=Paragraph(text, styleSheet['Code'])
2856        dumpParagraphFrags(P)
2857        w,h = P.wrap(6*72, 9.7*72)
2858        dumpParagraphLines(P)
2859
2860    if flagged(6):
2861        for text in ['''Here comes <FONT FACE="Helvetica" SIZE="14pt">Helvetica 14</FONT> with <STRONG>strong</STRONG> <EM>emphasis</EM>.''',
2862                     '''Here comes <font face="Helvetica" size="14pt">Helvetica 14</font> with <Strong>strong</Strong> <em>emphasis</em>.''',
2863                     '''Here comes <font face="Courier" size="3cm">Courier 3cm</font> and normal again.''',
2864                     ]:
2865            P=Paragraph(text, styleSheet['Normal'], caseSensitive=0)
2866            dumpParagraphFrags(P)
2867            w,h = P.wrap(6*72, 9.7*72)
2868            dumpParagraphLines(P)
2869
2870    if flagged(7):
2871        text = """<para align="CENTER" fontSize="24" leading="30"><b>Generated by:</b>Dilbert</para>"""
2872        P=Paragraph(text, styleSheet['Code'])
2873        dumpParagraphFrags(P)
2874        w,h = P.wrap(6*72, 9.7*72)
2875        dumpParagraphLines(P)
2876
2877    if flagged(8):
2878        text ="""- bullet 0<br/>- bullet 1<br/>- bullet 2<br/>- bullet 3<br/>- bullet 4<br/>- bullet 5"""
2879        P=Paragraph(text, styleSheet['Normal'])
2880        dumpParagraphFrags(P)
2881        w,h = P.wrap(6*72, 9.7*72)
2882        dumpParagraphLines(P)
2883        S = P.split(6*72,h/2.0)
2884        print(len(S))
2885        dumpParagraphFrags(S[0])
2886        dumpParagraphLines(S[0])
2887        S[1].wrap(6*72, 9.7*72)
2888        dumpParagraphFrags(S[1])
2889        dumpParagraphLines(S[1])
2890
2891
2892    if flagged(9):
2893        text="""Furthermore, the fundamental error of
2894regarding <img src="../docs/images/testimg.gif" width="3" height="7"/> functional notions as
2895categorial delimits a general
2896convention regarding the forms of the<br/>
2897grammar. I suggested that these results
2898would follow from the assumption that"""
2899        P=Paragraph(text,ParagraphStyle('aaa',parent=styleSheet['Normal'],align=TA_JUSTIFY))
2900        dumpParagraphFrags(P)
2901        w,h = P.wrap(6*cm-12, 9.7*72)
2902        dumpParagraphLines(P)
2903
2904    if flagged(10):
2905        text="""a b c\xc2\xa0d e f"""
2906        P=Paragraph(text,ParagraphStyle('aaa',parent=styleSheet['Normal'],align=TA_JUSTIFY))
2907        dumpParagraphFrags(P)
2908        w,h = P.wrap(6*cm-12, 9.7*72)
2909        dumpParagraphLines(P)
2910
2911    if flagged(11):
2912        text="""This page tests out a number of attributes of the <b>paraStyle</b><onDraw name="_indexAdd" label="paraStyle"/> tag.
2913This paragraph is in a style we have called "style1". It should be a normal <onDraw name="_indexAdd" label="normal"/> paragraph, set in Courier 12 pt.
2914It should be a normal<onDraw name="_indexAdd" label="normal"/> paragraph, set in Courier (not bold).
2915It should be a normal<onDraw name="_indexAdd" label="normal"/> paragraph, set in Courier 12 pt."""
2916        P=Paragraph(text,style=ParagraphStyle('style1',fontName="Courier",fontSize=10))
2917        dumpParagraphFrags(P)
2918        w,h = P.wrap(6.27*72-12,10000)
2919        dumpParagraphLines(P)
2920