1#!/usr/bin/env python3
2
3#******************************************************************************
4# nodeformat.py, provides a class to handle node format objects
5#
6# TreeLine, an information storage program
7# Copyright (C) 2019, Douglas W. Bell
8#
9# This is free software; you can redistribute it and/or modify it under the
10# terms of the GNU General Public License, either Version 2 or any later
11# version.  This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY.  See the included LICENSE file for details.
13#******************************************************************************
14
15import re
16import collections
17import os.path
18import sys
19import copy
20import operator
21import datetime
22import xml.sax.saxutils
23if not sys.platform.startswith('win'):
24    import pwd
25import fieldformat
26import conditional
27
28
29defaultFieldName = _('Name')
30_defaultOutputSeparator = ', '
31_fieldSplitRe = re.compile(r'({\*(?:\**|\?|!|&|#)[\w_\-.]+\*})')
32_fieldPartRe = re.compile(r'{\*(\**|\?|!|&|#)([\w_\-.]+)\*}')
33_endTagRe = re.compile(r'.*(<br[ /]*?>|<BR[ /]*?>|<hr[ /]*?>|<HR[ /]*?>)$')
34_levelFieldRe = re.compile(r'[^0-9]+([0-9]+)$')
35
36class NodeFormat:
37    """Class to handle node format info
38
39    Stores node field lists and line formatting.
40    Provides methods to return formatted data.
41    """
42    def __init__(self, name, parentFormats, formatData=None,
43                 addDefaultField=False):
44        """Initialize a tree format.
45
46        Arguments:
47            name -- the type name string
48            parentFormats -- a ref to TreeFormats class for outside field refs
49            formatData -- the JSON dict for this format
50            addDefaultField -- if true, adds a default initial field
51        """
52        self.name = name
53        self.parentFormats = parentFormats
54        self.savedConditionText = {}
55        self.conditional = None
56        self.childTypeLimit = set()
57        self.readFormat(formatData)
58        self.siblingPrefix = ''
59        self.siblingSuffix = ''
60        self.derivedTypes = []
61        self.origOutputLines = [] # lines without bullet or table modifications
62        self.sortFields = []   # temporary storage while sorting
63        if addDefaultField:
64            self.addFieldIfNew(defaultFieldName)
65            self.titleLine = ['{{*{0}*}}'.format(defaultFieldName)]
66            self.outputLines = [['{{*{0}*}}'.format(defaultFieldName)]]
67        self.updateLineParsing()
68        if self.useBullets:
69            self.addBullets()
70        if self.useTables:
71            self.addTables()
72
73    def readFormat(self, formatData=None):
74        """Read JSON format data into this format.
75
76        Arguments:
77            formatData -- JSON dict for this format (None for default settings)
78        """
79        self.fieldDict = collections.OrderedDict()
80        if formatData:
81            for fieldData in formatData.get('fields', []):
82                fieldName = fieldData['fieldname']
83                self.addField(fieldName, fieldData)
84        else:
85            formatData = {}
86        self.titleLine = [formatData.get('titleline', '')]
87        self.outputLines = [[line] for line in
88                            formatData.get('outputlines', [])]
89        self.spaceBetween = formatData.get('spacebetween', True)
90        self.formatHtml = formatData.get('formathtml', False)
91        self.useBullets = formatData.get('bullets', False)
92        self.useTables = formatData.get('tables', False)
93        self.childType = formatData.get('childtype', '')
94        self.genericType = formatData.get('generic', '')
95        if 'condition' in formatData:
96            self.conditional = conditional.Conditional(formatData['condition'])
97        if 'childTypeLimit' in formatData:
98            self.childTypeLimit = set(formatData['childTypeLimit'])
99        self.iconName = formatData.get('icon', '')
100        self.outputSeparator = formatData.get('outputsep',
101                                              _defaultOutputSeparator)
102        for key in formatData.keys():
103            if key.startswith('cond-'):
104                self.savedConditionText[key[5:]] = formatData[key]
105
106    def storeFormat(self):
107        """Return JSON format data for this format.
108        """
109        formatData = {}
110        formatData['formatname'] = self.name
111        formatData['fields'] = [field.formatData() for field in self.fields()]
112        formatData['titleline'] = self.getTitleLine()
113        formatData['outputlines'] = self.getOutputLines()
114        if not self.spaceBetween:
115            formatData['spacebetween'] = False
116        if self.formatHtml:
117            formatData['formathtml'] = True
118        if self.useBullets:
119            formatData['bullets'] = True
120        if self.useTables:
121            formatData['tables'] = True
122        if self.childType:
123            formatData['childtype'] = self.childType
124        if self.genericType:
125            formatData['generic'] = self.genericType
126        if self.conditional:
127            formatData['condition'] = self.conditional.conditionStr()
128        if self.childTypeLimit:
129            formatData['childTypeLimit'] = sorted(list(self.childTypeLimit))
130        if self.iconName:
131            formatData['icon'] = self.iconName
132        if self.outputSeparator != _defaultOutputSeparator:
133            formatData['outputsep'] = self.outputSeparator
134        for key, text in self.savedConditionText.items():
135            formatData['cond-' + key] = text
136        return formatData
137
138    def copySettings(self, sourceFormat):
139        """Copy all settings from another format to this one.
140
141        Arguments:
142            sourceFormat -- the format to copy
143        """
144        self.name = sourceFormat.name
145        self.readFormat(sourceFormat.storeFormat())
146        self.siblingPrefix = sourceFormat.siblingPrefix
147        self.siblingSuffix = sourceFormat.siblingSuffix
148        self.outputLines = sourceFormat.getOutputLines(False)
149        self.origOutputLines = sourceFormat.getOutputLines()
150        self.updateLineParsing()
151
152    def fields(self):
153        """Return list of all fields.
154        """
155        return self.fieldDict.values()
156
157    def fieldNames(self):
158        """Return list of names of all fields.
159        """
160        return list(self.fieldDict.keys())
161
162    def formatTitle(self, node, spotRef=None):
163        """Return a string with formatted title data.
164
165        Arguments:
166            node -- the node used to get data for fields
167            spotRef -- optional, used for ancestor field refs
168        """
169        line = ''.join([part.outputText(node, True, True, self.formatHtml)
170                        if hasattr(part, 'outputText') else part
171                        for part in self.titleLine])
172        return line.strip()
173
174    def formatOutput(self, node, plainText=False, keepBlanks=False,
175                     spotRef=None):
176        """Return a list of formatted text output lines.
177
178        Arguments:
179            node -- the node used to get data for fields
180            plainText -- if True, remove HTML markup from fields and formats
181            keepBlanks -- if True, keep lines with empty fields
182            spotRef -- optional, used for ancestor field refs
183        """
184        result = []
185        for lineData in self.outputLines:
186            line = ''
187            numEmptyFields = 0
188            numFullFields = 0
189            for part in lineData:
190                if hasattr(part, 'outputText'):
191                    text = part.outputText(node, False, plainText,
192                                           self.formatHtml)
193                    if text:
194                        numFullFields += 1
195                    else:
196                        numEmptyFields += 1
197                    line += text
198                else:
199                    if not self.formatHtml and not plainText:
200                        part = xml.sax.saxutils.escape(part)
201                    elif self.formatHtml and plainText:
202                        part = fieldformat.removeMarkup(part)
203                    line += part
204            if keepBlanks or numFullFields or not numEmptyFields:
205                result.append(line)
206            elif self.formatHtml and not plainText and result:
207                # add ending HTML tag from skipped line back to previous line
208                endTagMatch = _endTagRe.match(line)
209                if endTagMatch:
210                    result[-1] += endTagMatch.group(1)
211        return result
212
213    def addField(self, name, fieldData=None):
214        """Add a field type with its format to the field list.
215
216        Arguments:
217            name -- the field name string
218            fieldData -- the dict that defines this field's format
219        """
220        if not fieldData:
221            fieldData = {}
222        typeName = '{}Field'.format(fieldData.get('fieldtype', 'Text'))
223        fieldClass = getattr(fieldformat, typeName, fieldformat.TextField)
224        field = fieldClass(name, fieldData)
225        self.fieldDict[name] = field
226
227    def addFieldIfNew(self, name, fieldData=None):
228        """Add a field type to the field list if not already there.
229
230        Arguments:
231            name -- the field name string
232            fieldData -- the dict that defines this field's format
233        """
234        if name not in self.fieldDict:
235            self.addField(name, fieldData)
236
237    def addFieldList(self, nameList, addFirstTitle=False, addToOutput=False):
238        """Add text fields with names given in list.
239
240        Also add to title and output lines if addOutput is True.
241        Arguments:
242            nameList -- the list of names to add
243            addFirstTitle -- if True, use first field for title output format
244            addToOutput -- replace output lines with all fields if True
245        """
246        for name in nameList:
247            self.addFieldIfNew(name)
248        if addFirstTitle:
249            self.changeTitleLine('{{*{0}*}}'.format(nameList[0]))
250        if addToOutput:
251            self.changeOutputLines(['{{*{0}*}}'.format(name) for name in
252                                    nameList])
253
254    def reorderFields(self, fieldNameList):
255        """Change the order of fieldDict to match the given list.
256
257        Arguments:
258            fieldNameList -- a list of existing field names in a desired order
259        """
260        newFieldDict = collections.OrderedDict()
261        for fieldName in fieldNameList:
262            newFieldDict[fieldName] = self.fieldDict[fieldName]
263        self.fieldDict = newFieldDict
264
265    def removeField(self, field):
266        """Remove all occurances of field from title and output lines.
267
268        Arguments:
269            field -- the field to be removed
270        """
271        while field in self.titleLine:
272            self.titleLine.remove(field)
273        for lineData in self.outputLines:
274            while field in lineData:
275                lineData.remove(field)
276        self.outputLines = [line for line in self.outputLines if line]
277        # if len(self.lineList) == 0:
278            # self.lineList.append([''])
279
280    def setInitDefaultData(self, data, overwrite=False):
281        """Add initial default data from fields into supplied data dict.
282
283        Arguments:
284            data -- the data dict to modify
285            overwrite -- if true, replace previous data entries
286        """
287        for field in self.fields():
288            text = field.getInitDefault()
289            if text and (overwrite or not data.get(field.name, '')):
290                data[field.name] = text
291
292    def updateLineParsing(self):
293        """Update the fields parsed in the output lines.
294
295        Converts lines back to whole lines with embedded field names,
296        then parse back to individual fields and text.
297        """
298        self.titleLine = self.parseLine(self.getTitleLine())
299        self.outputLines = [self.parseLine(line) for line in
300                            self.getOutputLines(False)]
301        if self.origOutputLines:
302            self.origOutputLines = [self.parseLine(line) for line in
303                                    self.getOutputLines(True)]
304
305    def parseLine(self, text):
306        """Parse text format line, return list of field types and text.
307
308        Splits the line into field and text segments.
309        Arguments:
310            text -- the raw format text line to be parsed
311        """
312        text = ' '.join(text.split())
313        segments = (part for part in _fieldSplitRe.split(text) if part)
314        return [self.parseField(part) for part in segments]
315
316    def parseField(self, text):
317        """Parse text field, return field type or plain text if not a field.
318
319        Arguments:
320            text -- the raw format text (could be a field)
321        """
322        fieldMatch = _fieldPartRe.match(text)
323        if fieldMatch:
324            modifier = fieldMatch.group(1)
325            fieldName = fieldMatch.group(2)
326            try:
327                if not modifier:
328                    return self.fieldDict[fieldName]
329                elif modifier == '*' * len(modifier):
330                    return fieldformat.AncestorLevelField(fieldName,
331                                                          len(modifier))
332                elif modifier == '?':
333                    return fieldformat.AnyAncestorField(fieldName)
334                elif modifier == '&':
335                    return fieldformat.ChildListField(fieldName)
336                elif modifier == '#':
337                    match = _levelFieldRe.match(fieldName)
338                    if match and match.group(1) != '0':
339                        level = int(match.group(1))
340                        return fieldformat.DescendantCountField(fieldName,
341                                                                level)
342                elif modifier == '!':
343                    return (self.parentFormats.fileInfoFormat.
344                            fieldDict[fieldName])
345            except KeyError:
346                pass
347        return text
348
349    def getTitleLine(self):
350        """Return text of title format with field names embedded.
351        """
352        return ''.join([part.sepName() if hasattr(part, 'sepName') else part
353                        for part in self.titleLine])
354
355    def getOutputLines(self, useOriginal=True):
356        """Return text list of output format lines with field names embedded.
357
358        Arguments:
359            useOriginal -- use original line list, wothout bullet or table mods
360        """
361        lines = self.outputLines
362        if useOriginal and self.origOutputLines:
363            lines = self.origOutputLines
364        lines = [''.join([part.sepName() if hasattr(part, 'sepName') else part
365                          for part in line])
366                 for line in lines]
367        return lines if lines else ['']
368
369    def changeTitleLine(self, text):
370        """Replace the title format line.
371
372        Arguments:
373            text -- the new title format line
374        """
375        self.titleLine = self.parseLine(text)
376        if not self.titleLine:
377            self.titleLine = ['']
378
379    def changeOutputLines(self, lines, keepBlanks=False):
380        """Replace the output format lines with given list.
381
382        Arguments:
383            lines -- a list of replacement format lines
384            keepBlanks -- if False, ignore blank lines
385        """
386        self.outputLines = []
387        for line in lines:
388            newLine = self.parseLine(line)
389            if keepBlanks or newLine:
390                self.outputLines.append(newLine)
391        if self.useBullets:
392            self.origOutputLines = self.outputLines[:]
393            self.addBullets()
394        if self.useTables:
395            self.origOutputLines = self.outputLines[:]
396            self.addTables()
397
398    def addOutputLine(self, line):
399        """Add an output format line after existing lines.
400
401        Arguments:
402            line -- the text line to add
403        """
404        newLine = self.parseLine(line)
405        if newLine:
406            self.outputLines.append(newLine)
407
408    def extractTitleData(self, titleString, data):
409        """Modifies the data dictionary based on a title string.
410
411        Match the title format to the string, return True if successful.
412        Arguments:
413            title -- the string with the new title
414            data -- the data dictionary to be modified
415        """
416        fields = []
417        pattern = ''
418        extraText = ''
419        for seg in self.titleLine:
420            if hasattr(seg, 'name'):  # a field segment
421                fields.append(seg)
422                pattern += '(.*)'
423            else:                     # a text separator
424                pattern += re.escape(seg)
425                extraText += seg
426        match = re.match(pattern, titleString)
427        try:
428            if match:
429                for num, field in enumerate(fields):
430                    text = match.group(num + 1)
431                    data[field.name] = field.storedTextFromTitle(text)
432            elif not extraText.strip():
433                # assign to 1st field if sep is only spaces
434                text = fields[0].storedTextFromTitle(titleString)
435                data[fields[0].name] = text
436                for field in fields[1:]:
437                    data[field.name] = ''
438            else:
439                return False
440        except ValueError:
441            return False
442        return True
443
444    def updateDerivedTypes(self):
445        """Update derived types after changes to this generic type.
446        """
447        for derivedType in self.derivedTypes:
448            derivedType.updateFromGeneric(self)
449
450    def updateFromGeneric(self, genericType=None, formatsRef=None):
451        """Update fields and field types to match a generic type.
452
453        Does nothing if self is not a derived type.
454        Must provide either the genericType or a formatsRef.
455        Arguments:
456            genericType -- the type to update from
457            formatsRef -- the tree formats dict to update from
458        """
459        if not self.genericType:
460            return
461        if not genericType:
462            genericType = formatsRef[self.genericType]
463        newFields = collections.OrderedDict()
464        for field in genericType.fieldDict.values():
465            fieldMatch = self.fieldDict.get(field.name, None)
466            if fieldMatch and field.typeName == fieldMatch.typeName:
467                newFields[field.name] = fieldMatch
468            else:
469                newFields[field.name] = copy.deepcopy(field)
470        self.fieldDict = newFields
471        self.updateLineParsing()
472
473    def addBullets(self):
474        """Add bullet HTML tags to sibling prefix, suffix and output lines.
475        """
476        self.siblingPrefix = '<ul>'
477        self.siblingSuffix = '</ul>'
478        lines = self.getOutputLines()
479        if lines != ['']:
480            lines[0] = '<li>' + lines[0]
481            lines[-1] += '</li>'
482        self.origOutputLines = self.outputLines[:]
483        self.outputLines = lines
484        self.updateLineParsing()
485
486    def addTables(self):
487        """Add table HTML tags to sibling prefix, suffix and output lines.
488        """
489        lines = [line for line in self.getOutputLines() if line]
490        newLines = []
491        headings = []
492        for line in lines:
493            head = ''
494            firstPart = self.parseLine(line)[0]
495            if hasattr(firstPart, 'split') and ':' in firstPart:
496                head, line = line.split(':', 1)
497            newLines.append(line.strip())
498            headings.append(head.strip())
499        self.siblingPrefix = '<table border="1" cellpadding="3">'
500        if [head for head in headings if head]:
501            self.siblingPrefix += '<tr>'
502            for head in headings:
503                self.siblingPrefix = ('{0}<th>{1}</th>'.
504                                      format(self.siblingPrefix, head))
505            self.siblingPrefix += '</tr>'
506        self.siblingSuffix = '</table>'
507        newLines = ['<td>{0}</td>'.format(line) for line in newLines]
508        newLines[0] = '<tr>' + newLines[0]
509        newLines[-1] += '</tr>'
510        self.origOutputLines = self.outputLines[:]
511        self.outputLines = newLines
512        self.updateLineParsing()
513
514    def clearBulletsAndTables(self):
515        """Remove any HTML tags for bullets and tables.
516        """
517        self.siblingPrefix = ''
518        self.siblingSuffix = ''
519        if self.origOutputLines:
520            self.outputLines = self.origOutputLines
521            self.updateLineParsing()
522        self.origOutputLines = []
523
524    def numberingFieldList(self):
525        """Return a list of numbering field names.
526        """
527        return [field.name for field in self.fieldDict.values() if
528                field.typeName == 'Numbering']
529
530    def loadSortFields(self):
531        """Add sort fields to temporarily stored list.
532
533        Only used for efficiency while sorting.
534        """
535        self.sortFields = [field for field in self.fields() if
536                           field.sortKeyNum > 0]
537        self.sortFields.sort(key = operator.attrgetter('sortKeyNum'))
538        if not self.sortFields:
539            self.sortFields = [list(self.fields())[0]]
540
541
542class FileInfoFormat(NodeFormat):
543    """Node format class to store and update special file info fields.
544
545    Fields used in print header/footer and in outputs of other node types.
546    """
547    typeName = 'INT_TL_FILE_DATA_FORM'
548    fileFieldName = 'File_Name'
549    pathFieldName = 'File_Path'
550    sizeFieldName = 'File_Size'
551    dateFieldName = 'File_Mod_Date'
552    timeFieldName = 'File_Mod_Time'
553    ownerFieldName = 'File_Owner'
554    pageNumFieldName = 'Page_Number'
555    numPagesFieldName = 'Number_of_Pages'
556    def __init__(self, parentFormats):
557        """Create a file info format.
558        """
559        super().__init__(FileInfoFormat.typeName, parentFormats)
560        self.fieldFormatModified = False
561        self.addField(FileInfoFormat.fileFieldName)
562        self.addField(FileInfoFormat.pathFieldName)
563        self.addField(FileInfoFormat.sizeFieldName, {'fieldtype': 'Number'})
564        self.addField(FileInfoFormat.dateFieldName, {'fieldtype': 'Date'})
565        self.addField(FileInfoFormat.timeFieldName, {'fieldtype': 'Time'})
566        if not sys.platform.startswith('win'):
567            self.addField(FileInfoFormat.ownerFieldName)
568        # page info only for print header:
569        self.addField(FileInfoFormat.pageNumFieldName)
570        self.fieldDict[FileInfoFormat.pageNumFieldName].showInDialog = False
571        self.addField(FileInfoFormat.numPagesFieldName)
572        self.fieldDict[FileInfoFormat.numPagesFieldName].showInDialog = False
573        for field in self.fields():
574            field.useFileInfo = True
575
576    def updateFileInfo(self, fileObj, fileInfoNode):
577        """Update data of file info node.
578
579        Arguments:
580            fileObj -- the TreeLine file path object
581            fileInfoNode -- the node to update
582        """
583        try:
584            status = fileObj.stat()
585        except (AttributeError, OSError):
586            fileInfoNode.data = {}
587            return
588        fileInfoNode.data[FileInfoFormat.fileFieldName] = fileObj.name
589        fileInfoNode.data[FileInfoFormat.pathFieldName] = fileObj.parent
590        fileInfoNode.data[FileInfoFormat.sizeFieldName] = str(status.st_size)
591        modDateTime = datetime.datetime.fromtimestamp(status.st_mtime)
592        modDate = modDateTime.date().strftime(fieldformat.DateField.isoFormat)
593        modTime = modDateTime.time().strftime(fieldformat.TimeField.isoFormat)
594        fileInfoNode.data[FileInfoFormat.dateFieldName] = modDate
595        fileInfoNode.data[FileInfoFormat.timeFieldName] = modTime
596        if not sys.platform.startswith('win'):
597            try:
598                owner = pwd.getpwuid(status.st_uid)[0]
599            except KeyError:
600                owner = repr(status.st_uid)
601            fileInfoNode.data[FileInfoFormat.ownerFieldName] = owner
602
603    def duplicateFileInfo(self, altFileFormat):
604        """Copy field format settings from alternate file format.
605
606        Arguments:
607            altFileFormat -- the file info format to copy from
608        """
609        for field in self.fields():
610            altField = altFileFormat.fieldDict.get(field.name)
611            if altField:
612                if field.format != altField.format:
613                    field.setFormat(altField.format)
614                    self.fieldFormatModified = True
615                if altField.prefix:
616                    field.prefix = altField.prefix
617                    self.fieldFormatModified = True
618                if altField.suffix:
619                    field.suffix = altField.suffix
620                    self.fieldFormatModified = True
621
622
623class DescendantCountFormat(NodeFormat):
624    """Placeholder format for child count fields.
625
626    Should not show up in main format type list.
627    """
628    countFieldName = 'Level'
629    def __init__(self):
630        super().__init__('CountFormat', None)
631        for level in range(3):
632            name = '{0}{1}'.format(DescendantCountFormat.countFieldName,
633                                   level + 1)
634            field = fieldformat.DescendantCountField(name, level + 1)
635            self.fieldDict[name] = field
636