1"""
2Utility widgets/helper for reading csv like files.
3
4Contents
5--------
6
7* CSVOptionsWidget
8  Edit options for interpreting a csv file
9
10* CSVImportWidget
11  Read and preview part of the file
12
13* TablePreviewModel
14  An QAbstractTableModel feeding data from a csv.reader like rows iterator
15  implementing lazy iterative loading (`QAbstractItemModel.fetchMore`)
16
17"""
18# TODO: Consider a wizard-like interface:
19#   * 1. Select encoding, delimiter, ... (preview is all text)
20#   * 2. Define column types (preview is parsed and rendered type appropriate)
21
22import sys
23import io
24import enum
25import codecs
26import csv
27import traceback
28import itertools
29
30from functools import singledispatch
31from collections import defaultdict
32from types import MappingProxyType
33
34import typing
35from typing import (
36    List, Tuple, Dict, Iterator, Optional, Any, Union, Callable, Mapping
37)
38
39from AnyQt.QtCore import (
40    Qt, QSize, QPoint, QRect, QRectF, QRegularExpression, QAbstractTableModel,
41    QModelIndex, QItemSelectionModel, QTextBoundaryFinder, QTimer, QEvent
42)
43from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot
44from AnyQt.QtGui import (
45    QRegularExpressionValidator, QColor, QBrush, QPalette, QHelpEvent,
46    QStandardItemModel, QStandardItem, QIcon, QIconEngine, QPainter, QPixmap,
47    QFont
48)
49from AnyQt.QtWidgets import (
50    QWidget, QComboBox, QFormLayout, QHBoxLayout, QVBoxLayout, QLineEdit,
51    QHeaderView, QFrame, QTableView, QMenu, QLabel, QAction, QActionGroup,
52    QStyleOptionFrame, QStyle, QStyledItemDelegate, QStyleOptionViewItem,
53    QApplication, QAbstractItemView, QToolTip, QStyleOption
54)
55
56from Orange.widgets.utils import encodings
57from Orange.widgets.utils.overlay import OverlayWidget
58
59
60__all__ = ["ColumnType", "RowSpec", "CSVOptionsWidget", "CSVImportWidget"]
61
62if typing.TYPE_CHECKING:
63    # pylint: disable=invalid-name
64    _A = typing.TypeVar("_A")
65    _B = typing.TypeVar("_B")
66
67
68class StampIconEngine(QIconEngine):
69    def __init__(self, char, brush):
70        # type: (str, Union[QBrush, QColor]) -> None
71        super().__init__()
72        self.__char = char
73        self.__brush = QBrush(brush)
74
75    def paint(self, painter, rect, mode, state):
76        # type: (QPainter, QRect, QIcon.Mode, QIcon.State) -> None
77        size = rect.size()
78        if size.isNull():
79            return  # pragma: no cover
80        dpr = 1.0
81        try:
82            dpr = painter.device().devicePixelRatioF()
83        except AttributeError:
84            pass
85        if dpr != 1.0:
86            size = size * dpr
87        painter.drawPixmap(rect, self.pixmap(size, mode, state))
88
89    def pixmap(self, size, mode, state):
90        # type: (QSize, QIcon.Mode, QIcon.State) -> QPixmap
91        pm = QPixmap(size)
92        pm.fill(Qt.transparent)
93        painter = QPainter(pm)
94        painter.setRenderHints(
95            QPainter.Antialiasing | QPainter.TextAntialiasing |
96            QPainter.SmoothPixmapTransform
97        )
98        size = size.width()
99        color = self.__brush.color()
100        painter.setPen(color)
101        painter.setBrush(color)
102        margin = 1 + size // 16
103        text_margin = size // 20
104        rect = QRectF(margin, margin, size - 2 * margin, size - 2 * margin)
105        painter.drawRoundedRect(rect, 30.0, 30.0, Qt.RelativeSize)
106        painter.setPen(Qt.white)
107
108        font = painter.font()  # type: QFont
109        font.setPixelSize(size - 2 * margin - 2 * text_margin)
110        font.setBold(True)
111        painter.setFont(font)
112
113        painter.drawText(rect, Qt.AlignCenter, self.__char)
114        painter.end()
115
116        style = QApplication.style()
117        if style is not None:
118            opt = QStyleOption()
119            opt.palette = QApplication.palette()
120            pm = style.generatedIconPixmap(mode, pm, opt)
121        return pm
122
123
124class Dialect(csv.Dialect):
125    def __init__(self, delimiter, quotechar, escapechar, doublequote,
126                 skipinitialspace, quoting=csv.QUOTE_MINIMAL):
127        self.delimiter = delimiter
128        self.quotechar = quotechar
129        self.escapechar = escapechar
130        self.doublequote = doublequote
131        self.skipinitialspace = skipinitialspace
132        self.quoting = quoting
133        self.lineterminator = "\r\n"
134        super().__init__()
135
136    def __repr__(self):
137        _, args, *_ = self.__reduce__()
138        args = ", ".join(map("{!r}".format, args))
139        return "Dialect(" + args + ")"
140
141    def __reduce__(self):
142        return type(self), (self.delimiter, self.quotechar, self.escapechar,
143                            self.doublequote, self.skipinitialspace,
144                            self.quoting)
145
146
147class ColumnType(enum.Enum):
148    """
149    Possible column types
150    """
151    # Skip column
152    Skip = "Skip"
153    # Autodetect column type
154    Auto = "Auto"
155    # Numeric (real) column
156    Numeric = "Numeric"
157    # Categorical column
158    Categorical = "Categorical"
159    # Text column
160    Text = "Text"
161    # Date time column
162    Time = "Time"
163
164
165class LineEdit(QLineEdit):
166    """
167    A line edit widget with a `minimumContentsLength` property.
168
169    Similar to QComboBox.minimumContentsLength
170    """
171    # These constants are taken from Qt's sources for QLineEdit
172    _verticalMargin = 1
173    _horizontalMargin = 2
174
175    def __init__(self, *args, minimumContentsLength=0, **kwargs):
176        self.__minimumContentsLength = minimumContentsLength
177        super().__init__(*args, **kwargs)
178
179    def setMinimumContentsLength(self, characters):
180        # type: (int) -> None
181        """
182        Set the minimum number of characters that should fit into the
183        line edit (used for size hinting).
184        """
185        if self.__minimumContentsLength != characters:
186            self.__minimumContentsLength = characters
187            self.updateGeometry()
188
189    def minimumContentsLength(self):
190        # type: () -> int
191        """
192        Return the minimum number of characters that should fit into the
193        line edit.
194        """
195        return self.__minimumContentsLength
196
197    def sizeHint(self):
198        # type: () -> QSize
199        """Reimplemented."""
200        # Most of this is taken from QLineEdit's sources, the difference
201        # is only in the use of minimumContentsLength instead of a
202        # hardcoded constant.
203        self.ensurePolished()
204        fm = self.fontMetrics()
205        textmargins = self.textMargins()
206        contentsmargins = self.contentsMargins()
207
208        h = (max(fm.height(), 14) + 2 * self._verticalMargin +
209             textmargins.top() + textmargins.bottom() +
210             contentsmargins.top() + contentsmargins.bottom())
211
212        nchar = self.__minimumContentsLength
213        if nchar <= 0:
214            nchar = 17
215
216        w = (fm.horizontalAdvance("X") * nchar + 2 * self._horizontalMargin +
217             textmargins.left() + textmargins.right() +
218             contentsmargins.left() + contentsmargins.right())
219
220        opt = QStyleOptionFrame()
221        self.initStyleOption(opt)
222        size = self.style().sizeFromContents(
223            QStyle.CT_LineEdit, opt, QSize(w, h), self
224        )
225        return size
226
227    def minimumSizeHint(self):
228        # type: () -> QSize
229        """Reimplemented."""
230        if self.__minimumContentsLength > 0:
231            return self.sizeHint()
232        else:
233            return super(LineEdit, self).sizeHint()
234
235
236class TextEditCombo(QComboBox):
237    def text(self):
238        # type: () -> str
239        """
240        Return the current text.
241        """
242        return self.itemText(self.currentIndex())
243
244    def setText(self, text):
245        # type: (str) -> None
246        """
247        Set `text` as the current text (adding it to the model if necessary).
248        """
249        idx = self.findData(text, Qt.EditRole, Qt.MatchExactly)
250        if idx != -1:
251            self.setCurrentIndex(idx)
252        else:
253            self.addItem(text)
254            self.setCurrentIndex(self.count() - 1)
255
256
257class CSVOptionsWidget(QWidget):
258    """
259    A widget presenting common CSV options.
260    """
261    DelimiterTab, DelimiterComma, DelimiterSemicolon, DelimiterSpace = range(4)
262    DelimiterOther = DelimiterSpace + 2  # note DelimiterSpace + 1 is reserved
263
264    PresetDelimiters = [
265        ("Tab", "\t"),
266        ("Comma", ","),
267        ("Semicolon", ";"),
268        ("Space", " "),
269    ]
270
271    #: Signal emitted when the format (dialect) changes
272    optionsChanged = Signal()
273    #: Signal emitted when the format (dialect) is edited by the user
274    optionsEdited = Signal()
275
276    def __init__(self, *args, **kwargs):
277        self._delimiter_idx = 0
278        self._delimiter = ","
279        self._delimiter_custom = "|"
280        self._quotechar = "\""
281        self._encoding = kwargs.pop("selectedEncoding", "utf-8")
282
283        super().__init__(*args, **kwargs)
284
285        # Dialect options form
286        form = QFormLayout()
287        self.encoding_cb = QComboBox(
288            objectName="encoding-combo-box",
289            toolTip="Select file text encoding",
290        )
291        self.__set_visible_codecs(encodings.list_selected_encodings())
292        self.encoding_cb.activated.connect(self.__on_encoding_activated)
293
294        self.delimiter_cb = QComboBox(
295            objectName="delimiter-combo-box",
296            toolTip="Select cell delimiter character."
297        )
298        self.delimiter_cb.addItems(
299            [name for name, _ in CSVOptionsWidget.PresetDelimiters]
300        )
301        self.delimiter_cb.insertSeparator(self.delimiter_cb.count())
302        self.delimiter_cb.addItem("Other")
303
304        self.delimiter_cb.setCurrentIndex(self._delimiter_idx)
305        self.delimiter_cb.activated.connect(self.__on_delimiter_idx_activated)
306
307        validator = QRegularExpressionValidator(QRegularExpression("."))
308        self.delimiteredit = LineEdit(
309            self._delimiter_custom,
310            enabled=self._delimiter_idx == CSVOptionsWidget.DelimiterOther,
311            minimumContentsLength=2,
312            objectName="custom-delimiter-edit"
313        )
314        self.delimiteredit.setValidator(validator)
315        self.delimiteredit.editingFinished.connect(self.__on_delimiter_edited)
316
317        delimlayout = QHBoxLayout()
318        delimlayout.setContentsMargins(0, 0, 0, 0)
319        delimlayout.addWidget(self.delimiter_cb)
320        delimlayout.addWidget(self.delimiteredit)
321        self.quoteedit = TextEditCombo(
322            editable=True, minimumContentsLength=1,
323            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
324            objectName="quote-edit-combo-box"
325        )
326        self.quoteedit.addItems(["\"", "'"])
327        self.quoteedit.setValidator(validator)
328        self.quoteedit.setText(self._quotechar)
329        self.quoteedit.activated.connect(self.__on_quotechar_edited)
330
331        quotelayout = QHBoxLayout()
332        quotelayout.setContentsMargins(0, 0, 0, 0)
333        quotelayout.addWidget(self.quoteedit)
334
335        form.addRow("Encoding", self.encoding_cb)
336        form.addRow(QFrame(self, frameShape=QFrame.HLine))
337        form.addRow("Cell delimiter", delimlayout)
338        form.addRow("Quote character", self.quoteedit)
339        self.setLayout(form)
340
341    def dialect(self):
342        # type: () -> csv.Dialect
343        """
344        Return the current state as a `csv.Dialect` instance.
345        """
346        if self._delimiter_idx == CSVOptionsWidget.DelimiterOther:
347            delimiter = self._delimiter_custom
348        elif 0 <= self._delimiter_idx < len(CSVOptionsWidget.PresetDelimiters):
349            _, delimiter = CSVOptionsWidget.PresetDelimiters[self._delimiter_idx]
350        else:
351            assert False
352
353        quotechar = self.quoteedit.text() or None
354        skipinitialspace = True
355        escapechar = None
356        quoting = csv.QUOTE_MINIMAL if quotechar is not None else csv.QUOTE_NONE
357        return Dialect(delimiter, quotechar, escapechar,
358                       doublequote=True, skipinitialspace=skipinitialspace,
359                       quoting=quoting)
360
361    def setDialect(self, dialect):
362        # type: (csv.Dialect) -> None
363        """
364        Set the current state to match dialect instance.
365        """
366        changed = False
367        delimiter = dialect.delimiter
368        presets = [d for _, d in CSVOptionsWidget.PresetDelimiters]
369        try:
370            index = presets.index(delimiter)
371        except ValueError:
372            index = CSVOptionsWidget.DelimiterOther
373            if self._delimiter_custom != delimiter:
374                self._delimiter_custom = delimiter
375                changed = True
376
377        if self._delimiter_idx != index:
378            self._delimiter_idx = index
379            self.delimiter_cb.setCurrentIndex(index)
380            self.delimiteredit.setText(delimiter)
381            changed = True
382        if self._quotechar != dialect.quotechar:
383            self._quotechar = dialect.quotechar
384            self.quoteedit.setText(dialect.quotechar or '')
385            changed = True
386
387        if changed:
388            self.optionsChanged.emit()
389
390    def setSelectedEncoding(self, encoding):
391        # type: (str) -> None
392        """
393        Set the current selected encoding.
394
395        Parameters
396        ----------
397        encoding : str
398            Encoding name such that `codecs.lookup` finds it.
399        """
400        co = codecs.lookup(encoding)
401        cb = self.encoding_cb
402        index = cb.findData(co.name, Qt.UserRole)
403
404        if index == -1:
405            # insert the encoding before the separator.
406            sepidx = cb.findData("separator", Qt.AccessibleDescriptionRole)
407            if sepidx == -1:
408                sepidx = cb.count()
409            cb.insertItem(sepidx, encodings.display_name(co.name),
410                          userData=co.name)
411            index = sepidx
412            assert cb.itemData(index, Qt.UserRole) == co.name
413            changed = True
414            self._encoding = encoding
415        else:
416            changed = index != self.encoding_cb.currentIndex()
417            self._encoding = encoding
418
419        self.encoding_cb.setCurrentIndex(index)
420
421        if changed:
422            self.optionsChanged.emit()
423
424    def encoding(self):
425        # type: () -> str
426        """
427        Return the current selected encoding.
428        """
429        index = self.encoding_cb.currentIndex()
430        if index >= 0:
431            data = self.encoding_cb.itemData(index, Qt.UserRole)
432            if isinstance(data, str):
433                return data
434        return "latin-1"
435
436    def __on_encoding_activated(self, idx):
437        current = self._encoding
438        data = self.encoding_cb.itemData(idx, Qt.UserRole)
439        if data is ...:
440            # restore the previous item
441            idx = self.encoding_cb.findData(current, Qt.UserRole)
442            self.encoding_cb.setCurrentIndex(idx)
443            self.__show_encodings_widget()
444        elif isinstance(data, str):
445            assert codecs.lookup(data)
446            self._encoding = data
447            self.optionsEdited.emit()
448            self.optionsChanged.emit()
449
450    def __show_encodings_widget(self):
451        """
452        Show the encodings widget for selection
453        """
454        # If tool window is already shown just raise it
455        w = self.findChild(
456            encodings.SelectEncodingsWidget,
457            "-encoding-selection-tool-window"
458        )
459        if w is not None and w.isVisible():  # pragma: no coverage
460            w.raise_()
461            w.activateWindow()
462            return
463
464        w = encodings.SelectEncodingsWidget(
465            self, Qt.Tool,
466            windowTitle="Customize Encodings List",
467            objectName="-encoding-selection-tool-window"
468        )
469        w.setAttribute(Qt.WA_DeleteOnClose)
470        model = w.model()
471        model.dataChanged.connect(
472            lambda: self.__set_visible_codecs(w.selectedEncodings())
473        )
474        w.show()
475
476    def __set_visible_codecs(self, codecs):
477        # type: (List[str]) -> None
478        # Set the list of current visible/selectable codecs in the encoding_cb
479        if not codecs:
480            # never clear all items from the drop down
481            codecs = ["ascii"]
482        cb = self.encoding_cb
483        current = self._encoding
484        cb.clear()
485
486        for c in codecs:
487            cb.addItem(encodings.display_name(c), userData=c)
488        cb.insertSeparator(cb.count())
489        cb.addItem("Customize Encodings List...", userData=...)
490        idx = cb.findData(current, Qt.UserRole)
491        if idx != -1:
492            cb.setCurrentIndex(idx)
493        elif codecs:
494            cb.setCurrentIndex(0)
495            self._encoding = codecs[0]
496            self.__on_encoding_activated(0)
497        else:
498            cb.setCurrentIndex(-1)
499            self._encoding = ""
500
501    def __on_delimiter_idx_activated(self, index):
502        presets = CSVOptionsWidget.PresetDelimiters
503        if 0 <= index < CSVOptionsWidget.DelimiterOther:
504            self.delimiteredit.setText(presets[index][1])
505            self.delimiteredit.setEnabled(False)
506        else:
507            self.delimiteredit.setText(self._delimiter_custom)
508            self.delimiteredit.setEnabled(True)
509
510        if self._delimiter_idx != index:
511            self._delimiter_idx = index
512            self.optionsChanged.emit()
513            self.optionsEdited.emit()
514
515    def __on_delimiter_edited(self):
516        delimiter = self.delimiteredit.text()
517        if self._delimiter_custom != delimiter:
518            self._delimiter_custom = delimiter
519            self.optionsChanged.emit()
520            self.optionsEdited.emit()
521
522    def __on_quotechar_edited(self):
523        quotechar = self.quoteedit.text()
524        if self._quotechar != quotechar:
525            self._quotechar = quotechar
526            self.optionsChanged.emit()
527            self.optionsEdited.emit()
528
529
530class Item(QStandardItem):
531    """
532    A QStandardItem subclass using a python dict as a backing store.
533
534    Note
535    ----
536    Unlike QStandardItem, this class does not map `Qt.DisplayRole` and
537    `Qt.EditRole` to the same value. Also, accessing or setting via
538    `model.itemData` `model.setItemData` and will not work.
539    """
540    def __init__(self, data=MappingProxyType({})):
541        # type: (Mapping[Qt.ItemDataRole, Any]) -> None
542        super().__init__()
543        self.__data = dict(data)
544
545    def clone(self):
546        # type: () -> Item
547        """Reimplemented from QStandardItem"""
548        return Item(self.__data)
549
550    def setData(self, value, role=Qt.UserRole+1):
551        # type: (Any, Qt.ItemDataRole) -> None
552        """Reimplemented from QStandardItem"""
553        self.__data[role] = value
554        model = self.model()
555        if model is not None:
556            midx = model.indexFromItem(self)
557            model.dataChanged.emit(midx, midx, [role])
558
559    def data(self, role=Qt.UserRole+1):
560        # type: (Qt.ItemDataRole) -> Any
561        """Reimplemented from QStandardItem"""
562        if role == Qt.EditRole and role not in self.__data:
563            role = Qt.DisplayRole
564        elif role == Qt.DisplayRole and role not in self.__data:
565            role = Qt.EditRole
566        return self.__data.get(role, None)
567
568    def setItemData(self, data):
569        # type: (Dict[Qt.ItemDataRole, Any]) -> bool
570        roles = list(data.keys())
571        self.__data.update(data)
572        m = self.model()  #
573        if m is not None:
574            midx = m.indexFromItem(self)
575            m.dataChanged.emit(midx, midx, roles)
576        return True
577
578    def itemData(self):
579        return self.__data.copy()
580
581
582class CSVImportWidget(QWidget):
583    """
584    CSV import widget with a live table preview
585    """
586    #: Signal emitted on any format option change.
587    optionsChanged = Signal()
588    #: Signal emitted when a user changes format options.
589    optionsEdited = Signal()
590    #: Signal emitted when a user changes type affiliation for a column
591    columnTypesChanged = Signal()
592    #: Signal emitted when the preview content parsing ends with an error.
593    #: Note: this does not include errors in cell content interpretation.
594    previewReadErrorOccurred = Signal(str)
595    #: Signal emitted when the preview model is reset. This is either because
596    #: of `setPreviewContents` or a options change.
597    previewModelReset = Signal()
598
599    def __init__(self, *args, **kwargs):
600        super().__init__(*args, **kwargs)
601
602        self.__previewmodel = None  # type: Optional[TablePreviewModel]
603        self.__textwrapper = None  # type: Optional[io.TextIOWrapper]
604        self.__sample = None
605        self.__buffer = None
606
607        layout = QVBoxLayout()
608        layout.setContentsMargins(0, 0, 0, 0)
609
610        self.optionswidget = CSVOptionsWidget()
611        self.optionswidget.optionsChanged.connect(self.optionsChanged)
612        self.optionswidget.optionsEdited.connect(self.optionsEdited)
613
614        self.dataview = TablePreview(
615            selectionBehavior=QTableView.SelectColumns,
616            tabKeyNavigation=False,
617        )
618        self.dataview.setContextMenuPolicy(Qt.CustomContextMenu)
619        self.dataview.customContextMenuRequested.connect(
620            self.__dataview_context_menu
621        )
622        header = self.dataview.horizontalHeader()  # type: QHeaderView
623        header.setContextMenuPolicy(Qt.CustomContextMenu)
624        header.customContextMenuRequested.connect(
625            self.__hheader_context_menu
626        )
627
628        header = self.dataview.verticalHeader()
629        header.setContextMenuPolicy(Qt.CustomContextMenu)
630        header.customContextMenuRequested.connect(
631            self.__vheader_context_menu
632        )
633        style = self.style()
634        opt = self.dataview.viewOptions()
635        opt.text = "X"
636        opt.features |= QStyleOptionViewItem.HasDisplay
637        csize = style.sizeFromContents(
638            QStyle.CT_ItemViewItem, opt, QSize(18, 18), self.dataview
639        )
640        header.ensurePolished()
641        header.setDefaultSectionSize(max(csize.height(),
642                                         header.minimumSectionSize()))
643        layout.addWidget(self.optionswidget)
644        form = self.optionswidget.layout()
645        assert isinstance(form, QFormLayout)
646        number_sep_layout = QHBoxLayout()
647        self.grouping_sep_edit_cb = TextEditCombo(
648            editable=True, objectName="grouping-separator-combo-box",
649            toolTip="Thousands group separator",
650            minimumContentsLength=1,
651            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon
652        )
653        items = [
654            {Qt.DisplayRole: "None", Qt.EditRole: "",
655             Qt.ToolTipRole: "No separator"},
656            {Qt.DisplayRole: ".", Qt.EditRole: "."},
657            {Qt.DisplayRole: ",", Qt.EditRole: ","},
658            {Qt.DisplayRole: "Space", Qt.EditRole: " "},
659            {Qt.DisplayRole: "'", Qt.EditRole: "'"},
660        ]
661        m = QStandardItemModel(self)
662        m.invisibleRootItem().appendRows([Item(data) for data in items])
663        self.grouping_sep_edit_cb.setModel(m)
664        # TODO: Treat all space (THIN SPACE, NO-BREAK SPACE, ...) the same?
665        # For instance Apple's Numbers exports csv with \N{NO-BREAK SPACE}.
666        # Maybe just use unicodedata.normalize('NFKC', ...) as a converter?
667        # For now only allow a limited set
668        self.grouping_sep_edit_cb.setValidator(
669            QRegularExpressionValidator(QRegularExpression(r"(\.|,| |')?"), self)
670        )
671        self.grouping_sep_edit_cb.activated[str].connect(
672            self.__group_sep_activated)
673
674        self.decimal_sep_edit_cb = TextEditCombo(
675            editable=True, objectName="decimal-separator-combo-box",
676            toolTip="Decimal separator",
677            minimumContentsLength=1,
678            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon
679        )
680        self.decimal_sep_edit_cb.setValidator(
681            QRegularExpressionValidator(QRegularExpression(r"(\.|,)"), self))
682        self.decimal_sep_edit_cb.addItems([".", ","])
683        self.decimal_sep_edit_cb.activated[str].connect(
684            self.__decimal_sep_activated)
685
686        number_sep_layout.addWidget(QLabel("Grouping:"))
687        number_sep_layout.addWidget(self.grouping_sep_edit_cb)
688        number_sep_layout.addWidget(QLabel("Decimal:"))
689        number_sep_layout.addWidget(self.decimal_sep_edit_cb)
690        number_sep_layout.addStretch(10)
691        form.addRow("Number separators:", number_sep_layout)
692        self.column_type_edit_cb = QComboBox(
693            enabled=False, objectName="column-type-edit-combo-box"
694        )
695        self.column_type_edit_cb.activated.connect(
696            self.__on_column_type_edit_activated
697        )
698        types = [
699            {Qt.DisplayRole: "Auto",
700             Qt.ToolTipRole: "The type will be determined automatically based "
701                             "on column contents.",
702             Qt.UserRole: ColumnType.Auto},
703            {Qt.DisplayRole: "Numeric", Qt.UserRole: ColumnType.Numeric},
704            {Qt.DisplayRole: "Categorical",
705             Qt.UserRole: ColumnType.Categorical},
706            {Qt.DisplayRole: "Text", Qt.UserRole: ColumnType.Text},
707            {Qt.DisplayRole: "Datetime", Qt.UserRole: ColumnType.Time},
708            {Qt.AccessibleDescriptionRole: "separator"},
709            {Qt.DisplayRole: "Ignore",
710             Qt.UserRole: ColumnType.Skip,
711             Qt.ToolTipRole: "The column will not be loaded"}
712        ]
713        typemodel = QStandardItemModel(self)
714        for itemdata in types:
715            item = Item(itemdata)
716            if itemdata.get(Qt.AccessibleDescriptionRole) == "separator":
717                item.setFlags(Qt.NoItemFlags)
718            typemodel.appendRow(item)
719
720        self.column_type_edit_cb.setModel(typemodel)
721        self.column_type_edit_cb.setCurrentIndex(-1)
722
723        form.addRow(QFrame(frameShape=QFrame.HLine))
724        form.addRow("Column type", self.column_type_edit_cb)
725        layout.addWidget(self.dataview)
726        # Overlay error message widget in the bottom left corner of the data
727        # view
728        self.__overlay = overlay = OverlayWidget(
729            parent=self.dataview.viewport(),
730            alignment=Qt.AlignBottom | Qt.AlignLeft,
731            objectName="-error-overlay",
732            visible=False,
733        )
734        overlay.setLayout(QVBoxLayout(margin=0))
735        self.__error_label = label = QLabel(objectName="-error-text-label")
736        overlay.layout().addWidget(label)
737        overlay.setWidget(self.dataview.viewport())
738
739        self.setLayout(layout)
740
741        self.__timer = QTimer(self, singleShot=True)
742        self.__timer.timeout.connect(self.__resetPreview)
743        self.optionswidget.optionsChanged.connect(self.__timer.start)
744
745    def setDialect(self, dialect):
746        # type: (csv.Dialect) -> None
747        """
748        Set the current state to match dialect instance.
749        """
750        self.optionswidget.setDialect(dialect)
751
752    def dialect(self):
753        # type: () -> csv.Dialect
754        """
755        Return the current dialect.
756        """
757        return self.optionswidget.dialect()
758
759    def setEncoding(self, encoding):
760        # type: (str) -> None
761        """Set the current text encoding."""
762        self.optionswidget.setSelectedEncoding(encoding)
763
764    def encoding(self):
765        # type: () -> str
766        """Return the curent text encoding."""
767        return self.optionswidget.encoding()
768
769    def setNumbersFormat(self, groupsep, decimalsep):
770        changed = False
771
772        if groupsep != self.grouping_sep_edit_cb.text():
773            self.grouping_sep_edit_cb.setText(groupsep)
774            changed = True
775
776        if decimalsep != self.grouping_sep_edit_cb.text():
777            self.decimal_sep_edit_cb.setText(decimalsep)
778            changed = True
779
780        if changed:
781            self.__update_numbers_format()
782            self.optionsChanged.emit()
783
784    def numbersFormat(self):
785        group = self.grouping_sep_edit_cb.text()
786        decimal = self.decimal_sep_edit_cb.text()
787        return {"group": group, "decimal": decimal}
788
789    def __decimal_sep_activated(self, sep):
790        group_sep = self.grouping_sep_edit_cb.text()
791        preferred_replace = {".": ",", ",": "."}
792        if sep == group_sep and sep in preferred_replace:
793            self.grouping_sep_edit_cb.setText(preferred_replace[sep])
794        elif sep == group_sep:
795            cb = self.grouping_sep_edit_cb
796            cb.setCurrentIndex((cb.currentIndex() + 1) % cb.count())
797
798        self.__update_numbers_format()
799        self.optionsEdited.emit()
800        self.optionsChanged.emit()
801
802    def __group_sep_activated(self, sep):
803        decimal_sep = self.decimal_sep_edit_cb.text()
804        preferred_replace = {".": ",", ",": "."}
805        if sep == decimal_sep and sep in preferred_replace:
806            self.decimal_sep_edit_cb.setText(preferred_replace[sep])
807        elif sep == decimal_sep:
808            cb = self.decimal_sep_edit_cb
809            cb.setCurrentIndex((cb.currentIndex() + 1) % cb.count())
810        self.__update_numbers_format()
811
812        self.optionsEdited.emit()
813        self.optionsChanged.emit()
814
815    def __update_numbers_format(self):
816        groupsep = self.grouping_sep_edit_cb.text()
817        decimalsep = self.decimal_sep_edit_cb.text()
818
819        model = self.__previewmodel
820        if model is None:
821            return
822        parser = number_parser(groupsep, decimalsep)
823
824        # update the delegates
825        view = self.dataview
826        for i in range(model.columnCount()):
827            coltype = model.headerData(
828                i, Qt.Horizontal, TablePreviewModel.ColumnTypeRole)
829            if coltype == ColumnType.Numeric:
830                delegate = ColumnValidateItemDelegate(view, converter=parser)
831                view.setItemDelegateForColumn(i, delegate)
832
833    def columnTypes(self):
834        # type: () -> Dict[int, ColumnType]
835        """
836        Return the current column type annotations.
837
838        Returns
839        -------
840        mapping : Dict[int, Optional[ColumnType]]
841            Mapping from column indices column types.
842        """
843        # types = dict.fromkeys(range(model.rowCount()), ColumnType.Auto)
844        types = {}
845        types.update(self.__columnTypes())
846        return types
847
848    def setColumnTypes(self, types):
849        # type: (Dict[int, Optional[ColumnType]]) -> None
850        """
851        Set column type annotations.
852
853        Parameters
854        ----------
855        types : Dict[int, Optional[ColumnType]]
856            Mapping from column indices to column types, `None` indicates
857            default (unspecified type, will be inferred)
858        """
859        # This depends on encoding/dialect. Force preview model update.
860        if self.__timer.isActive():
861            self.__resetPreview()
862        self.__setColumnTypes(types)
863
864    def setStateForRow(self, row, state):
865        # type: (int, TablePreview.RowSpec) -> None
866        """
867        Set the state for row.
868        """
869        if self.__timer.isActive():
870            self.__resetPreview()
871
872        model = self.__previewmodel
873        if model is None:
874            return
875        rowcount = model.rowCount()
876        while row > rowcount - 1 and model.canFetchMore():
877            model.fetchMore()
878            if model.rowCount() == rowcount:
879                break
880            rowcount = model.rowCount()
881
882        model.setHeaderData(
883            row, Qt.Vertical, state, TablePreviewModel.RowStateRole)
884        self.dataview.setRowHints({row: state})
885
886    def stateForRow(self, row):
887        # type: (int) -> Optional[TablePreview.RowSpec]
888        """
889        Return the state for row.
890        """
891        model = self.__previewmodel
892        if model is not None:
893            return model.headerData(
894                row, Qt.Vertical, TablePreviewModel.RowStateRole)
895        else:
896            return None
897
898    def rowStates(self):
899        # type: () -> (Dict[int, RowSpec])
900        """
901        Return states for all rows with non None state
902        """
903        return self.__rowStates()
904
905    def setRowStates(self, rowstate):
906        # type: (Dict[int, RowSpec]) -> None
907        """
908        Set the state for rows.
909
910        Note
911        ----
912        States for all rows not passed in rowstate is reset to `None`.
913        """
914        if self.__timer.isActive():
915            self.__resetPreview()
916        model = self.__previewmodel
917        if model is None:
918            return
919
920        currstate = self.rowStates()
921        newstate = dict.fromkeys(currstate.keys(), None)
922        newstate.update(rowstate)
923        for row, state in newstate.items():
924            self.setStateForRow(row, state)
925
926    def __rowStates(self):
927        model = self.__previewmodel
928        items = (
929            (row, model.headerData(row, Qt.Vertical,
930                                   TablePreviewModel.RowStateRole))
931            for row in range(model.rowCount())
932        )
933        return {row: state for row, state in items if state is not None}
934
935    def setSampleContents(self, stream):
936        # type: (io.BinaryIO) -> None
937        """
938        Set a binary file-like stream for displaying sample content.
939
940        The stream will be read as needed when the data view is scrolled.
941
942        Note
943        ----
944        If the stream is not seekable, its contents will be cached in memory.
945        If and existing stream is already set it is NOT closed. The caller
946        is responsible for managing its lifetime.
947        """
948        self.__sample = stream
949        self.__buffer = io.BytesIO()
950        self.__resetPreview()
951
952    def __resetPreview(self):
953        # Reset the preview model and view
954        self.__timer.stop()
955        colstate = {}
956        rowstate = {}
957        if self.__previewmodel is not None:
958            # store the column/row specs
959            colstate = self.__columnTypes()
960            rowstate = self.__rowStates()
961            self.__previewmodel.errorOccurred.disconnect(self.__set_error)
962            self.__previewmodel.deleteLater()
963            self.__previewmodel = None
964
965        if self.__textwrapper is not None:
966            self.__textwrapper.detach()
967            self.__textwrapper = None
968
969        self.__set_error("")
970        self.previewModelReset.emit()
971
972        if self.__sample is None:
973            return
974
975        self.__previewmodel = TablePreviewModel(self)
976        self.__previewmodel.errorOccurred.connect(self.__set_error)
977        try:
978            seekable = self.__sample.seekable()
979        except AttributeError:
980            seekable = False
981
982        if seekable:
983            # Might be better to always use buffer? (compressed streams are
984            # seekable but slower)
985            base = self.__sample
986            base.seek(0)
987        else:
988            self.__buffer.seek(0)
989            base = CachedBytesIOWrapper(self.__sample, self.__buffer)
990
991        wrapper = io.TextIOWrapper(
992            base, encoding=self.encoding(),
993            # use surrogate escape to validate/detect encoding errors in
994            # delegates
995            errors="surrogateescape"
996        )
997        rows = csv.reader(
998            wrapper, dialect=self.dialect()
999        )
1000
1001        self.__textwrapper = wrapper
1002        self.__previewmodel.setPreviewStream(rows)
1003        if self.__previewmodel.canFetchMore():
1004            # TODO: Fetch until the same number of rows as at method entry?
1005            self.__previewmodel.fetchMore()
1006
1007        self.dataview.setModel(self.__previewmodel)
1008        self.dataview.selectionModel().selectionChanged.connect(
1009            self.__update_column_type_edit, Qt.UniqueConnection
1010        )
1011        if self.__previewmodel.columnCount() == len(colstate):
1012            self.__setColumnTypes(colstate)
1013        for row, state in rowstate.items():
1014            self.__previewmodel.setHeaderData(row, Qt.Vertical, state,
1015                                              TablePreviewModel.RowStateRole)
1016        self.dataview.setRowHints(rowstate)
1017
1018    @Slot()
1019    def __update_column_type_edit(self):
1020        # Update the 'Column type' edit control based on current column
1021        # selection
1022        smodel = self.dataview.selectionModel()
1023        model = self.dataview.model()
1024        cb = self.column_type_edit_cb
1025        columns = smodel.selectedColumns(0)
1026        types = {model.headerData(c.column(), Qt.Horizontal,
1027                                  TablePreviewModel.ColumnTypeRole)
1028                 for c in columns}
1029
1030        types = {ColumnType.Auto if t is None else t for t in types}
1031        if len(types) == 0:
1032            # no selection, disabled
1033            cb.setCurrentIndex(-1)
1034            cb.setEnabled(False)
1035        elif len(types) == 1:
1036            idx = cb.findData(types.pop(), Qt.UserRole)
1037            cb.setCurrentIndex(idx)
1038            cb.setEnabled(True)
1039        else:
1040            cb.setCurrentIndex(-1)
1041            cb.setEnabled(True)
1042
1043    def __on_column_type_edit_activated(self, idx):
1044        # Column type set via the combo box.
1045        coltype = self.column_type_edit_cb.itemData(idx, Qt.UserRole)
1046        smodel = self.dataview.selectionModel()
1047        columns = smodel.selectedColumns(0)
1048        columns = [c.column() for c in columns]
1049        self.__setColumnType(columns, coltype)
1050
1051    def __dataview_context_menu(self, pos):
1052        pos = self.dataview.viewport().mapToGlobal(pos)
1053        cols = self.dataview.selectionModel().selectedColumns(0)
1054        cols = [idx.column() for idx in cols]
1055        self.__run_type_columns_menu(pos, cols)
1056
1057    def __hheader_context_menu(self, pos):
1058        pos = self.dataview.horizontalHeader().mapToGlobal(pos)
1059        cols = self.dataview.selectionModel().selectedColumns(0)
1060        cols = [idx.column() for idx in cols]
1061        self.__run_type_columns_menu(pos, cols)
1062
1063    def __vheader_context_menu(self, pos):
1064        header = self.dataview.verticalHeader()  # type: QHeaderView
1065        index = header.logicalIndexAt(pos)
1066        pos = header.mapToGlobal(pos)
1067        model = header.model()  # type: QAbstractTableModel
1068
1069        RowStateRole = TablePreviewModel.RowStateRole
1070        state = model.headerData(index, Qt.Vertical, RowStateRole)
1071        m = QMenu(header)
1072        skip_action = m.addAction("Skip")
1073        skip_action.setCheckable(True)
1074        skip_action.setChecked(state == TablePreview.Skipped)
1075        m.addSection("")
1076        mark_header = m.addAction("Header")
1077        mark_header.setCheckable(True)
1078        mark_header.setChecked(state == TablePreview.Header)
1079
1080        def update_row_state(action):
1081            # type: (QAction) -> None
1082            state = None
1083            if action is mark_header:
1084                state = TablePreview.Header if action.isChecked() else None
1085            elif action is skip_action:
1086                state = TablePreview.Skipped if action.isChecked() else None
1087            model.setHeaderData(index, Qt.Vertical, state, RowStateRole)
1088            self.dataview.setRowHints({index: state})
1089
1090        m.triggered.connect(update_row_state)
1091        m.popup(pos)
1092
1093    def __run_type_columns_menu(self, pos, columns):
1094        # type: (QPoint, List[int]) -> None
1095        # Open a QMenu at pos for setting column types for column indices list
1096        # `columns`
1097        model = self.__previewmodel
1098        if model is None:
1099            return
1100        menu = QMenu(self)
1101        menu.setAttribute(Qt.WA_DeleteOnClose)
1102        coltypes = {model.headerData(
1103                        i, Qt.Horizontal, TablePreviewModel.ColumnTypeRole)
1104                    for i in columns}
1105        coltypes = {ColumnType.Auto if t is None else t for t in coltypes}
1106        if len(coltypes) == 1:
1107            current = coltypes.pop()
1108        else:
1109            current = None
1110        cb = self.column_type_edit_cb
1111        g = QActionGroup(menu)
1112        current_action = None
1113        # 'Copy' the column types model into a menu
1114        for i in range(cb.count()):
1115            if cb.itemData(i, Qt.AccessibleDescriptionRole) == "separator":
1116                menu.addSeparator()
1117                continue
1118
1119            ac = menu.addAction(cb.itemIcon(i), cb.itemText(i))
1120            ac.setData(cb.itemData(i, Qt.UserRole))
1121            ac.setCheckable(True)
1122            if ac.data() == current:
1123                ac.setChecked(True)
1124                current_action = ac
1125            g.addAction(ac)
1126
1127        def update_types(action):
1128            newtype = action.data()
1129            self.__setColumnType(columns, newtype)
1130
1131        menu.triggered.connect(update_types)
1132        menu.triggered.connect(self.__update_column_type_edit)
1133        menu.popup(pos, current_action)
1134
1135    def __setColumnType(self, columns, coltype):
1136        # type: (List[int], ColumnType) -> None
1137        view = self.dataview
1138        model = view.model()  # type: QAbstractTableModel
1139        numbersformat = self.numbersFormat()
1140        numberconverter = number_parser(
1141            numbersformat["group"], numbersformat["decimal"])
1142        if coltype == ColumnType.Numeric:
1143            delegate = ColumnValidateItemDelegate(self.dataview,
1144                                                  converter=numberconverter)
1145        elif coltype == ColumnType.Text:
1146            delegate = ColumnValidateItemDelegate(self.dataview,
1147                                                  converter=str.strip)
1148        elif coltype == ColumnType.Time:
1149            delegate = ColumnValidateItemDelegate(self.dataview,
1150                                                  converter=parse_datetime)
1151        elif coltype == ColumnType.Skip:
1152            delegate = SkipItemDelegate(self.dataview)
1153        else:
1154            delegate = None
1155
1156        changed = False
1157        for i in columns:
1158            current = model.headerData(
1159                i, Qt.Horizontal, TablePreviewModel.ColumnTypeRole)
1160            changed = changed or current != coltype
1161            model.setHeaderData(
1162                i, Qt.Horizontal, coltype, TablePreviewModel.ColumnTypeRole
1163            )
1164            model.setHeaderData(
1165                i, Qt.Horizontal, icon_for_column_type(coltype),
1166                Qt.DecorationRole
1167            )
1168            self.dataview.setItemDelegateForColumn(i, delegate)
1169
1170        if changed:
1171            self.__update_column_type_edit()
1172            self.columnTypesChanged.emit()
1173
1174    def __setColumnTypes(self, coltypes):
1175        # type: (Dict[int, ColumnType]) -> None
1176        def mapping_invert(mapping):
1177            # type: (Dict[_A, _B]) -> Dict[_B, List[_A]]
1178            m = defaultdict(list)
1179            for key, val in mapping.items():
1180                m[val].append(key)
1181            return m
1182        model = self.__previewmodel
1183        if model is None:
1184            return
1185        coltypes_ = dict.fromkeys(range(model.columnCount()), ColumnType.Auto)
1186        coltypes_.update(coltypes)
1187        for coltype, cols in mapping_invert(coltypes_).items():
1188            self.__setColumnType(cols, coltype)
1189
1190    def __columnTypes(self):
1191        # type: () -> Dict[int, ColumnType]
1192        model = self.__previewmodel
1193        if model is None:
1194            return {}
1195        res = {
1196            i: model.headerData(i, Qt.Horizontal,
1197                                TablePreviewModel.ColumnTypeRole)
1198            for i in range(model.columnCount())
1199        }
1200        return {i: val for i, val in res.items()
1201                if val is not None and val != ColumnType.Auto}
1202
1203    def columnTypeRanges(self):
1204        # type: () -> List[Tuple[range, ColumnType]]
1205        """
1206        Return the column type specs as column ranges.
1207
1208        Returns
1209        -------
1210        coltypes : List[Tuple[range, ColumnType]]
1211            A list of `(range, coltype)` tuples where `range` are ranges
1212            with step 1 and coltype a ColumnType. The ranges are sorted
1213            in ascending order.
1214
1215        Note
1216        ----
1217        Unlike `columnTypes` this method does not omit ColumnTypes.Auto
1218        entries.
1219        """
1220        model = self.__previewmodel
1221        if model is None:
1222            return []
1223        res = dict.fromkeys(range(model.columnCount()), ColumnType.Auto)
1224        res.update(self.__columnTypes())
1225        types = sorted(res.items())
1226        res = []
1227
1228        # Group by increasing indices (with step 1) and coltype
1229        def groupkey(item, __incseq=iter(itertools.count())):
1230            index, val = item
1231            return index - next(__incseq), val
1232
1233        for (_, key), items in itertools.groupby(types, key=groupkey):
1234            items = list(items)
1235            start = items[0][0]
1236            last = items[-1][0]
1237            res.append((range(start, last + 1), key))
1238        return res
1239
1240    def setColumnTypeRanges(self, ranges):
1241        # type: (List[Tuple[range, ColumnType]]) -> None
1242        """
1243        Set column type specs for ranges.
1244
1245        Parameters
1246        ----------
1247        ranges : List[Tuple[range, ColumnType]]
1248            For every `(range, coltype)` tuple set the corresponding coltype.
1249        """
1250        self.setColumnTypes({i: coltype for r, coltype in ranges for i in r})
1251
1252    def __set_error(self, errorstr):
1253        # type: (str) -> None
1254        if not errorstr:
1255            self.__overlay.hide()
1256            self.__error_label.setText("")
1257        else:
1258            self.__overlay.show()
1259            self.__error_label.setText(errorstr)
1260            self.previewReadErrorOccurred.emit(errorstr)
1261
1262
1263class CachedBytesIOWrapper(io.BufferedIOBase):
1264    """
1265    Read and cache data from `base`. When cache is not empty prepend data from
1266    the cache before switching to base
1267
1268    Base needs to implement `read` method, cache must be read/write and
1269    seekable.
1270
1271    Utility wrapper to implement restartable reads for streams that are not
1272    seekable.
1273    """
1274    def __init__(self, base, cache):
1275        # type: (io.BinaryIO, io.BytesIO) -> None
1276        super().__init__()
1277        self.__base = base
1278        self.__cache = cache
1279
1280    def detach(self):
1281        base = self.__base
1282        self.__base = None
1283        return base
1284
1285    def read(self, size=-1):
1286        # type: (Optional[int]) -> bytes
1287        base, cache = self.__base, self.__cache
1288        if size is None or size < 0:
1289            b1 = cache.read()
1290            b2 = base.read()
1291            cache.write(b2)
1292            return b1 + b2
1293        else:
1294            if cache.tell() < len(cache.getbuffer()):
1295                b1 = cache.read(size)
1296                if len(b1) < size:
1297                    assert len(cache.getbuffer()) == cache.tell()
1298                    b2 = base.read(size - len(b1))
1299                    cache.write(b2)
1300                    assert len(cache.getbuffer()) == cache.tell()
1301                    b = b1 + b2
1302                else:
1303                    b = b1
1304            else:
1305                b = base.read(size)
1306                cache.write(b)
1307                assert len(cache.getbuffer()) == cache.tell()
1308            return b
1309
1310    def read1(self, size=-1):
1311        # Does not exactly conform to spec, but necessary for io.TextIOWrapper
1312        return self.read(size)
1313
1314    def readable(self):
1315        return True
1316
1317    def writable(self):
1318        return False
1319
1320
1321class RowSpec(enum.IntEnum):
1322    """Row spec flags"""
1323    #: Header row
1324    Header = 1
1325    #: Row is skipped
1326    Skipped = 2
1327
1328
1329class TablePreview(QTableView):
1330    RowSpec = RowSpec
1331    Header, Skipped = RowSpec
1332
1333    def __init__(self, *args, **kwargs):
1334        super().__init__(*args, **kwargs)
1335        self.setItemDelegate(PreviewItemDelegate(self))
1336
1337    def rowsInserted(self, parent, start, end):
1338        # type: (QModelIndex, int, int) -> None
1339        super().rowsInserted(parent, start, end)
1340        behavior = self.selectionBehavior()
1341        if behavior & (QTableView.SelectColumns | QTableView.SelectRows):
1342            # extend the selection to the new rows
1343            smodel = self.selectionModel()
1344            selection = smodel.selection()
1345            command = QItemSelectionModel.Select
1346            if behavior & QTableView.SelectRows:
1347                command |= QItemSelectionModel.Rows
1348            if behavior & QTableView.SelectColumns:
1349                command |= QItemSelectionModel.Columns
1350            smodel.select(selection, command)
1351
1352    def setRowHints(self, hints):
1353        # type: (Dict[int, TablePreview.RowSpec]) -> None
1354        for row, hint in hints.items():
1355            current = self.itemDelegateForRow(row)
1356            if current is not None:
1357                current.deleteLater()
1358            if hint == TablePreview.Header:
1359                delegate = HeaderItemDelegate(self)
1360            elif hint == TablePreview.Skipped:
1361                delegate = SkipItemDelegate(self)
1362            else:
1363                delegate = None
1364            self.setItemDelegateForRow(row, delegate)
1365
1366    def sizeHint(self):
1367        sh = super().sizeHint()  # type: QSize
1368        hh = self.horizontalHeader()  # type: QHeaderView
1369        vh = self.verticalHeader()  # type: QHeaderView
1370        hsection = hh.defaultSectionSize()
1371        vsection = vh.defaultSectionSize()
1372        return sh.expandedTo(QSize(8 * hsection, 20 * vsection))
1373
1374
1375def is_surrogate_escaped(text: str) -> bool:
1376    """Does `text` contain any surrogate escape characters."""
1377    return any("\udc80" <= c <= "\udcff" for c in text)
1378
1379
1380class PreviewItemDelegate(QStyledItemDelegate):
1381    def initStyleOption(self, option, index):
1382        # type: (QStyleOptionViewItem, QModelIndex) -> None
1383        super().initStyleOption(option, index)
1384        if len(option.text) > 500:
1385            # Shorten long text (long text layout takes too long)
1386            f = QTextBoundaryFinder(QTextBoundaryFinder.Grapheme, option.text)
1387            f.setPosition(500)
1388            i = f.toNextBoundary()
1389            if i != -1:
1390                option.text = option.text[:i] + "..."
1391        model = index.model()
1392        coltype = model.headerData(index.column(), Qt.Horizontal,
1393                                   TablePreviewModel.ColumnTypeRole)
1394        if coltype == ColumnType.Numeric or coltype == ColumnType.Time:
1395            option.displayAlignment = Qt.AlignRight | Qt.AlignVCenter
1396
1397        if not self.validate(option.text):
1398            option.palette.setBrush(
1399                QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern)
1400            )
1401            option.palette.setBrush(
1402                QPalette.All, QPalette.HighlightedText,
1403                QBrush(Qt.red, Qt.SolidPattern)
1404            )
1405
1406    def validate(self, value: str) -> bool:  # pylint: disable=no-self-use
1407        return not is_surrogate_escaped(value)
1408
1409    def helpEvent(self, event, view, option, index):
1410        # type: (QHelpEvent, QAbstractItemView, QStyleOptionViewItem, QModelIndex) -> bool
1411        if event.type() == QEvent.ToolTip:
1412            ttip = index.data(Qt.ToolTipRole)
1413            if ttip is None:
1414                ttip = index.data(Qt.DisplayRole)
1415                ttip = self.displayText(ttip, option.locale)
1416                QToolTip.showText(event.globalPos(), ttip, view)
1417                return True
1418        return super().helpEvent(event, view, option, index)
1419
1420
1421class HeaderItemDelegate(PreviewItemDelegate):
1422    """
1423    Paint the items with an alternate color scheme
1424    """
1425    NoFeatures = 0
1426    AutoDecorate = 1
1427
1428    def __init__(self, *args, **kwargs):
1429        super().__init__(*args, **kwargs)
1430        self.__features = HeaderItemDelegate.NoFeatures
1431
1432    def features(self):
1433        return self.__features
1434
1435    def initStyleOption(self, option, index):
1436        # type: (QStyleOptionViewItem, QModelIndex) -> None
1437        super().initStyleOption(option, index)
1438        palette = option.palette
1439        shadow = palette.color(QPalette.Foreground)  # type: QColor
1440        if shadow.isValid():
1441            shadow.setAlphaF(0.1)
1442            option.backgroundBrush = QBrush(shadow, Qt.SolidPattern)
1443        option.displayAlignment = Qt.AlignCenter
1444        model = index.model()
1445        if option.icon.isNull() and \
1446                self.__features & HeaderItemDelegate.AutoDecorate:
1447            ctype = model.headerData(index.column(), Qt.Horizontal,
1448                                     TablePreviewModel.ColumnTypeRole)
1449            option.icon = icon_for_column_type(ctype)
1450        if not option.icon.isNull():
1451            option.features |= QStyleOptionViewItem.HasDecoration
1452
1453
1454def icon_for_column_type(coltype):
1455    # type: (ColumnType) -> QIcon
1456    if coltype == ColumnType.Numeric:
1457        icon = QIcon(StampIconEngine("N", QColor("red")))
1458    elif coltype == ColumnType.Categorical:
1459        icon = QIcon(StampIconEngine("C", QColor("green")))
1460    elif coltype == ColumnType.Text:
1461        icon = QIcon(StampIconEngine("S", QColor("black")))
1462    elif coltype == ColumnType.Time:
1463        icon = QIcon(StampIconEngine("T", QColor("deepskyblue")))
1464    else:
1465        icon = QIcon()
1466    return icon
1467
1468
1469class SkipItemDelegate(PreviewItemDelegate):
1470    def initStyleOption(self, option, index):
1471        # type: (QStyleOptionViewItem, QModelIndex) -> None
1472        super().initStyleOption(option, index)
1473        color = QColor(Qt.red)
1474        palette = option.palette  # type: QPalette
1475        base = palette.color(QPalette.Base)
1476        if base.isValid() and base.value() > 127:
1477            # blend on 'light' base, not on dark (low contrast)
1478            color.setAlphaF(0.2)
1479        option.backgroundBrush = QBrush(color, Qt.DiagCrossPattern)
1480
1481
1482class ColumnValidateItemDelegate(PreviewItemDelegate):
1483    def __init__(self, *args, converter=None, **kwargs):
1484        super().__init__(*args, **kwargs)
1485        self.converter = converter or float
1486
1487    def validate(self, value):
1488        if value in {"NA", "Na", "na", "n/a", "N/A", "?", "", "."}:
1489            return True
1490        try:
1491            self.converter(value)
1492        except ValueError:
1493            return False
1494        else:
1495            return super().validate(value)
1496
1497
1498def number_parser(groupsep, decimalsep):
1499    # type: (str, str) -> Callable[[str], float]
1500    if groupsep == "" and decimalsep == ".":
1501        return float
1502    elif groupsep == "":
1503        return lambda value: float(value.replace(decimalsep, "."))
1504    elif decimalsep != groupsep and decimalsep != "" and groupsep != "":
1505        table = {ord(groupsep): None, ord(decimalsep): ord(".")}
1506        return lambda value: float(value.translate(table))
1507    else:
1508        return float
1509
1510
1511class TablePreviewModel(QAbstractTableModel):
1512    """
1513    Lazy populated table preview model.
1514
1515    The model reads rows on demand from an 'rows' iterable when requested
1516    (via fetchMore).
1517    Additionally the client can set column/row header data.
1518    """
1519    ColumnTypeRole = Qt.UserRole + 11
1520    RowStateRole = Qt.UserRole + 12
1521
1522    #: Signal emitted when an error occurs while iterating over the preview
1523    #: stream.
1524    errorOccurred = Signal(str)
1525
1526    def __init__(self, *args, **kwargs):
1527        super().__init__(*args, **kwargs)
1528        self.__rowCount = self.__colCount = 0
1529        self.__rows = []
1530        self.__canFetchMore = False
1531        self.__error = None
1532        self.__iter = None
1533        # extra header data for use by setHeaderData
1534        self.__headerData = {
1535            Qt.Horizontal: defaultdict(dict),
1536            Qt.Vertical: defaultdict(dict),
1537        }
1538
1539    def setPreviewStream(self, stream):
1540        # type: (Iterator[List[str]]) -> None
1541        """
1542        Set an iterator over the rows.
1543
1544        The iterator will be advanced on demand by `fetchMore`, while storing
1545        the returned values. Previous stream and its cached data is discarded.
1546        """
1547        self.beginResetModel()
1548        self.__iter = stream
1549        self.__rows = []
1550        self.__rowCount = self.__colCount = 0
1551        self.__canFetchMore = True
1552        self.__error = None
1553        self.endResetModel()
1554
1555    def canFetchMore(self, parent=QModelIndex()):
1556        """Reimplemented."""
1557        if not parent.isValid():
1558            return self.__canFetchMore
1559        else:
1560            return False
1561
1562    def fetchMore(self, parent=QModelIndex()):
1563        """Reimplemented."""
1564        if not parent.isValid():
1565            error = self.__error
1566            if self.__rowCount == 0:
1567                newrows = self.__tryFetchRows(20)
1568            else:
1569                newrows = self.__tryFetchRows(5)
1570
1571            if newrows:
1572                extent = len(newrows), max(len(row) for row in newrows)
1573                rows, cols = self.__rowCount, self.__colCount
1574
1575                self.beginInsertRows(QModelIndex(), rows, rows + extent[0] - 1)
1576                self.__rows.extend(newrows)
1577                self.__rowCount += extent[0]
1578                self.endInsertRows()
1579
1580                if cols < extent[1]:
1581                    newColCount = max(cols, extent[1])
1582                    self.beginInsertColumns(QModelIndex(), cols, newColCount - 1)
1583                    self.__colCount = newColCount
1584                    self.endInsertColumns()
1585
1586            # Emit error after inserting the final rows
1587            if self.__error is not None and self.__error != error:
1588                self.errorOccurred.emit(self.__error)
1589
1590    def __tryFetchRows(self, n=10):
1591        # type: (int) -> List[List[str]]
1592        """
1593        Fetch and return a maximum of `n` rows from the source preview stream.
1594        """
1595        rows = []
1596        for _ in range(n):
1597            try:
1598                row = next(self.__iter)
1599            except StopIteration:
1600                self.__canFetchMore = False
1601                break
1602            except Exception as err:  # pylint: disable=broad-except
1603                print("".join(traceback.format_exception(*sys.exc_info())),
1604                      file=sys.stderr)
1605                self.__error = format_exception(err)
1606                self.__canFetchMore = False
1607                break
1608            else:
1609                rows.append(row)
1610        return rows
1611
1612    def rowCount(self, parent=QModelIndex()):
1613        # type: (QModelIndex) -> int
1614        """Reimplemented."""
1615        return 0 if parent.isValid() else self.__rowCount
1616
1617    def columnCount(self, parent=QModelIndex()):
1618        # type: (QModelIndex) -> int
1619        """Reimplemented."""
1620        return 0 if parent.isValid() else self.__colCount
1621
1622    def data(self, index, role=Qt.DisplayRole):
1623        # type: (QModelIndex, int) -> Any
1624        """Reimplemented."""
1625        if not index.isValid():
1626            return None
1627        row, col = index.row(), index.column()
1628        assert self.__rowCount == len(self.__rows)
1629        if not 0 <= row < self.__rowCount:
1630            return None
1631        row = self.__rows[row]
1632
1633        if not 0 <= col < len(row):
1634            return None
1635
1636        value = row[col]
1637        if role == Qt.DisplayRole:
1638            return value
1639        elif role == TablePreviewModel.ColumnTypeRole:
1640            return self.__headerData[Qt.Horizontal][index.column()].get(role)
1641        else:
1642            return None
1643
1644    def headerData(self, section, orientation, role=Qt.DisplayRole):
1645        # type: (int, Qt.Orientation, int) -> Any
1646        """Reimplemented."""
1647        if role == Qt.DisplayRole:
1648            return section + 1
1649        else:
1650            return self.__headerData[orientation][section].get(role)
1651
1652    def setHeaderData(self, section, orientation, value, role=Qt.EditRole):
1653        # type: (int, Qt.Orientation, Any, Qt.ItemDataRole) -> bool
1654        """Reimplemented."""
1655        current = self.__headerData[orientation][section].get(role, None)
1656        if current != value:
1657            if value is None:
1658                del self.__headerData[orientation][section][role]
1659            else:
1660                self.__headerData[orientation][section][role] = value
1661            self.headerDataChanged.emit(orientation, section, section)
1662        return True
1663
1664    def updateHeaderData(self, orientation, values):
1665        # type: (Qt.Orientation, Dict[int, Dict[Qt.ItemDataRole, Any]]) -> None
1666        """
1667        Update/set multiple header sections/roles at once.
1668
1669        Parameters
1670        ----------
1671        orientation : Qt.Orientation
1672        values : Dict[int, Dict[Qt.ItemDataRole, Any]]
1673            A mapping of section indices to mapping of role to values.
1674            e.g. `{1: {Qt.DisplayRole: "A"}}` sets the display text to "A"
1675        """
1676        data = self.__headerData[orientation]
1677        if orientation == Qt.Horizontal:
1678            length = self.__colCount
1679        else:
1680            length = self.__rowCount
1681        sections = []
1682        for section, itemdata in values.items():
1683            if 0 <= section < length:
1684                data[section].update(itemdata)
1685                sections.append(section)
1686        if not sections:
1687            return
1688
1689        first = min(sections)
1690        last = max(sections)
1691
1692        self.headerDataChanged.emit(orientation, first, last)
1693
1694    def flags(self, index):
1695        # type: (QModelIndex) -> Qt.ItemFlags
1696        """Reimplemented."""
1697        # pylint: disable=unused-argument,no-self-use
1698        return Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsEnabled)
1699
1700    def errorString(self):
1701        # type: () -> Optional[str]
1702        """
1703        Return the error string or None if no error occurred.
1704        """
1705        return self.__error
1706
1707
1708@singledispatch
1709def format_exception(err):
1710    return "".join(traceback.format_exception_only(type(err), err)).rstrip()
1711
1712
1713@format_exception.register(csv.Error)
1714def format_exception_csv(err):
1715    return "CSV parsing error: " + str(err)
1716
1717
1718_to_datetime = None
1719
1720
1721def parse_datetime(text):
1722    global _to_datetime
1723    if _to_datetime is None:
1724        from pandas import to_datetime as _to_datetime
1725    return _to_datetime(text)
1726
1727
1728TEST_DATA = b"""\
1729 ,A,B,C,D
17301,a,1,1,
17312,b,2,2,
17323,c,3,3,
17334,d,4,4,,\
1734"""
1735
1736
1737def main(argv=None):  # pragma: no cover
1738    app = QApplication(argv or [])
1739    argv = app.arguments()
1740    w = CSVImportWidget()
1741    w.show()
1742    w.raise_()
1743
1744    if len(argv) > 1:
1745        path = argv[1]
1746        f = open(path, "rb")
1747    else:
1748        f = io.BytesIO(TEST_DATA)
1749    try:
1750        w.setSampleContents(f)
1751        app.exec()
1752    finally:
1753        f.close()
1754
1755
1756if __name__ == "__main__":  # pragma: no cover
1757    csv.field_size_limit(4 * 2 ** 20)
1758    main(sys.argv)
1759