1 /*
2     historyimport.cpp
3 
4     Copyright (c) 2010 by Timo Schluessler
5 
6     Kopete    (c) 2010 by the Kopete developers  <kopete-devel@kde.org>
7 
8     *************************************************************************
9     *                                                                       *
10     * This program is free software; you can redistribute it and/or modify  *
11     * it under the terms of the GNU General Public License as published by  *
12     * the Free Software Foundation; either version 2 of the License, or     *
13     * (at your option) any later version.                                   *
14     *                                                                       *
15     *************************************************************************
16 */
17 
18 #include "historyimport.h"
19 
20 #include <QStack>
21 #include <QDir>
22 #include <QTextEdit>
23 #include <QTreeView>
24 #include <QPushButton>
25 #include <QCheckBox>
26 #include <QGridLayout>
27 #include <QStandardItemModel>
28 #include <QProgressDialog>
29 #include <QMessageBox>
30 #include <QFileDialog>
31 #include <QApplication>
32 #include <QXmlStreamReader>
33 
34 #include <kdebug.h>
35 #include <klocale.h>
36 
37 #include <kopetecontactlist.h>
38 #include <kopetemetacontact.h>
39 #include <kopeteprotocol.h>
40 #include <kopeteaccount.h>
41 #include <kopetecontact.h>
42 #include <kopetemessage.h>
43 
44 #include "historylogger.h"
45 
HistoryImport(QWidget * parent)46 HistoryImport::HistoryImport(QWidget *parent)
47     : KDialog(parent)
48 {
49     // set dialog settings
50     setButtons(KDialog::Ok | KDialog::Details | KDialog::Cancel);
51     setWindowTitle(KDialog::makeStandardCaption(i18n("Import History")));
52     setButtonText(KDialog::Ok, i18n("Import Listed Logs"));
53 
54     // create widgets
55     QWidget *w = new QWidget(this);
56     QGridLayout *l = new QGridLayout(w);
57 
58     display = new QTextEdit(w);
59     display->setReadOnly(true);
60     treeView = new QTreeView(w);
61 
62     QPushButton *fromPidgin = new QPushButton(i18n("Get History From &Pidgin..."), w);
63 
64     l->addWidget(treeView, 0, 0, 1, 3);
65     l->addWidget(display, 0, 4, 1, 10);
66     l->addWidget(fromPidgin, 1, 0);
67 
68     setMainWidget(w);
69 
70     // create details widget
71     QWidget *details = new QWidget(w);
72     QVBoxLayout *dL = new QVBoxLayout(details);
73 
74     QTextEdit *detailsEdit = new QTextEdit(details);
75     detailsEdit->setReadOnly(true);
76     selectByHand = new QCheckBox(i18n("Select log directory by hand"), details);
77 
78     dL->addWidget(selectByHand);
79     dL->addWidget(detailsEdit);
80 
81     setDetailsWidget(details);
82     detailsCursor = QTextCursor(detailsEdit->document());
83 
84     // create model for treeView
85     QStandardItemModel *model = new QStandardItemModel(treeView);
86     treeView->setModel(model);
87     model->setHorizontalHeaderLabels(QStringList(i18n("Parsed History")));
88 
89     // connect everything
90     connect(treeView, SIGNAL(clicked(QModelIndex)), this, SLOT(itemClicked(QModelIndex)));
91     connect(fromPidgin, SIGNAL(clicked()), this, SLOT(importPidgin()));
92     connect(this, SIGNAL(okClicked()), this, SLOT(save()));
93 
94     // define variables
95     amount = 0;
96     cancel = false;
97     pidginImported = false;
98 
99     timeFormats << "(MM/dd/yyyy hh:mm:ss)" << "(MM/dd/yyyy hh:mm:ss AP)" << "(MM/dd/yy hh:mm:ss)" << "(MM/dd/yy hh:mm:ss AP)" << "(dd.MM.yyyy hh:mm:ss)" << "(dd.MM.yyyy hh:mm:ss AP)"
100                 << "(dd.MM.yy hh:mm:ss)" << "(dd.MM.yyyy hh:mm:ss AP)" << "(dd/MM/yyyy hh:mm:ss)" << "(dd/MM/yyyy hh:mm:ss AP)" << "(dd/MM/yy hh:mm:ss)" << "(dd/MM/yy hh:mm:ss AP)";
101 
102     show();
103 }
104 
~HistoryImport(void)105 HistoryImport::~HistoryImport(void)
106 {
107 }
108 
save(void)109 void HistoryImport::save(void)
110 {
111     QProgressDialog progress(i18n("Saving logs to disk ..."), i18n("Abort Saving"), 0, amount, this);
112     progress.setWindowTitle(i18n("Saving"));
113 
114     Log log;
115 
116     foreach (log, logs) {
117         HistoryLogger logger(log.other, this);
118         Message message;
119         foreach (message, log.messages) {
120             Kopete::Message kMessage;
121             if (message.incoming) {
122                 kMessage = Kopete::Message(log.other, log.me);
123                 kMessage.setDirection(Kopete::Message::Inbound);
124             } else {
125                 kMessage = Kopete::Message(log.me, log.other);
126                 kMessage.setDirection(Kopete::Message::Outbound);
127             }
128             kMessage.setPlainBody(message.text);
129             kMessage.setTimestamp(message.timestamp);
130             logger.appendMessage(kMessage, log.other);
131 
132             progress.setValue(progress.value()+1);
133             qApp->processEvents();
134             if (progress.wasCanceled()) {
135                 cancel = true;
136                 break;
137             }
138         }
139         if (cancel) {
140             break;
141         }
142     }
143 }
144 
displayLog(struct Log * log)145 void HistoryImport::displayLog(struct Log *log)
146 {
147     Message message;
148 
149     QList<QStandardItem *> items;
150     QStringList strings;
151 
152     items << static_cast<QStandardItemModel *>(treeView->model())->invisibleRootItem();
153     items << NULL << NULL << NULL;
154     strings << "" << "" << "";
155 
156     foreach (message, log->messages) {
157         amount++; // for QProgressDialog in save()
158 
159         strings[0] = log->other->protocol()->pluginId() + " (" + log->other->account()->accountId() + ')';
160         strings[1] = log->other->displayName();
161         strings[2] = message.timestamp.toString("yyyy-MM-dd");
162 
163         bool update = false;
164         int i;
165         for (i = 1; i < 4; i++) {
166             if (update || !items.at(i) || items.at(i)->data(Qt::DisplayRole) != strings.at(i-1)) {
167                 items[i] = findItem(strings.at(i-1), items.at(i-1));
168                 update = true;
169             } //else
170               //kDebug(14310) << "using cached item";
171         }
172 
173         if (!items.at(3)->data(Qt::UserRole).isValid()) {
174             items[3]->setData((int)logs.indexOf(*log), Qt::UserRole);
175         }
176     }
177 }
178 
findItem(const QString & text,QStandardItem * parent)179 QStandardItem *HistoryImport::findItem(const QString &text, QStandardItem *parent)
180 {
181     int i;
182     bool found = false;
183     QStandardItem *child = 0L;
184 
185     for (i = 0; i < parent->rowCount(); i++) {
186         child = parent->child(i, 0);
187         if (child->data(Qt::DisplayRole) == text) {
188             found = true;
189             break;
190         }
191     }
192     if (!found) {
193         child = new QStandardItem(text);
194         parent->appendRow(child);
195     }
196 
197     return child;
198 }
199 
itemClicked(const QModelIndex & index)200 void HistoryImport::itemClicked(const QModelIndex &index)
201 {
202     QVariant id = index.data(Qt::UserRole);
203 
204     if (id.canConvert<int>()) {
205         Log log = logs.at(id.toInt());
206         display->document()->clear();
207         QTextCursor cursor(display->document());
208 
209         Message message;
210         QDate date = QDate::fromString(index.data(Qt::DisplayRole).toString(), "yyyy-MM-dd");
211         foreach (message, log.messages) {
212             if (date != message.timestamp.date()) {
213                 continue;
214             }
215             cursor.insertText(message.timestamp.toString("hh:mm:ss "));
216             if (message.incoming) {
217                 cursor.insertText(log.other->displayName().append(": "));
218             } else {
219                 cursor.insertText(log.me->displayName().append(": "));
220             }
221             cursor.insertText(message.text);
222             cursor.insertBlock();
223         }
224     }
225 }
226 
countLogs(QDir dir,int depth)227 int HistoryImport::countLogs(QDir dir, int depth)
228 {
229     int res = 0;
230     QStack<int> pos;
231     QStringList files;
232     pos.push(0);
233 
234     depth++;
235 
236     forever {
237         files = dir.entryList(QDir::Dirs | QDir::NoDotAndDotDot);
238 
239         if (pos.size() == depth) {
240             res += dir.entryList(QDir::Files).size();
241         }
242         if (files.isEmpty() || files.size() <= pos.top() || pos.size() == depth) {
243             dir.cdUp();
244             pos.pop();
245             if (pos.isEmpty()) {
246                 break;
247             }
248             pos.top()++;
249         } else if (pos.size() != depth) {
250             dir.cd(files.at(pos.top()));
251             pos.push(0);
252         }
253     }
254 
255     return res;
256 }
257 
importPidgin()258 void HistoryImport::importPidgin()
259 {
260     if (pidginImported) {
261         if (QMessageBox::question(this,
262                                   i18n("Are You Sure?"),
263                                   i18n("You already imported logs from pidgin. If you do it twice, each message is imported twice.\nAre you sure you want to continue?"),
264                                   QMessageBox::Yes | QMessageBox::No,
265                                   QMessageBox::No) != QMessageBox::Yes) {
266             return;
267         }
268     }
269     pidginImported = true;
270 
271     QDir logDir = QDir::homePath();
272     if (selectByHand->isChecked() || !logDir.cd(".purple/logs")) {
273         logDir = QFileDialog::getExistingDirectory(mainWidget(), i18n("Select Log Directory"), QDir::homePath());
274     }
275 
276     int total = countLogs(logDir, 3);
277     QProgressDialog progress(i18n("Parsing history from pidgin ..."), i18n("Abort parsing"), 0, total, mainWidget());
278     progress.setWindowTitle(i18n("Parsing history"));
279     progress.show();
280     cancel = false;
281 
282     // protocolMap maps pidgin account-names to kopete protocol names (as in Kopete::Contact::protocol()->pluginId())
283     QHash<QString, QString> protocolMap;
284     protocolMap.insert("msn", "WlmProtocol");
285     protocolMap.insert("icq", "ICQProtocol");
286     protocolMap.insert("aim", "AIMProtocol");
287     protocolMap.insert("jabber", "JabberProtocol");
288     protocolMap.insert("yahoo", "YahooProtocol");
289     protocolMap.insert("qq", "QQProtocol");
290     protocolMap.insert("irc", "IRCProtocol");
291     protocolMap.insert("gadu-gadu", "GaduProtocol");
292     protocolMap.insert("bonjour", "BonjourProtocol");
293     protocolMap.insert("meanwhile", "MeanwhileProtocol");
294 
295     QString protocolFolder;
296     foreach (protocolFolder, logDir.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
297         logDir.cd(protocolFolder);
298 
299         QString accountFolder;
300         foreach (accountFolder, logDir.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
301             logDir.cd(accountFolder);
302 
303             // check if we can map the protocol
304             if (!protocolMap.contains(protocolFolder)) {
305                 detailsCursor.insertText(i18n("WARNING: There is no equivalent for protocol %1 in kopete.\n", protocolFolder));
306                 logDir.cdUp();
307                 continue;
308             }
309             const QString &protocol = protocolMap.value(protocolFolder);
310 
311             // TODO use findContact?
312             Kopete::ContactList *cList = Kopete::ContactList::self();
313             QList<Kopete::Contact *> meList = cList->myself()->contacts();
314             Kopete::Contact *me;
315             bool found = false;
316             foreach (me, meList) {
317                 if (me->protocol()->pluginId() == protocol && me->account()->accountId().contains(accountFolder, Qt::CaseInsensitive)) {
318                     found = true;
319                     break;
320                 }
321             }
322             if (!found) {
323                 detailsCursor.insertText(i18n("WARNING: Cannot find matching account for %1 (%2).\n", accountFolder, protocolFolder));
324                 logDir.cdUp();
325                 continue;
326             }
327 
328             QString chatPartner;
329             foreach (chatPartner, logDir.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
330                 logDir.cd(chatPartner);
331 
332                 Kopete::Contact *other = cList->findContact(me->protocol()->pluginId(), me->account()->accountId(), chatPartner);
333                 struct Log log;
334                 if (!other) {
335                     detailsCursor.insertText(i18n("WARNING: Cannot find %1 (%2) in your contact list. Found logs will not be imported.\n", chatPartner, protocolFolder));
336                     logDir.cdUp();
337                     continue;
338                 } else {
339                     log.me = me;
340                     log.other = other;
341                 }
342 
343                 QString logFile;
344                 QStringList filter;
345                 filter << "*.html" << "*.txt";
346                 foreach (logFile, logDir.entryList(filter, QDir::Files)) {
347                     QFile file(logDir.filePath(logFile));
348                     if (!file.open(QIODevice::ReadOnly)) {
349                         detailsCursor.insertText(i18n("WARNING: Cannot open file %1. Skipping.\n", logDir.filePath(logFile)));
350                         continue;
351                     }
352 
353                     if (logFile.endsWith(".html")) {
354                         parsePidginXml(file, &log, QDate::fromString(logFile.left(10), "yyyy-MM-dd"));
355                     } else if (logFile.endsWith(".txt")) {
356                         parsePidginTxt(file, &log, QDate::fromString(logFile.left(10), "yyyy-MM-dd"));
357                     }
358 
359                     file.close();
360 
361                     progress.setValue(progress.value()+1);
362                     qApp->processEvents();
363                     if (cancel || progress.wasCanceled()) {
364                         cancel = true;
365                         break;
366                     }
367                 }
368 
369                 logs.append(log);
370                 displayLog(&log);
371 
372                 if (cancel) {
373                     break;
374                 }
375                 logDir.cdUp();
376             }
377             if (cancel) {
378                 break;
379             }
380             logDir.cdUp();
381         }
382         if (cancel) {
383             break;
384         }
385         logDir.cdUp();
386     }
387 }
388 
extractTime(const QString & string,QDate ref)389 QDateTime HistoryImport::extractTime(const QString &string, QDate ref)
390 {
391     QDateTime dateTime;
392     QTime time;
393 
394     // try some formats used by pidgin
395     if ((time = QTime::fromString(string, "(hh:mm:ss)")).isValid()) {
396     } else if ((time = QTime::fromString(string, "(hh:mm:ss AP)")).isValid()) {
397     } else {
398         QString format;
399         foreach (format, timeFormats) {
400             if ((dateTime = QDateTime::fromString(string, format)).isValid()) {
401                 break;
402             }
403         }
404     }
405 
406     // check if the century in dateTime is equal to that of our date reference
407     if (dateTime.isValid()) {
408         int diff = ref.year() - dateTime.date().year();
409         dateTime = dateTime.addYears(diff - (diff % 100));
410     }
411 
412     // if string contains only a time we use ref as date
413     if (time.isValid()) {
414         dateTime = QDateTime(ref, time);
415     }
416 
417     // inform the user about the date problems
418     if (!dateTime.isValid()) {
419         detailsCursor.insertText(i18n("WARNING: Cannot parse date \"%1\". You may want to edit the file containing this date manually. (Example recognized date strings: \"%2\".)\n", string,
420                                       dateTime.toString("yyyy-MM-dd hh:mm:ss")));
421     }
422 
423     return dateTime;
424 }
425 
parsePidginTxt(QFile & file,struct Log * log,QDate date)426 void HistoryImport::parsePidginTxt(QFile &file, struct Log *log, QDate date)
427 {
428     QString line;
429     QString nick;
430     struct Message message;
431 
432     // this is to collect unknown nicknames (the list stores the index in log->messages of the messages that used the nickname)
433     // the bool says if that nickname is incoming (only used when the list is empty)
434     QHash<QString, QPair<bool, QList<int> > > nicknames;
435 
436     QTextStream str(&file);
437     // utf-8 seems to be default for pidgins-txt logs
438     str.setCodec("UTF-8");
439 
440     while (!str.atEnd()) {
441         line = str.readLine();
442 
443         if (line[0] == '(') {
444             if (!message.text.isEmpty()) {
445                 /*// message.text contains an unwished newline at the end
446                 if (message.text.endsWith('\n'))
447                     message.text.chop(1); */
448                 log->messages.append(message);
449                 message.text.clear();
450             }
451 
452             int endTime = line.indexOf(')')+1;
453             message.timestamp = extractTime(line.left(endTime), date);
454 
455             int nickEnd = QRegExp("\\s").indexIn(line, endTime + 1);
456             // TODO what if a nickname consists of two words? is this possible?
457             // the following while can't be used because in status logs there is no : after the nickname :(
458             //while (line[nickEnd-1] != ':')
459             //	nickEnd = QRegExp("\\").indexIn(line, nickEnd);
460             if (line[nickEnd -1] != ':') { // this line is a status message
461                 continue;
462             }
463 
464             nick = line.mid(endTime+1, nickEnd - endTime - 2); // -2 to delete the colon
465 
466             // detect if the message is in- or outbound
467             if (nick == log->me->displayName()) {
468                 message.incoming = false;
469             } else if (nick == log->other->displayName()) {
470                 message.incoming = true;
471             } else if (knownNicks.contains(nick)) {
472                 message.incoming = knownNicks.value(nick);
473             } else {
474                 // store this nick for later decision
475                 nicknames[nick].second.append(log->messages.size());
476             }
477             nicknames[nick].first = message.incoming;
478 
479             if (cancel) {
480                 return;
481             }
482 
483             message.text = line.mid(nickEnd + 1);
484         } else if (line[0] == ' ') {
485             // an already started message is continued in this line
486             int start = QRegExp("\\S").indexIn(line);
487             message.text.append('\n' + line.mid(start));
488         }
489     }
490     if (!message.text.isEmpty()) {
491         log->messages.append(message);
492     }
493 
494     // check if we can guess which nickname belongs to us
495     QHash<QString, QPair<bool, QList<int> > >::iterator itr;
496     QHash<QString, QPair<bool, QList<int> > >::iterator itr2;
497     for (itr = nicknames.begin(); itr != nicknames.end(); ++itr) {
498         if (itr->second.isEmpty()) { // no work for this one
499             continue;
500         }
501         bool haveAnother = false, lastIncoming = false;
502         // check against all other nicknames
503         for (itr2 = nicknames.begin(); itr2 != nicknames.end(); ++itr2) {
504             if (itr2 == itr) { // skip ourselves
505                 continue;
506             }
507 
508             // if there is another unknown nickname, we have no chance to guess which is our
509             if (!itr2->second.isEmpty()) {
510                 break;
511             }
512             if (!haveAnother) {
513                 lastIncoming = itr2->first;
514                 haveAnother = true;
515             } else {
516                 // when there are more than one known nicknames, but with different incoming-values, we also can't guess which is ours
517                 if (lastIncoming != itr2->first) {
518                     break;
519                 }
520             }
521         }
522         // we now can guess the incoming value of itr, namely !lastIncoming
523         if (haveAnother && itr2 == nicknames.end()) {
524             // inform the user
525             if (lastIncoming) {
526                 detailsCursor.insertText(i18n("INFORMATION: Guessed %1 to be one of your nicks.\n", itr.key()));
527             } else {
528                 detailsCursor.insertText(i18n("INFORMATION: Guessed %1 to be one of your buddys nicks.\n", itr.key()));
529             }
530 
531             knownNicks.insert(itr.key(), !lastIncoming);
532             int i;
533             for (i = 0; i < itr->second.size(); i++) {
534                 log->messages[itr->second.at(i)].incoming = !lastIncoming;
535             }
536             itr->second.clear(); // we are finished with these indexes
537         }
538     }
539 
540     // iterate once again over the nicknames to detect which nicks are still not known. simply ask the user!
541     for (itr = nicknames.begin(); itr != nicknames.end(); ++itr) {
542         if (itr->second.isEmpty()) { // no word for this one
543             continue;
544         }
545 
546         bool incoming;
547         int r = QMessageBox::question(NULL,
548                                       i18n("Cannot map Nickname to Account"),
549                                       i18n("Did you ever use \"%1\" as nickname in your history?", itr.key()),
550                                       QMessageBox::Yes | QMessageBox::No | QMessageBox::Abort);
551 
552         if (r == QMessageBox::Yes) {
553             knownNicks.insert(itr.key(), false);
554             incoming = true;
555         } else if (r == QMessageBox::No) {
556             knownNicks.insert(itr.key(), true);
557             incoming = false;
558         } else {
559             cancel = true;
560             return;
561         }
562 
563         // set the queried incoming value to our already stored Messages
564         int i;
565         for (i = 0; i < itr->second.size(); i++) {
566             log->messages[itr->second.at(i)].incoming = incoming;
567         }
568     }
569 }
570 
parsePidginXml(QFile & file,struct Log * log,QDate date)571 void HistoryImport::parsePidginXml(QFile &file, struct Log *log, QDate date)
572 {
573     bool inMessage = false, textComes = false;
574     int lineNumber = -1;
575     struct Message msg;
576 
577     // unfortunately pidgin doesn't write <... /> for the <meta> tag
578     QByteArray data = file.readAll();
579     if (data.contains("<meta")) {
580         int metaEnd = data.indexOf(">", data.indexOf("<meta"));
581         if (data.at(metaEnd-1) != '/') {
582             data.insert(metaEnd, '/');
583         }
584     }
585 
586     QXmlStreamReader reader(data);
587 
588     while (!reader.atEnd()) {
589         reader.readNext();
590 
591         // pidgin writes one chat-message per line. so if we come to the next line, we can finish and save the current message
592         if (inMessage && reader.lineNumber() != lineNumber) {
593             if (!msg.text.isEmpty()) {
594                 msg.text = msg.text.trimmed(); // trimm especially unwished newlines and spaces
595                 log->messages.append(msg); // save messge for later import via HistoryLogger (see HistoryImport::save())
596             }
597             textComes = false;
598             inMessage = false;
599         }
600         // when there is only the color attribute for the font-tag, this must be the beginning of a new message
601         if (!inMessage && reader.isStartElement() && reader.name() == "font" && reader.attributes().size() == 1 && reader.attributes().first().name() == "color") {
602             if (reader.attributes().value("color") == "#A82F2F") {
603                 msg.incoming = true;
604             } else {
605                 msg.incoming = false;
606             }
607 
608             while (reader.readNext() != QXmlStreamReader::Characters) { }  // skip tags
609             msg.timestamp = extractTime(reader.text().toString(), date);
610             msg.text.clear();
611             lineNumber = reader.lineNumber();
612             inMessage = true;
613         } else if (inMessage && !textComes && reader.isStartElement() && reader.name() == "b") {
614             reader.readNext(); // this is the nickname, which is followed by the messageText
615             textComes = true;
616         } else if (textComes && reader.isCharacters()) {
617             msg.text += reader.text().toString(); // append text
618         } else if (textComes && reader.isStartElement() && reader.name() == "br") {
619             msg.text += '\n'; // append newline
620         }
621     }
622 
623     if (reader.hasError()) {
624         // we ignore error 4: premature end of document
625         if (reader.error() != 4) {
626             int i, pos = 0;
627             for (i = 1; i < reader.lineNumber(); i++) {
628                 pos = data.indexOf('\n', pos) + 1;
629             }
630             detailsCursor.insertText(i18n("WARNING: XML parser error in %1 at line %2, character %3: %4",
631                                           file.fileName(), reader.lineNumber(), reader.columnNumber(), reader.errorString()));
632             detailsCursor.insertBlock();
633             detailsCursor.insertText(i18n("\t%1", QString(data.mid(pos, data.indexOf('\n', pos) - pos))));
634             detailsCursor.insertBlock();
635         }
636     } else if (inMessage) { // an unsaved message is still pending (this doesn't happen at least for my pidgin-logs - handle it anyway)
637         msg.text = msg.text.trimmed(); // trimm especially unwished newlines and spaces
638         log->messages.append(msg); // save messge for later import via HistoryLogger (see HistoryImport::save())
639     }
640 }
641