1 /* Copyright (C) 2005 J.F.Dockes
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the
14 * Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17 #ifndef TEST_IDFILE
18 #include "autoconfig.h"
19
20 #include <stdlib.h>
21 #include <ctype.h>
22 #include <cstring>
23
24 #include <fstream>
25 #include <sstream>
26
27 #include "idfile.h"
28 #include "log.h"
29
30 using namespace std;
31
32 // Bogus code to avoid bogus valgrind mt warnings about the
33 // initialization of treat_mbox_... which I can't even remember the
34 // use of (it's not documented or ever set)
35 static int treat_mbox_as_rfc822;
36 class InitTMAR {
37 public:
InitTMAR()38 InitTMAR() {
39 treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
40 }
41 };
42 static InitTMAR initTM;
43
44 /**
45 * This code is currently ONLY used to identify mbox and mail message files
46 * which are badly handled by standard mime type identifiers
47 * There is a very old (circa 1990) mbox format using blocks of ^A (0x01) chars
48 * to separate messages, that we don't recognize currently
49 */
50
51 // Mail headers we compare to:
52 static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ",
53 "Date: ", "Subject: ", "Status: ",
54 "In-Reply-To: "};
55 static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8, 13};
56 static const int nmh = sizeof(mailhs) / sizeof(char *);
57
58 const int wantnhead = 3;
59
60 // fn is for message printing
idFileInternal(istream & input,const char * fn)61 static string idFileInternal(istream& input, const char *fn)
62 {
63 bool line1HasFrom = false;
64 bool gotnonempty = false;
65 int lookslikemail = 0;
66
67 // emacs VM sometimes inserts very long lines with continuations or
68 // not (for folder information). This forces us to look at many
69 // lines and long ones
70 int lnum = 1;
71 for (int loop = 1; loop < 200; loop++, lnum++) {
72
73 #define LL 2*1024
74 char cline[LL+1];
75 cline[LL] = 0;
76 input.getline(cline, LL-1);
77 if (input.fail()) {
78 if (input.bad()) {
79 LOGERR("idfile: error while reading [" << (fn) << "]\n" );
80 return string();
81 }
82 // Must be eof ?
83 break;
84 }
85
86 // gcount includes the \n
87 std::streamsize ll = input.gcount() - 1;
88 if (ll > 0)
89 gotnonempty = true;
90
91 LOGDEB2("idfile: lnum " << (lnum) << " ll " << ((unsigned int)ll) << ": [" << (cline) << "]\n" );
92
93 // Check for a few things that can't be found in a mail file,
94 // (optimization to get a quick negative)
95
96 // Empty lines
97 if (ll <= 0) {
98 // Accept a few empty lines at the beginning of the file,
99 // otherwise this is the end of headers
100 if (gotnonempty || lnum > 10) {
101 LOGDEB2("Got empty line\n" );
102 break;
103 } else {
104 // Don't increment the line counter for initial empty lines.
105 lnum--;
106 continue;
107 }
108 }
109
110 // emacs vm can insert VERY long header lines.
111 if (ll > LL - 20) {
112 LOGDEB2("idFile: Line too long\n" );
113 return string();
114 }
115
116 // Check for mbox 'From ' line
117 if (lnum == 1 && !strncmp("From ", cline, 5)) {
118 if (treat_mbox_as_rfc822 == -1) {
119 line1HasFrom = true;
120 LOGDEB2("idfile: line 1 has From_\n" );
121 }
122 continue;
123 }
124
125 // Except for a possible first line with 'From ', lines must
126 // begin with whitespace or have a colon
127 // (hope no one comes up with a longer header name !
128 // Take care to convert to unsigned char because ms ctype does
129 // like negative values
130 if (!isspace((unsigned char)cline[0])) {
131 char *cp = strchr(cline, ':');
132 if (cp == 0 || (cp - cline) > 70) {
133 LOGDEB2("idfile: can't be mail header line: [" << (cline) << "]\n" );
134 break;
135 }
136 }
137
138 // Compare to known headers
139 for (int i = 0; i < nmh; i++) {
140 if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
141 //fprintf(stderr, "Got [%s]\n", mailhs[i]);
142 lookslikemail++;
143 break;
144 }
145 }
146 if (lookslikemail >= wantnhead)
147 break;
148 }
149 if (line1HasFrom)
150 lookslikemail++;
151
152 if (lookslikemail >= wantnhead)
153 return line1HasFrom ? string("text/x-mail") : string("message/rfc822");
154
155 return string();
156 }
157
idFile(const char * fn)158 string idFile(const char *fn)
159 {
160 ifstream input;
161 input.open(fn, ios::in);
162 if (!input.is_open()) {
163 LOGERR("idFile: could not open [" << (fn) << "]\n" );
164 return string();
165 }
166 return idFileInternal(input, fn);
167 }
168
idFileMem(const string & data)169 string idFileMem(const string& data)
170 {
171 stringstream s(data, stringstream::in);
172 return idFileInternal(s, "");
173 }
174
175 #else
176
177 #include <stdio.h>
178 #include <stdlib.h>
179
180 #include <string>
181 #include <iostream>
182
183 #include <fcntl.h>
184
185 using namespace std;
186
187 #include "log.h"
188
189 #include "idfile.h"
190
main(int argc,char ** argv)191 int main(int argc, char **argv)
192 {
193 if (argc < 2) {
194 cerr << "Usage: idfile filename" << endl;
195 exit(1);
196 }
197 DebugLog::getdbl()->setloglevel(DEBDEB1);
198 DebugLog::setfilename("stderr");
199 for (int i = 1; i < argc; i++) {
200 string mime = idFile(argv[i]);
201 cout << argv[i] << " : " << mime << endl;
202 }
203 exit(0);
204 }
205
206 #endif
207
208