1 /* Copyright (C) 2005 J.F.Dockes
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the
14  *   Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16  */
17 #ifndef TEST_IDFILE
18 #include "autoconfig.h"
19 
20 #include <stdlib.h>
21 #include <ctype.h>
22 #include <cstring>
23 
24 #include <fstream>
25 #include <sstream>
26 
27 #include "idfile.h"
28 #include "log.h"
29 
30 using namespace std;
31 
32 // Bogus code to avoid bogus valgrind mt warnings about the
33 // initialization of treat_mbox_...  which I can't even remember the
34 // use of (it's not documented or ever set)
35 static int treat_mbox_as_rfc822;
36 class InitTMAR {
37 public:
InitTMAR()38     InitTMAR() {
39         treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
40     }
41 };
42 static InitTMAR initTM;
43 
44 /**
45  * This code is currently ONLY used to identify mbox and mail message files
46  * which are badly handled by standard mime type identifiers
47  * There is a very old (circa 1990) mbox format using blocks of ^A (0x01) chars
48  * to separate messages, that we don't recognize currently
49  */
50 
51 // Mail headers we compare to:
52 static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ",
53                    "Date: ", "Subject: ", "Status: ",
54                    "In-Reply-To: "};
55 static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8, 13};
56 static const int nmh = sizeof(mailhs) / sizeof(char *);
57 
58 const int wantnhead = 3;
59 
60 // fn is for message printing
idFileInternal(istream & input,const char * fn)61 static string idFileInternal(istream& input, const char *fn)
62 {
63     bool line1HasFrom = false;
64     bool gotnonempty = false;
65     int lookslikemail = 0;
66 
67     // emacs VM sometimes inserts very long lines with continuations or
68     // not (for folder information). This forces us to look at many
69     // lines and long ones
70     int lnum = 1;
71     for (int loop = 1; loop < 200; loop++, lnum++) {
72 
73 #define LL 2*1024
74     char cline[LL+1];
75     cline[LL] = 0;
76     input.getline(cline, LL-1);
77     if (input.fail()) {
78         if (input.bad()) {
79         LOGERR("idfile: error while reading ["  << (fn) << "]\n" );
80         return string();
81         }
82         // Must be eof ?
83         break;
84     }
85 
86     // gcount includes the \n
87     std::streamsize ll = input.gcount() - 1;
88     if (ll > 0)
89         gotnonempty = true;
90 
91     LOGDEB2("idfile: lnum "  << (lnum) << " ll "  << ((unsigned int)ll) << ": ["  << (cline) << "]\n" );
92 
93     // Check for a few things that can't be found in a mail file,
94     // (optimization to get a quick negative)
95 
96     // Empty lines
97     if (ll <= 0) {
98         // Accept a few empty lines at the beginning of the file,
99         // otherwise this is the end of headers
100         if (gotnonempty || lnum > 10) {
101         LOGDEB2("Got empty line\n" );
102         break;
103         } else {
104         // Don't increment the line counter for initial empty lines.
105         lnum--;
106         continue;
107         }
108     }
109 
110     // emacs vm can insert VERY long header lines.
111     if (ll > LL - 20) {
112         LOGDEB2("idFile: Line too long\n" );
113         return string();
114     }
115 
116     // Check for mbox 'From ' line
117     if (lnum == 1 && !strncmp("From ", cline, 5)) {
118         if (treat_mbox_as_rfc822 == -1) {
119         line1HasFrom = true;
120         LOGDEB2("idfile: line 1 has From_\n" );
121         }
122         continue;
123     }
124 
125     // Except for a possible first line with 'From ', lines must
126     // begin with whitespace or have a colon
127     // (hope no one comes up with a longer header name !
128     // Take care to convert to unsigned char because ms ctype does
129     // like negative values
130     if (!isspace((unsigned char)cline[0])) {
131         char *cp = strchr(cline, ':');
132         if (cp == 0 || (cp - cline) > 70) {
133         LOGDEB2("idfile: can't be mail header line: ["  << (cline) << "]\n" );
134         break;
135         }
136     }
137 
138     // Compare to known headers
139     for (int i = 0; i < nmh; i++) {
140         if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
141         //fprintf(stderr, "Got [%s]\n", mailhs[i]);
142         lookslikemail++;
143         break;
144         }
145     }
146     if (lookslikemail >= wantnhead)
147         break;
148     }
149     if (line1HasFrom)
150     lookslikemail++;
151 
152     if (lookslikemail >= wantnhead)
153     return line1HasFrom ? string("text/x-mail") : string("message/rfc822");
154 
155     return string();
156 }
157 
idFile(const char * fn)158 string idFile(const char *fn)
159 {
160     ifstream input;
161     input.open(fn, ios::in);
162     if (!input.is_open()) {
163     LOGERR("idFile: could not open ["  << (fn) << "]\n" );
164     return string();
165     }
166     return idFileInternal(input, fn);
167 }
168 
idFileMem(const string & data)169 string idFileMem(const string& data)
170 {
171     stringstream s(data, stringstream::in);
172     return idFileInternal(s, "");
173 }
174 
175 #else
176 
177 #include <stdio.h>
178 #include <stdlib.h>
179 
180 #include <string>
181 #include <iostream>
182 
183 #include <fcntl.h>
184 
185 using namespace std;
186 
187 #include "log.h"
188 
189 #include "idfile.h"
190 
main(int argc,char ** argv)191 int main(int argc, char **argv)
192 {
193     if (argc < 2) {
194     cerr << "Usage: idfile filename" << endl;
195     exit(1);
196     }
197     DebugLog::getdbl()->setloglevel(DEBDEB1);
198     DebugLog::setfilename("stderr");
199     for (int i = 1; i < argc; i++) {
200     string mime = idFile(argv[i]);
201     cout << argv[i] << " : " << mime << endl;
202     }
203     exit(0);
204 }
205 
206 #endif
207 
208