1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
2
3 /* AbiWord
4 * Copyright (C) 2005 Dom Lachowicz <cinamod@hotmail.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * 02110-1301 USA.
20 */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include "ut_string.h"
28 #include "ut_types.h"
29 #include "ie_imp_Text.h"
30
31 #include <gsf/gsf-input-stdio.h>
32 #include <gsf/gsf-output-stdio.h>
33
34 #include "xap_Module.h"
35
36 #ifdef ABI_PLUGIN_BUILTIN
37 #define abi_plugin_register abipgn_pdf_register
38 #define abi_plugin_unregister abipgn_pdf_unregister
39 #define abi_plugin_supports_version abipgn_pdf_supports_version
40 // dll exports break static linking
41 #define ABI_BUILTIN_FAR_CALL extern "C"
42 #else
43 #define ABI_BUILTIN_FAR_CALL ABI_FAR_CALL
44 ABI_PLUGIN_DECLARE("PDF")
45 #endif
46
47 static const struct
48 {
49 const char *conversion_program;
50 const char *extension;
51 } pdf_conversion_programs[] = {
52 { "pdftoabw", ".abw" },
53 { "pdftotext", ".txt" }
54 };
55
temp_name(UT_String & out_filename)56 static UT_Error temp_name (UT_String& out_filename)
57 {
58 char *temporary_file = NULL;
59 GError *err = NULL;
60 gint tmp_fp = g_file_open_tmp ("XXXXXX", &temporary_file, &err);
61
62 if (err)
63 {
64 g_warning ("%s", err->message);
65 g_error_free (err); err = NULL;
66 return UT_ERROR;
67 }
68
69 out_filename = temporary_file;
70 g_free (temporary_file);
71 close(tmp_fp);
72 return UT_OK;
73 }
74
75 class IE_Imp_PDF : public IE_Imp
76 {
77 public:
78
IE_Imp_PDF(PD_Document * pDocument)79 IE_Imp_PDF (PD_Document * pDocument)
80 : IE_Imp(pDocument)
81 {
82 }
83
~IE_Imp_PDF()84 virtual ~IE_Imp_PDF ()
85 {
86 }
87
_runConversion(const UT_String & pdf_on_disk,const UT_String & output_on_disk,size_t which)88 UT_Error _runConversion(const UT_String& pdf_on_disk, const UT_String& output_on_disk, size_t which)
89 {
90 UT_Error rval = UT_ERROR;
91
92 const char * pdftoabw_argv[4];
93
94 int argc = 0;
95 pdftoabw_argv[argc++] = pdf_conversion_programs[which].conversion_program;
96 pdftoabw_argv[argc++] = pdf_on_disk.c_str ();
97 pdftoabw_argv[argc++] = output_on_disk.c_str ();
98 pdftoabw_argv[argc++] = NULL;
99
100 // run conversion
101 if (g_spawn_sync (NULL,
102 (gchar **)pdftoabw_argv,
103 NULL,
104 (GSpawnFlags)(G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL | G_SPAWN_STDERR_TO_DEV_NULL),
105 NULL,
106 NULL,
107 NULL,
108 NULL,
109 NULL,
110 NULL))
111 {
112 char * uri = UT_go_filename_to_uri (output_on_disk.c_str ());
113 if (uri)
114 {
115 // import the document
116 rval = IE_Imp::loadFile (getDoc (), uri, IE_Imp::fileTypeForSuffix (pdf_conversion_programs[which].extension));
117 g_free (uri);
118 }
119 }
120
121 return rval;
122 }
123
_loadFile(GsfInput * input)124 virtual UT_Error _loadFile(GsfInput * input)
125 {
126 UT_Error rval = UT_ERROR;
127
128 UT_String pdf_on_disk, abw_on_disk;
129
130 // create temporary file names
131 rval = temp_name (pdf_on_disk);
132 if (rval != UT_OK) return rval;
133
134 rval = temp_name (abw_on_disk);
135 if (rval != UT_OK) return rval;
136
137 GsfOutput * output = gsf_output_stdio_new (pdf_on_disk.c_str (), NULL);
138 if (output)
139 {
140 // copy input to disk
141 gboolean copy_res = gsf_input_copy (input, output);
142
143 gsf_output_close (output);
144 g_object_unref (G_OBJECT (output));
145
146 if (copy_res)
147 {
148 for (size_t i = 0; i < G_N_ELEMENTS(pdf_conversion_programs); i++)
149 {
150 if ((rval = _runConversion(pdf_on_disk, abw_on_disk, i)) == UT_OK)
151 break;
152 }
153 }
154 }
155
156 // remove temporary files
157 remove(pdf_on_disk.c_str ());
158 remove(abw_on_disk.c_str ());
159
160 return rval;
161 }
162
163 };
164
165 /*****************************************************************/
166 /*****************************************************************/
167
168 // supported suffixes
169 static IE_SuffixConfidence IE_Imp_PDF_Sniffer__SuffixConfidence[] = {
170 { "pdf", UT_CONFIDENCE_PERFECT },
171 { "", UT_CONFIDENCE_ZILCH }
172 };
173
174 // supported mimetypes
175 static IE_MimeConfidence IE_Imp_PDF_Sniffer__MimeConfidence[] = {
176 { IE_MIME_MATCH_FULL, "application/pdf", UT_CONFIDENCE_PERFECT },
177 { IE_MIME_MATCH_BOGUS, "", UT_CONFIDENCE_ZILCH }
178 };
179
180 class IE_Imp_PDF_Sniffer : public IE_ImpSniffer
181 {
182 public:
183
IE_Imp_PDF_Sniffer()184 IE_Imp_PDF_Sniffer()
185 : IE_ImpSniffer("application/pdf", false)
186 {
187 }
188
~IE_Imp_PDF_Sniffer()189 virtual ~IE_Imp_PDF_Sniffer()
190 {
191 }
192
getSuffixConfidence()193 const IE_SuffixConfidence * getSuffixConfidence ()
194 {
195 return IE_Imp_PDF_Sniffer__SuffixConfidence;
196 }
197
getMimeConfidence()198 const IE_MimeConfidence * getMimeConfidence ()
199 {
200 return IE_Imp_PDF_Sniffer__MimeConfidence;
201 }
202
recognizeContents(const char * szBuf,UT_uint32)203 virtual UT_Confidence_t recognizeContents (const char * szBuf,
204 UT_uint32 /*iNumbytes*/)
205 {
206 if (!strncmp (szBuf, "%PDF-", 5))
207 return UT_CONFIDENCE_PERFECT;
208 return UT_CONFIDENCE_ZILCH;
209 }
210
getDlgLabels(const char ** pszDesc,const char ** pszSuffixList,IEFileType * ft)211 virtual bool getDlgLabels (const char ** pszDesc,
212 const char ** pszSuffixList,
213 IEFileType * ft)
214 {
215 *pszDesc = "PDF (.pdf)";
216 *pszSuffixList = "*.pdf";
217 *ft = getFileType();
218 return true;
219 }
220
constructImporter(PD_Document * pDocument,IE_Imp ** ppie)221 virtual UT_Error constructImporter (PD_Document * pDocument,
222 IE_Imp ** ppie)
223 {
224 *ppie = new IE_Imp_PDF(pDocument);
225 return UT_OK;
226 }
227 };
228
229 /*****************************************************************/
230 /* General plugin stuff */
231 /*****************************************************************/
232
233 // we use a reference-counted sniffer
234 static IE_Imp_PDF_Sniffer * m_impSniffer = 0;
235
236 ABI_BUILTIN_FAR_CALL
abi_plugin_register(XAP_ModuleInfo * mi)237 int abi_plugin_register (XAP_ModuleInfo * mi)
238 {
239 for (size_t i = 0; i < G_N_ELEMENTS(pdf_conversion_programs); i++)
240 {
241 gchar * prog_path;
242
243 prog_path = g_find_program_in_path (pdf_conversion_programs[i].conversion_program);
244 if (prog_path)
245 {
246 // don't register the plugin if it can't find pdftoabw
247 g_free (prog_path);
248
249 if (!m_impSniffer)
250 {
251 m_impSniffer = new IE_Imp_PDF_Sniffer ();
252 }
253
254 mi->name = "PDF Import Filter";
255 mi->desc = "Import Adobe PDF Documents";
256 mi->version = ABI_VERSION_STRING;
257 mi->author = "Dom Lachowicz <cinamod@hotmail.com>";
258 mi->usage = "No Usage";
259
260 IE_Imp::registerImporter (m_impSniffer);
261 return 1;
262 }
263 }
264
265 return 0;
266 }
267
268 ABI_BUILTIN_FAR_CALL
abi_plugin_unregister(XAP_ModuleInfo * mi)269 int abi_plugin_unregister (XAP_ModuleInfo * mi)
270 {
271 mi->name = 0;
272 mi->desc = 0;
273 mi->version = 0;
274 mi->author = 0;
275 mi->usage = 0;
276
277 if (m_impSniffer)
278 {
279 IE_Imp::unregisterImporter (m_impSniffer);
280 delete m_impSniffer;
281 m_impSniffer = 0;
282 }
283
284 return 1;
285 }
286
287 ABI_BUILTIN_FAR_CALL
abi_plugin_supports_version(UT_uint32,UT_uint32,UT_uint32)288 int abi_plugin_supports_version (UT_uint32 /*major*/, UT_uint32 /*minor*/,
289 UT_uint32 /*release*/)
290 {
291 return 1;
292 }
293