1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
2 
3 /* AbiWord
4  * Copyright (C) 2005 Dom Lachowicz <cinamod@hotmail.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301 USA.
20  */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "ut_string.h"
28 #include "ut_types.h"
29 #include "ie_imp_Text.h"
30 
31 #include <gsf/gsf-input-stdio.h>
32 #include <gsf/gsf-output-stdio.h>
33 
34 #include "xap_Module.h"
35 
36 #ifdef ABI_PLUGIN_BUILTIN
37 #define abi_plugin_register abipgn_pdf_register
38 #define abi_plugin_unregister abipgn_pdf_unregister
39 #define abi_plugin_supports_version abipgn_pdf_supports_version
40 // dll exports break static linking
41 #define ABI_BUILTIN_FAR_CALL extern "C"
42 #else
43 #define ABI_BUILTIN_FAR_CALL ABI_FAR_CALL
44 ABI_PLUGIN_DECLARE("PDF")
45 #endif
46 
47 static const struct
48 {
49 	const char *conversion_program;
50 	const char *extension;
51 } pdf_conversion_programs[] = {
52 	{ "pdftoabw", ".abw" },
53 	{ "pdftotext", ".txt" }
54 };
55 
temp_name(UT_String & out_filename)56 static UT_Error temp_name (UT_String& out_filename)
57 {
58 	char *temporary_file = NULL;
59 	GError *err = NULL;
60 	gint tmp_fp = g_file_open_tmp ("XXXXXX", &temporary_file, &err);
61 
62 	if (err)
63 		{
64 			g_warning ("%s", err->message);
65 			g_error_free (err); err = NULL;
66 			return UT_ERROR;
67 		}
68 
69 	out_filename = temporary_file;
70 	g_free (temporary_file);
71 	close(tmp_fp);
72 	return UT_OK;
73 }
74 
75 class IE_Imp_PDF : public IE_Imp
76 {
77 public:
78 
IE_Imp_PDF(PD_Document * pDocument)79   IE_Imp_PDF (PD_Document * pDocument)
80     : IE_Imp(pDocument)
81   {
82   }
83 
~IE_Imp_PDF()84   virtual ~IE_Imp_PDF ()
85   {
86   }
87 
_runConversion(const UT_String & pdf_on_disk,const UT_String & output_on_disk,size_t which)88 	UT_Error _runConversion(const UT_String& pdf_on_disk, const UT_String& output_on_disk, size_t which)
89 	{
90 		UT_Error rval = UT_ERROR;
91 
92 		const char * pdftoabw_argv[4];
93 
94 		int argc = 0;
95 		pdftoabw_argv[argc++] = pdf_conversion_programs[which].conversion_program;
96 		pdftoabw_argv[argc++] = pdf_on_disk.c_str ();
97 		pdftoabw_argv[argc++] = output_on_disk.c_str ();
98 		pdftoabw_argv[argc++] = NULL;
99 
100 		// run conversion
101 		if (g_spawn_sync (NULL,
102 						  (gchar **)pdftoabw_argv,
103 						  NULL,
104 						  (GSpawnFlags)(G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL | G_SPAWN_STDERR_TO_DEV_NULL),
105 						  NULL,
106 						  NULL,
107 						  NULL,
108 						  NULL,
109 						  NULL,
110 						  NULL))
111 			{
112 				char * uri = UT_go_filename_to_uri (output_on_disk.c_str ());
113 				if (uri)
114 					{
115 						// import the document
116 						rval = IE_Imp::loadFile (getDoc (), uri, IE_Imp::fileTypeForSuffix (pdf_conversion_programs[which].extension));
117 						g_free (uri);
118 					}
119 			}
120 
121 		return rval;
122 	}
123 
_loadFile(GsfInput * input)124   virtual UT_Error _loadFile(GsfInput * input)
125   {
126     UT_Error rval = UT_ERROR;
127 
128 	UT_String pdf_on_disk, abw_on_disk;
129 
130 	// create temporary file names
131 	rval = temp_name (pdf_on_disk);
132 	if (rval != UT_OK) return rval;
133 
134 	rval = temp_name (abw_on_disk);
135 	if (rval != UT_OK) return rval;
136 
137 	GsfOutput * output = gsf_output_stdio_new (pdf_on_disk.c_str (), NULL);
138 	if (output)
139 		{
140 			// copy input to disk
141 			gboolean copy_res = gsf_input_copy (input, output);
142 
143 			gsf_output_close (output);
144 			g_object_unref (G_OBJECT (output));
145 
146 			if (copy_res)
147 				{
148 					for (size_t i = 0; i < G_N_ELEMENTS(pdf_conversion_programs); i++)
149 						{
150 							if ((rval = _runConversion(pdf_on_disk, abw_on_disk, i)) == UT_OK)
151 								break;
152 						}
153 				}
154 		}
155 
156 	// remove temporary files
157 	remove(pdf_on_disk.c_str ());
158 	remove(abw_on_disk.c_str ());
159 
160     return rval;
161   }
162 
163 };
164 
165 /*****************************************************************/
166 /*****************************************************************/
167 
168 // supported suffixes
169 static IE_SuffixConfidence IE_Imp_PDF_Sniffer__SuffixConfidence[] = {
170 	{ "pdf", 	UT_CONFIDENCE_PERFECT 	},
171 	{ "", 	UT_CONFIDENCE_ZILCH 	}
172 };
173 
174 // supported mimetypes
175 static IE_MimeConfidence IE_Imp_PDF_Sniffer__MimeConfidence[] = {
176 	{ IE_MIME_MATCH_FULL, 	"application/pdf", 	UT_CONFIDENCE_PERFECT 	},
177 	{ IE_MIME_MATCH_BOGUS, 	"", 				UT_CONFIDENCE_ZILCH 	}
178 };
179 
180 class IE_Imp_PDF_Sniffer : public IE_ImpSniffer
181 {
182 public:
183 
IE_Imp_PDF_Sniffer()184   IE_Imp_PDF_Sniffer()
185     : IE_ImpSniffer("application/pdf", false)
186   {
187   }
188 
~IE_Imp_PDF_Sniffer()189   virtual ~IE_Imp_PDF_Sniffer()
190   {
191   }
192 
getSuffixConfidence()193   const IE_SuffixConfidence * getSuffixConfidence ()
194   {
195 	return IE_Imp_PDF_Sniffer__SuffixConfidence;
196   }
197 
getMimeConfidence()198   const IE_MimeConfidence * getMimeConfidence ()
199   {
200 	return IE_Imp_PDF_Sniffer__MimeConfidence;
201   }
202 
recognizeContents(const char * szBuf,UT_uint32)203   virtual UT_Confidence_t recognizeContents (const char * szBuf,
204 					     UT_uint32 /*iNumbytes*/)
205   {
206     if (!strncmp (szBuf, "%PDF-", 5))
207       return UT_CONFIDENCE_PERFECT;
208     return UT_CONFIDENCE_ZILCH;
209   }
210 
getDlgLabels(const char ** pszDesc,const char ** pszSuffixList,IEFileType * ft)211   virtual bool getDlgLabels (const char ** pszDesc,
212 							 const char ** pszSuffixList,
213 							 IEFileType * ft)
214   {
215     *pszDesc = "PDF (.pdf)";
216     *pszSuffixList = "*.pdf";
217     *ft = getFileType();
218     return true;
219   }
220 
constructImporter(PD_Document * pDocument,IE_Imp ** ppie)221   virtual UT_Error constructImporter (PD_Document * pDocument,
222 				      IE_Imp ** ppie)
223   {
224     *ppie = new IE_Imp_PDF(pDocument);
225     return UT_OK;
226   }
227 };
228 
229 /*****************************************************************/
230 /* General plugin stuff                                          */
231 /*****************************************************************/
232 
233 // we use a reference-counted sniffer
234 static IE_Imp_PDF_Sniffer * m_impSniffer = 0;
235 
236 ABI_BUILTIN_FAR_CALL
abi_plugin_register(XAP_ModuleInfo * mi)237 int abi_plugin_register (XAP_ModuleInfo * mi)
238 {
239 	for (size_t i = 0; i < G_N_ELEMENTS(pdf_conversion_programs); i++)
240 		{
241 			gchar * prog_path;
242 
243 			prog_path = g_find_program_in_path (pdf_conversion_programs[i].conversion_program);
244 			if (prog_path)
245 				{
246 					// don't register the plugin if it can't find pdftoabw
247 					g_free (prog_path);
248 
249 					if (!m_impSniffer)
250 						{
251 							m_impSniffer = new IE_Imp_PDF_Sniffer ();
252 						}
253 
254 					mi->name    = "PDF Import Filter";
255 					mi->desc    = "Import Adobe PDF Documents";
256 					mi->version = ABI_VERSION_STRING;
257 					mi->author  = "Dom Lachowicz <cinamod@hotmail.com>";
258 					mi->usage   = "No Usage";
259 
260 					IE_Imp::registerImporter (m_impSniffer);
261 					return 1;
262 				}
263 		}
264 
265 	return 0;
266 }
267 
268 ABI_BUILTIN_FAR_CALL
abi_plugin_unregister(XAP_ModuleInfo * mi)269 int abi_plugin_unregister (XAP_ModuleInfo * mi)
270 {
271   mi->name = 0;
272   mi->desc = 0;
273   mi->version = 0;
274   mi->author = 0;
275   mi->usage = 0;
276 
277   if (m_impSniffer)
278 	  {
279 		  IE_Imp::unregisterImporter (m_impSniffer);
280 		  delete m_impSniffer;
281 		  m_impSniffer = 0;
282 	  }
283 
284   return 1;
285 }
286 
287 ABI_BUILTIN_FAR_CALL
abi_plugin_supports_version(UT_uint32,UT_uint32,UT_uint32)288 int abi_plugin_supports_version (UT_uint32 /*major*/, UT_uint32 /*minor*/,
289 								 UT_uint32 /*release*/)
290 {
291   return 1;
292 }
293