1<?xml version="1.0" encoding="utf-8"?>
2<!--
3
4This file allows you to use external programs to extract text from
5more structured file formats. For example, you could use pdftotext to
6extract data from PDF files.
7The format is very close to that used by Beagle, with a few modifications.
8
9There are some limitations to this system: the external programs must
10take a filename on the command line and must output text to standard
11out. You cannot extract any metadata using this system. Using our
12PDF example, you could extract all of the text, but you couldn't
13extract the author of the document as a special field. For that, you
14will have to write a more traditional filter.
15
16Now, an example entry:
17
18<filter>
19  <mimetype>application/msword</mimetype>
20  <charset>utf-8</charset>
21  <command>antiword</command>
22  <arguments>-mUTF-8.txt %s</arguments>
23  <output>text/plain</output>
24</filter>
25
26mimetype - The mime type handled by this filter. This item is required.
27
28charset - The charset of the output generated by this filter. This item
29is optional , and defaults to whatever the locale charset is.
30
31command - The filename of the command to run. Do not put any command
32line arguments in this. This item is required.
33
34arguments - Any arguments to pass into the given command. The special
35token "%s" means the filename to be passed in. This item is required.
36
37output - The mime type of the output generated by the command. For
38the right thing to happen, this should be restricted to basic types
39text/plain, text/html and text/xml or application/xml. The special
40value SCAN will cause the output to be scanned for its mime type.
41This item is optional, and defaults to text/plain.
42
43-->
44
45<external-filters>
46<filter>
47  <mimetype>application/vnd.sun.xml.writer</mimetype>
48  <command>unzip</command>
49  <arguments>-p %s content.xml</arguments>
50  <output>application/xml</output>
51</filter>
52<filter>
53  <mimetype>application/vnd.sun.xml.writer.template</mimetype>
54  <command>unzip</command>
55  <arguments>-p %s content.xml</arguments>
56  <output>application/xml</output>
57</filter>
58<filter>
59  <mimetype>application/vnd.sun.xml.calc</mimetype>
60  <command>unzip</command>
61  <arguments>-p %s content.xml</arguments>
62  <output>application/xml</output>
63</filter>
64<filter>
65  <mimetype>application/vnd.sun.xml.calc.template</mimetype>
66  <command>unzip</command>
67  <arguments>-p %s content.xml</arguments>
68  <output>application/xml</output>
69</filter>
70<filter>
71  <mimetype>application/vnd.sun.xml.draw</mimetype>
72  <command>unzip</command>
73  <arguments>-p %s content.xml</arguments>
74  <output>application/xml</output>
75</filter>
76<filter>
77  <mimetype>application/vnd.sun.xml.draw.template</mimetype>
78  <command>unzip</command>
79  <arguments>-p %s content.xml</arguments>
80  <output>application/xml</output>
81</filter>
82<filter>
83  <mimetype>application/vnd.sun.xml.impress</mimetype>
84  <command>unzip</command>
85  <arguments>-p %s content.xml</arguments>
86  <output>application/xml</output>
87</filter>
88<filter>
89  <mimetype>application/vnd.sun.xml.impress.template</mimetype>
90  <command>unzip</command>
91  <arguments>-p %s content.xml</arguments>
92  <output>application/xml</output>
93</filter>
94<filter>
95  <mimetype>application/vnd.sun.xml.writer.global</mimetype>
96  <command>unzip</command>
97  <arguments>-p %s content.xml</arguments>
98  <output>application/xml</output>
99</filter>
100<filter>
101  <mimetype>application/vnd.sun.xml.math</mimetype>
102  <command>unzip</command>
103  <arguments>-p %s content.xml</arguments>
104  <output>application/xml</output>
105</filter>
106<filter>
107  <mimetype>application/vnd.oasis.opendocument.chart</mimetype>
108  <command>unzip</command>
109  <arguments>-p %s content.xml</arguments>
110  <output>application/xml</output>
111</filter>
112<filter>
113  <mimetype>application/vnd.oasis.opendocument.database</mimetype>
114  <command>unzip</command>
115  <arguments>-p %s content.xml</arguments>
116  <output>application/xml</output>
117</filter>
118<filter>
119  <mimetype>application/vnd.oasis.opendocument.formula</mimetype>
120  <command>unzip</command>
121  <arguments>-p %s content.xml</arguments>
122  <output>application/xml</output>
123</filter>
124<filter>
125  <mimetype>application/vnd.oasis.opendocument.graphics</mimetype>
126  <command>unzip</command>
127  <arguments>-p %s content.xml</arguments>
128  <output>application/xml</output>
129</filter>
130<filter>
131  <mimetype>application/vnd.oasis.opendocument.graphics-template</mimetype>
132  <command>unzip</command>
133  <arguments>-p %s content.xml</arguments>
134  <output>application/xml</output>
135</filter>
136<filter>
137  <mimetype>application/vnd.oasis.opendocument.presentation</mimetype>
138  <command>unzip</command>
139  <arguments>-p %s content.xml</arguments>
140  <output>application/xml</output>
141</filter>
142<filter>
143  <mimetype>application/vnd.oasis.opendocument.presentation-template</mimetype>
144  <command>unzip</command>
145  <arguments>-p %s content.xml</arguments>
146  <output>application/xml</output>
147</filter>
148<filter>
149  <mimetype>application/vnd.oasis.opendocument.spreadsheet</mimetype>
150  <command>unzip</command>
151  <arguments>-p %s content.xml</arguments>
152  <output>application/xml</output>
153</filter>
154<filter>
155  <mimetype>application/vnd.oasis.opendocument.spreadsheet-template</mimetype>
156  <command>unzip</command>
157  <arguments>-p %s content.xml</arguments>
158  <output>application/xml</output>
159</filter>
160<filter>
161  <mimetype>application/vnd.oasis.opendocument.text</mimetype>
162  <command>unzip</command>
163  <arguments>-p %s content.xml</arguments>
164  <output>application/xml</output>
165</filter>
166<filter>
167  <mimetype>application/vnd.oasis.opendocument.text-master</mimetype>
168  <command>unzip</command>
169  <arguments>-p %s content.xml</arguments>
170  <output>application/xml</output>
171</filter>
172<filter>
173  <mimetype>application/vnd.oasis.opendocument.text-template</mimetype>
174  <command>unzip</command>
175  <arguments>-p %s content.xml</arguments>
176  <output>application/xml</output>
177</filter>
178<filter>
179  <mimetype>application/vnd.oasis.opendocument.text-web</mimetype>
180  <command>unzip</command>
181  <arguments>-p %s content.xml</arguments>
182  <output>application/xml</output>
183</filter>
184<filter>
185  <mimetype>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mimetype>
186  <command>unzip</command>
187  <arguments>-p %s word/document.xml</arguments>
188  <output>application/xml</output>
189</filter>
190<filter>
191  <mimetype>application/vnd.openxmlformats-officedocument.presentationml.presentation</mimetype>
192  <command>unzip</command>
193  <arguments>-p %s ppt/slides/slide*.xml</arguments>
194  <output>application/xml</output>
195</filter>
196<filter>
197  <mimetype>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mimetype>
198  <command>unzip</command>
199  <arguments>-p %s xl/sharedStrings.xml</arguments>
200  <output>application/xml</output>
201</filter>
202<filter>
203  <mimetype>application/pdf</mimetype>
204  <charset>utf-8</charset>
205  <command>pdftotext</command>
206  <arguments>-enc UTF-8 -raw %s -</arguments>
207  <output>text/plain</output>
208</filter>
209<filter>
210  <mimetype>application/rtf</mimetype>
211  <command>unrtf</command>
212  <arguments>-n -t html %s</arguments>
213  <output>text/html</output>
214</filter>
215<filter>
216  <mimetype>text/rtf</mimetype>
217  <command>unrtf</command>
218  <arguments>-n -t html %s</arguments>
219  <output>text/html</output>
220</filter>
221<filter>
222  <mimetype>text/x-rst</mimetype>
223  <charset>utf-8</charset>
224  <command>rst2txt</command>
225  <arguments>-r 5 %s</arguments>
226  <output>text/plain</output>
227</filter>
228<filter>
229  <mimetype>application/msword</mimetype>
230  <charset>utf-8</charset>
231  <command>antiword</command>
232  <arguments>-mUTF-8.txt %s</arguments>
233</filter>
234<filter>
235  <mimetype>application/vnd.ms-word</mimetype>
236  <charset>utf-8</charset>
237  <command>antiword</command>
238  <arguments>-mUTF-8.txt %s</arguments>
239</filter>
240<filter>
241  <mimetype>application/vnd.ms-excel</mimetype>
242  <charset>utf-8</charset>
243  <command>xls2csv</command>
244  <arguments>-q0 -dutf-8 %s</arguments>
245</filter>
246<filter>
247  <mimetype>application/vnd.ms-powerpoint</mimetype>
248  <charset>utf-8</charset>
249  <command>catppt</command>
250  <arguments>-dutf-8 %s</arguments>
251</filter>
252<filter>
253  <mimetype>application/x-dvi</mimetype>
254  <command>catdvi</command>
255  <arguments>-e2 -s %s</arguments>
256</filter>
257<filter>
258  <mimetype>image/vnd.djvu</mimetype>
259  <command>djvutxt</command>
260  <arguments>%s</arguments>
261</filter>
262<filter>
263  <mimetype>application/x-rpm</mimetype>
264  <command><![CDATA[OUT=`file -bi %s` && [ "$OUT" == "application/x-rpm" ] && rpm]]></command>
265  <arguments>-qlp %s</arguments>
266</filter>
267<filter>
268  <mimetype>application/x-compress</mimetype>
269  <command>uncompress</command>
270  <arguments>-c %s</arguments>
271  <output>SCAN</output>
272</filter>
273<filter>
274  <mimetype>application/x-gzip</mimetype>
275  <command>gunzip</command>
276  <arguments>-c %s</arguments>
277  <output>SCAN</output>
278</filter>
279<filter>
280  <mimetype>application/x-bzip</mimetype>
281  <command>bunzip2</command>
282  <arguments>-c %s</arguments>
283  <output>SCAN</output>
284</filter>
285</external-filters>
286