1#!/usr/bin/perl
2
3# COPYRIGHT AND LICENSE
4#        Copyright (C) 2005-2018 H.Merijn Brand
5#
6#        This script is free software; you can redistribute it and/or modify it
7#        under the same terms as Perl and/or Claws Mail itself. (GPL)
8
9use 5.14.1;
10use warnings;
11
12our $VERSION = "1.01 - 2018-10-08";
13our $CMD = $0 =~ s{.*/}{}r;
14
15sub usage {
16    my ($err, $str) = (@_, "");
17    $err and select STDERR;
18    say "usage: $CMD [--html] [--type=<type>] file\n",
19	"       --html    Generate HTML (if supported)\n",
20	"       --type=X  X as mimetype (msword => doc)\n",
21	"  $CMD --list will show all implemented conversions";
22    $str and say $str;
23    exit $err;
24    } # usage
25
26use Getopt::Long qw(:config bundling nopermute);
27my $opt_v = 0;
28my $opt_h = "text";
29GetOptions (
30    "help|?"		=> sub { usage (0); },
31    "V|version"		=> sub { say "$CMD [$VERSION]"; exit 0; },
32
33    "v|verbose:1"	=> \$opt_v,
34    "t|type|mimetype=s"	=> \my $opt_t,
35    "h|html"		=> sub { $opt_h = "html" },
36    "l|list!"		=> \my $opt_l,
37    ) or usage (1);
38
39$opt_v and say "$0 @ARGV";
40
41# anon-list contains all possible commands to show content
42# plain text is a reference to same type (alias)
43# %f will be replaced with file. If no %f, file will be the last arg
44my %fh = (
45    text => {
46	bin	=> [ "strings"		], # fallback for binary files
47
48	txt	=> [ "cat"		], # Plain text
49
50	html	=> [ "htm2txt",
51		     "html2text"	], # HTML
52
53	msword	=> "doc",
54	doc	=> [ "catdoc -x -dutf-8",
55		     "wvText",
56		     "antiword -w 72"	], # M$ Word
57	"vnd.ms-excel" => "xls",
58	"ms-excel"     => "xls",
59	docx	=> [ "unoconv -f text --stdout"	], # MS Word
60	xlsx	=> "xls",
61	xls	=> [ "xlscat -L",
62		     "catdoc -x -dutf-8",
63		     "wvText"		], # M$ Excel
64#	ppt	=> [ "ppthtml"		], # M$ PowerPoint
65#			ppthtml "$1" | html2text
66	csv	=> "xls",		   # Comma Separated Values
67
68	ics	=> [ "ics2txt"		], # ICS calendar request
69
70	rtf	=> [ "rtf2text",
71		     "unrtf -t text"	], # RTF
72	pdf	=> [ "pdftotext %f -"	], # Adobe PDF
73
74	ods	=> "xls",		   # OpenOffice spreadsheet
75	sxc	=> "xls",		   # OpenOffice spreadsheet
76	odt	=> [ "oo2pod %f | pod2text",
77		     "ooo2txt"		], # OpenOffice writer
78	rtf	=> [ "rtf2text"		], # RTF
79
80	pl	=> [ "perltidy -st -se",
81		     "cat"		], # Perl
82	pm	=> "pl",
83
84	jsn	=> [ "json_pp"		], # JSON
85	json	=> "jsn",
86
87	xml	=> [ "xml_pp"		], # XML
88
89	( map { $_ => "txt" } qw(
90	    patch diff
91	    c h ic ec cc
92	    sh sed awk
93	    plain
94	    yml yaml
95	    )),
96
97	bz2	=> [ "bzip2 -d < %f | strings" ],
98
99	zip	=> [ "unzip -l %f"	], # ZIP
100
101	test	=> [ \&test		], # Internal
102
103	tgz	=> [ "tar tvf"		], # Tar     uncompressed
104	tgz	=> [ "tar tzvf"		], # Tar GZ    compressed
105	tbz	=> [ "tar tjvf"		], # Tar BZip2 compressed
106	txz	=> [ "tar tJvf"		], # Tar XZ    compressed
107
108	rar	=> [ "unrar l"		], # RAR
109	},
110
111    html => {
112	rtf	=> [ "rtf2html"		],
113	},
114    );
115
116if ($opt_l) {
117    my %tc = %{$fh{text}};
118    foreach my $ext (sort keys %tc) {
119	my $exe = $tc{$ext};
120	ref $exe or $exe = $tc{$exe};
121	printf "  .%-12s %s\n", $ext, $_ for @$exe;
122	}
123    exit 0;
124    }
125
126my $file = shift or usage (1, "File argument is missing");
127-f $file         or usage (1, "File argument is not a plain file");
128-r $file         or usage (1, "File argument is not a readable file");
129-s $file         or usage (1, "File argument is an empty file");
130
131my $ext = $file =~ m/\.(\w+)$/ ? lc $1 : "";
132$opt_t && exists $fh{text}{lc $opt_t} and $ext = lc$opt_t;
133unless (exists $fh{text}{$ext}) {
134    my $ftype = `file --brief $file`;
135    $ext =
136	$ftype =~ m/^pdf doc/i					? "pdf" :
137	$ftype =~ m/^ascii( english)? text/i			? "txt" :
138	$ftype =~ m/^(utf-8 unicode|iso-\d+)( english)? text/i	? "txt" :
139	$ftype =~ m/^xml doc/i					? "xml" :
140	$ftype =~ m/^\w+ compress/i				? "bin" :
141								  "bin" ;
142    # \w+ archive
143    # \w+ image
144    # ...
145    }
146$ext ||= "txt";
147exists $fh{$opt_h}{$ext} or $opt_h = "text";
148exists $fh{$opt_h}{$ext} or $ext   = "txt";
149my          $ref = $fh{$opt_h}{$ext};
150ref $ref or $ref = $fh{$opt_h}{$ref};
151
152$opt_v and warn "[ @$ref ] $file\n";
153
154sub which {
155    (my $cmd = shift) =~ s/\s.*//; # Only the command. Discard arguments here
156    foreach my $path (split m/:+/, $ENV{PATH}) {
157	-x "$path/$cmd" and return "$path/$cmd";
158	}
159    return 0;
160    } # which
161
162my $cmd = "cat -ve";
163foreach my $c (@$ref) {
164    if (ref $c) {
165	$c->($file);
166	exit;
167	}
168
169    my $cp = which ($c) or next;
170    $cmd = $c;
171    last;
172    }
173
174my @cmd = split m/ +/ => $cmd;
175grep { s/%f\b/$file/ } @cmd or push @cmd, $file;
176#$cmd =~ s/%f\b/$file/g or $cmd .= " $file";
177$opt_v and say "@cmd";
178exec @cmd;
179