1#!./perl 2use 5.008001; 3BEGIN { pop @INC if $INC[-1] eq '.' } 4use strict; 5use warnings; 6use Encode; 7use Getopt::Std; 8use Carp; 9use Encode::Guess; 10$Getopt::Std::STANDARD_HELP_VERSION = 1; 11 12my %opt; 13getopts( "huSs:", \%opt ); 14my @suspect_list; 15list_valid_suspects() and exit if $opt{S}; 16@suspect_list = split /:,/, $opt{s} if $opt{s}; 17HELP_MESSAGE() if $opt{h}; 18HELP_MESSAGE() unless @ARGV; 19do_guess($_) for @ARGV; 20 21sub read_file { 22 my $filename = shift; 23 local $/; 24 open my $fh, '<:raw', $filename or croak "$filename:$!"; 25 my $content = <$fh>; 26 close $fh; 27 return $content; 28} 29 30sub do_guess { 31 my $filename = shift; 32 my $data = read_file($filename); 33 my $enc = guess_encoding( $data, @suspect_list ); 34 if ( !ref($enc) && $opt{u} ) { 35 return 1; 36 } 37 print "$filename\t"; 38 if ( ref($enc) ) { 39 print $enc->mime_name(); 40 } 41 else { 42 print "unknown"; 43 } 44 print "\n"; 45 return 1; 46} 47 48sub list_valid_suspects { 49 print join( "\n", Encode->encodings(":all") ); 50 print "\n"; 51 return 1; 52} 53 54sub HELP_MESSAGE { 55 exec 'pod2usage', $0 or die "pod2usage: $!" 56} 57__END__ 58=head1 NAME 59 60encguess - guess character encodings of files 61 62=head1 VERSION 63 64$Id: encguess,v 0.4 2023/11/10 01:10:50 dankogai Exp $ 65 66=head1 SYNOPSIS 67 68 encguess [switches] filename... 69 70=head2 SWITCHES 71 72=over 2 73 74=item -h 75 76show this message and exit. 77 78=item -s 79 80specify a list of "suspect encoding types" to test, 81separated by either C<:> or C<,> 82 83=item -S 84 85output a list of all acceptable encoding types that can be used with 86the -s param 87 88=item -u 89 90suppress display of unidentified types 91 92=back 93 94=head2 EXAMPLES: 95 96=over 2 97 98=item * 99 100Guess encoding of a file named C<test.txt>, using only the default 101suspect types. 102 103 encguess test.txt 104 105=item * 106 107Guess the encoding type of a file named C<test.txt>, using the suspect 108types C<euc-jp,shiftjis,7bit-jis>. 109 110 encguess -s euc-jp,shiftjis,7bit-jis test.txt 111 encguess -s euc-jp:shiftjis:7bit-jis test.txt 112 113=item * 114 115Guess the encoding type of several files, do not display results for 116unidentified files. 117 118 encguess -us euc-jp,shiftjis,7bit-jis test*.txt 119 120=back 121 122=head1 DESCRIPTION 123 124The encoding identification is done by checking one encoding type at a 125time until all but the right type are eliminated. The set of encoding 126types to try is defined by the -s parameter and defaults to ascii, 127utf8 and UTF-16/32 with BOM. This can be overridden by passing one or 128more encoding types via the -s parameter. If you need to pass in 129multiple suspect encoding types, use a quoted string with the a space 130separating each value. 131 132=head1 SEE ALSO 133 134L<Encode::Guess>, L<Encode::Detect> 135 136=head1 LICENSE AND COPYRIGHT 137 138Copyright 2015 Michael LaGrasta and Dan Kogai. 139 140This program is free software; you can redistribute it and/or modify it 141under the terms of the Artistic License (2.0). You may obtain a 142copy of the full license at: 143 144L<http://www.perlfoundation.org/artistic_license_2_0> 145 146=cut 147