#!./perl
use strict;
use Encode;
use Getopt::Std;
my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt);
$Opt{p} ||= $Opt{P};
$Opt{e} ||= 'utf8';
$Opt{f} ||= $Opt{e};
$Opt{t} ||= $Opt{e};
$Opt{h} and help();
my ($linebuf, $outbuf);
my $CPL = $Opt{p} ? 64 : 8;
my $linenum;
my $linesperheading = $Opt{H};
my $nchars;
our $PrevChunk;
$Opt{h} and help();
$Opt{p} and do_perl($Opt{s});
do_dump($Opt{s});
exit;
#
sub do_perl{
my $string = shift;
$Opt{P} and print "#!$^X -w\nprint\n";
unless ($string){
while(<>){
use utf8;
$linebuf .= Encode::decode($Opt{f}, $_);
while($linebuf){
my $chr = render_p(substr($linebuf, 0, 1, ''));
length($outbuf) + length($chr) > $CPL and print_P();
$outbuf .= $chr;
}
}
$outbuf and print print_P(";");
}else{
while($string){
my $chr = render_p(substr($string, 0, 1, ''));
length($outbuf) + length($chr) > $CPL and print_P();
$outbuf .= $chr;
}
}
$outbuf and print print_P(";");
exit;
}
sub render_p{
my ($chr, $format) = @_;
our %S2pstr;
$S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
$chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable;
my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ?
q(\x%x) : q(\x{%x});
return sprintf $fmt, ord($chr);
}
sub print_P{
my $end = shift;
$outbuf or return;
print '"', encode($Opt{t}, $outbuf), '"';
my $tail = $Opt{P} ? $end ? "$end" : "," : '';
print $tail, "\n";
$outbuf = '';
}
sub do_dump{
my $string = shift;
!$Opt{p} and exists $Opt{H} and print_H();
unless ($string){
while(<>){
use utf8;
$linebuf .= Encode::decode($Opt{f}, $_);
while (length($linebuf) > $CPL){
my $chunk = substr($linebuf, 0, $CPL, '');
print_C($chunk, $linenum++);
$Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
}
}
$linebuf and print_C($linebuf);
}else{
while ($string){
my $chunk = substr($string, 0, $CPL, '');
print_C($chunk, $linenum++);
$Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
}
}
exit;
}
sub print_S{
print "--------+------------------------------------------------";
if ($Opt{C}){
print "-+-----------------";
}
print "\n";
}
sub print_H{
print " Offset 0 1 2 3 4 5 6 7";
if ($Opt{C}){
print " | 0 1 2 3 4 5 6 7";
}
print "\n";
print_S;
}
sub print_C{
my ($chunk, $linenum) = @_;
if (!$Opt{v} and $chunk eq $PrevChunk){
printf "%08x *\n", $linenum*8; return;
}
$PrevChunk = $chunk;
my $end = length($chunk) - 1;
my (@ord, @chr);
for my $i (0..$end){
use utf8;
my $chr = substr($chunk,$i,1);
my $ord = ord($chr);
my $fmt = $ord <= 0xffff ? " %04x" : " %05x";
push @ord, (sprintf $fmt, $ord);
$Opt{C} and push @chr, render_c($chr);
}
if (++$end < 7){
for my $i ($end..7){
push @ord, (" " x 6);
}
}
my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
$Opt{C} and $line .= sprintf " | %s", join('', @chr);
print encode($Opt{t}, $line), "\n";
}
sub render_c{
my ($chr, $format) = @_;
our (%S2str, $IsFullWidth);
$chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " ";
$chr =~ $IsFullWidth and return $chr; # as is
return " " . $chr;
}
sub help{
my $message = shift;
use File::Basename;
my $name = basename($0);
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
Usage:
$name -[options...] [files...]
$name -[options...] -s "string"
$name -h
-h prints this message.
Inherited from hexdump;
-C Canonical unidump mode
-v prints the duplicate line as is. Without this option,
single "*" will be printed instead.
For unidump only
-p prints in perl literals that you can copy and paste directly
to your perl script.
-P prints in perl executable format!
-u prints a bunch of "Uxxxx,". Handy when you want to pass your
characters in mailing lists.
IO Options:
-e io_encoding same as "-f io_encoding -t io_encoding"
-f from_encoding convert the source stream from this encoding
-t to_encoding print to STDOUT in this encoding
-s string "string" will be converted instead of STDIN.
-H nline prints separater for each nlines of output.
0 means only the table headding be printed.
EOT
exit;
}
BEGIN{
our %S2pstr= (
"\\" => '\\\\',
"\0" => '\0',
"\t" => '\t',
"\n" => '\n',
"\r" => '\r',
"\v" => '\v',
"\a" => '\a',
"\e" => '\e',
"\"" => qq(\\\"),
"\'" => qq(\\\'),
'$' => '\$',
"@" => '\@',
"%" => '\%',
);
our %S2str = (
qq(\x00) => q(\0), # NULL
qq(\x01) => q(^A), # START OF HEADING
qq(\x02) => q(^B), # START OF TEXT
qq(\x03) => q(^C), # END OF TEXT
qq(\x04) => q(^D), # END OF TRANSMISSION
qq(\x05) => q(^E), # ENQUIRY
qq(\x06) => q(^F), # ACKNOWLEDGE
qq(\x07) => q(\a), # BELL
qq(\x08) => q(^H), # BACKSPACE
qq(\x09) => q(\t), # HORIZONTAL TABULATION
qq(\x0A) => q(\n), # LINE FEED
qq(\x0B) => q(\v), # VERTICAL TABULATION
qq(\x0C) => q(^L), # FORM FEED
qq(\x0D) => q(\r), # CARRIAGE RETURN
qq(\x0E) => q(^N), # SHIFT OUT
qq(\x0F) => q(^O), # SHIFT IN
qq(\x10) => q(^P), # DATA LINK ESCAPE
qq(\x11) => q(^Q), # DEVICE CONTROL ONE
qq(\x12) => q(^R), # DEVICE CONTROL TWO
qq(\x13) => q(^S), # DEVICE CONTROL THREE
qq(\x14) => q(^T), # DEVICE CONTROL FOUR
qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE
qq(\x16) => q(^V), # SYNCHRONOUS IDLE
qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK
qq(\x18) => q(^X), # CANCEL
qq(\x19) => q(^Y), # END OF MEDIUM
qq(\x1A) => q(^Z), # SUBSTITUTE
qq(\x1B) => q(\e), # ESCAPE (\c[)
qq(\x1C) => "^\\", # FILE SEPARATOR
qq(\x1D) => "^\]", # GROUP SEPARATOR
qq(\x1E) => q(^^), # RECORD SEPARATOR
qq(\x1F) => q(^_), # UNIT SEPARATOR
);
#
# Generated out of lib/unicore/EastAsianWidth.txt
# will it work ?
#
our $IsFullWidth =
qr/^[
\x{1100}-\x{1159}
\x{115F}-\x{115F}
\x{2329}-\x{232A}
\x{2E80}-\x{2E99}
\x{2E9B}-\x{2EF3}
\x{2F00}-\x{2FD5}
\x{2FF0}-\x{2FFB}
\x{3000}-\x{303E}
\x{3041}-\x{3096}
\x{3099}-\x{30FF}
\x{3105}-\x{312C}
\x{3131}-\x{318E}
\x{3190}-\x{31B7}
\x{31F0}-\x{321C}
\x{3220}-\x{3243}
\x{3251}-\x{327B}
\x{327F}-\x{32CB}
\x{32D0}-\x{32FE}
\x{3300}-\x{3376}
\x{337B}-\x{33DD}
\x{3400}-\x{4DB5}
\x{4E00}-\x{9FA5}
\x{33E0}-\x{33FE}
\x{A000}-\x{A48C}
\x{AC00}-\x{D7A3}
\x{A490}-\x{A4C6}
\x{F900}-\x{FA2D}
\x{FA30}-\x{FA6A}
\x{FE30}-\x{FE46}
\x{FE49}-\x{FE52}
\x{FE54}-\x{FE66}
\x{FE68}-\x{FE6B}
\x{FF01}-\x{FF60}
\x{FFE0}-\x{FFE6}
\x{20000}-\x{2A6D6}
]$/xo;
}
__END__
|