使用utf8
file input 及 standard output 改變 encoding 為big5,輸出 wide character。split 依每個 character 分割split(//, ...);
view plain
PERL:
use utf8;
$grm = shift;
open(IN,'<:encoding(big5)', $grm) || die $!;
#binmode(IN, ':encoding(big5)');
binmode(STDOUT, ':encoding(big5)');
while($line = )
{
if($line =~ /.<(.*)>/)
{
print $line;
$big5 = $1;
#print $cht ;
my @tokens = split(//,$big5);
print " ";
foreach $token(@tokens)
{
print "#$token ";
}
print "n";
}else
{
print $line;
}
}
close IN;
Tags: utf8