#============================================================
# MyCharConvEUCJP
#============================================================
package MyCharConvEUCJP;
use MyCharConv;
@ISA = qw(MyCharConv);
use strict;
use Encode;
use Jcode;
#use Text::Kakasi;
#===============================================
# ¥¹¥¯¥ê¥×¥ÈÂç°èÊÑ¿ô
#===============================================
my $ScriptCodeCharCode = Jcode::getcode("·î²Ð¿åÌÚ¶âÅÚÆü¡¡");
my $UseKakasiModule = 0;
my $KakasiPath = "/usr/local/bin/kakasi";
my $KakasiCharCode = "euc";
if($^O =~ /^MSWin/) {
$KakasiPath = "c:\\kakasi\\bin\\kakasi.exe";
$KakasiCharCode = "sjis";
}
my %ConvTbl = (
'a' => '¤¢',
'i' => '¤¤',
'u' => '¤¦',
'e' => '¤¨',
'o' => '¤ª',
'ka' => '¤«',
'ki' => '¤',
'ku' => '¤¯',
'ke' => '¤±',
'ko' => '¤³',
'sa' => '¤µ',
'shi' => '¤·',
'si' => '¤·',
'su' => '¤¹',
'se' => '¤»',
'so' => '¤½',
'ta' => '¤¿',
'chi' => '¤Á',
'ti' => '¤Á',
'tu' => '¤Ä',
'te' => '¤Æ',
'to' => '¤È',
'na' => '¤Ê',
'ni' => '¤Ë',
'nu' => '¤Ì',
'ne' => '¤Í',
'no' => '¤Î',
'ha' => '¤Ï',
'hi' => '¤Ò',
'hu' => '¤Õ',
'he' => '¤Ø',
'ho' => '¤Û',
'ma' => '¤Þ',
'mi' => '¤ß',
'mu' => '¤à',
'me' => '¤á',
'mo' => '¤â',
'ya' => '¤ä',
'yi' => '¤¤',
'yu' => '¤æ',
'ye' => '¤¨',
'yo' => '¤è',
'ra' => '¤é',
'ri' => '¤ê',
'ru' => '¤ë',
're' => '¤ì',
'ro' => '¤í',
'wa' => '¤ï',
'wi' => '¤¦¤£',
'wu' => '¤¦',
'we' => '¤¦¤§',
'wo' => '¤ò',
'n' => '¤ó',
'm' => '¤ó',
'da' => '¤À',
'di' => '¤Â',
'du' => '¤Å',
'de' => '¤Ç',
'do' => '¤É',
'ba' => '¤Ð',
'bi' => '¤Ó',
'bu' => '¤Ö',
'be' => '¤Ù',
'bo' => '¤Ü',
'ca' => '¤¤ã',
'ci' => '¤Á',
'cu' => '¤¤å',
'ce' => '¤»',
'co' => '¤³',
'fa' => '¤Õ¤¡',
'fi' => '¤Õ¤£',
'fu' => '¤Õ',
'fe' => '¤Õ¤§',
'fo' => '¤Õ¤©',
'ga' => '¤¬',
'gi' => '¤®',
'gu' => '¤°',
'ge' => '¤²',
'go' => '¤´',
'ja' => '¤¸¤ã',
'ji' => '¤¸',
'ju' => '¤¸¤å',
'je' => '¤¸¤§',
'jo' => '¤¸¤ç',
'la' => '¤é',
'li' => '¤ê',
'lu' => '¤ë',
'le' => '¤ì',
'lo' => '¤í',
'pa' => '¤Ñ',
'pi' => '¤Ó',
'pu' => '¤Ö',
'pe' => '¤Ú',
'po' => '¤Ý',
'qa' => '¤¯¤¡',
'qi' => '¤¯¤£',
'qu' => '¤¯',
'qe' => '¤¯¤§',
'qo' => '¤¯¤©',
'va' => '¥ô¤¡',
'vi' => '¥ô¤£',
'vu' => '¥ô',
've' => '¥ô¤§',
'vo' => '¥ô¤©',
'xa' => '¤·¤ã',
'xi' => '¤·¤£',
'xu' => '¤¹¤¥',
'xe' => '¤·¤§',
'xo' => '¤¹¤©',
'za' => '¤¶',
'zi' => '¤¸',
'zu' => '¤º',
'ze' => '¤¼',
'zo' => '¤¾',
'cha' => '¤Á¤ã',
'chi' => '¤Á',
'chu' => '¤Á¤å',
'che' => '¤Á¤§',
'cho' => '¤Á¤ç',
'tsa' => '¤Ä¤¡',
'tsi' => '¤Á',
'tsu' => '¤Ä',
'tse' => '¤Ä¤§',
'tso' => '¤Ä¤©',
'tha' => '¤·¤ã',
'thi' => '¤·',
'thu' => '¤Á¤å',
'the' => '¤»',
'tho' => '¤½',
'kya' => '¤¤ã',
'kyi' => '¤¤£',
'kyu' => '¤¤ã',
'kye' => '¤¤§',
'kyo' => '¤¤ç',
'sya' => '¤·¤ã',
'syi' => '¤·¤£',
'syu' => '¤·¤å',
'sye' => '¤·¤§',
'syo' => '¤·¤ç',
'sha' => '¤·¤ã',
'shi' => '¤·',
'shu' => '¤·¤å',
'she' => '¤·¤§',
'sho' => '¤·¤ç',
'tya' => '¤Á¤ã',
'tyi' => '¤Á¤£',
'tyu' => '¤Á¤å',
'tye' => '¤Á¤§',
'tyo' => '¤Á¤©',
'nya' => '¤Ë¤ã',
'nyi' => '¤Ë¤£',
'nyu' => '¤Ë¤å',
'nye' => '¤Ë¤§',
'nyo' => '¤Ë¤ç',
'hya' => '¤Ò¤ã',
'hyi' => '¤Ò¤£',
'hyu' => '¤Ò¤å',
'hye' => '¤Ò¤§',
'hyo' => '¤Ò¤ç',
'mya' => '¤ß¤ã',
'myi' => '¤ß¤£',
'myu' => '¤ß¤£',
'mye' => '¤ß¤§',
'myo' => '¤ß¤ç',
'rya' => '¤ê¤ã',
'ryi' => '¤ê¤£',
'ryu' => '¤ê¤å',
'rye' => '¤ê¤§',
'ryo' => '¤ê¤ç',
'wya' => '¤ï¤¡',
'wyi' => '¤ï¤£',
'wyu' => '¤ï¤¥',
'wye' => '¤ï¤§',
'wyo' => '¤ï¤©',
'nji' => '¤ó¤¸',
'nta' => '¤ó¤¿',
);
foreach my $key (keys %ConvTbl) {
Jcode::convert(\$ConvTbl{$key}, "utf8", $ScriptCodeCharCode);
}
my $ltu = "¤Ã";
Jcode::convert(\$ltu, "utf8", $ScriptCodeCharCode);
my $oh = "¤ª¤ª";
Jcode::convert(\$oh, "utf8", $ScriptCodeCharCode);
my $nn = "¤ó";
Jcode::convert(\$nn, "utf8", $ScriptCodeCharCode);
#===============================================
# ¥³¥ó¥¹¥È¥é¥¯¥¿¡¦¥Ç¥¹¥È¥é¥¯¥¿
#===============================================
sub new
{
my ($module, $buff) = @_;
my $this = {};
bless $this;
$this->{pJcode} = new Jcode;
$this->{pUTF8} = find_encoding('utf8');
$this->{pEUCjp} = find_encoding('eucjp');
$this->{pSJIS} = find_encoding('sjis');
return $this;
}
sub DESTROY
{
my $this = shift;
$this->Close();
}
sub Initialize
{
my ($this) = @_;
}
#===============================================
# ÊÑ¿ô¼èÆÀ´Ø¿ô
#===============================================
#===============================================
# °ìÈÌ¥á¥ó¥Ð´Ø¿ô
#===============================================
sub Hankaku2Zenkaku
{
my ($this, $s, $sourcecharcode, $ConvertKana) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$ConvertKana = 1 if(!defined $ConvertKana);
#print("c[$ScriptCodeCharCode][$sourcecharcode]
\n");
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
# $s = Jcode->new($s)->tr('a-zA-Z0-9 @!\"#$%&\'()=-^\\|{}:;+*?_/<>',
# '£á-£ú£Á-£Ú£°-£¹¡¡¡÷!¡É#$¡ó¡õ¡Ç()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö?¡²/¡ã¡ä')->h2z() . '';
$s = Jcode->new($s)->tr('a-zA-Z0-9 ', '£á-£ú£Á-£Ú£°-£¹¡¡');
$s = $s->tr('@!\"#$%&\'', '¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç');
$s = $s->tr('()=-^\\|{}:;+*?_/<>', '()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö¡©¡²¡¿¡ã¡ä');
if($ConvertKana) {
$s = $s->h2z() . '';
}
else {
$s = $s . '';
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
# return Jcode->new($s)->h2z();
}
sub Zenkaku2Hankaku
{
my ($this, $s, $sourcecharcode, $ConvertKana) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$ConvertKana = 1 if(!defined $ConvertKana);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
# $s = Jcode->new($s)->tr('£á-£ú£Á-£Ú£°-£¹¡¡¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç¡Ê¡Ë¡á¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨¡Ü¡ö¡©¡²¡¿¡ã¡ä',
# 'a-zA-Z0-9 @!"#$%&\'()=-^\\|{}:;+*?_/<>')->z2h() . '';
$s = Jcode->new($s)->tr('£á-£ú£Á-£Ú£°-£¹¡¡', 'a-zA-Z0-9 ');
$s = $s->tr('¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç', '@!\"#$%&\'');
$s = $s->tr('()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö¡©¡²¡¿¡ã¡ä', '()=-^\\|{}:;+*?_/<>');
if($ConvertKana) {
$s = $s->z2h() . '';
}
else {
$s = $s . '';
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
# return Jcode->new($s)->z2h();
}
sub Zenkaku2HankakuKana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = Jcode->new($s)->z2h() . '';
$this->convert(\$s, $sourcecharcode);
return $s;
}
sub HankakuKana2Zenkaku
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
$s = Jcode->new($s)->h2z() . '';
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
}
sub Hiragana2Katakana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = Jcode->new($s)->tr('¤¡-¤ó', '¥¡-¥ó') . '';
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
}
sub Katakana2Hiragana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
$s = $this->HankakuKana2Zenkaku($s, $sourcecharcode);
$s = Jcode->new($s)->tr('¥¢-¥ó', '¤¢-¤ó') . '';
$this->convert(\$s, $sourcecharcode);
return $s;
}
#my $ZenkakuKigou = "[¡ª-¡Á¡¡]";
#my $ZenkakuSpace = "([!-~])¡¡([!-~])";
#my $ZenkakuAlphabet = "[£Á-£Ú£á-£ú£°-£¹¡¡¡Ý¡¾¡¤¡¢¡£¡¥]";
#Jcode::convert(\$ZenkakuKigou, 'utf8', $ScriptCodeCharCode);
#Jcode::convert(\$ZenkakuSpace, 'utf8', $ScriptCodeCharCode);
#Jcode::convert(\$ZenkakuAlphabet, 'utf8', $ScriptCodeCharCode);
sub Zen2Han
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = $this->Zenkaku2Hankaku($s, $sourcecharcode) . '';
# $this->convert(\$s, $sourcecharcode);
return $s;
}
sub Roma2Kana {
my ($this, $s, $charcode, $sourcecharcode) = @_;
$charcode = 'sjis' if(!defined $charcode);
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = lc $this->Zenkaku2Hankaku($s, $sourcecharcode);
$this->convert(\$s, "utf8", $sourcecharcode);
$s =~ s/([aiueo])h([bcdfghjklmnpqrstvwxyz])/$1$1$2/g;
$s =~ s/[nm]([^aiueon])/$nn$1/g;
$s =~ s/nn/$nn/g;
$s =~ s/n-([aiueo])/$nn$1/g;
my ($others, $hit, $rest);
my $ret = '';
while(1) {
#print "s=[$s]\n";
my ($others, $hit, $rest) = ($s =~ /^(.*?)([bcdfghjklmnpqrstvwxyz]*[aiueon])(.*)$/sg);
if(!defined $hit) {
$this->convert(\$s, $charcode, "utf8");
$ret .= $s;
return $ret;
}
my $pre = '';
if($hit =~ /^([bcdfghjklmnpqrstvwxyz])([bcdfghjklmnpqrstvwxyz])(.*)$/) {
if($1 eq $2) {
# $pre = $ltu;
# $hit = $2 . $3;
}
}
if($hit =~ /^(oh)(.*)$/) {
# $pre .= $oh;
# $hit = $2;
}
$hit = $ConvTbl{$hit} if(defined $ConvTbl{$hit});
$this->convert(\$others, $charcode, "utf8");
$this->convert(\$hit, $charcode, "utf8");
$this->convert(\$pre, $charcode, "utf8");
$ret .= $others . $pre .$hit;
$s = $rest;
}
}
sub Conv2Hiragana
{
my ($this, $s, $sourcecharcode, $targetcharcode) = @_;
$sourcecharcode = Jcode::getcode($s);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
if($this->match($s, "^[¤¡-¤ó¥¡-¥óާ-ŽÝ]*\$")) {
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $this->Katakana2Hiragana($s, $sourcecharcode, $targetcharcode);
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-aH -jH -KH -JH -EH -kH/);
# return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -aH -jH -KH -JH -EH -kH/);
}
sub Conv2Roma
{
my ($this, $s, $sourcecharcode, $targetcharcode) = @_;
return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-Ha -ja -Ka -Ja -Ea -ka/);
# return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -Ha -ja -Ka -Ja -Ea -ka/);
}
sub Kakasi {
my ($this, $s, $sourcecharcode, $targetcharcode, @options) = @_;
$sourcecharcode = $this->getcode($sourcecharcode) if(!defined $sourcecharcode);
$targetcharcode = $sourcecharcode if(!defined $targetcharcode);
return $s if($s eq '');
#print "s1: [$s]
\n";
$s =~ s/([^\\])([\(\)\{\}\[\]\>\<\*\?\|\!\$\%\&\~\=\-\'\"\`\/\#])/$1\\$2/g;
$this->convert(\$s, $KakasiCharCode, $sourcecharcode);
#print "s2: [$s]
\n";
if($UseKakasiModule) {
my $kakasi = Text::Kakasi->new(@options);
$s = $kakasi->get($s);
}
else {
my $cmd = "$KakasiPath " . join(' ', @options);
#print "cmd: [$cmd]\n";
#$s =~ s/\(/\\(/g;
#$s =~ s/\)/\\(/g;
#print "[$s]
\n";
$s = `echo $s | $cmd`;
$s =~ s/[\r\n]+$//s;
}
$this->convert(\$s, $targetcharcode, $KakasiCharCode);
return $s;
}
1;