#============================================================ # MyCharConvEUCJP #============================================================ package MyCharConvEUCJP; use MyCharConv; @ISA = qw(MyCharConv); use strict; use Encode; use Jcode; #use Text::Kakasi; #=============================================== # ¥¹¥¯¥ê¥×¥ÈÂç°èÊÑ¿ô #=============================================== my $ScriptCodeCharCode = Jcode::getcode("·î²Ð¿åÌÚ¶âÅÚÆü¡¡"); my $UseKakasiModule = 0; my $KakasiPath = "/usr/local/bin/kakasi"; my $KakasiCharCode = "euc"; if($^O =~ /^MSWin/) { $KakasiPath = "c:\\kakasi\\bin\\kakasi.exe"; $KakasiCharCode = "sjis"; } my %ConvTbl = ( 'a' => '¤¢', 'i' => '¤¤', 'u' => '¤¦', 'e' => '¤¨', 'o' => '¤ª', 'ka' => '¤«', 'ki' => '¤­', 'ku' => '¤¯', 'ke' => '¤±', 'ko' => '¤³', 'sa' => '¤µ', 'shi' => '¤·', 'si' => '¤·', 'su' => '¤¹', 'se' => '¤»', 'so' => '¤½', 'ta' => '¤¿', 'chi' => '¤Á', 'ti' => '¤Á', 'tu' => '¤Ä', 'te' => '¤Æ', 'to' => '¤È', 'na' => '¤Ê', 'ni' => '¤Ë', 'nu' => '¤Ì', 'ne' => '¤Í', 'no' => '¤Î', 'ha' => '¤Ï', 'hi' => '¤Ò', 'hu' => '¤Õ', 'he' => '¤Ø', 'ho' => '¤Û', 'ma' => '¤Þ', 'mi' => '¤ß', 'mu' => '¤à', 'me' => '¤á', 'mo' => '¤â', 'ya' => '¤ä', 'yi' => '¤¤', 'yu' => '¤æ', 'ye' => '¤¨', 'yo' => '¤è', 'ra' => '¤é', 'ri' => '¤ê', 'ru' => '¤ë', 're' => '¤ì', 'ro' => '¤í', 'wa' => '¤ï', 'wi' => '¤¦¤£', 'wu' => '¤¦', 'we' => '¤¦¤§', 'wo' => '¤ò', 'n' => '¤ó', 'm' => '¤ó', 'da' => '¤À', 'di' => '¤Â', 'du' => '¤Å', 'de' => '¤Ç', 'do' => '¤É', 'ba' => '¤Ð', 'bi' => '¤Ó', 'bu' => '¤Ö', 'be' => '¤Ù', 'bo' => '¤Ü', 'ca' => '¤­¤ã', 'ci' => '¤Á', 'cu' => '¤­¤å', 'ce' => '¤»', 'co' => '¤³', 'fa' => '¤Õ¤¡', 'fi' => '¤Õ¤£', 'fu' => '¤Õ', 'fe' => '¤Õ¤§', 'fo' => '¤Õ¤©', 'ga' => '¤¬', 'gi' => '¤®', 'gu' => '¤°', 'ge' => '¤²', 'go' => '¤´', 'ja' => '¤¸¤ã', 'ji' => '¤¸', 'ju' => '¤¸¤å', 'je' => '¤¸¤§', 'jo' => '¤¸¤ç', 'la' => '¤é', 'li' => '¤ê', 'lu' => '¤ë', 'le' => '¤ì', 'lo' => '¤í', 'pa' => '¤Ñ', 'pi' => '¤Ó', 'pu' => '¤Ö', 'pe' => '¤Ú', 'po' => '¤Ý', 'qa' => '¤¯¤¡', 'qi' => '¤¯¤£', 'qu' => '¤¯', 'qe' => '¤¯¤§', 'qo' => '¤¯¤©', 'va' => '¥ô¤¡', 'vi' => '¥ô¤£', 'vu' => '¥ô', 've' => '¥ô¤§', 'vo' => '¥ô¤©', 'xa' => '¤·¤ã', 'xi' => '¤·¤£', 'xu' => '¤¹¤¥', 'xe' => '¤·¤§', 'xo' => '¤¹¤©', 'za' => '¤¶', 'zi' => '¤¸', 'zu' => '¤º', 'ze' => '¤¼', 'zo' => '¤¾', 'cha' => '¤Á¤ã', 'chi' => '¤Á', 'chu' => '¤Á¤å', 'che' => '¤Á¤§', 'cho' => '¤Á¤ç', 'tsa' => '¤Ä¤¡', 'tsi' => '¤Á', 'tsu' => '¤Ä', 'tse' => '¤Ä¤§', 'tso' => '¤Ä¤©', 'tha' => '¤·¤ã', 'thi' => '¤·', 'thu' => '¤Á¤å', 'the' => '¤»', 'tho' => '¤½', 'kya' => '¤­¤ã', 'kyi' => '¤­¤£', 'kyu' => '¤­¤ã', 'kye' => '¤­¤§', 'kyo' => '¤­¤ç', 'sya' => '¤·¤ã', 'syi' => '¤·¤£', 'syu' => '¤·¤å', 'sye' => '¤·¤§', 'syo' => '¤·¤ç', 'sha' => '¤·¤ã', 'shi' => '¤·', 'shu' => '¤·¤å', 'she' => '¤·¤§', 'sho' => '¤·¤ç', 'tya' => '¤Á¤ã', 'tyi' => '¤Á¤£', 'tyu' => '¤Á¤å', 'tye' => '¤Á¤§', 'tyo' => '¤Á¤©', 'nya' => '¤Ë¤ã', 'nyi' => '¤Ë¤£', 'nyu' => '¤Ë¤å', 'nye' => '¤Ë¤§', 'nyo' => '¤Ë¤ç', 'hya' => '¤Ò¤ã', 'hyi' => '¤Ò¤£', 'hyu' => '¤Ò¤å', 'hye' => '¤Ò¤§', 'hyo' => '¤Ò¤ç', 'mya' => '¤ß¤ã', 'myi' => '¤ß¤£', 'myu' => '¤ß¤£', 'mye' => '¤ß¤§', 'myo' => '¤ß¤ç', 'rya' => '¤ê¤ã', 'ryi' => '¤ê¤£', 'ryu' => '¤ê¤å', 'rye' => '¤ê¤§', 'ryo' => '¤ê¤ç', 'wya' => '¤ï¤¡', 'wyi' => '¤ï¤£', 'wyu' => '¤ï¤¥', 'wye' => '¤ï¤§', 'wyo' => '¤ï¤©', 'nji' => '¤ó¤¸', 'nta' => '¤ó¤¿', ); foreach my $key (keys %ConvTbl) { Jcode::convert(\$ConvTbl{$key}, "utf8", $ScriptCodeCharCode); } my $ltu = "¤Ã"; Jcode::convert(\$ltu, "utf8", $ScriptCodeCharCode); my $oh = "¤ª¤ª"; Jcode::convert(\$oh, "utf8", $ScriptCodeCharCode); my $nn = "¤ó"; Jcode::convert(\$nn, "utf8", $ScriptCodeCharCode); #=============================================== # ¥³¥ó¥¹¥È¥é¥¯¥¿¡¦¥Ç¥¹¥È¥é¥¯¥¿ #=============================================== sub new { my ($module, $buff) = @_; my $this = {}; bless $this; $this->{pJcode} = new Jcode; $this->{pUTF8} = find_encoding('utf8'); $this->{pEUCjp} = find_encoding('eucjp'); $this->{pSJIS} = find_encoding('sjis'); return $this; } sub DESTROY { my $this = shift; $this->Close(); } sub Initialize { my ($this) = @_; } #=============================================== # ÊÑ¿ô¼èÆÀ´Ø¿ô #=============================================== #=============================================== # °ìÈÌ¥á¥ó¥Ð´Ø¿ô #=============================================== sub Hankaku2Zenkaku { my ($this, $s, $sourcecharcode, $ConvertKana) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $ConvertKana = 1 if(!defined $ConvertKana); #print("c[$ScriptCodeCharCode][$sourcecharcode]
\n"); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); # $s = Jcode->new($s)->tr('a-zA-Z0-9 @!\"#$%&\'()=-^\\|{}:;+*?_/<>', # '£á-£ú£Á-£Ú£°-£¹¡¡¡÷!¡É#$¡ó¡õ¡Ç()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö?¡²/¡ã¡ä')->h2z() . ''; $s = Jcode->new($s)->tr('a-zA-Z0-9 ', '£á-£ú£Á-£Ú£°-£¹¡¡'); $s = $s->tr('@!\"#$%&\'', '¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç'); $s = $s->tr('()=-^\\|{}:;+*?_/<>', '()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö¡©¡²¡¿¡ã¡ä'); if($ConvertKana) { $s = $s->h2z() . ''; } else { $s = $s . ''; } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; # return Jcode->new($s)->h2z(); } sub Zenkaku2Hankaku { my ($this, $s, $sourcecharcode, $ConvertKana) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $ConvertKana = 1 if(!defined $ConvertKana); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); # $s = Jcode->new($s)->tr('£á-£ú£Á-£Ú£°-£¹¡¡¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç¡Ê¡Ë¡á¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨¡Ü¡ö¡©¡²¡¿¡ã¡ä', # 'a-zA-Z0-9 @!"#$%&\'()=-^\\|{}:;+*?_/<>')->z2h() . ''; $s = Jcode->new($s)->tr('£á-£ú£Á-£Ú£°-£¹¡¡', 'a-zA-Z0-9 '); $s = $s->tr('¡÷¡ª¡É¡ô¡ð¡ó¡õ¡Ç', '@!\"#$%&\''); $s = $s->tr('()=¡Ý¡°¡ï¡Ã¡Ð¡Ñ¡§¡¨+¡ö¡©¡²¡¿¡ã¡ä', '()=-^\\|{}:;+*?_/<>'); if($ConvertKana) { $s = $s->z2h() . ''; } else { $s = $s . ''; } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; # return Jcode->new($s)->z2h(); } sub Zenkaku2HankakuKana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = Jcode->new($s)->z2h() . ''; $this->convert(\$s, $sourcecharcode); return $s; } sub HankakuKana2Zenkaku { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); $s = Jcode->new($s)->h2z() . ''; $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; } sub Hiragana2Katakana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = Jcode->new($s)->tr('¤¡-¤ó', '¥¡-¥ó') . ''; $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; } sub Katakana2Hiragana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); $s = $this->HankakuKana2Zenkaku($s, $sourcecharcode); $s = Jcode->new($s)->tr('¥¢-¥ó', '¤¢-¤ó') . ''; $this->convert(\$s, $sourcecharcode); return $s; } #my $ZenkakuKigou = "[¡ª-¡Á¡¡]"; #my $ZenkakuSpace = "([!-~])¡¡([!-~])"; #my $ZenkakuAlphabet = "[£Á-£Ú£á-£ú£°-£¹¡¡¡Ý¡¾¡¤¡¢¡£¡¥]"; #Jcode::convert(\$ZenkakuKigou, 'utf8', $ScriptCodeCharCode); #Jcode::convert(\$ZenkakuSpace, 'utf8', $ScriptCodeCharCode); #Jcode::convert(\$ZenkakuAlphabet, 'utf8', $ScriptCodeCharCode); sub Zen2Han { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = $this->Zenkaku2Hankaku($s, $sourcecharcode) . ''; # $this->convert(\$s, $sourcecharcode); return $s; } sub Roma2Kana { my ($this, $s, $charcode, $sourcecharcode) = @_; $charcode = 'sjis' if(!defined $charcode); $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = lc $this->Zenkaku2Hankaku($s, $sourcecharcode); $this->convert(\$s, "utf8", $sourcecharcode); $s =~ s/([aiueo])h([bcdfghjklmnpqrstvwxyz])/$1$1$2/g; $s =~ s/[nm]([^aiueon])/$nn$1/g; $s =~ s/nn/$nn/g; $s =~ s/n-([aiueo])/$nn$1/g; my ($others, $hit, $rest); my $ret = ''; while(1) { #print "s=[$s]\n"; my ($others, $hit, $rest) = ($s =~ /^(.*?)([bcdfghjklmnpqrstvwxyz]*[aiueon])(.*)$/sg); if(!defined $hit) { $this->convert(\$s, $charcode, "utf8"); $ret .= $s; return $ret; } my $pre = ''; if($hit =~ /^([bcdfghjklmnpqrstvwxyz])([bcdfghjklmnpqrstvwxyz])(.*)$/) { if($1 eq $2) { # $pre = $ltu; # $hit = $2 . $3; } } if($hit =~ /^(oh)(.*)$/) { # $pre .= $oh; # $hit = $2; } $hit = $ConvTbl{$hit} if(defined $ConvTbl{$hit}); $this->convert(\$others, $charcode, "utf8"); $this->convert(\$hit, $charcode, "utf8"); $this->convert(\$pre, $charcode, "utf8"); $ret .= $others . $pre .$hit; $s = $rest; } } sub Conv2Hiragana { my ($this, $s, $sourcecharcode, $targetcharcode) = @_; $sourcecharcode = Jcode::getcode($s); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); if($this->match($s, "^[¤¡-¤ó¥¡-¥óާ-ŽÝ]*\$")) { $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $this->Katakana2Hiragana($s, $sourcecharcode, $targetcharcode); } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-aH -jH -KH -JH -EH -kH/); # return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -aH -jH -KH -JH -EH -kH/); } sub Conv2Roma { my ($this, $s, $sourcecharcode, $targetcharcode) = @_; return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-Ha -ja -Ka -Ja -Ea -ka/); # return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -Ha -ja -Ka -Ja -Ea -ka/); } sub Kakasi { my ($this, $s, $sourcecharcode, $targetcharcode, @options) = @_; $sourcecharcode = $this->getcode($sourcecharcode) if(!defined $sourcecharcode); $targetcharcode = $sourcecharcode if(!defined $targetcharcode); return $s if($s eq ''); #print "s1: [$s]
\n"; $s =~ s/([^\\])([\(\)\{\}\[\]\>\<\*\?\|\!\$\%\&\~\=\-\'\"\`\/\#])/$1\\$2/g; $this->convert(\$s, $KakasiCharCode, $sourcecharcode); #print "s2: [$s]
\n"; if($UseKakasiModule) { my $kakasi = Text::Kakasi->new(@options); $s = $kakasi->get($s); } else { my $cmd = "$KakasiPath " . join(' ', @options); #print "cmd: [$cmd]\n"; #$s =~ s/\(/\\(/g; #$s =~ s/\)/\\(/g; #print "[$s]
\n"; $s = `echo $s | $cmd`; $s =~ s/[\r\n]+$//s; } $this->convert(\$s, $targetcharcode, $KakasiCharCode); return $s; } 1;