#============================================================ # MyCharConv #============================================================ package MyCharConv; use Common; #@ISA = qw(Common); use strict; use utf8; use Encode; use Jcode; #use Text::Kakasi; #=============================================== # スクリプト大域変数 #=============================================== my $ScriptCodeCharCode = Jcode::getcode("月火水木金土日 "); my $UseKakasiModule = 0; my $KakasiPath = "/usr/local/bin/kakasi"; my $KakasiCharCode = "euc"; if($^O =~ /^MSWin/) { $KakasiPath = "c:\\kakasi\\bin\\kakasi.exe"; $KakasiCharCode = "sjis"; } my %ConvTbl = ( 'a' => 'あ', 'i' => 'い', 'u' => 'う', 'e' => 'え', 'o' => 'お', 'ka' => 'か', 'ki' => 'き', 'ku' => 'く', 'ke' => 'け', 'ko' => 'こ', 'sa' => 'さ', 'shi' => 'し', 'si' => 'し', 'su' => 'す', 'se' => 'せ', 'so' => 'そ', 'ta' => 'た', 'chi' => 'ち', 'ti' => 'ち', 'tu' => 'つ', 'te' => 'て', 'to' => 'と', 'na' => 'な', 'ni' => 'に', 'nu' => 'ぬ', 'ne' => 'ね', 'no' => 'の', 'ha' => 'は', 'hi' => 'ひ', 'hu' => 'ふ', 'he' => 'へ', 'ho' => 'ほ', 'ma' => 'ま', 'mi' => 'み', 'mu' => 'む', 'me' => 'め', 'mo' => 'も', 'ya' => 'や', 'yi' => 'い', 'yu' => 'ゆ', 'ye' => 'え', 'yo' => 'よ', 'ra' => 'ら', 'ri' => 'り', 'ru' => 'る', 're' => 'れ', 'ro' => 'ろ', 'wa' => 'わ', 'wi' => 'うぃ', 'wu' => 'う', 'we' => 'うぇ', 'wo' => 'を', 'n' => 'ん', 'm' => 'ん', 'da' => 'だ', 'di' => 'ぢ', 'du' => 'づ', 'de' => 'で', 'do' => 'ど', 'ba' => 'ば', 'bi' => 'び', 'bu' => 'ぶ', 'be' => 'べ', 'bo' => 'ぼ', 'ca' => 'きゃ', 'ci' => 'ち', 'cu' => 'きゅ', 'ce' => 'せ', 'co' => 'こ', 'fa' => 'ふぁ', 'fi' => 'ふぃ', 'fu' => 'ふ', 'fe' => 'ふぇ', 'fo' => 'ふぉ', 'ga' => 'が', 'gi' => 'ぎ', 'gu' => 'ぐ', 'ge' => 'げ', 'go' => 'ご', 'ja' => 'じゃ', 'ji' => 'じ', 'ju' => 'じゅ', 'je' => 'じぇ', 'jo' => 'じょ', 'la' => 'ら', 'li' => 'り', 'lu' => 'る', 'le' => 'れ', 'lo' => 'ろ', 'pa' => 'ぱ', 'pi' => 'び', 'pu' => 'ぶ', 'pe' => 'ぺ', 'po' => 'ぽ', 'qa' => 'くぁ', 'qi' => 'くぃ', 'qu' => 'く', 'qe' => 'くぇ', 'qo' => 'くぉ', 'va' => 'ヴぁ', 'vi' => 'ヴぃ', 'vu' => 'ヴ', 've' => 'ヴぇ', 'vo' => 'ヴぉ', 'xa' => 'しゃ', 'xi' => 'しぃ', 'xu' => 'すぅ', 'xe' => 'しぇ', 'xo' => 'すぉ', 'za' => 'ざ', 'zi' => 'じ', 'zu' => 'ず', 'ze' => 'ぜ', 'zo' => 'ぞ', 'cha' => 'ちゃ', 'chi' => 'ち', 'chu' => 'ちゅ', 'che' => 'ちぇ', 'cho' => 'ちょ', 'tsa' => 'つぁ', 'tsi' => 'ち', 'tsu' => 'つ', 'tse' => 'つぇ', 'tso' => 'つぉ', 'tha' => 'しゃ', 'thi' => 'し', 'thu' => 'ちゅ', 'the' => 'せ', 'tho' => 'そ', 'kya' => 'きゃ', 'kyi' => 'きぃ', 'kyu' => 'きゃ', 'kye' => 'きぇ', 'kyo' => 'きょ', 'sya' => 'しゃ', 'syi' => 'しぃ', 'syu' => 'しゅ', 'sye' => 'しぇ', 'syo' => 'しょ', 'sha' => 'しゃ', 'shi' => 'し', 'shu' => 'しゅ', 'she' => 'しぇ', 'sho' => 'しょ', 'tya' => 'ちゃ', 'tyi' => 'ちぃ', 'tyu' => 'ちゅ', 'tye' => 'ちぇ', 'tyo' => 'ちぉ', 'nya' => 'にゃ', 'nyi' => 'にぃ', 'nyu' => 'にゅ', 'nye' => 'にぇ', 'nyo' => 'にょ', 'hya' => 'ひゃ', 'hyi' => 'ひぃ', 'hyu' => 'ひゅ', 'hye' => 'ひぇ', 'hyo' => 'ひょ', 'mya' => 'みゃ', 'myi' => 'みぃ', 'myu' => 'みぃ', 'mye' => 'みぇ', 'myo' => 'みょ', 'rya' => 'りゃ', 'ryi' => 'りぃ', 'ryu' => 'りゅ', 'rye' => 'りぇ', 'ryo' => 'りょ', 'wya' => 'わぁ', 'wyi' => 'わぃ', 'wyu' => 'わぅ', 'wye' => 'わぇ', 'wyo' => 'わぉ', 'nji' => 'んじ', 'nta' => 'んた', ); foreach my $key (keys %ConvTbl) { Jcode::convert(\$ConvTbl{$key}, "utf8", $ScriptCodeCharCode); } my $ltu = "っ"; Jcode::convert(\$ltu, "utf8", $ScriptCodeCharCode); my $oh = "おお"; Jcode::convert(\$oh, "utf8", $ScriptCodeCharCode); my $nn = "ん"; Jcode::convert(\$nn, "utf8", $ScriptCodeCharCode); #=============================================== # コンストラクタ・デストラクタ #=============================================== sub new { my ($module, $buff) = @_; my $this = {}; bless $this; $this->{pJcode} = new Jcode; $this->{pUTF8} = find_encoding('utf8'); $this->{pEUCjp} = find_encoding('eucjp'); $this->{pSJIS} = find_encoding('sjis'); return $this; } sub DESTROY { my $this = shift; $this->Close(); } sub Initialize { my ($this) = @_; } #=============================================== # 変数取得関数 #=============================================== sub ConversionMode { my ($this) = @_; return $this->{ConversionMode}; } sub SetConversionMode { my ($this, $mode) = @_; return $this->{ConversionMode} = $mode; } #=============================================== # 一般メンバ関数 #=============================================== sub getcode { my ($this, $s) = @_; return Jcode::getcode($s); } sub sjis { my ($this, $s) = @_; return Jcode->new($s)->sjis(); } sub jis { my ($this, $s) = @_; return Jcode->new($s)->jis(); } sub euc { my ($this, $s) = @_; return Jcode->new($s)->euc(); } sub ucs2 { my ($this, $s) = @_; return Jcode->new($s)->ucs2(); } sub utf8 { my ($this, $s) = @_; return Jcode->new($s)->utf8(); } sub iso_2022_jp { my ($this, $s) = @_; return Jcode->new($s)->iso_2022_jp(); } sub Hankaku2Zenkaku { my ($this, $s, $sourcecharcode, $ConvertKana) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $ConvertKana = 1 if(!defined $ConvertKana); #print("c[$ScriptCodeCharCode][$sourcecharcode]
\n"); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); # $s = Jcode->new($s)->tr('a-zA-Z0-9 @!\"#$%&\'()=-^\\|{}:;+*?_/<>', # 'a-zA-Z0-9 @!”#$%&’()=-^¥|{}:;+*?_/<>')->h2z() . ''; $s = Jcode->new($s)->tr('a-zA-Z0-9 ', 'a-zA-Z0-9 '); $s = $s->tr('@!\"#$%&\'', '@!”#$%&’'); $s = $s->tr('()=-^\\|{}:;+*?_/<>', '()=-^¥|{}:;+*?_/<>'); if($ConvertKana) { $s = $s->h2z() . ''; } else { $s = $s . ''; } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; # return Jcode->new($s)->h2z(); } sub Zenkaku2Hankaku { my ($this, $s, $sourcecharcode, $ConvertKana) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $ConvertKana = 1 if(!defined $ConvertKana); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); # $s = Jcode->new($s)->tr('a-zA-Z0-9 @!”#$%&’()=-^¥|{}:;+*?_/<>', # 'a-zA-Z0-9 @!"#$%&\'()=-^\\|{}:;+*?_/<>')->z2h() . ''; $s = Jcode->new($s)->tr('a-zA-Z0-9 ', 'a-zA-Z0-9 '); $s = $s->tr('@!”#$%&’', '@!\"#$%&\''); $s = $s->tr('()=-^¥|{}:;+*?_/<>', '()=-^\\|{}:;+*?_/<>'); if($ConvertKana) { $s = $s->z2h() . ''; } else { $s = $s . ''; } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; # return Jcode->new($s)->z2h(); } sub Zenkaku2HankakuKana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = Jcode->new($s)->z2h() . ''; $this->convert(\$s, $sourcecharcode); return $s; } sub HankakuKana2Zenkaku { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); $s = Jcode->new($s)->h2z() . ''; $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; } sub Hiragana2Katakana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = Jcode->new($s)->tr('ぁ-ん', 'ァ-ン') . ''; $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $s; } sub Katakana2Hiragana { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); $s = $this->HankakuKana2Zenkaku($s, $sourcecharcode); $s = Jcode->new($s)->tr('ア-ン', 'あ-ん') . ''; $this->convert(\$s, $sourcecharcode); return $s; } sub MIMEEncode { my ($this, $s) = @_; return Jcode->new($s)->mime_encode(); } sub MIMEEncodeByUTF8 { my ($this, $s) = @_; return Jcode->new($s)->MIME_Header(); } sub MIMEDecode { my ($this, $s) = @_; return Jcode->new($s)->mime_decode(); } sub tr { my ($this, $s, $from, $to, $opt) = @_; return Jcode->new($s)->tr($from, $to, $opt); } sub s { my ($this, $s, $pattern, $replace, $opt) = @_; return Jcode->new($s)->s($pattern, $replace, $opt); } sub match { my ($this, $s, $pattern, $opt) = @_; return Jcode->new($s)->m($pattern, $opt); } sub convert { my ($this, $pStr, $targetcharcode, $sourcecharcode) = @_; return $$pStr if($this->{ConversionMode} eq 'none'); Utils::convert($pStr, $targetcharcode, $sourcecharcode); } #my $ZenkakuKigou = "[!-~ ]"; #my $ZenkakuSpace = "([!-~]) ([!-~])"; #my $ZenkakuAlphabet = "[A-Za-z0-9 -‐,、。.]"; #Jcode::convert(\$ZenkakuKigou, 'utf8', $ScriptCodeCharCode); #Jcode::convert(\$ZenkakuSpace, 'utf8', $ScriptCodeCharCode); #Jcode::convert(\$ZenkakuAlphabet, 'utf8', $ScriptCodeCharCode); sub Zen2Han { my ($this, $s, $sourcecharcode) = @_; $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = $this->Zenkaku2Hankaku($s, $sourcecharcode) . ''; # $this->convert(\$s, $sourcecharcode); return $s; #$this->convert(\$s, 'utf8', $sourcecharcode); ## $s = decode("sjis", $s); ## $s = $this->{pUTF8}->encode($s); # # $s =~ tr/$ZenkakuKigou/[!-~ ]/s; # $s =~ s/$ZenkakuSpace/$1 $2/sg; # $s =~ tr/$ZenkakuAlphabet/[A-Za-z0-9 --,,..]/s; # #$this->convert(\$s, $sourcecharcode, 'utf8'); ## my $conv = find_encoding($sourcecharcode); ## $s = $conv->encode($s); # # return $s; } sub Roma2Kana { my ($this, $s, $charcode, $sourcecharcode) = @_; $charcode = 'sjis' if(!defined $charcode); $sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode); $s = lc $this->Zenkaku2Hankaku($s, $sourcecharcode); $this->convert(\$s, "utf8", $sourcecharcode); $s =~ s/([aiueo])h([bcdfghjklmnpqrstvwxyz])/$1$1$2/g; $s =~ s/[nm]([^aiueon])/$nn$1/g; $s =~ s/nn/$nn/g; $s =~ s/n-([aiueo])/$nn$1/g; my ($others, $hit, $rest); my $ret = ''; while(1) { #print "s=[$s]\n"; my ($others, $hit, $rest) = ($s =~ /^(.*?)([bcdfghjklmnpqrstvwxyz]*[aiueon])(.*)$/sg); if(!defined $hit) { $this->convert(\$s, $charcode, "utf8"); $ret .= $s; return $ret; } my $pre = ''; if($hit =~ /^([bcdfghjklmnpqrstvwxyz])([bcdfghjklmnpqrstvwxyz])(.*)$/) { if($1 eq $2) { # $pre = $ltu; # $hit = $2 . $3; } } if($hit =~ /^(oh)(.*)$/) { # $pre .= $oh; # $hit = $2; } $hit = $ConvTbl{$hit} if(defined $ConvTbl{$hit}); $this->convert(\$others, $charcode, "utf8"); $this->convert(\$hit, $charcode, "utf8"); $this->convert(\$pre, $charcode, "utf8"); $ret .= $others . $pre .$hit; $s = $rest; } } sub Conv2Hiragana { my ($this, $s, $sourcecharcode, $targetcharcode) = @_; $sourcecharcode = Jcode::getcode($s); $this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode); if($this->match($s, "^[ぁ-んァ-ンァ-ン]*\$")) { $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $this->Katakana2Hiragana($s, $sourcecharcode, $targetcharcode); } $this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode); return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-aH -jH -KH -JH -EH -kH/); # return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -aH -jH -KH -JH -EH -kH/); } sub Conv2Roma { my ($this, $s, $sourcecharcode, $targetcharcode) = @_; return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-Ha -ja -Ka -Ja -Ea -ka/); # return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -Ha -ja -Ka -Ja -Ea -ka/); } sub Kakasi { my ($this, $s, $sourcecharcode, $targetcharcode, @options) = @_; $sourcecharcode = $this->getcode($sourcecharcode) if(!defined $sourcecharcode); $targetcharcode = $sourcecharcode if(!defined $targetcharcode); return $s if($s eq ''); #print "s1: [$s]
\n"; $s =~ s/([^\\])([\(\)\{\}\[\]\>\<\*\?\|\!\$\%\&\~\=\-\'\"\`\/\#])/$1\\$2/g; $this->convert(\$s, $KakasiCharCode, $sourcecharcode); #print "s2: [$s]
\n"; if($UseKakasiModule) { my $kakasi = Text::Kakasi->new(@options); $s = $kakasi->get($s); } else { my $cmd = "$KakasiPath " . join(' ', @options); #print "cmd: [$cmd]\n"; #$s =~ s/\(/\\(/g; #$s =~ s/\)/\\(/g; #print "[$s]
\n"; $s = `echo $s | $cmd`; $s =~ s/[\r\n]+$//s; } $this->convert(\$s, $targetcharcode, $KakasiCharCode); return $s; } 1;