#============================================================
# MyCharConv
#============================================================
package MyCharConv;
use Common;
#@ISA = qw(Common);
use strict;
use utf8;
use Encode;
use Jcode;
#use Text::Kakasi;
#===============================================
# スクリプト大域変数
#===============================================
my $ScriptCodeCharCode = Jcode::getcode("月火水木金土日 ");
my $UseKakasiModule = 0;
my $KakasiPath = "/usr/local/bin/kakasi";
my $KakasiCharCode = "euc";
if($^O =~ /^MSWin/) {
$KakasiPath = "c:\\kakasi\\bin\\kakasi.exe";
$KakasiCharCode = "sjis";
}
my %ConvTbl = (
'a' => 'あ',
'i' => 'い',
'u' => 'う',
'e' => 'え',
'o' => 'お',
'ka' => 'か',
'ki' => 'き',
'ku' => 'く',
'ke' => 'け',
'ko' => 'こ',
'sa' => 'さ',
'shi' => 'し',
'si' => 'し',
'su' => 'す',
'se' => 'せ',
'so' => 'そ',
'ta' => 'た',
'chi' => 'ち',
'ti' => 'ち',
'tu' => 'つ',
'te' => 'て',
'to' => 'と',
'na' => 'な',
'ni' => 'に',
'nu' => 'ぬ',
'ne' => 'ね',
'no' => 'の',
'ha' => 'は',
'hi' => 'ひ',
'hu' => 'ふ',
'he' => 'へ',
'ho' => 'ほ',
'ma' => 'ま',
'mi' => 'み',
'mu' => 'む',
'me' => 'め',
'mo' => 'も',
'ya' => 'や',
'yi' => 'い',
'yu' => 'ゆ',
'ye' => 'え',
'yo' => 'よ',
'ra' => 'ら',
'ri' => 'り',
'ru' => 'る',
're' => 'れ',
'ro' => 'ろ',
'wa' => 'わ',
'wi' => 'うぃ',
'wu' => 'う',
'we' => 'うぇ',
'wo' => 'を',
'n' => 'ん',
'm' => 'ん',
'da' => 'だ',
'di' => 'ぢ',
'du' => 'づ',
'de' => 'で',
'do' => 'ど',
'ba' => 'ば',
'bi' => 'び',
'bu' => 'ぶ',
'be' => 'べ',
'bo' => 'ぼ',
'ca' => 'きゃ',
'ci' => 'ち',
'cu' => 'きゅ',
'ce' => 'せ',
'co' => 'こ',
'fa' => 'ふぁ',
'fi' => 'ふぃ',
'fu' => 'ふ',
'fe' => 'ふぇ',
'fo' => 'ふぉ',
'ga' => 'が',
'gi' => 'ぎ',
'gu' => 'ぐ',
'ge' => 'げ',
'go' => 'ご',
'ja' => 'じゃ',
'ji' => 'じ',
'ju' => 'じゅ',
'je' => 'じぇ',
'jo' => 'じょ',
'la' => 'ら',
'li' => 'り',
'lu' => 'る',
'le' => 'れ',
'lo' => 'ろ',
'pa' => 'ぱ',
'pi' => 'び',
'pu' => 'ぶ',
'pe' => 'ぺ',
'po' => 'ぽ',
'qa' => 'くぁ',
'qi' => 'くぃ',
'qu' => 'く',
'qe' => 'くぇ',
'qo' => 'くぉ',
'va' => 'ヴぁ',
'vi' => 'ヴぃ',
'vu' => 'ヴ',
've' => 'ヴぇ',
'vo' => 'ヴぉ',
'xa' => 'しゃ',
'xi' => 'しぃ',
'xu' => 'すぅ',
'xe' => 'しぇ',
'xo' => 'すぉ',
'za' => 'ざ',
'zi' => 'じ',
'zu' => 'ず',
'ze' => 'ぜ',
'zo' => 'ぞ',
'cha' => 'ちゃ',
'chi' => 'ち',
'chu' => 'ちゅ',
'che' => 'ちぇ',
'cho' => 'ちょ',
'tsa' => 'つぁ',
'tsi' => 'ち',
'tsu' => 'つ',
'tse' => 'つぇ',
'tso' => 'つぉ',
'tha' => 'しゃ',
'thi' => 'し',
'thu' => 'ちゅ',
'the' => 'せ',
'tho' => 'そ',
'kya' => 'きゃ',
'kyi' => 'きぃ',
'kyu' => 'きゃ',
'kye' => 'きぇ',
'kyo' => 'きょ',
'sya' => 'しゃ',
'syi' => 'しぃ',
'syu' => 'しゅ',
'sye' => 'しぇ',
'syo' => 'しょ',
'sha' => 'しゃ',
'shi' => 'し',
'shu' => 'しゅ',
'she' => 'しぇ',
'sho' => 'しょ',
'tya' => 'ちゃ',
'tyi' => 'ちぃ',
'tyu' => 'ちゅ',
'tye' => 'ちぇ',
'tyo' => 'ちぉ',
'nya' => 'にゃ',
'nyi' => 'にぃ',
'nyu' => 'にゅ',
'nye' => 'にぇ',
'nyo' => 'にょ',
'hya' => 'ひゃ',
'hyi' => 'ひぃ',
'hyu' => 'ひゅ',
'hye' => 'ひぇ',
'hyo' => 'ひょ',
'mya' => 'みゃ',
'myi' => 'みぃ',
'myu' => 'みぃ',
'mye' => 'みぇ',
'myo' => 'みょ',
'rya' => 'りゃ',
'ryi' => 'りぃ',
'ryu' => 'りゅ',
'rye' => 'りぇ',
'ryo' => 'りょ',
'wya' => 'わぁ',
'wyi' => 'わぃ',
'wyu' => 'わぅ',
'wye' => 'わぇ',
'wyo' => 'わぉ',
'nji' => 'んじ',
'nta' => 'んた',
);
foreach my $key (keys %ConvTbl) {
Jcode::convert(\$ConvTbl{$key}, "utf8", $ScriptCodeCharCode);
}
my $ltu = "っ";
Jcode::convert(\$ltu, "utf8", $ScriptCodeCharCode);
my $oh = "おお";
Jcode::convert(\$oh, "utf8", $ScriptCodeCharCode);
my $nn = "ん";
Jcode::convert(\$nn, "utf8", $ScriptCodeCharCode);
#===============================================
# コンストラクタ・デストラクタ
#===============================================
sub new
{
my ($module, $buff) = @_;
my $this = {};
bless $this;
$this->{pJcode} = new Jcode;
$this->{pUTF8} = find_encoding('utf8');
$this->{pEUCjp} = find_encoding('eucjp');
$this->{pSJIS} = find_encoding('sjis');
return $this;
}
sub DESTROY
{
my $this = shift;
$this->Close();
}
sub Initialize
{
my ($this) = @_;
}
#===============================================
# 変数取得関数
#===============================================
sub ConversionMode
{
my ($this) = @_;
return $this->{ConversionMode};
}
sub SetConversionMode
{
my ($this, $mode) = @_;
return $this->{ConversionMode} = $mode;
}
#===============================================
# 一般メンバ関数
#===============================================
sub getcode
{
my ($this, $s) = @_;
return Jcode::getcode($s);
}
sub sjis
{
my ($this, $s) = @_;
return Jcode->new($s)->sjis();
}
sub jis
{
my ($this, $s) = @_;
return Jcode->new($s)->jis();
}
sub euc
{
my ($this, $s) = @_;
return Jcode->new($s)->euc();
}
sub ucs2
{
my ($this, $s) = @_;
return Jcode->new($s)->ucs2();
}
sub utf8
{
my ($this, $s) = @_;
return Jcode->new($s)->utf8();
}
sub iso_2022_jp
{
my ($this, $s) = @_;
return Jcode->new($s)->iso_2022_jp();
}
sub Hankaku2Zenkaku
{
my ($this, $s, $sourcecharcode, $ConvertKana) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$ConvertKana = 1 if(!defined $ConvertKana);
#print("c[$ScriptCodeCharCode][$sourcecharcode]
\n");
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
# $s = Jcode->new($s)->tr('a-zA-Z0-9 @!\"#$%&\'()=-^\\|{}:;+*?_/<>',
# 'a-zA-Z0-9 @!”#$%&’()=-^¥|{}:;+*?_/<>')->h2z() . '';
$s = Jcode->new($s)->tr('a-zA-Z0-9 ', 'a-zA-Z0-9 ');
$s = $s->tr('@!\"#$%&\'', '@!”#$%&’');
$s = $s->tr('()=-^\\|{}:;+*?_/<>', '()=-^¥|{}:;+*?_/<>');
if($ConvertKana) {
$s = $s->h2z() . '';
}
else {
$s = $s . '';
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
# return Jcode->new($s)->h2z();
}
sub Zenkaku2Hankaku
{
my ($this, $s, $sourcecharcode, $ConvertKana) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$ConvertKana = 1 if(!defined $ConvertKana);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
# $s = Jcode->new($s)->tr('a-zA-Z0-9 @!”#$%&’()=-^¥|{}:;+*?_/<>',
# 'a-zA-Z0-9 @!"#$%&\'()=-^\\|{}:;+*?_/<>')->z2h() . '';
$s = Jcode->new($s)->tr('a-zA-Z0-9 ', 'a-zA-Z0-9 ');
$s = $s->tr('@!”#$%&’', '@!\"#$%&\'');
$s = $s->tr('()=-^¥|{}:;+*?_/<>', '()=-^\\|{}:;+*?_/<>');
if($ConvertKana) {
$s = $s->z2h() . '';
}
else {
$s = $s . '';
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
# return Jcode->new($s)->z2h();
}
sub Zenkaku2HankakuKana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = Jcode->new($s)->z2h() . '';
$this->convert(\$s, $sourcecharcode);
return $s;
}
sub HankakuKana2Zenkaku
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
$s = Jcode->new($s)->h2z() . '';
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
}
sub Hiragana2Katakana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = Jcode->new($s)->tr('ぁ-ん', 'ァ-ン') . '';
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $s;
}
sub Katakana2Hiragana
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
$s = $this->HankakuKana2Zenkaku($s, $sourcecharcode);
$s = Jcode->new($s)->tr('ア-ン', 'あ-ん') . '';
$this->convert(\$s, $sourcecharcode);
return $s;
}
sub MIMEEncode
{
my ($this, $s) = @_;
return Jcode->new($s)->mime_encode();
}
sub MIMEEncodeByUTF8
{
my ($this, $s) = @_;
return Jcode->new($s)->MIME_Header();
}
sub MIMEDecode
{
my ($this, $s) = @_;
return Jcode->new($s)->mime_decode();
}
sub tr
{
my ($this, $s, $from, $to, $opt) = @_;
return Jcode->new($s)->tr($from, $to, $opt);
}
sub s
{
my ($this, $s, $pattern, $replace, $opt) = @_;
return Jcode->new($s)->s($pattern, $replace, $opt);
}
sub match
{
my ($this, $s, $pattern, $opt) = @_;
return Jcode->new($s)->m($pattern, $opt);
}
sub convert
{
my ($this, $pStr, $targetcharcode, $sourcecharcode) = @_;
return $$pStr if($this->{ConversionMode} eq 'none');
Utils::convert($pStr, $targetcharcode, $sourcecharcode);
}
#my $ZenkakuKigou = "[!-~ ]";
#my $ZenkakuSpace = "([!-~]) ([!-~])";
#my $ZenkakuAlphabet = "[A-Za-z0-9 -‐,、。.]";
#Jcode::convert(\$ZenkakuKigou, 'utf8', $ScriptCodeCharCode);
#Jcode::convert(\$ZenkakuSpace, 'utf8', $ScriptCodeCharCode);
#Jcode::convert(\$ZenkakuAlphabet, 'utf8', $ScriptCodeCharCode);
sub Zen2Han
{
my ($this, $s, $sourcecharcode) = @_;
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = $this->Zenkaku2Hankaku($s, $sourcecharcode) . '';
# $this->convert(\$s, $sourcecharcode);
return $s;
#$this->convert(\$s, 'utf8', $sourcecharcode);
## $s = decode("sjis", $s);
## $s = $this->{pUTF8}->encode($s);
#
# $s =~ tr/$ZenkakuKigou/[!-~ ]/s;
# $s =~ s/$ZenkakuSpace/$1 $2/sg;
# $s =~ tr/$ZenkakuAlphabet/[A-Za-z0-9 --,,..]/s;
#
#$this->convert(\$s, $sourcecharcode, 'utf8');
## my $conv = find_encoding($sourcecharcode);
## $s = $conv->encode($s);
#
# return $s;
}
sub Roma2Kana {
my ($this, $s, $charcode, $sourcecharcode) = @_;
$charcode = 'sjis' if(!defined $charcode);
$sourcecharcode = $this->getcode($s) if(!defined $sourcecharcode);
$s = lc $this->Zenkaku2Hankaku($s, $sourcecharcode);
$this->convert(\$s, "utf8", $sourcecharcode);
$s =~ s/([aiueo])h([bcdfghjklmnpqrstvwxyz])/$1$1$2/g;
$s =~ s/[nm]([^aiueon])/$nn$1/g;
$s =~ s/nn/$nn/g;
$s =~ s/n-([aiueo])/$nn$1/g;
my ($others, $hit, $rest);
my $ret = '';
while(1) {
#print "s=[$s]\n";
my ($others, $hit, $rest) = ($s =~ /^(.*?)([bcdfghjklmnpqrstvwxyz]*[aiueon])(.*)$/sg);
if(!defined $hit) {
$this->convert(\$s, $charcode, "utf8");
$ret .= $s;
return $ret;
}
my $pre = '';
if($hit =~ /^([bcdfghjklmnpqrstvwxyz])([bcdfghjklmnpqrstvwxyz])(.*)$/) {
if($1 eq $2) {
# $pre = $ltu;
# $hit = $2 . $3;
}
}
if($hit =~ /^(oh)(.*)$/) {
# $pre .= $oh;
# $hit = $2;
}
$hit = $ConvTbl{$hit} if(defined $ConvTbl{$hit});
$this->convert(\$others, $charcode, "utf8");
$this->convert(\$hit, $charcode, "utf8");
$this->convert(\$pre, $charcode, "utf8");
$ret .= $others . $pre .$hit;
$s = $rest;
}
}
sub Conv2Hiragana
{
my ($this, $s, $sourcecharcode, $targetcharcode) = @_;
$sourcecharcode = Jcode::getcode($s);
$this->convert(\$s, $ScriptCodeCharCode, $sourcecharcode);
if($this->match($s, "^[ぁ-んァ-ンァ-ン]*\$")) {
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $this->Katakana2Hiragana($s, $sourcecharcode, $targetcharcode);
}
$this->convert(\$s, $sourcecharcode, $ScriptCodeCharCode);
return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-aH -jH -KH -JH -EH -kH/);
# return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -aH -jH -KH -JH -EH -kH/);
}
sub Conv2Roma
{
my ($this, $s, $sourcecharcode, $targetcharcode) = @_;
return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-Ha -ja -Ka -Ja -Ea -ka/);
# return $this->Kakasi($s, $sourcecharcode, $targetcharcode, qw/-ieuc -oeuc -Ha -ja -Ka -Ja -Ea -ka/);
}
sub Kakasi {
my ($this, $s, $sourcecharcode, $targetcharcode, @options) = @_;
$sourcecharcode = $this->getcode($sourcecharcode) if(!defined $sourcecharcode);
$targetcharcode = $sourcecharcode if(!defined $targetcharcode);
return $s if($s eq '');
#print "s1: [$s]
\n";
$s =~ s/([^\\])([\(\)\{\}\[\]\>\<\*\?\|\!\$\%\&\~\=\-\'\"\`\/\#])/$1\\$2/g;
$this->convert(\$s, $KakasiCharCode, $sourcecharcode);
#print "s2: [$s]
\n";
if($UseKakasiModule) {
my $kakasi = Text::Kakasi->new(@options);
$s = $kakasi->get($s);
}
else {
my $cmd = "$KakasiPath " . join(' ', @options);
#print "cmd: [$cmd]\n";
#$s =~ s/\(/\\(/g;
#$s =~ s/\)/\\(/g;
#print "[$s]
\n";
$s = `echo $s | $cmd`;
$s =~ s/[\r\n]+$//s;
}
$this->convert(\$s, $targetcharcode, $KakasiCharCode);
return $s;
}
1;