#============================================================ # TokyuWeb #============================================================ package TokyuWeb; use NextTrain::NextTrain; @ISA = qw(NextTrain); use strict; use LWP::Simple; #use LWP::Simple;use LWP::Simple; #============================================================ # コンストラクタ、デストラクタ #============================================================ BEGIN { } sub new { my ($module, $App) = @_; my $this = {}; bless $this; $this->SetApplication($App); $this->InitializeNotes(); return $this; } sub DESTROY { my $this = shift; # $this->SUPER::DESTROY(@_); } #=============================================== # 変数取得関数 #=============================================== sub InitializeNotes { my ($this) = @_; $this->{Mark} = 'A'; $this->{pNoteMarkArray} = []; $this->{pNoteHash} = {}; my %NoteMark0; $NoteMark0{"急"} = "e"; $this->{pNoteHash}{"急"} = "急行"; $NoteMark0{"準"} = "s"; $this->{pNoteHash}{"準"} = "準急"; $NoteMark0{"−"} = "p"; $this->{pNoteHash}{"−"} = "通過"; $this->{pNoteMarkArray}[0] = \%NoteMark0; } #=============================================== # 一般メンバー関数 #=============================================== sub ReadAllNotes { my ($this, $pInputFiles) = @_; for(my $i = 0 ; $i < @$pInputFiles ; $i++) { my $InFile = $pInputFiles->[$i]; $this->ReadNotes($i, $InFile); } } sub ReadNotes { my ($this, $idx, $InFile) = @_; my $pMark = $this->pMark(); my $pNoteMarkOriginal = $this->pNoteMarkOriginal(); my $pNoteMark = $this->pNoteMark($idx); my $pNoteHash = $this->pNoteHash(); my $Content = $this->Get($InFile); my ($PageTitle, $ModifiedDate); ($Content, $PageTitle, $ModifiedDate) = $this->SimplifyHTML($Content); my ($line) = ($Content =~ /トップ.*[\r\n\s]*?(\S.*?)[\r\n\s]*?<\/TABLE>[\r\n\s]*?$/si); #print "line: $line\n"; while($line ne '') { my ($l, $rest) = ($line =~ /^([^\r\n]*)[\r\n]+(.*)$/s); $line = $rest; #print "l=$l\n"; my ($note, $d, $comment) = ($l =~ /^\s*(\S+?)( |\s)+(.*)\s*$/); if($l =~ /^※/) { ($note, $comment) = ($line =~ /^(..)\s*(.*)$/); } next if(!defined $comment); next if($note eq '無印'); $comment =~ s/( |\s)//g; #print "$note: [$comment]\n"; foreach my $k (keys %$pNoteHash) { if($pNoteHash->{$k} eq $comment) { my $key = $pNoteMarkOriginal->{$note}; $pNoteMark->{$note} = $key; $this->print("note0[$InFile]: $note [$key] $comment\n"); } } if(!defined $pNoteMark->{$note}) { $this->print("note1[$InFile]: $note [$$pMark] $comment\n"); $pNoteMark->{$note} = $$pMark; $pNoteMarkOriginal->{$note} = $$pMark; $pNoteHash->{$note} = $comment; if($$pMark eq 'Z') { $$pMark = 'a'; } else { $$pMark++; } if($$pMark =~ /[esp]/) { $$pMark++;; } } } return; } sub WriteNotes { my ($this, $out) = @_; my @pNoteMark = $this->NoteMarkArray(); my %NoteMark = $this->pNoteMarkOriginal(); #NoteMark(0); my %NoteHash = $this->NoteHash(); $out->print(";備考データ\n"); #マークでソートするため、$pNoteMark[0]のKeyと値を入れ替えたハッシュを作る my %Reverse = Utils::RevertHash($pNoteMark[0]); foreach my $k (sort keys %Reverse) { my $n = $Reverse{$k}; my $s = $NoteHash{$n}; my $m = $pNoteMark[0]->{$n}; $out->print("$m:$s\n"); } $out->print("\n"); } sub Get { my ($this, $URL) = @_; if($URL =~ /^(ftp|http)s?:\/\//i) { return get($URL); } return JFile->new()->ReadFile($URL, "sjis"); } sub SimplifyHTML { my ($this, $Content) = @_; #どのページか # my ($Line) = ($Content =~ /
(.*?)]*?>([^<>]*?)
  • (.*?)<\/SPAN>/si); # $PageTitle1 =~ s/[\s\r\n]+//g; # $PageTitle1 =~ s/時刻表\//; # $PageTitle2 =~ s/[\s\r\n]+//g; #print "PageTitle: $Line $PageTitle1 - $PageTitle2\n"; # my ($PageTitle) = ($Content =~ /alt="(.*?)"/si); my ($PageTitle); my $rest = $Content; while(1) { ($PageTitle, $rest) = ($rest =~ /alt="(.*?)"(.*)$/si); last if($PageTitle =~ /方面/); if(undef $PageTitle) { $PageTitle = 'undefined'; last; } } $PageTitle =~ s/時刻表\//; $PageTitle =~ s/(/ /; $PageTitle =~ s/)//; #print "PageTitle: $PageTitle\n"; my ($ModifiedDate) = ($Content =~ />(\d{4}年[^<]*?)]+?>//si; $Content =~ s/<(\/?HTML|\/?BODY)[^>]*?>//gsi; #改行関係のタグ $Content =~ s/]*?>/\n/gsi; $Content =~ s/]*?>/\n/gsi; $Content =~ s/<\/P[^>]*?>/\n/gsi; #コメントタグ $Content =~ s/]+?>//gsi; #他のタグ $Content =~ s/<(\/?DIV|\/?HR|\/?A|\/?B|\/?I|\/?UL|\/?LI|\/?UL|\/?H\d|\/?IMG)[^>]*?>//gsi; $Content =~ s/<(\/?SPAN|\/?STRONG|\/?FONT)[^>]*?>//gsi; #Table関係を飛ばす $Content =~ s/<(\/?TBODY|\/?TR|\/?TD)[^>]*?>//gsi; # $Content =~ s/<(\/?TABLE|\/?TBODY|\/?TR|\/?TD)[^>]*?>//gsi; $Content =~ s/]*>/
  • /gsi; $Content =~ s/ / /gsi; $Content =~ s/ / /gsi; $Content =~ s/(\n)\s+(\S)/$1$2/gsi; $Content =~ s/\s+\n/\n/gsi; $Content =~ s/\n\n\n+/\n/gsi; #最初の
    タグまでを飛ばす # $Content =~ s/^.*?(<\/TABLE>)+//si; $Content =~ s/\n(0|1|2|3|4|5|6|7|8|9)/ $1/gsi; $Content =~ s/^.*?各駅情報[\s\n\r]*//si; $Content =~ s/\n\s*ペ..ジの先頭へ.*?$//si; return ($Content, $PageTitle, $ModifiedDate); } sub SplitMinites { my ($this, $line) = @_; $line = NextTrain::Zen2Han($line); my ($note, $min, @mins) = Utils::Split("\\s+", $line); my $notehead = ''; if($note =~ /^\[[0-9]+\]/) { my ($a, $b) = ($note =~ /^(\[[0-9]+?\])(.*)$/); #print "note: $note / $a / $b\n"; $notehead = $a; $note = $b; } if($note =~ /[0-9]/) { my ($a, $b) = ($note =~ /^(.*?)([0-9]+)/); @mins = ($min, @mins); $note = $a; $min = $b; } $note = "$notehead$note"; print "note: $note min=$min mins={", join(':', @mins), "}\n"; return ($note, $min, @mins); } sub MakeTimeTable { my ($this, $pInputFiles, $pDayKey, $OutputFile) = @_; my (@PageTitle, $ModifiedDate); my $Content; for(my $i = 0 ; $i < @$pInputFiles ; $i++) { $Content = $this->Get($pInputFiles->[$i]); ($Content, $PageTitle[$i], $ModifiedDate) = $this->SimplifyHTML($Content); print "PagetTitle: $PageTitle[$i]\n"; print "ModifiedDate: $ModifiedDate\n"; my $outfile = "a$i.txt"; open(OUT,">$outfile") or die "$!: $outfile\n"; print OUT $Content; close(OUT); } if($OutputFile eq '') { $OutputFile = "$PageTitle[0].tbl"; } $this->ReadAllNotes($pInputFiles); my $out = $this->OpenTableFile($OutputFile, "w"); return if(!defined $out); $out->print(";$PageTitle[0] \n"); $out->print(";$ModifiedDate \n"); $out->print("\n"); $this->WriteNotes($out); my @pNoteMark = $this->NoteMarkArray(); my %NoteHash = $this->NoteHash(); for(my $i = 0 ; $i < @$pInputFiles ; $i++) { my $InFile = $pInputFiles->[$i]; my $DayKey = $pDayKey->[$i]; my %NoteMark = $this->NoteMark($i); $Content = $this->Get($pInputFiles->[$i]); ($Content, $PageTitle[$i], $ModifiedDate) = $this->SimplifyHTML($Content); $out->print("$DayKey\n") if($DayKey ne ''); $out->print("#$PageTitle[$i]\n"); my $l; my $time; my $prevmin = 0; while($Content ne '') { my ($text, $rest) = ($Content =~ /^.*?(.*?)<\/TABLE>[\r\n]*<\/TABLE>(.*)$/si); #print "C: $Content\n"; #print "text: $text\n"; ($time, $text) = ($text =~ /^[\r\n\s]*([^\r\n]*)[\r\n\s]+(.*)$/s); #print "t: $time\n"; last if(!defined $text); $this->print("$time: "); $out->printf("%02d:", $time); #
    をスキップ $text =~ s/
    //i; # ($l, $text) = ($text =~ /^([^\r\n]*)[\r\n\s]+(.*)$/s); #print "l=$l\n"; my $prevnote = ''; while($text ne '') { ($l, $text) = ($text =~ /^([^\r\n]*)[\r\n\s]+(.*)$/s); my ($note, $min, @mins) = $this->SplitMinites($l); # if($min eq '' or $min !~ /^\d+$/) { # $min = $prevmin + 1; # } my $noteprev = $note; $note = $this->ConverteMark($noteprev); $this->print(" ${min}[$noteprev-$note]"); if(!defined $min) { $prevnote = $note; next; } $out->printf(" %s%02d", "$prevnote$note", $min); printf("\nout1: %s%02d\n", "$prevnote$note", $min); $prevmin = $min; for(my $i = 0 ; $i < @mins ; $i++) { print "mins[$i]=$mins[$i]\n"; next if($mins[$i] !~ /\d/); $out->printf(" %02d", $mins[$i]); printf("\nout2: %02d\n", $mins[$i]); #exit if($time >= 21); } $prevnote = ''; } $out->print("\n"); $this->print("\n"); $Content = $rest; } $out->print("\n"); } $this->CloseTableFile(); }