43 lines
922 B
Perl
43 lines
922 B
Perl
|
#!/usr/bin/perl
|
||
|
|
||
|
#use strict;
|
||
|
#use warnings;
|
||
|
use Web::Scraper;
|
||
|
use URI;
|
||
|
use HTML::Entities;
|
||
|
use Encode;
|
||
|
use URI::Escape;
|
||
|
use LWP::UserAgent;
|
||
|
|
||
|
my $scrap;
|
||
|
|
||
|
my $wikiurl = "http://de.wikipedia.org/wiki/Liste_der_Dateiendungen";
|
||
|
|
||
|
my $scrapp = scraper {
|
||
|
process '//div[@id="bodyContent"]/table/tr/td/a', 'chars[]' => 'TEXT';
|
||
|
};
|
||
|
my $url = URI->new($wikiurl);
|
||
|
my $blubb = $scrapp->scrape($url);
|
||
|
my $list = $blubb->{'chars'};
|
||
|
|
||
|
binmode(STDOUT, ":utf8");
|
||
|
|
||
|
foreach(@$list) {
|
||
|
$scrap = scraper {
|
||
|
process '//div[@id="bodyContent"]/table[@class="prettytable"]/tr/td', 'table[]' => 'TEXT';
|
||
|
};
|
||
|
$url = URI->new("$wikiurl/$_");
|
||
|
|
||
|
my $res = $scrap->scrape($url);
|
||
|
my $table = $res->{'table'};
|
||
|
|
||
|
for ($i=0; $i<=$#$table; $i+=3) {
|
||
|
if ($$table[$i] !~ /\..*(\..*)+/ && $$table[$i+1] !~ /^.?$/ ) {
|
||
|
print "$$table[$i] $$table[$i+1]\n";
|
||
|
}
|
||
|
if ($$table[$i+2] =~ /^\./) {
|
||
|
$i--;
|
||
|
}
|
||
|
}
|
||
|
}
|