one example for analysing webpages with scraper
This commit is contained in:
parent
c4d2aba012
commit
c7898d8656
1 changed files with 35 additions and 0 deletions
35
download_webscraper.pl
Normal file
35
download_webscraper.pl
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
#use strict;
|
||||||
|
#use warnings;
|
||||||
|
use Web::Scraper;
|
||||||
|
use URI;
|
||||||
|
use HTML::Entities;
|
||||||
|
use Encode;
|
||||||
|
use URI::Escape;
|
||||||
|
use LWP::UserAgent;
|
||||||
|
|
||||||
|
my $scrap;
|
||||||
|
|
||||||
|
my $wikiurl = "http://www.bildung-lsa.de/unterricht/zentrale_leistungserhebungen__schriftliche_pruefungen__zentrale_klassenarbeiten__vergleichsarbeiten____/schriftliche_abiturpruefung.html";
|
||||||
|
|
||||||
|
my $ua = new LWP::UserAgent;
|
||||||
|
my $req = HTTP::Request->new('GET', $wikiurl);
|
||||||
|
my $res = $ua->request($req);
|
||||||
|
my $url = $res->request->uri;
|
||||||
|
|
||||||
|
binmode(STDOUT, ":utf8");
|
||||||
|
|
||||||
|
|
||||||
|
$scrap = scraper {
|
||||||
|
process '//a[@class="subjectlink"]', 'href[]' => '@href';
|
||||||
|
};
|
||||||
|
$url = URI->new($wikiurl);
|
||||||
|
|
||||||
|
my $res = $scrap->scrape($url);
|
||||||
|
my $href = $res->{'href'};
|
||||||
|
for ($i = 0; $i <= $#$href; $i++)
|
||||||
|
{
|
||||||
|
my $url = $$href[$i];
|
||||||
|
system("wget -q \"$url\"");
|
||||||
|
}
|
Loading…
Reference in a new issue