summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlookshe <lookshe@fumuga.com>2012-03-07 03:32:55 +0100
committerlookshe <lookshe@fumuga.com>2012-03-07 03:32:55 +0100
commit5455232d68904d820ca332e38efac74516e8b5d3 (patch)
tree345fe7c47dba39e35be09e28f7b0db263c6895c3
parent4811461b59768ea38612a78c07f4afeafe64cfa3 (diff)
fixed wikiscripts
work now with WWW:Wikipedia
-rw-r--r--wiki.tcl4
-rw-r--r--wiki2.pl141
2 files changed, 64 insertions, 81 deletions
diff --git a/wiki.tcl b/wiki.tcl
index deb98da..d3609b4 100644
--- a/wiki.tcl
+++ b/wiki.tcl
@@ -23,7 +23,7 @@ if {$arg == ""} {
return 0
}
- set output [split "[exec perl /home/eggdrop/eggdrop/scripts/wiki.pl \"$arg\" de]" "\n"]
+ set output [split "[exec perl /home/eggdrop/eggdrop/scripts/wiki2.pl \"$arg\" de]" "\n"]
foreach out $output {
putserv "PRIVMSG $chan :$out";
}
@@ -41,7 +41,7 @@ if {$arg == ""} {
return 0
}
- set output [split "[exec perl /home/eggdrop/eggdrop/scripts/wiki.pl $arg en]" "\n"]
+ set output [split "[exec perl /home/eggdrop/eggdrop/scripts/wiki2.pl $arg en]" "\n"]
foreach out $output {
putserv "PRIVMSG $chan :$out";
}
diff --git a/wiki2.pl b/wiki2.pl
index e42f65a..87c7c8c 100644
--- a/wiki2.pl
+++ b/wiki2.pl
@@ -2,99 +2,82 @@
#use strict;
#use warnings;
-use Web::Scraper;
-use URI;
+use WWW::Wikipedia;
use HTML::Entities;
-use Encode;
-use URI::Escape;
-use LWP::UserAgent;
+use HTML::StripTags qw(strip_tags);
-my $scrap;
+binmode(STDOUT, ":utf8");
my $lang = $ARGV[1];
if (!$lang) {
$lang = "de";
}
-my $wikiurl = "http://$lang.wikipedia.org/wiki/Special:Search?search=$ARGV[0]&go=Go";
-
-my $ua = new LWP::UserAgent;
-my $req = HTTP::Request->new('GET', $wikiurl);
-my $res = $ua->request($req);
-my $url = $res->request->uri;
-my $origurl = $url;
-$url =~ s/.*\/wiki\///;
-
-binmode(STDOUT, ":utf8");
-
-if ($url !~ m/Special:Search/) {
-#artikel
- $scrap = scraper {
- process '//div[@id="bodyContent"]/p', 'text[]' => 'TEXT';
- process '//img', 'img[]' => '@src';
- process '//div[@id="bodyContent"]/ul/li', 'list[]' => 'TEXT';
- process '//table/tr/td', 'table[]' => 'TEXT';
- };
- $url = URI->new($wikiurl);
+my $wiki = WWW::Wikipedia->new( language => $lang);
- my $res = $scrap->scrape($url);
- my $text = $res->{'text'};
- my $img = $res->{'img'};
- my $list = $res->{'list'};
- my $table = $res->{'table'};
+my $result = $wiki->search( $ARGV[0] );
+if (defined $result) {
+ my @lines = split('\n', $result->text());
+ my @newlines;
+ my $newline = "";
my $isDis = 0;
-
- if ($$table[1] !~ m/$ARGV[0]/i && $#$table == 1) {
- foreach (@$img) {
-#print "$_\n";
- if ($_ =~ m/Disambig/) {
- $isDis = 1;
+ my $ln = 0;
+ foreach my $line (@lines) {
+ $line =~ s/<!--.*-->//g;
+ $line=~ s/^\s*//;
+ $line=~ s/\s*$//;
+ if ($line && $line =~ m/^\* / && $ln < 3) {
+ push(@newlines, $newline);
+ push(@newlines, $line);
+ $newline = "";
+ $isDis = 1;
+ } elsif ($line) {
+ $newline = "$newline$line ";
+ $ln++;
+ } else {
+ push(@newlines, $newline);
+ $newline = "";
+ }
+ }
+ push(@newlines, $newline);
+ $ln = 0;
+ foreach my $line (@newlines) {
+ $line=~ s/{{.*}}//g;
+ $line=~ s/^\s*//;
+ $line=~ s/\s*$//;
+ if ($line !~ m/^\s*$/) {
+ if ($isDis) {
+ if ($line =~ m/^\* /) {
+ print "$line\n";
+ $ln++;
+ last if $ln == 3;
+ }
+ } else {
+ $line = decode_entities($line);
+ #$line =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
+ $line =~ s/\[\[([^\]]*)\]\]/$1/g;
+ $line =~ s/\'([^\']*)\'/$1/g;
+ $line =~ s/\[\s*\]//g;
+ $line =~ s/\(\s*\)//g;
+ $line =~ s/\[\s*\]//g;
+ $line =~ s/\(\s*\)//g;
+ #$line = strip_tags($line);
+ $line =~ s/<ref>[^<]*<\/ref>//g;
+ $line =~ s/\s+/ /g;
+ $line =~ s/\s([,.\?!])/$1/g;
+ if ($line =~ m/.{448}.*/) {
+ $line =~ s/^(.{448}).*$/$1/;
+ #$line =~ s/^(.*[\.!\?])[^\.!\?]*$/$1 (...)/;
+ $line =~ s/^(.*[\.!\?]) [^\.!\?]*$/$1 (...)/;
+ }
+ print "$line\n";
last;
}
}
}
- if (!$isDis) {
- $text = decode_entities($$text[0]);
- $text =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
- $text =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
- $text =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
- $text =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
- $text =~ s/\s+/ /g;
- $text =~ s/\s([,.\?!])/$1/g;
-
- if ($text =~ m/.{448}.*/) {
- $text =~ s/^(.{448}).*$/$1/;
- $text =~ s/^(.*[\.!\?])[^\.!\?]*$/$1 (...)/;
- }
-
- print $text, "\n";
- } else {
- for ($count = 0; $count < 3 && $count <= $#$list; $count++) {
- print "$$list[$count]\n";
- }
- print "For more see $origurl\n";
+ if ($isDis) {
+ print "For more see http://$lang.wikipedia.org/wiki/$ARGV[0]\n";
}
-
} else {
-#kein artikel
-
- $scrap = scraper {
- process '//div[@class="searchresult"]', 'text[]' => 'TEXT';
- process '//ul[@class="mw-search-results"]/li/a', 'href[]' => '@href';
- };
- $url = URI->new($wikiurl);
-
- my $res = $scrap->scrape($url);
- if (keys(%$res)) {
- my $text = $res->{'text'};
- my $href = $res->{'href'};
- my $result = "";
-
- for ($count = 0; $count < 5 && $count <= $#$text; $count++) {
- $result = ($result?"$result || ":"").$$href[$count], "\n";
- }
- print "$result\n";
- } else {
- print "No matches with $ARGV[0]\n";
- }
+ print "No matches with $ARGV[0]\n";
}