blob: d6b66bbbf49246e598acef32d72b820b407b2de2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#!/usr/bin/perl
#use strict;
#use warnings;
use WWW::Wikipedia;
use HTML::Entities;
use HTML::StripTags qw(strip_tags);
binmode(STDOUT, ":utf8");
my $query = $ARGV[0];
my $lang = $ARGV[1];
if (!$lang) {
$lang = "de";
}
my $found = 0;
my $up = 0;
my $result;
while ($found < 5) {
$found++;
my $wiki = WWW::Wikipedia->new( language => $lang);
$result = $wiki->search( "$query" );
if (defined $result) {
my @tmplines = split('\n', $result->text_basic());
if ($#tmplines == 0 && $tmplines[0] =~ m/^#/) {
$query = $tmplines[0];
$query =~ s/^#\w*\s(.*)$/$1/;
} else {
break;
}
} else {
if ($up == 0) {
$query = uc $query;
$up = 1;
} else {
$query = lc $query;
}
}
}
my $printed = 0;
if (defined $result) {
my @lines = split('\n', $result->text_basic());
my @newlines;
my $newline = "";
my $isDis = 0;
my $ln = 0;
my $comment = 0;
foreach my $line (@lines) {
#print "$line\n";
$line =~ s/<!--.*-->//g;
#$line=~ s/^}}//g;
$line=~ s/^\]\]//g;
$line=~ s/^\s*//;
$line=~ s/\s*$//;
$comment = 1 if $line =~ m/^<!--$/;
$comment = 0 if $line =~ m/^-->$/;
if ($line && $line =~ m/^\*\s?/ && $ln < 4) {
push(@newlines, $newline);
push(@newlines, $line);
$newline = "";
$isDis = 1 if $ln < 4;
} elsif ($line) {
$newline = "$newline$line ";
$ln++;
} elsif ($comment) {
} else {
push(@newlines, $newline);
$newline = "";
}
}
push(@newlines, $newline);
$ln = 0;
my $lst = 0;
foreach my $line (@newlines) {
#print "$line\n";
$line =~ s/<!--.*-->//g;
$line=~ s/^[^}{]*}}//g;
$line=~ s/^{{Infobox.*}}//g;
$line=~ s/^[^\[]*\]\]//g;
$line=~ s/^\s*//;
$line=~ s/\s*$//;
if ($line !~ m/^\s*$/ && $line !~ m/^{{Infobox/) {
$line = decode_entities($line);
#$line =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
$line =~ s/\[\[([^|\]]*\|)?([^\]]*)\]\]/$2/g;
$line =~ s/\'([^\']*)\'/$1/g;
$line =~ s/\[\s*\]//g;
$line =~ s/\(\s*\)//g;
$line =~ s/\[\s*\]//g;
$line =~ s/\(\s*\)//g;
#$line = strip_tags($line);
$line =~ s/<ref[^>]*>[^<]*<\/ref>//g;
$line =~ s/<ref[^>]*\/>//g;
$line =~ s/\s+/ /g;
$line =~ s/\s([,.\?!])/$1/g;
if ($isDis) {
if ($line =~ m/^\*\s?/) {
last if ($ln == 3) && ($lst = 1);
print "$line\n";
$printed = 1;
$ln++;
}
} else {
if ($line =~ m/.{448}.*/) {
$line =~ s/^(.{448}).*$/$1/;
#$line =~ s/^(.*[\.!\?])[^\.!\?]*$/$1 (...)/;
$line =~ s/^(.*[\.!\?]) [^\.!\?]*$/$1 (...)/;
}
print "$line\n";
$printed = 1;
last;
}
}
}
if ($isDis && $lst) {
print "For more see http://$lang.wikipedia.org/wiki/$query\n";
}
} else {
print "No matches with $query\n";
$printed = 1;
}
print "No information found in article for $query\n" if !$printed;
|