summaryrefslogtreecommitdiffstats
path: root/wiki2.pl
blob: d6b66bbbf49246e598acef32d72b820b407b2de2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/perl

#use strict;
#use warnings;
use WWW::Wikipedia;
use HTML::Entities;
use HTML::StripTags qw(strip_tags);

binmode(STDOUT, ":utf8");

my $query = $ARGV[0];
my $lang = $ARGV[1];
if (!$lang) {
   $lang = "de";
}
my $found = 0;
my $up = 0;
my $result;

while ($found < 5) {
   $found++;
   my $wiki = WWW::Wikipedia->new( language => $lang);
   $result = $wiki->search( "$query" );
   if (defined $result) {
      my @tmplines = split('\n', $result->text_basic());
      if ($#tmplines == 0 && $tmplines[0] =~ m/^#/) {
         $query = $tmplines[0];
         $query =~ s/^#\w*\s(.*)$/$1/;
      } else {
         break;
      }
   } else {
      if ($up == 0) {
         $query = uc $query;
         $up = 1;
      } else {
         $query = lc $query;
      }
   }
}
my $printed = 0;
if (defined $result) {
   my @lines = split('\n', $result->text_basic());
   my @newlines;
   my $newline = "";
   my $isDis = 0;
   my $ln = 0;
   my $comment = 0;
   foreach my $line (@lines) {
#print "$line\n";
      $line =~ s/<!--.*-->//g;
      #$line=~ s/^}}//g;
      $line=~ s/^\]\]//g;
      $line=~ s/^\s*//;
      $line=~ s/\s*$//;
      $comment = 1 if $line =~ m/^<!--$/;
      $comment = 0 if $line =~ m/^-->$/;
      if ($line && $line =~ m/^\*\s?/ && $ln < 4) {
         push(@newlines, $newline);
         push(@newlines, $line);
         $newline = "";
         $isDis = 1 if $ln < 4;
      } elsif ($line) {
         $newline = "$newline$line ";
         $ln++;
      } elsif ($comment) {
      } else {
         push(@newlines, $newline);
         $newline = "";
      }
   }
   push(@newlines, $newline);
   $ln = 0;
   my $lst = 0;
   foreach my $line (@newlines) {
#print "$line\n";
      $line =~ s/<!--.*-->//g;
      $line=~ s/^[^}{]*}}//g;
      $line=~ s/^{{Infobox.*}}//g;
      $line=~ s/^[^\[]*\]\]//g;
      $line=~ s/^\s*//;
      $line=~ s/\s*$//;
      if ($line !~ m/^\s*$/ && $line !~ m/^{{Infobox/) {
         $line = decode_entities($line);
         #$line =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
         $line =~ s/\[\[([^|\]]*\|)?([^\]]*)\]\]/$2/g;
         $line =~ s/\'([^\']*)\'/$1/g;
         $line =~ s/\[\s*\]//g;
         $line =~ s/\(\s*\)//g;
         $line =~ s/\[\s*\]//g;
         $line =~ s/\(\s*\)//g;
         #$line = strip_tags($line);
         $line =~ s/<ref[^>]*>[^<]*<\/ref>//g;
         $line =~ s/<ref[^>]*\/>//g;
         $line =~ s/\s+/ /g;
         $line =~ s/\s([,.\?!])/$1/g;
         if ($isDis) {
            if ($line =~ m/^\*\s?/) {
               last if ($ln == 3) && ($lst = 1);
               print "$line\n";
               $printed = 1;
               $ln++;
            }
         } else {
            if ($line =~ m/.{448}.*/) {
               $line =~ s/^(.{448}).*$/$1/;
               #$line =~ s/^(.*[\.!\?])[^\.!\?]*$/$1 (...)/;
               $line =~ s/^(.*[\.!\?]) [^\.!\?]*$/$1 (...)/;
            }
            print "$line\n";
            $printed = 1;
            last;
         }
      }
   }
   if ($isDis && $lst) {
      print "For more see http://$lang.wikipedia.org/wiki/$query\n";
   }
} else {
   print "No matches with $query\n";
   $printed = 1;
}
print "No information found in article for $query\n" if !$printed;