summaryrefslogtreecommitdiffstats
path: root/wiki2.pl
blob: 87c7c8c20555e39ab2962362f76a45ae6e492cca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/perl

#use strict;
#use warnings;
use WWW::Wikipedia;
use HTML::Entities;
use HTML::StripTags qw(strip_tags);

binmode(STDOUT, ":utf8");

my $lang = $ARGV[1];
if (!$lang) {
   $lang = "de";
}

my $wiki = WWW::Wikipedia->new( language => $lang);

my $result = $wiki->search( $ARGV[0] );
if (defined $result) {
   my @lines = split('\n', $result->text());
   my @newlines;
   my $newline = "";
   my $isDis = 0;
   my $ln = 0;
   foreach my $line (@lines) {
      $line =~ s/<!--.*-->//g;
      $line=~ s/^\s*//;
      $line=~ s/\s*$//;
      if ($line && $line =~ m/^\* / && $ln < 3) {
         push(@newlines, $newline);
         push(@newlines, $line);
         $newline = "";
         $isDis = 1;
      } elsif ($line) {
         $newline = "$newline$line ";
         $ln++;
      } else {
         push(@newlines, $newline);
         $newline = "";
      }
   }
   push(@newlines, $newline);
   $ln = 0;
   foreach my $line (@newlines) {
      $line=~ s/{{.*}}//g;
      $line=~ s/^\s*//;
      $line=~ s/\s*$//;
      if ($line !~ m/^\s*$/) {
         if ($isDis) {
            if ($line =~ m/^\* /) {
               print "$line\n";
               $ln++;
               last if $ln == 3;
            }
         } else {
            $line = decode_entities($line);
            #$line =~ s/\([^\(\)]*\)||\[[^\[\]]*\]//g;
            $line =~ s/\[\[([^\]]*)\]\]/$1/g;
            $line =~ s/\'([^\']*)\'/$1/g;
            $line =~ s/\[\s*\]//g;
            $line =~ s/\(\s*\)//g;
            $line =~ s/\[\s*\]//g;
            $line =~ s/\(\s*\)//g;
            #$line = strip_tags($line);
            $line =~ s/<ref>[^<]*<\/ref>//g;
            $line =~ s/\s+/ /g;
            $line =~ s/\s([,.\?!])/$1/g;
            if ($line =~ m/.{448}.*/) {
               $line =~ s/^(.{448}).*$/$1/;
               #$line =~ s/^(.*[\.!\?])[^\.!\?]*$/$1 (...)/;
               $line =~ s/^(.*[\.!\?]) [^\.!\?]*$/$1 (...)/;
            }
            print "$line\n";
            last;
         }
      }
   }
   if ($isDis) {
      print "For more see http://$lang.wikipedia.org/wiki/$ARGV[0]\n";
   }
} else {
   print "No matches with $ARGV[0]\n";
}