file.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

#!/usr/bin/perl

#use strict;
#use warnings;
use Web::Scraper;
use URI;
use HTML::Entities;
use Encode;
use URI::Escape;
use LWP::UserAgent;


my $type = $ARGV[0];
my $file = $ARGV[1];
my $skipFile = $ARGV[2];

if ($length !~ /.{0,6}/) {
   exit 0;
}

binmode(STDOUT, ":utf8");

if ($type !~ /^\./) {
   $type =~ s/^/./;
}

my $found = 0;

if ($skipFile !~ /X/i)
{
   open (in,"<$file")||die $!;
   while (<in>) {
      ($ext = $_) =~ s/ .*\n//;
      ($des = $_) =~ s/^$ext (.*)\n/$1/;
      if ($type =~ /^$ext$/) {
         print "$ext is \"$des\"\n";
         $found = 1;
      }
   }
   close in;
}
$type =~ s/^\.//;
if ($found == 0) {
   $found = 0;
   #my $wikiurl = "http://filext.com/file-extension/$ARGV[0]";
   #my $scrapp = scraper {
   #   process '//table/tr/td', 'chars[]' => 'TEXT';
   #};
   my $wikiurl = "http://www.file-extensions.org/search/?searchstring=$ARGV[0]";
   my $scrapp = scraper {
      process '//table/tr/td', 'chars[]' => 'TEXT';
      process '//div//p', 'results[]' => 'TEXT';
      process '//div[@id="heading"]/h2', 'text[]' => 'TEXT';
   };
   my $url = URI->new($wikiurl);
   my $blubb = $scrapp->scrape($url);
   my $list = $blubb->{'chars'};
   my $res = $blubb->{'results'};
   my $text = $blubb->{'text'};
   my $morethanone = 0;
   for ($i=0; $i <= $#$res; $i++) {
      if ($$res[$i] =~ /Database contains .* records./i) {
         $morethanone = 1;
      }
   }
   if ($morethanone =~/1/) {
      for ($i = 3; $i <= $#$list; $i++) {
         if ($$list[$i] =~ /^.$type.+/i) {
            last;
         }
         if ($$list[$i] !~ /^.$type$/i) {
            print ".$type is $$list[$i]\n";
            $found = 1;
         }
      }
   } else {
      print ".$type is $$text[0]\n";
      $found = 1;
   }
   #for ($i = 0; $i <= $#$list; $i++) {
   #   if ($$list[$i] =~ /^Extension: $type$/i) {
   #      print ".$type is $$list[$i+4]\n";
   #      $found = 1;
   #   }
   #}
   if ($found == 0) {
      print ".$type not in database\n";
   }
}