blob: 6bc1faae997a41c94b0fcee1ff82c9fd3dc6428f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
#!/usr/bin/perl
#use strict;
#use warnings;
use Web::Scraper;
use URI;
use HTML::Entities;
use Encode;
use URI::Escape;
use LWP::UserAgent;
my $type = $ARGV[0];
my $file = $ARGV[1];
my $skipFile = $ARGV[2];
if ($length !~ /.{0,6}/) {
exit 0;
}
binmode(STDOUT, ":utf8");
if ($type !~ /^\./) {
$type =~ s/^/./;
}
my $found = 0;
if ($skipFile !~ /X/i)
{
open (in,"<$file")||die $!;
while (<in>) {
($ext = $_) =~ s/ .*\n//;
($des = $_) =~ s/^$ext (.*)\n/$1/;
if ($type =~ /^$ext$/) {
print "$ext is \"$des\"\n";
$found = 1;
}
}
close in;
}
$type =~ s/^\.//;
if ($found == 0) {
$found = 0;
#my $wikiurl = "http://filext.com/file-extension/$ARGV[0]";
#my $scrapp = scraper {
# process '//table/tr/td', 'chars[]' => 'TEXT';
#};
my $wikiurl = "http://www.file-extensions.org/search/?searchstring=$ARGV[0]";
my $scrapp = scraper {
process '//table/tr/td', 'chars[]' => 'TEXT';
process '//div//p', 'results[]' => 'TEXT';
process '//div[@id="heading"]/h2', 'text[]' => 'TEXT';
};
my $url = URI->new($wikiurl);
my $blubb = $scrapp->scrape($url);
my $list = $blubb->{'chars'};
my $res = $blubb->{'results'};
my $text = $blubb->{'text'};
my $morethanone = 0;
for ($i=0; $i <= $#$res; $i++) {
if ($$res[$i] =~ /Database contains .* records./i) {
$morethanone = 1;
}
}
if ($morethanone =~/1/) {
for ($i = 3; $i <= $#$list; $i++) {
if ($$list[$i] =~ /^.$type.+/i) {
last;
}
if ($$list[$i] !~ /^.$type$/i) {
print ".$type is $$list[$i]\n";
$found = 1;
}
}
} else {
print ".$type is $$text[0]\n";
$found = 1;
}
#for ($i = 0; $i <= $#$list; $i++) {
# if ($$list[$i] =~ /^Extension: $type$/i) {
# print ".$type is $$list[$i+4]\n";
# $found = 1;
# }
#}
if ($found == 0) {
print ".$type not in database\n";
}
}
|