summaryrefslogtreecommitdiffstats
path: root/file.pl
diff options
context:
space:
mode:
authorStefan Ritter <xeno@thehappy.de>2011-08-16 10:51:41 +0200
committerStefan Ritter <xeno@thehappy.de>2011-08-16 10:51:41 +0200
commitba46c304bdffdce0b6019a47f6759ce6abbeef90 (patch)
tree40a5b204637cb65c5bb91bd4f26a248ea498e019 /file.pl
Initial commit
Diffstat (limited to 'file.pl')
-rw-r--r--file.pl91
1 files changed, 91 insertions, 0 deletions
diff --git a/file.pl b/file.pl
new file mode 100644
index 0000000..6bc1faa
--- /dev/null
+++ b/file.pl
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+
+#use strict;
+#use warnings;
+use Web::Scraper;
+use URI;
+use HTML::Entities;
+use Encode;
+use URI::Escape;
+use LWP::UserAgent;
+
+
+my $type = $ARGV[0];
+my $file = $ARGV[1];
+my $skipFile = $ARGV[2];
+
+if ($length !~ /.{0,6}/) {
+ exit 0;
+}
+
+binmode(STDOUT, ":utf8");
+
+if ($type !~ /^\./) {
+ $type =~ s/^/./;
+}
+
+my $found = 0;
+
+if ($skipFile !~ /X/i)
+{
+ open (in,"<$file")||die $!;
+ while (<in>) {
+ ($ext = $_) =~ s/ .*\n//;
+ ($des = $_) =~ s/^$ext (.*)\n/$1/;
+ if ($type =~ /^$ext$/) {
+ print "$ext is \"$des\"\n";
+ $found = 1;
+ }
+ }
+ close in;
+}
+$type =~ s/^\.//;
+if ($found == 0) {
+ $found = 0;
+ #my $wikiurl = "http://filext.com/file-extension/$ARGV[0]";
+ #my $scrapp = scraper {
+ # process '//table/tr/td', 'chars[]' => 'TEXT';
+ #};
+ my $wikiurl = "http://www.file-extensions.org/search/?searchstring=$ARGV[0]";
+ my $scrapp = scraper {
+ process '//table/tr/td', 'chars[]' => 'TEXT';
+ process '//div//p', 'results[]' => 'TEXT';
+ process '//div[@id="heading"]/h2', 'text[]' => 'TEXT';
+ };
+ my $url = URI->new($wikiurl);
+ my $blubb = $scrapp->scrape($url);
+ my $list = $blubb->{'chars'};
+ my $res = $blubb->{'results'};
+ my $text = $blubb->{'text'};
+ my $morethanone = 0;
+ for ($i=0; $i <= $#$res; $i++) {
+ if ($$res[$i] =~ /Database contains .* records./i) {
+ $morethanone = 1;
+ }
+ }
+ if ($morethanone =~/1/) {
+ for ($i = 3; $i <= $#$list; $i++) {
+ if ($$list[$i] =~ /^.$type.+/i) {
+ last;
+ }
+ if ($$list[$i] !~ /^.$type$/i) {
+ print ".$type is $$list[$i]\n";
+ $found = 1;
+ }
+ }
+ } else {
+ print ".$type is $$text[0]\n";
+ $found = 1;
+ }
+ #for ($i = 0; $i <= $#$list; $i++) {
+ # if ($$list[$i] =~ /^Extension: $type$/i) {
+ # print ".$type is $$list[$i+4]\n";
+ # $found = 1;
+ # }
+ #}
+ if ($found == 0) {
+ print ".$type not in database\n";
+ }
+}
+
+