diff options
author | lookshe <github@lookshe.org> | 2014-09-21 01:56:45 +0200 |
---|---|---|
committer | lookshe <github@lookshe.org> | 2014-09-21 01:56:45 +0200 |
commit | f158e764859189e1d9c7ce4c2c928539696da5e4 (patch) | |
tree | 7b09d083baa4b1fa99384696eb2a67b16991b4ac | |
parent | 3ffde91c79f247a205efd3d8b7c85c28a74a6e94 (diff) |
fix for unwanted sign before name and added support international country code
-rw-r--r-- | tld.pl | 17 |
1 files changed, 12 insertions, 5 deletions
@@ -8,6 +8,7 @@ use HTML::Entities; use Encode; use URI::Escape; use LWP::UserAgent; +use utf8; my $type = $ARGV[0]; if ($type !~ /^\./) @@ -15,7 +16,6 @@ if ($type !~ /^\./) $type = ".$type"; } -binmode(STDOUT, ":utf8"); my $scrap = scraper { process '//table/tr/td', 'table[]' => 'TEXT'; @@ -24,14 +24,17 @@ my $url = URI->new("http://en.wikipedia.org/wiki/List_of_Internet_top-level_doma my $res = $scrap->scrape($url); my $table = $res->{'table'}; my $found = 0; -for ($i = 0; $i < $#$table; $i++) +for ($i = 0; $i < $#$table && found != 1; $i++) { if ($$table[$i] =~ /^\.[^ ]*$/ && $$table[$i+1] !~ /^No$/ && $$table[$i+1] !~ /^Yes$/ && $$table[$i+1] !~ /^Partial\[/ && $$table[$i+1] !~ /^$/) { #print "$$table[$i] is $$table[$i+1]\n"; if ($$table[$i] =~ /^$type$/) { - print "$type is $$table[$i+1]\n"; + ($result = $$table[$i+1]) =~ s/^ //; + print "$type is "; + binmode(STDOUT, ":utf8"); + print "$result\n"; $found = 1; break; } @@ -40,10 +43,14 @@ for ($i = 0; $i < $#$table; $i++) } if ($$table[$i] =~ /^xn--/) { + $tabletype_enc = encode("utf8", $$table[$i+1]); #print "$$table[$i+1] is $$table[$i+2]\n"; - if ($$table[$i+1] =~ /^$type$/) + if ($tabletype_enc =~ /$type/) { - print "$type is $$table[$i+2]\n"; + ($result = $$table[$i+2]) =~ s/^ //; + print "$type is "; + binmode(STDOUT, ":utf8"); + print "$result\n"; $found = 1; break; } |