From f158e764859189e1d9c7ce4c2c928539696da5e4 Mon Sep 17 00:00:00 2001 From: lookshe Date: Sun, 21 Sep 2014 01:56:45 +0200 Subject: fix for unwanted sign before name and added support international country code --- tld.pl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tld.pl b/tld.pl index ce21b6f..5fdbe68 100644 --- a/tld.pl +++ b/tld.pl @@ -8,6 +8,7 @@ use HTML::Entities; use Encode; use URI::Escape; use LWP::UserAgent; +use utf8; my $type = $ARGV[0]; if ($type !~ /^\./) @@ -15,7 +16,6 @@ if ($type !~ /^\./) $type = ".$type"; } -binmode(STDOUT, ":utf8"); my $scrap = scraper { process '//table/tr/td', 'table[]' => 'TEXT'; @@ -24,14 +24,17 @@ my $url = URI->new("http://en.wikipedia.org/wiki/List_of_Internet_top-level_doma my $res = $scrap->scrape($url); my $table = $res->{'table'}; my $found = 0; -for ($i = 0; $i < $#$table; $i++) +for ($i = 0; $i < $#$table && found != 1; $i++) { if ($$table[$i] =~ /^\.[^ ]*$/ && $$table[$i+1] !~ /^No$/ && $$table[$i+1] !~ /^Yes$/ && $$table[$i+1] !~ /^Partial\[/ && $$table[$i+1] !~ /^$/) { #print "$$table[$i] is $$table[$i+1]\n"; if ($$table[$i] =~ /^$type$/) { - print "$type is $$table[$i+1]\n"; + ($result = $$table[$i+1]) =~ s/^ //; + print "$type is "; + binmode(STDOUT, ":utf8"); + print "$result\n"; $found = 1; break; } @@ -40,10 +43,14 @@ for ($i = 0; $i < $#$table; $i++) } if ($$table[$i] =~ /^xn--/) { + $tabletype_enc = encode("utf8", $$table[$i+1]); #print "$$table[$i+1] is $$table[$i+2]\n"; - if ($$table[$i+1] =~ /^$type$/) + if ($tabletype_enc =~ /$type/) { - print "$type is $$table[$i+2]\n"; + ($result = $$table[$i+2]) =~ s/^ //; + print "$type is "; + binmode(STDOUT, ":utf8"); + print "$result\n"; $found = 1; break; } -- cgit v1.2.3