diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c1557b..fcb9243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +0.1.8: + +* handle invalid UTF-8 in percent-encoded paths; + 0.1.7: * percent-encode square brackets in :url_encoded part of the result, because of HTTParty; diff --git a/lib/linkhum/url.rb b/lib/linkhum/url.rb index fd11819..40971ca 100644 --- a/lib/linkhum/url.rb +++ b/lib/linkhum/url.rb @@ -30,6 +30,11 @@ def self.parse(url) end au_path.force_encoding(Encoding::ASCII_8BIT) url_encoded[:path] = encode_component(au_path) + decoded_path = human_readable[:path].dup + if !decoded_path.force_encoding(Encoding::UTF_8).valid_encoding? + human_readable[:path] = au.path + end + human_readable[:query] = unencode_component(au.query, false) if au.query decoded_query = human_readable[:query].dup @@ -37,7 +42,6 @@ def self.parse(url) human_readable[:query] = au.query end end - if au.query # see above au_query = au.query.dup diff --git a/linkhum-url.gemspec b/linkhum-url.gemspec index 10c811c..61a3dcc 100644 --- a/linkhum-url.gemspec +++ b/linkhum-url.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'linkhum-url' - s.version = '0.1.7' - s.date = '2017-10-17' + s.version = '0.1.8' + s.date = '2018-08-22' s.summary = "Linkhum-URL creates both URL-encoded and readable versions of URLs" s.description = "Input URL could be either human-readable, or URL-encoded. Two URLs are returned as result: human-readable and URL-encoded." s.authors = ["Alexey Makhotkin"] diff --git a/spec/linkhum-url.spec b/spec/linkhum-url.spec index 7f5be5e..309945b 100644 --- a/spec/linkhum-url.spec +++ b/spec/linkhum-url.spec @@ -103,12 +103,19 @@ describe Linkhum::URL do end end - it "handles non-UTF8 percent-encoded URLs" do + it "handles non-UTF8 percent-encoded query" do lu = Linkhum::URL.parse("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF") expect(lu[:human_readable]).to eql("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF") expect(lu[:url_encoded]).to eql("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF") end + it "handles non-UTF8 percent-encoded URL" do + url = "http://galaktikinsan.com/2017/11/20/korkunc-plan-insanlik-nufusunun-%90-ni-ni-yok-etmek/" + lu = Linkhum::URL.parse(url) + expect(lu[:url_encoded]).to eql(url) + expect(lu[:human_readable]).to eql(url) + end + it "handles Punycode/percent-encoded Devanagari" do lu = Linkhum::URL.parse("http://xn--p1b6ci4b4b3a.xn--11b5bs3a9aj6g/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A5%8D%E0%A4%AF_%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0") expect(lu[:human_readable]).to eql("http://उदाहरण.परीक्षा/मुख्य_पृष्ठ")