Skip to content

Commit

Permalink
Release 0.1.8: handle invalid UTF-8 in percent-encoded paths
Browse files Browse the repository at this point in the history
  • Loading branch information
squadette committed Aug 21, 2018
1 parent 69f8ff7 commit 19801c8
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.1.8:

* handle invalid UTF-8 in percent-encoded paths;

0.1.7:

* percent-encode square brackets in :url_encoded part of the result, because of HTTParty;
Expand Down
6 changes: 5 additions & 1 deletion lib/linkhum/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ def self.parse(url)
end
au_path.force_encoding(Encoding::ASCII_8BIT)
url_encoded[:path] = encode_component(au_path)
decoded_path = human_readable[:path].dup
if !decoded_path.force_encoding(Encoding::UTF_8).valid_encoding?
human_readable[:path] = au.path
end

human_readable[:query] = unencode_component(au.query, false)
if au.query
decoded_query = human_readable[:query].dup
if !decoded_query.force_encoding(Encoding::UTF_8).valid_encoding?
human_readable[:query] = au.query
end
end

if au.query
# see above
au_query = au.query.dup
Expand Down
4 changes: 2 additions & 2 deletions linkhum-url.gemspec
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|
s.name = 'linkhum-url'
s.version = '0.1.7'
s.date = '2017-10-17'
s.version = '0.1.8'
s.date = '2018-08-22'
s.summary = "Linkhum-URL creates both URL-encoded and readable versions of URLs"
s.description = "Input URL could be either human-readable, or URL-encoded. Two URLs are returned as result: human-readable and URL-encoded."
s.authors = ["Alexey Makhotkin"]
Expand Down
9 changes: 8 additions & 1 deletion spec/linkhum-url.spec
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,19 @@ describe Linkhum::URL do
end
end

it "handles non-UTF8 percent-encoded URLs" do
it "handles non-UTF8 percent-encoded query" do
lu = Linkhum::URL.parse("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF")
expect(lu[:human_readable]).to eql("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF")
expect(lu[:url_encoded]).to eql("http://www.alib.ru/find3.php4?tfind=%EB%EE%F6%E8%FF")
end

it "handles non-UTF8 percent-encoded URL" do
url = "http://galaktikinsan.com/2017/11/20/korkunc-plan-insanlik-nufusunun-%90-ni-ni-yok-etmek/"
lu = Linkhum::URL.parse(url)
expect(lu[:url_encoded]).to eql(url)
expect(lu[:human_readable]).to eql(url)
end

it "handles Punycode/percent-encoded Devanagari" do
lu = Linkhum::URL.parse("http://xn--p1b6ci4b4b3a.xn--11b5bs3a9aj6g/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A5%8D%E0%A4%AF_%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0")
expect(lu[:human_readable]).to eql("http://उदाहरण.परीक्षा/मुख्य_पृष्ठ")
Expand Down

0 comments on commit 19801c8

Please sign in to comment.