diff --git a/lib/rack/utf8_sanitizer.rb b/lib/rack/utf8_sanitizer.rb index eea3f12..5e50424 100644 --- a/lib/rack/utf8_sanitizer.rb +++ b/lib/rack/utf8_sanitizer.rb @@ -2,6 +2,7 @@ require 'uri' require 'stringio' +require 'rack/request' module Rack class UTF8Sanitizer @@ -126,6 +127,10 @@ def sanitize_rack_input(env) end end return unless @sanitizable_content_types.any? {|type| content_type == type } + + charset = Rack::Request.new(env).content_charset + return if charset && charset.downcase != 'utf-8' + uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type} if env['rack.input'] diff --git a/test/test_utf8_sanitizer.rb b/test/test_utf8_sanitizer.rb index f0240c2..cb055c8 100644 --- a/test/test_utf8_sanitizer.rb +++ b/test/test_utf8_sanitizer.rb @@ -252,6 +252,18 @@ def read end end + it "sanitizes the rack body if the charset is present and utf-8" do + input = "name=#{CGI.escape("まつもと")}" + @rack_input = StringIO.new input + + env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=utf-8") + sanitize_form_data(env) do |sanitized_input| + sanitized_input.encoding.should == Encoding::UTF_8 + sanitized_input.should.be.valid_encoding + sanitized_input.should == input + end + end + it "strip UTF-8 BOM from StringIO rack.input" do input = %(\xef\xbb\xbf{"Hello": "World"}) @rack_input = StringIO.new input @@ -327,6 +339,18 @@ def read end end + it "does not sanitize the rack body if the charset is present and not utf-8" do + input = "name=".encode("Shift_JIS") + CGI.escape("まつもと".encode("Shift_JIS", "UTF-8")) + @rack_input = StringIO.new input + + env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=Shift_JIS") + sanitize_form_data(env) do |sanitized_input| + sanitized_input.encoding.should == Encoding::SHIFT_JIS + sanitized_input.should.be.valid_encoding + sanitized_input.should == input + end + end + it "adjusts content-length when replacing input" do input = "foo=bla&quux=bar\xED" @rack_input = StringIO.new input