diff --git a/lib/rack/utf8_sanitizer.rb b/lib/rack/utf8_sanitizer.rb index eea3f12..cc5a3d1 100644 --- a/lib/rack/utf8_sanitizer.rb +++ b/lib/rack/utf8_sanitizer.rb @@ -2,6 +2,7 @@ require 'uri' require 'stringio' +require 'rack/request' module Rack class UTF8Sanitizer @@ -115,17 +116,13 @@ def build_strategy(options) end def sanitize_rack_input(env) - # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42 - # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset - # Ignoring charset in content type. - if content_type = env['CONTENT_TYPE'] - content_type = content_type.split(/[;,]/, 2).first - if content_type - content_type.strip! - content_type.downcase! - end - end + request = Rack::Request.new(env) + content_type = request.media_type return unless @sanitizable_content_types.any? {|type| content_type == type } + + charset = request.content_charset + return if charset && charset.downcase != 'utf-8' + uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type} if env['rack.input'] diff --git a/test/test_utf8_sanitizer.rb b/test/test_utf8_sanitizer.rb index f0240c2..cb055c8 100644 --- a/test/test_utf8_sanitizer.rb +++ b/test/test_utf8_sanitizer.rb @@ -252,6 +252,18 @@ def read end end + it "sanitizes the rack body if the charset is present and utf-8" do + input = "name=#{CGI.escape("まつもと")}" + @rack_input = StringIO.new input + + env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=utf-8") + sanitize_form_data(env) do |sanitized_input| + sanitized_input.encoding.should == Encoding::UTF_8 + sanitized_input.should.be.valid_encoding + sanitized_input.should == input + end + end + it "strip UTF-8 BOM from StringIO rack.input" do input = %(\xef\xbb\xbf{"Hello": "World"}) @rack_input = StringIO.new input @@ -327,6 +339,18 @@ def read end end + it "does not sanitize the rack body if the charset is present and not utf-8" do + input = "name=".encode("Shift_JIS") + CGI.escape("まつもと".encode("Shift_JIS", "UTF-8")) + @rack_input = StringIO.new input + + env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=Shift_JIS") + sanitize_form_data(env) do |sanitized_input| + sanitized_input.encoding.should == Encoding::SHIFT_JIS + sanitized_input.should.be.valid_encoding + sanitized_input.should == input + end + end + it "adjusts content-length when replacing input" do input = "foo=bla&quux=bar\xED" @rack_input = StringIO.new input