From ee50bd2a817b6e612692f86d9d7e85acc3a89f9c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:06:20 -0400 Subject: [PATCH] =?UTF-8?q?DEBUG-2334=20duplicate=20mutable=20values=20whe?= =?UTF-8?q?n=20serializing=20for=20dynamic=20inst=E2=80=A6=20(#4009)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * DEBUG-2334 duplicate mutable values when serializing for dynamic instrumentation When serializing parameter values at method entry, the resulting snapshot should not change if the parameter is mutated during method execution. This currently only applies to string values as other values used as serializer output are not mutable. This commit duplicates the string values which achieves immutability with no additional API complexity but at the cost of duplicating all strings, even if they are not entry parameters and thus can be stored without duplication. * only duplicate strings if they are not frozen and not truncated * replace case with if --------- Co-authored-by: Oleg Pudeyev --- lib/datadog/di/serializer.rb | 24 +++++++++- spec/datadog/di/serializer_spec.rb | 74 ++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/lib/datadog/di/serializer.rb b/lib/datadog/di/serializer.rb index 7958302dc86..2fefa0d91eb 100644 --- a/lib/datadog/di/serializer.rb +++ b/lib/datadog/di/serializer.rb @@ -26,6 +26,16 @@ module DI # All serialization methods take the names of the variables being # serialized in order to be able to redact values. # + # The result of serialization should not reference parameter values when + # the values are mutable (currently, this only applies to string values). + # Serializer will duplicate such mutable values, so that if method + # arguments are captured at entry and then modified during method execution, + # the serialized values from entry are correctly preserved. + # Alternatively, we could pass a parameter to the serialization methods + # which would control whether values are duplicated. This may be more + # efficient but there would be additional overhead from passing this + # parameter all the time and the API would get more complex. + # # @api private class Serializer def initialize(settings, redactor) @@ -92,12 +102,24 @@ def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.ma when Integer, Float, TrueClass, FalseClass serialized.update(value: value.to_s) when String, Symbol - value = value.to_s + need_dup = false + value = if String === value + # This is the only place where we duplicate the value, currently. + # All other values are immutable primitives (e.g. numbers). + # However, do not duplicate if the string is frozen, or if + # it is later truncated. + need_dup = !value.frozen? + value + else + value.to_s + end max = settings.dynamic_instrumentation.max_capture_string_length if value.length > max serialized.update(truncated: true, size: value.length) value = value[0...max] + need_dup = false end + value = value.dup if need_dup serialized.update(value: value) when Array if depth < 0 diff --git a/spec/datadog/di/serializer_spec.rb b/spec/datadog/di/serializer_spec.rb index a19d02b79e7..2e870d9cad3 100644 --- a/spec/datadog/di/serializer_spec.rb +++ b/spec/datadog/di/serializer_spec.rb @@ -320,5 +320,79 @@ def self.define_cases(cases) end end end + + context 'when positional arg is mutated' do + let(:args) do + ['hello', 'world'] + end + + let(:kwargs) { {} } + + it 'preserves original value' do + serialized + + args.first.gsub!('hello', 'bye') + + expect(serialized).to eq( + arg1: {type: 'String', value: 'hello'}, + arg2: {type: 'String', value: 'world'}, + ) + end + end + + context 'when keyword arg is mutated' do + let(:args) do + [] + end + + let(:kwargs) do + {foo: 'bar'} + end + + it 'preserves original value' do + serialized + + kwargs[:foo].gsub!('bar', 'bye') + + expect(serialized).to eq( + foo: {type: 'String', value: 'bar'}, + ) + end + end + + context 'when positional arg is frozen' do + let(:frozen_string) { 'hello'.freeze } + + let(:args) do + [frozen_string, 'world'] + end + + let(:kwargs) { {} } + + it 'serializes without duplication' do + expect(serialized).to eq( + arg1: {type: 'String', value: 'hello'}, + arg2: {type: 'String', value: 'world'}, + ) + + expect(serialized[:arg1][:value]).to be frozen_string + end + end + + context 'when keyword arg is frozen' do + let(:frozen_string) { 'hello'.freeze } + + let(:args) { [] } + + let(:kwargs) { {foo: frozen_string} } + + it 'serializes without duplication' do + expect(serialized).to eq( + foo: {type: 'String', value: 'hello'}, + ) + + expect(serialized[:foo][:value]).to be frozen_string + end + end end end