Merge pull request #27 from mjdemedeiros/histogram

Abstract DP Histogram
leanprover · Jul 8, 2024 · eea6025 · eea6025
2 parents dd9be00 + 0dca9bb
commit eea6025
Show file tree

Hide file tree

Showing 28 changed files with 929 additions and 245 deletions.
diff --git a/SampCert.lean b/SampCert.lean
@@ -5,10 +5,40 @@ Authors: Jean-Baptiste Tristan
 -/
 
 import SampCert.DifferentialPrivacy.Queries.BoundedMean.Basic
+import SampCert.DifferentialPrivacy.Queries.Histogram.Basic
 import SampCert.DifferentialPrivacy.ZeroConcentrated.System
 import SampCert.DifferentialPrivacy.Pure.System
+import SampCert.DifferentialPrivacy.Queries.HistogramMean.Properties
 
 open SLang
 
 def combineConcentrated := @privNoisedBoundedMean_DP gaussian_zCDPSystem
 def combinePure := @privNoisedBoundedMean_DP PureDPSystem
+
+/-
+##  Example: execute the histogram mean on a list
+-/
+section histogramMeanExample
+
+def numBins : ℕ+ := 64
+
+/-
+Bin the infinite type ℕ with clipping
+-/
+def bin (n : ℕ) : Fin numBins :=
+  { val := min (Nat.log 2 n) (PNat.natPred numBins),
+    isLt := by
+      apply min_lt_iff.mpr
+      right
+      exact Nat.lt_of_sub_eq_succ rfl
+  }
+
+/-
+Return an upper bound on the bin value, clipped to 2^(1 + numBins)
+-/
+def unbin (n : Fin numBins) : ℕ+ := 2 ^ (1 + n.val)
+
+noncomputable def combineMeanHistogram : Mechanism ℕ (Option ℚ) :=
+  privMeanHistogram PureDPSystem numBins { bin } unbin 1 20 2 1 20
+
+end histogramMeanExample
diff --git a/SampCert/DifferentialPrivacy/Abstract.lean b/SampCert/DifferentialPrivacy/Abstract.lean
@@ -87,14 +87,24 @@ def privPostProcess (nq : Mechanism T U) (pp : U → V) (l : List T) : PMF V :=
   let A ← nq l
   return pp A
 
+/--
+Constant mechanism
+-/
+def privConst (u : U) : Mechanism T U := fun _ => PMF.pure u
+
+
 /--
 Abstract definition of a differentially private systemm.
 -/
 class DPSystem (T : Type) where
   /--
   Differential privacy proposition, with one real paramater (ε-DP, ε-zCDP, etc)
   -/
-  prop : Mechanism T Z → ℝ → Prop
+  prop : Mechanism T Z → NNReal → Prop
+  /--
+  DP is monotonic
+  -/
+  prop_mono {m : Mechanism T Z} {ε₁ ε₂: NNReal} (Hε : ε₁ ≤ ε₂) (H : prop m ε₁) : prop m ε₂
   /--
   A noise mechanism (eg. Laplace, Discrete Gaussian, etc)
   Paramaterized by a query, sensitivity, and a (rational) security paramater.
@@ -107,18 +117,32 @@ class DPSystem (T : Type) where
   /--
   Privacy composes by addition.
   -/
-  compose_prop : {U V : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → [MeasurableSpace V] → [Countable V] → [DiscreteMeasurableSpace V] → [Inhabited V] → ∀ m₁ : Mechanism T U, ∀ m₂ : Mechanism T V, ∀ ε₁ ε₂ ε₃ ε₄ : ℕ+,
-    prop m₁ (ε₁ / ε₂) → prop m₂ (ε₃ / ε₄) → prop (privCompose m₁ m₂) ((ε₁ / ε₂) + (ε₃ / ε₄))
+  compose_prop : {U V : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → [MeasurableSpace V] → [Countable V] → [DiscreteMeasurableSpace V] → [Inhabited V] →
+    ∀ m₁ : Mechanism T U, ∀ m₂ : Mechanism T V, ∀ ε₁ ε₂ : NNReal,
+    prop m₁ ε₁ → prop m₂ ε₂ → prop (privCompose m₁ m₂) (ε₁ + ε₂)
   /--
   Privacy adaptively composes by addition.
   -/
-  adaptive_compose_prop : {U V : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → [MeasurableSpace V] → [Countable V] → [DiscreteMeasurableSpace V] → [Inhabited V] → ∀ m₁ : Mechanism T U, ∀ m₂ : U -> Mechanism T V, ∀ ε₁ ε₂ ε₃ ε₄ : ℕ+,
-    prop m₁ (ε₁ / ε₂) → (∀ u, prop (m₂ u) (ε₃ / ε₄)) -> prop (privComposeAdaptive m₁ m₂) ((ε₁ / ε₂) + (ε₃ / ε₄))
+  adaptive_compose_prop : {U V : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → [MeasurableSpace V] → [Countable V] → [DiscreteMeasurableSpace V] → [Inhabited V] → ∀ m₁ : Mechanism T U, ∀ m₂ : U -> Mechanism T V,
+    ∀ ε₁ ε₂ : NNReal,
+    prop m₁ ε₁ → (∀ u, prop (m₂ u) ε₂) -> prop (privComposeAdaptive m₁ m₂) (ε₁ + ε₂)
   /--
   Privacy is invariant under post-processing.
   -/
-  postprocess_prop : {U : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → { pp : U → V } → ∀ m : Mechanism T U, ∀ ε₁ ε₂ : ℕ+,
-   prop m (ε₁ / ε₂) → prop (privPostProcess m pp) (ε₁ / ε₂)
+  postprocess_prop : {U : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → { pp : U → V } →
+    ∀ m : Mechanism T U, ∀ ε : NNReal,
+   prop m ε → prop (privPostProcess m pp) ε
+  /--
+  Constant query is 0-DP
+  -/
+  const_prop : {U : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] -> (u : U) -> prop (privConst u) (0 : NNReal)
+
+
+lemma DPSystem_prop_ext [dps : DPSystem T] {ε₁ ε₂ : NNReal} (m : Mechanism T U) (Hε : ε₁ = ε₂) (H : dps.prop m ε₁) :
+    dps.prop m ε₂ := by
+  rw [<- Hε]
+  assumption
+
 
 @[simp]
 lemma bind_bind_indep (p : Mechanism T U) (q : Mechanism T V) (h : U → V → PMF A) :

diff --git a/SampCert/DifferentialPrivacy/Neighbours.lean b/SampCert/DifferentialPrivacy/Neighbours.lean
@@ -16,7 +16,7 @@ variable {T : Type}
 /--
 Lists which differ by the inclusion or modification of an element.
 
-This is SampCert's private property.
+This is SampCert's private property. 
 -/
 inductive Neighbour (l₁ l₂ : List T) : Prop where
   | Addition : l₁ = a ++ b → l₂ = a ++ [n] ++ b → Neighbour l₁ l₂

diff --git a/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean b/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean
@@ -18,11 +18,9 @@ variable [Hu : Nonempty U]
 namespace SLang
 
 -- Better proof for Pure DP adaptive composition
-theorem PureDP_ComposeAdaptive' (nq1 : List T → PMF U) (nq2 : U -> List T → PMF V) (ε₁ ε₂ ε₃ ε₄ : ℕ+) (h1 : PureDP nq1 ((ε₁ : ℝ) / ε₂))  (h2 : ∀ u : U, PureDP (nq2 u) ((ε₃ : ℝ) / ε₄)) :
-  PureDP (privComposeAdaptive nq1 nq2) (((ε₁ : ℝ) / ε₂) + ((ε₃ : ℝ) / ε₄)) := by
+theorem PureDP_ComposeAdaptive' (nq1 : List T → PMF U) (nq2 : U -> List T → PMF V) (ε₁ ε₂ : NNReal) (h1 : PureDP nq1 ε₁)  (h2 : ∀ u : U, PureDP (nq2 u) ε₂) :
+  PureDP (privComposeAdaptive nq1 nq2) (ε₁ + ε₂) := by
   simp [PureDP] at *
-  have h1a := h1
-  -- rcases h1 with ⟨h1a, h1nz⟩
   rw [event_eq_singleton] at *
   simp [DP_singleton] at *
   intros l₁ l₂ Hl₁l₂ u v
@@ -71,13 +69,12 @@ theorem PureDP_ComposeAdaptive' (nq1 : List T → PMF U) (nq2 : U -> List T →
       have h2a'_pre := h2 u
       rw [event_eq_singleton] at h2a'_pre
       simp [DP_singleton] at h2a'_pre
-      exact (mul_le_mul' (h1a l₁ l₂ Hl₁l₂ u) (h2a'_pre l₁ l₂ Hl₁l₂ v))
+      exact (mul_le_mul' (h1 l₁ l₂ Hl₁l₂ u) (h2a'_pre l₁ l₂ Hl₁l₂ v))
     · left
       rename_i hnz
       intro HK
       simp_all
     · right
       intro HK
       simp_all
-
 end SLang
diff --git a/SampCert/DifferentialPrivacy/Pure/Composition.lean b/SampCert/DifferentialPrivacy/Pure/Composition.lean
@@ -26,16 +26,14 @@ namespace SLang
 /--
 Pure DP privacy bound for ``privCompose``.
 -/
-theorem privCompose_DP_bound {nq1 : Mechanism T U} {nq2 : Mechanism T V} {ε₁ ε₂ ε₃ ε₄ : ℕ+} (h1 : PureDP nq1 ((ε₁ : ℝ) / ε₂))  (h2 : PureDP nq2 ((ε₃ : ℝ) / ε₄)) :
-  DP (privCompose nq1 nq2) (((ε₁ : ℝ) / ε₂) + ((ε₃ : ℝ) / ε₄)) := by
+theorem privCompose_DP_bound {nq1 : Mechanism T U} {nq2 : Mechanism T V} {ε₁ ε₂ : NNReal} (h1 : PureDP nq1 ε₁)  (h2 : PureDP nq2 ε₂) :
+  DP (privCompose nq1 nq2) (ε₁ + ε₂) := by
   simp [PureDP] at *
-  have h1a := h1
-  have h2a := h2
   rw [event_eq_singleton] at *
   simp [DP_singleton] at *
   intros l₁ l₂ neighbours x y
-  replace h1a := h1a l₁ l₂ neighbours x
-  replace h2a := h2a l₁ l₂ neighbours y
+  replace h1a := h1 l₁ l₂ neighbours x
+  replace h2a := h2 l₁ l₂ neighbours y
   simp [privCompose]
   conv =>
     left
@@ -85,7 +83,7 @@ theorem privCompose_DP_bound {nq1 : Mechanism T U} {nq2 : Mechanism T V} {ε₁
         have B := mul_le_mul' h1a h2a
         apply le_trans B
         rw [Real.exp_add]
-        rw [ENNReal.ofReal_mul (Real.exp_nonneg (↑↑ε₁ / ↑↑ε₂))]
+        rw [ENNReal.ofReal_mul (Real.exp_nonneg ε₁)]
     . aesop
     . aesop
   . aesop
@@ -95,8 +93,8 @@ theorem privCompose_DP_bound {nq1 : Mechanism T U} {nq2 : Mechanism T V} {ε₁
 /--
 Pure DP satisfies pure differential privacy.
 -/
-theorem privCompose_DP (nq1 : Mechanism T U) (nq2 : Mechanism T V) (ε₁ ε₂ ε₃ ε₄ : ℕ+) (h : PureDP nq1 ((ε₁ : ℝ) / ε₂))  (h' : PureDP nq2 ((ε₃ : ℝ) / ε₄)) :
-  PureDP (privCompose nq1 nq2) (((ε₁ : ℝ) / ε₂) + ((ε₃ : ℝ) / ε₄)) := by
+theorem privCompose_DP (nq1 : Mechanism T U) (nq2 : Mechanism T V) (ε₁ ε₂ : NNReal) (h : PureDP nq1 ε₁)  (h' : PureDP nq2 ε₂) :
+  PureDP (privCompose nq1 nq2) (ε₁ + ε₂) := by
   simp [PureDP] at *
   apply privCompose_DP_bound h h'
 

diff --git a/SampCert/DifferentialPrivacy/Pure/Const.lean b/SampCert/DifferentialPrivacy/Pure/Const.lean
@@ -0,0 +1,40 @@
+/-
+Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Jean-Baptiste Tristan
+-/
+import SampCert.DifferentialPrivacy.Abstract
+import SampCert.DifferentialPrivacy.Pure.DP
+
+/-!
+# zCDP Constant function
+
+This file proves a DP bound on the constant query
+-/
+
+noncomputable section
+
+open Classical Nat Int Real ENNReal MeasureTheory Measure
+
+namespace SLang
+
+variable { T U : Type }
+
+/--
+Constant query satisfies zCDP Renyi divergence bound.
+-/
+theorem privConst_DP_Bound {u : U} : DP (privConst u : Mechanism T U) 0 := by
+  rw [event_eq_singleton]
+  rw [DP_singleton]
+  intros
+  simp [privConst]
+  split <;> simp
+
+/--
+``privComposeAdaptive`` satisfies zCDP
+-/
+theorem PureDP_privConst : ∀ (u : U),  PureDP (privConst u : Mechanism T U) (0 : NNReal) := by
+  simp [PureDP] at *
+  apply privConst_DP_Bound
+
+end SLang
diff --git a/SampCert/DifferentialPrivacy/Pure/DP.lean b/SampCert/DifferentialPrivacy/Pure/DP.lean
@@ -20,7 +20,7 @@ def DP (m : Mechanism T U) (ε : ℝ) : Prop :=
   ∀ l₁ l₂ : List T, Neighbour l₁ l₂ → ∀ S : Set U,
   (∑' x : U, if x ∈ S then m l₁ x else 0) / (∑' x : U, if x ∈ S then m l₂ x else 0) ≤ ENNReal.ofReal (Real.exp ε)
 
-def PureDP (m : Mechanism T U) (ε : ℝ) : Prop :=
+def PureDP (m : Mechanism T U) (ε : NNReal) : Prop :=
   DP m ε
 
 def DP_singleton (m : Mechanism T U) (ε : ℝ) : Prop :=
@@ -75,4 +75,16 @@ theorem event_eq_singleton (m : Mechanism T U) (ε : ℝ) :
   . apply event_to_singleton
   . apply singleton_to_event
 
+lemma PureDP_mono {m : Mechanism T U} {ε₁ ε₂ : NNReal} (H : ε₁ ≤ ε₂) (Hε : PureDP m ε₁) : PureDP m ε₂ := by
+  rw [PureDP] at *
+  apply (event_eq_singleton _ _).mpr
+  apply (event_eq_singleton _ _).mp at Hε
+  simp [DP_singleton] at *
+  intro l₁ l₂ N x
+  apply (@le_trans _ _ _ (ENNReal.ofReal (Real.exp ↑ε₁)) _ (Hε l₁ l₂ N x))
+  apply ENNReal.coe_mono
+  refine (Real.toNNReal_le_toNNReal_iff ?a.hp).mpr ?a.a
+  · exact Real.exp_nonneg ↑ε₂
+  · exact Real.exp_le_exp.mpr H
+
 end SLang
diff --git a/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean b/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean
@@ -119,7 +119,7 @@ theorem privNoisedQueryPure_DP_bound (query : List T → ℤ) (Δ ε₁ ε₂ :
 Laplace noising mechanism ``privNoisedQueryPure`` produces a pure ``ε₁/ε₂``-DP mechanism from a Δ-sensitive query.
 -/
 theorem privNoisedQueryPure_DP (query : List T → ℤ) (Δ ε₁ ε₂ : ℕ+) (bounded_sensitivity : sensitivity query Δ) :
-  PureDP (privNoisedQueryPure query Δ ε₁ ε₂) ((ε₁ : ℝ) / ε₂) := by
+  PureDP (privNoisedQueryPure query Δ ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by
   simp [PureDP]
   apply privNoisedQueryPure_DP_bound
   apply bounded_sensitivity

diff --git a/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean b/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean
@@ -20,14 +20,13 @@ open Classical Set
 
 namespace SLang
 
-lemma privPostProcess_DP_bound {nq : Mechanism T U} {ε₁ ε₂ : ℕ+} (h : PureDP nq ((ε₁ : ℝ) / ε₂)) (f : U → V) :
-  DP (privPostProcess nq f) (((ε₁ : ℝ) / ε₂)) := by
+lemma privPostProcess_DP_bound {nq : Mechanism T U} {ε : NNReal} (h : PureDP nq ε) (f : U → V) :
+  DP (privPostProcess nq f) ε := by
   simp [PureDP] at *
-  have ha := h
   rw [event_eq_singleton] at *
   simp [DP_singleton] at *
   intros l₁ l₂ neighbours x
-  replace ha := ha l₁ l₂ neighbours
+  replace h := h l₁ l₂ neighbours
   simp [privPostProcess]
   apply ENNReal.div_le_of_le_mul
   rw [← ENNReal.tsum_mul_left]
@@ -36,18 +35,18 @@ lemma privPostProcess_DP_bound {nq : Mechanism T U} {ε₁ ε₂ : ℕ+} (h : Pu
   split
   . rename_i h
     subst h
-    refine (ENNReal.div_le_iff_le_mul ?inl.hb0 ?inl.hbt).mp (ha i)
+    refine (ENNReal.div_le_iff_le_mul ?inl.hb0 ?inl.hbt).mp (h i)
     . aesop
     . right
       simp
-      exact Real.exp_pos ((ε₁: ℝ) / ε₂)
+      exact Real.exp_pos ε
   . simp
 
 /--
 ``privPostProcess`` satisfies pure DP, for any surjective postprocessing function.
 -/
-theorem PureDP_PostProcess {f : U → V} (nq : Mechanism T U) (ε₁ ε₂ : ℕ+) (h : PureDP nq ((ε₁ : ℝ) / ε₂)) :
-  PureDP (privPostProcess nq f) (((ε₁ : ℝ) / ε₂)) := by
+theorem PureDP_PostProcess {f : U → V} (nq : Mechanism T U) (ε : NNReal) (h : PureDP nq ε) :
+  PureDP (privPostProcess nq f) ε := by
   simp [PureDP] at *
   apply privPostProcess_DP_bound h
 

diff --git a/SampCert/DifferentialPrivacy/Pure/System.lean b/SampCert/DifferentialPrivacy/Pure/System.lean
@@ -9,6 +9,7 @@ import SampCert.DifferentialPrivacy.Pure.Mechanism.Basic
 import SampCert.DifferentialPrivacy.Pure.Composition
 import SampCert.DifferentialPrivacy.Pure.AdaptiveComposition
 import SampCert.DifferentialPrivacy.Pure.Postprocessing
+import SampCert.DifferentialPrivacy.Pure.Const
 
 /-!
 # Pure DP system
@@ -23,10 +24,12 @@ Pure ε-DP with noise drawn from the discrete Laplace distribution.
 -/
 noncomputable instance PureDPSystem : DPSystem T where
   prop := PureDP
+  prop_mono := PureDP_mono
   noise := privNoisedQueryPure
   noise_prop := privNoisedQueryPure_DP
   compose_prop := privCompose_DP
   adaptive_compose_prop := PureDP_ComposeAdaptive'
   postprocess_prop := PureDP_PostProcess
+  const_prop := PureDP_privConst
 
 end SLang
diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean b/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean
@@ -23,15 +23,15 @@ namespace SLang
 variable [dps : DPSystem ℕ]
 
 lemma budget_split (ε₁ ε₂ : ℕ+) :
-  (ε₁ : ℝ) / (ε₂ : ℝ) = (ε₁ : ℝ) / ((2 * ε₂) : ℕ+) + (ε₁ : ℝ) / ((2 * ε₂) : ℕ+) := by
+  (ε₁ : NNReal) / (ε₂ : NNReal) = (ε₁ : NNReal) / ((2 * ε₂) : ℕ+) + (ε₁ : NNReal) / ((2 * ε₂) : ℕ+) := by
   field_simp
   ring_nf
 
 /--
 DP bound for noised mean.
 -/
 theorem privNoisedBoundedMean_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) :
-  dps.prop (privNoisedBoundedMean U ε₁ ε₂) ((ε₁ : ℝ) / ε₂) := by
+  dps.prop (privNoisedBoundedMean U ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by
   unfold privNoisedBoundedMean
   rw [bind_bind_indep]
   apply dps.postprocess_prop

diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean b/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean
@@ -85,7 +85,7 @@ The noised bounded sum satisfies the DP property of the DP system.
 -/
 @[simp]
 theorem privNoisedBoundedSum_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) :
-  dps.prop (privNoisedBoundedSum U ε₁ ε₂) ((ε₁ : ℝ) / ε₂) := by
+  dps.prop (privNoisedBoundedSum U ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by
   apply dps.noise_prop
   apply exactBoundedSum_sensitivity
 

diff --git a/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean b/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean
@@ -45,7 +45,7 @@ The noised counting query satisfies DP property
 -/
 @[simp]
 theorem privNoisedCount_DP (ε₁ ε₂ : ℕ+) :
-  dps.prop (privNoisedCount ε₁ ε₂) ((ε₁ : ℝ) / ε₂) := by
+  dps.prop (privNoisedCount ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by
   apply dps.noise_prop
   apply exactCount_1_sensitive
 

diff --git a/SampCert/DifferentialPrivacy/Queries/Histogram/Basic.lean b/SampCert/DifferentialPrivacy/Queries/Histogram/Basic.lean
@@ -0,0 +1,8 @@
+/-
+Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Jean-Baptiste Tristan
+-/
+
+import SampCert.DifferentialPrivacy.Queries.Histogram.Code
+import SampCert.DifferentialPrivacy.Queries.Histogram.Properties