Merge pull request #48 from SoftVarE-Group/attribute-twise

Enable attributed t-wise sampling
SoftVarE-Group · Jan 27, 2025 · ed56a74 · ed56a74
2 parents 6e2b3a4 + e66851b
commit ed56a74
Show file tree

Hide file tree

Showing 22 changed files with 2,074 additions and 102 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/ddnnife/Cargo.toml b/ddnnife/Cargo.toml
@@ -16,6 +16,7 @@ uniffi = ["dep:uniffi"]
 crate-type = ["lib", "cdylib"]
 
 [dependencies]
+bimap = "0.6"
 bitvec = "1.0"
 csv = { workspace = true }
 file_diff = "1.0.0"
@@ -28,6 +29,7 @@ petgraph = "0.7"
 rand = "0.8"
 rand_distr = "0.4"
 rand_pcg = "0.3"
+regex = "1.11"
 streaming-iterator = "0.1"
 tempfile = "3.15"
 uniffi = { workspace = true, optional = true }

diff --git a/ddnnife/src/ddnnf.rs b/ddnnife/src/ddnnf.rs
@@ -1,6 +1,7 @@
 pub mod anomalies;
 pub mod clause_cache;
 pub mod counting;
+pub mod extended_ddnnf;
 pub mod heuristics;
 pub mod multiple_queries;
 pub mod node;

diff --git a/ddnnife/src/ddnnf/anomalies/sat.rs b/ddnnife/src/ddnnf/anomalies/sat.rs
@@ -17,6 +17,11 @@ impl Ddnnf {
         self.sat_propagate(features, &mut vec![false; self.nodes.len()], None)
     }
 
+    #[inline]
+    pub fn sat_immutable(&self, features: &[i32]) -> bool {
+        self.sat_propagate(features, &mut vec![false; self.nodes.len()], None)
+    }
+
     /// Does the exact same as 'sat' with the difference of choosing the marking Vec by ourself.
     /// That allows reusing that vector and therefore enabeling an efficient method to do decision propogation.
     /// If wanted, one can supply an marking Vec<bool>, that can be reused in following method calls to propagate satisfiability.

diff --git a/ddnnife/src/ddnnf/anomalies/t_wise_sampling.rs b/ddnnife/src/ddnnf/anomalies/t_wise_sampling.rs
@@ -7,14 +7,25 @@ mod sat_wrapper;
 mod t_iterator;
 mod t_wise_sampler;
 
+use crate::ddnnf::extended_ddnnf::objective_function::FloatOrd;
+use crate::ddnnf::extended_ddnnf::ExtendedDdnnf;
 use crate::Ddnnf;
 pub use config::Config;
+use covering_strategies::cover_with_caching_sorted;
+use itertools::Itertools;
 pub use sample::Sample;
+use sample_merger::attribute_similarity_merger::AttributeSimilarityMerger;
+use sample_merger::attribute_zipping_merger::AttributeZippingMerger;
 use sample_merger::similarity_merger::SimilarityMerger;
 use sample_merger::zipping_merger::ZippingMerger;
 pub use sampling_result::SamplingResult;
 use sat_wrapper::SatWrapper;
+use std::cmp::min;
+use streaming_iterator::StreamingIterator;
+use t_iterator::TInteractionIter;
 use t_wise_sampler::TWiseSampler;
+use t_wise_sampler::{complete_partial_configs_optimal, trim_and_resample};
+use SamplingResult::ResultWithSample;
 
 #[cfg_attr(feature = "uniffi", uniffi::export)]
 impl Ddnnf {
@@ -34,62 +45,184 @@ impl Ddnnf {
     }
 }
 
+impl ExtendedDdnnf {
+    pub fn sample_t_wise(&self, t: usize) -> SamplingResult {
+        let sat_solver = SatWrapper::new(&self.ddnnf);
+        let and_merger = AttributeZippingMerger {
+            t,
+            sat_solver: &sat_solver,
+            ext_ddnnf: self,
+        };
+        let or_merger = AttributeSimilarityMerger { t, ext_ddnnf: self };
+
+        let mut sampler = TWiseSampler::new(&self.ddnnf, and_merger, or_merger);
+
+        for node_id in 0..sampler.ddnnf.nodes.len() {
+            let partial_sample = sampler.partial_sample(node_id);
+            sampler.partial_samples.insert(node_id, partial_sample);
+        }
+
+        let root_id = sampler.ddnnf.nodes.len() - 1;
+
+        let sampling_result = sampler
+            .partial_samples
+            .remove(&root_id)
+            .expect("Root sample does not exist!");
+
+        if let ResultWithSample(mut sample) = sampling_result {
+            debug_assert!(
+                sample
+                    .complete_configs
+                    .iter()
+                    .all(|config| !config.get_literals().contains(&0)),
+                "Complete Configs must not contain undecided variables."
+            );
+
+            sample = trim_and_resample(
+                root_id,
+                sample,
+                t,
+                self.ddnnf.number_of_variables as usize,
+                &sat_solver,
+            );
+
+            complete_partial_configs_optimal(&mut sample, self);
+
+            ResultWithSample(sample)
+        } else {
+            sampling_result
+        }
+    }
+
+    pub fn sample_t_wise_yasa(&self, t: usize) -> Sample {
+        let sat_solver = SatWrapper::new(&self.ddnnf);
+        let vars = (1..=self.ddnnf.number_of_variables).collect_vec();
+        let literals = (-(self.ddnnf.number_of_variables as i32)
+            ..=self.ddnnf.number_of_variables as i32)
+            .filter(|&literal| literal != 0)
+            .collect_vec();
+        let root_id = self.ddnnf.nodes.len() - 1;
+        let mut sample = Sample::new(vars.into_iter().collect());
+
+        let mut interactions = Vec::new();
+        TInteractionIter::new(&literals[..], min(literals.len(), t))
+            .for_each(|interaction| interactions.push(interaction.to_vec()));
+
+        interactions
+            .iter()
+            .sorted_by_cached_key(|interaction| {
+                FloatOrd::from(self.get_objective_fn_val_of_literals(&interaction[..]))
+            })
+            .rev()
+            .for_each(|interaction| {
+                cover_with_caching_sorted(
+                    &mut sample,
+                    interaction,
+                    &sat_solver,
+                    root_id,
+                    self.ddnnf.number_of_variables as usize,
+                    self,
+                );
+            });
+
+        sample = trim_and_resample(
+            root_id,
+            sample,
+            t,
+            self.ddnnf.number_of_variables as usize,
+            &sat_solver,
+        );
+        complete_partial_configs_optimal(&mut sample, self);
+
+        sample.literals = literals;
+
+        sample
+    }
+}
+
 #[cfg(test)]
 mod test {
     use itertools::Itertools;
+    use std::collections::HashSet;
 
+    use crate::ddnnf::anomalies::t_wise_sampling::t_iterator::TInteractionIter;
+    use crate::ddnnf::anomalies::t_wise_sampling::Sample;
+    use crate::ddnnf::extended_ddnnf::optimal_configs::test::build_sandwich_ext_ddnnf_with_objective_function_values;
     use crate::{parser::build_ddnnf, Ddnnf};
+    use streaming_iterator::StreamingIterator;
 
-    #[test]
-    fn t_wise_sampling_validity() {
-        let mut vp9: Ddnnf = build_ddnnf("tests/data/VP9_d4.nnf", Some(42));
-        let mut auto1: Ddnnf = build_ddnnf("tests/data/auto1_d4.nnf", Some(2513));
+    fn check_validity_of_sample(sample: &Sample, ddnnf: &Ddnnf, t: usize) {
+        let sample_literals: HashSet<i32> = sample.get_literals().iter().copied().collect();
+        sample.iter().for_each(|config| {
+            assert!(config
+                .get_decided_literals()
+                .all(|literal| sample_literals.contains(&literal)));
+        });
 
-        check_validity_samplingresult(&mut vp9, 1);
-        check_validity_samplingresult(&mut vp9, 2);
-        check_validity_samplingresult(&mut vp9, 3);
-        check_validity_samplingresult(&mut vp9, 4);
-        check_validity_samplingresult(&mut auto1, 1);
-
-        fn check_validity_samplingresult(ddnnf: &mut Ddnnf, t: usize) {
-            let t_wise_samples = ddnnf.sample_t_wise(t);
-            let configs = t_wise_samples
-                .optional()
-                .unwrap()
-                .iter()
-                .map(|config| config.get_literals())
-                .collect_vec();
-
-            for config in configs.iter() {
+        sample
+            .iter()
+            .map(|config| config.get_decided_literals().collect_vec())
+            .for_each(|literals| {
                 // every config must be complete and satisfiable
                 assert_eq!(
                     ddnnf.number_of_variables as usize,
-                    config.len(),
+                    literals.len(),
                     "config is not complete"
                 );
-                assert!(ddnnf.sat(config));
-            }
-
-            let mut possible_features = (-(ddnnf.number_of_variables as i32)
-                ..=ddnnf.number_of_variables as i32)
-                .collect_vec();
-            possible_features.remove(ddnnf.number_of_variables as usize); // remove the 0
-            for combi in possible_features.into_iter().combinations(t) {
-                // checks if the pair can be found in at least one of the samples
-                let combi_exists = |combi: &[i32]| -> bool {
-                    configs.iter().any(|config| {
-                        combi
-                            .iter()
-                            .all(|&f| config[f.unsigned_abs() as usize - 1] == f)
-                    })
-                };
+                assert!(ddnnf.sat_immutable(&literals[..]));
+            });
+
+        let all_literals = (-(ddnnf.number_of_variables as i32)..=ddnnf.number_of_variables as i32)
+            .filter(|&literal| literal != 0)
+            .collect_vec();
 
+        TInteractionIter::new(&all_literals[..], t)
+            .filter(|interaction| ddnnf.sat_immutable(interaction))
+            .for_each(|interaction| {
                 assert!(
-                    combi_exists(&combi) || !ddnnf.sat(&combi),
-                    "combination: {:?} can neither be convered with samples nor is it unsat",
-                    combi
+                    sample.covers(interaction),
+                    "Valid interaction {:?} is not covered.",
+                    interaction
                 )
-            }
+            });
+    }
+
+    #[test]
+    fn ddnnf_t_wise_sampling_validity_small_model() {
+        let vp9: Ddnnf = build_ddnnf("tests/data/VP9_d4.nnf", Some(42));
+
+        for t in 1..=4 {
+            check_validity_of_sample(vp9.sample_t_wise(t).get_sample().unwrap(), &vp9, t);
+        }
+    }
+
+    #[test]
+    fn ddnnf_t_wise_sampling_validity_big_model() {
+        let mut auto1: Ddnnf = build_ddnnf("tests/data/auto1_d4.nnf", Some(2513));
+        let t = 1;
+
+        check_validity_of_sample(auto1.sample_t_wise(t).get_sample().unwrap(), &mut auto1, t);
+    }
+
+    #[test]
+    fn ext_ddnnf_t_wise_sampling_validity() {
+        let ext_ddnnf = build_sandwich_ext_ddnnf_with_objective_function_values();
+
+        for t in 1..=4 {
+            check_validity_of_sample(
+                ext_ddnnf.sample_t_wise(t).get_sample().unwrap(),
+                &ext_ddnnf.ddnnf,
+                t,
+            );
+        }
+    }
+
+    #[test]
+    fn ext_ddnnf_t_wise_sampling_yasa_validity() {
+        let ext_ddnnf = build_sandwich_ext_ddnnf_with_objective_function_values();
+
+        for t in 1..=4 {
+            check_validity_of_sample(&ext_ddnnf.sample_t_wise_yasa(t), &ext_ddnnf.ddnnf, t);
         }
     }
 }
diff --git a/ddnnife/src/ddnnf/anomalies/t_wise_sampling/config.rs b/ddnnife/src/ddnnf/anomalies/t_wise_sampling/config.rs
@@ -1,6 +1,7 @@
 use super::SatWrapper;
 use crate::util::format_vec;
 use std::fmt::Display;
+use std::hash::{Hash, Hasher};
 
 /// Represents a (partial) configuration
 #[cfg_attr(feature = "uniffi", derive(uniffi::Record))]
@@ -10,6 +11,14 @@ pub struct Config {
     pub literals: Vec<i32>,
     pub sat_state: Option<Vec<bool>>,
     sat_state_complete: bool,
+    /// The number of decided literals
+    pub n_decided_literals: usize,
+}
+
+impl Hash for Config {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.literals.hash(state)
+    }
 }
 
 impl PartialEq for Config {
@@ -41,6 +50,7 @@ impl Config {
             literals: vec![0; number_of_variables],
             sat_state: None,
             sat_state_complete: false,
+            n_decided_literals: 0,
         };
         config.extend(literals.iter().copied());
         config
@@ -60,7 +70,7 @@ impl Config {
                 marker does not propagate upward to the AND. So the AND remains unmarked which
                 is wrong and may cause wrong results when SAT solving.
                  */
-                if left.get_decided_literals().count() >= right.get_decided_literals().count() {
+                if left.n_decided_literals >= right.n_decided_literals {
                     Some(left_state)
                 } else {
                     Some(right_state)
@@ -74,6 +84,7 @@ impl Config {
             literals: vec![0; number_of_variables],
             sat_state,
             sat_state_complete: false, // always false because we can not combine the states
+            n_decided_literals: 0,
         };
         config.extend(left.get_decided_literals());
         config.extend(right.get_decided_literals());
@@ -109,6 +120,12 @@ impl Config {
         self.sat_state_complete
     }
 
+    /// Returns the number of decided literals
+    pub fn get_n_decided_literals(&self) -> usize {
+        debug_assert!(self.n_decided_literals == self.get_decided_literals().count());
+        self.n_decided_literals
+    }
+
     /// Uses the given [SatWrapper] to update the cached sat solver state in this config.
     /// This does nothing if the cache is up to date.
     pub fn update_sat_state(&mut self, sat_solver: &SatWrapper, root: usize) {
@@ -165,6 +182,14 @@ impl Config {
         debug_assert!(literal != 0);
         self.sat_state_complete = false;
         let index = literal.unsigned_abs() as usize - 1;
+        if self.literals[index] == 0 {
+            self.n_decided_literals += 1;
+        }
         self.literals[index] = literal;
     }
+
+    /// Checks if the config is complete (all literals are decided)
+    pub fn is_complete(&self) -> bool {
+        self.n_decided_literals == self.literals.len()
+    }
 }