From 0a4fc3e6ad707a466af611ec919b3db08720356c Mon Sep 17 00:00:00 2001
From: noajshu <shutty@google.com>
Date: Fri, 15 Mar 2024 23:13:23 +0000
Subject: [PATCH] remove old method, add better docstrings, add more tests and
 fix shortest error case for probability 0 errors

---
 doc/python_api_reference_vDev.md   |  26 +++-----
 doc/stim.pyi                       |  26 +++-----
 glue/python/src/stim/__init__.pyi  |  26 +++-----
 src/stim/circuit/circuit.pybind.cc |  92 +++----------------------
 src/stim/search/sat/wcnf.cc        |   4 +-
 src/stim/search/sat/wcnf.test.cc   | 104 ++++++++++++++++++-----------
 6 files changed, 106 insertions(+), 172 deletions(-)

diff --git a/doc/python_api_reference_vDev.md b/doc/python_api_reference_vDev.md
index 998249b58..4028e1ae2 100644
--- a/doc/python_api_reference_vDev.md
+++ b/doc/python_api_reference_vDev.md
@@ -2145,13 +2145,13 @@ def likeliest_error_sat_problem(
     quantization: int = 100,
     format: str = 'WDIMACS',
 ) -> str:
-    """Makes a maxSAT problem of the circuit's distance, that other tools can solve.
+    """Makes a maxSAT problem of the circuit's most likely undetectable logical
+    error, that other tools can solve.
 
     The output is a string describing the maxSAT problem in WDIMACS format
     (see https://maxhs.org/docs/wdimacs.html). The optimal solution to the
-    problem is the fault distance of the circuit (the minimum number of error
-    mechanisms that combine to flip any logical observable while producing no
-    detection events).
+    problem is the highest likelihood set of error mechanisms that combine to
+    flip any logical observable while producing no detection events).
 
     There are many tools that can solve maxSAT problems in WDIMACS format.
     For example, you can download one of the entries in the 2023 maxSAT
@@ -2165,12 +2165,12 @@ def likeliest_error_sat_problem(
     Args:
         format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
         described here: http://www.maxhs.org/docs/wdimacs.html
-        weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-            converted to log-odds and scaled/rounded to be positive integers at
-            most this large. Setting this argument to a larger number results in
-            the solution to the problem being the most likely logical error
-            weighted by probability of its parts, instead of just the one with
-            the fewest parts.
+        quantization: Defaults to 10. Error probabilities are converted to log-odds
+            and scaled/rounded to be positive integers at most this large. Setting
+            this argument to a larger number results in more accurate quantization
+            such that the returned error set should have a likelihood closer to the
+            true most likely solution. This comes at the cost of making some maxSAT
+            solvers slower.
 
     Returns:
         A string corresponding to the contents of a WCNF file in the requested
@@ -2541,12 +2541,6 @@ def shortest_error_sat_problem(
     Args:
         format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
         described here: http://www.maxhs.org/docs/wdimacs.html
-        weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-            converted to log-odds and scaled/rounded to be positive integers at
-            most this large. Setting this argument to a larger number results in
-            the solution to the problem being the most likely logical error
-            weighted by probability of its parts, instead of just the one with
-            the fewest parts.
 
     Returns:
         A string corresponding to the contents of a WCNF file in the requested
diff --git a/doc/stim.pyi b/doc/stim.pyi
index 0cf853ca4..c012ae642 100644
--- a/doc/stim.pyi
+++ b/doc/stim.pyi
@@ -1531,13 +1531,13 @@ class Circuit:
         quantization: int = 100,
         format: str = 'WDIMACS',
     ) -> str:
-        """Makes a maxSAT problem of the circuit's distance, that other tools can solve.
+        """Makes a maxSAT problem of the circuit's most likely undetectable logical
+        error, that other tools can solve.
 
         The output is a string describing the maxSAT problem in WDIMACS format
         (see https://maxhs.org/docs/wdimacs.html). The optimal solution to the
-        problem is the fault distance of the circuit (the minimum number of error
-        mechanisms that combine to flip any logical observable while producing no
-        detection events).
+        problem is the highest likelihood set of error mechanisms that combine to
+        flip any logical observable while producing no detection events).
 
         There are many tools that can solve maxSAT problems in WDIMACS format.
         For example, you can download one of the entries in the 2023 maxSAT
@@ -1551,12 +1551,12 @@ class Circuit:
         Args:
             format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
             described here: http://www.maxhs.org/docs/wdimacs.html
-            weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                converted to log-odds and scaled/rounded to be positive integers at
-                most this large. Setting this argument to a larger number results in
-                the solution to the problem being the most likely logical error
-                weighted by probability of its parts, instead of just the one with
-                the fewest parts.
+            quantization: Defaults to 10. Error probabilities are converted to log-odds
+                and scaled/rounded to be positive integers at most this large. Setting
+                this argument to a larger number results in more accurate quantization
+                such that the returned error set should have a likelihood closer to the
+                true most likely solution. This comes at the cost of making some maxSAT
+                solvers slower.
 
         Returns:
             A string corresponding to the contents of a WCNF file in the requested
@@ -1864,12 +1864,6 @@ class Circuit:
         Args:
             format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
             described here: http://www.maxhs.org/docs/wdimacs.html
-            weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                converted to log-odds and scaled/rounded to be positive integers at
-                most this large. Setting this argument to a larger number results in
-                the solution to the problem being the most likely logical error
-                weighted by probability of its parts, instead of just the one with
-                the fewest parts.
 
         Returns:
             A string corresponding to the contents of a WCNF file in the requested
diff --git a/glue/python/src/stim/__init__.pyi b/glue/python/src/stim/__init__.pyi
index 0cf853ca4..c012ae642 100644
--- a/glue/python/src/stim/__init__.pyi
+++ b/glue/python/src/stim/__init__.pyi
@@ -1531,13 +1531,13 @@ class Circuit:
         quantization: int = 100,
         format: str = 'WDIMACS',
     ) -> str:
-        """Makes a maxSAT problem of the circuit's distance, that other tools can solve.
+        """Makes a maxSAT problem of the circuit's most likely undetectable logical
+        error, that other tools can solve.
 
         The output is a string describing the maxSAT problem in WDIMACS format
         (see https://maxhs.org/docs/wdimacs.html). The optimal solution to the
-        problem is the fault distance of the circuit (the minimum number of error
-        mechanisms that combine to flip any logical observable while producing no
-        detection events).
+        problem is the highest likelihood set of error mechanisms that combine to
+        flip any logical observable while producing no detection events).
 
         There are many tools that can solve maxSAT problems in WDIMACS format.
         For example, you can download one of the entries in the 2023 maxSAT
@@ -1551,12 +1551,12 @@ class Circuit:
         Args:
             format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
             described here: http://www.maxhs.org/docs/wdimacs.html
-            weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                converted to log-odds and scaled/rounded to be positive integers at
-                most this large. Setting this argument to a larger number results in
-                the solution to the problem being the most likely logical error
-                weighted by probability of its parts, instead of just the one with
-                the fewest parts.
+            quantization: Defaults to 10. Error probabilities are converted to log-odds
+                and scaled/rounded to be positive integers at most this large. Setting
+                this argument to a larger number results in more accurate quantization
+                such that the returned error set should have a likelihood closer to the
+                true most likely solution. This comes at the cost of making some maxSAT
+                solvers slower.
 
         Returns:
             A string corresponding to the contents of a WCNF file in the requested
@@ -1864,12 +1864,6 @@ class Circuit:
         Args:
             format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
             described here: http://www.maxhs.org/docs/wdimacs.html
-            weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                converted to log-odds and scaled/rounded to be positive integers at
-                most this large. Setting this argument to a larger number results in
-                the solution to the problem being the most likely logical error
-                weighted by probability of its parts, instead of just the one with
-                the fewest parts.
 
         Returns:
             A string corresponding to the contents of a WCNF file in the requested
diff --git a/src/stim/circuit/circuit.pybind.cc b/src/stim/circuit/circuit.pybind.cc
index 9b12038d6..5d3e0de32 100644
--- a/src/stim/circuit/circuit.pybind.cc
+++ b/src/stim/circuit/circuit.pybind.cc
@@ -2003,12 +2003,6 @@ void stim_pybind::pybind_circuit_methods(pybind11::module &, pybind11::class_<Ci
             Args:
                 format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
                 described here: http://www.maxhs.org/docs/wdimacs.html
-                weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                    converted to log-odds and scaled/rounded to be positive integers at
-                    most this large. Setting this argument to a larger number results in
-                    the solution to the problem being the most likely logical error
-                    weighted by probability of its parts, instead of just the one with
-                    the fewest parts.
 
             Returns:
                 A string corresponding to the contents of a WCNF file in the requested
@@ -2051,13 +2045,13 @@ void stim_pybind::pybind_circuit_methods(pybind11::module &, pybind11::class_<Ci
         pybind11::arg("quantization") = 100,
         pybind11::arg("format") = "WDIMACS",
         clean_doc_string(R"DOC(
-            Makes a maxSAT problem of the circuit's distance, that other tools can solve.
+            Makes a maxSAT problem of the circuit's most likely undetectable logical
+            error, that other tools can solve.
 
             The output is a string describing the maxSAT problem in WDIMACS format
             (see https://maxhs.org/docs/wdimacs.html). The optimal solution to the
-            problem is the fault distance of the circuit (the minimum number of error
-            mechanisms that combine to flip any logical observable while producing no
-            detection events).
+            problem is the highest likelihood set of error mechanisms that combine to
+            flip any logical observable while producing no detection events).
 
             There are many tools that can solve maxSAT problems in WDIMACS format.
             For example, you can download one of the entries in the 2023 maxSAT
@@ -2071,12 +2065,12 @@ void stim_pybind::pybind_circuit_methods(pybind11::module &, pybind11::class_<Ci
             Args:
                 format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
                 described here: http://www.maxhs.org/docs/wdimacs.html
-                weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-                    converted to log-odds and scaled/rounded to be positive integers at
-                    most this large. Setting this argument to a larger number results in
-                    the solution to the problem being the most likely logical error
-                    weighted by probability of its parts, instead of just the one with
-                    the fewest parts.
+                quantization: Defaults to 10. Error probabilities are converted to log-odds
+                    and scaled/rounded to be positive integers at most this large. Setting
+                    this argument to a larger number results in more accurate quantization
+                    such that the returned error set should have a likelihood closer to the
+                    true most likely solution. This comes at the cost of making some maxSAT
+                    solvers slower.
 
             Returns:
                 A string corresponding to the contents of a WCNF file in the requested
@@ -2112,72 +2106,6 @@ void stim_pybind::pybind_circuit_methods(pybind11::module &, pybind11::class_<Ci
         )DOC")
             .data());
 
-    // c.def(
-    //     "shortest_error_problem_as_wcnf_file",
-    //     &py_shortest_error_problem_as_wcnf_file,
-    //     pybind11::kw_only(),
-    //     pybind11::arg("weighted") = false,
-    //     pybind11::arg("weight_scale_factor") = 0,
-    //     clean_doc_string(R"DOC(
-    //         Makes a maxSAT problem of the circuit's distance, that other tools can solve.
-
-    //         The output is a string describing the maxSAT problem in WDIMACS format
-    //         (see https://maxhs.org/docs/wdimacs.html). The optimal solution to the
-    //         problem is the fault distance of the circuit (the minimum number of error
-    //         mechanisms that combine to flip any logical observable while producing no
-    //         detection events).
-
-    //         There are many tools that can solve maxSAT problems in WDIMACS format.
-    //         For example, you can download one of the entries in the 2023 maxSAT
-    //         competition (see https://maxsat-evaluations.github.io/2023), for example
-    //         CASHWMaxSAT-CorePlus.zip and run it on your problem by running these
-    //         BASH terminal commands:
-
-    //             unzip CASHWMaxSAT-CorePlus.zip
-    //             ./CASHWMaxSAT-CorePlus/bin/cashwmaxsatcoreplus -bm -m your_problem.wcnf
-
-    //         Args:
-    //             format: Defaults to "WDIMACS", corresponding to WDIMACS format which is
-    //             described here: http://www.maxhs.org/docs/wdimacs.html
-    //             weight_bins: Defaults to 1 (unweighted errors). Error probabilities are
-    //                 converted to log-odds and scaled/rounded to be positive integers at
-    //                 most this large. Setting this argument to a larger number results in
-    //                 the solution to the problem being the most likely logical error
-    //                 weighted by probability of its parts, instead of just the one with
-    //                 the fewest parts.
-
-    //         Returns:
-    //             A string corresponding to the contents of a WCNF file in the requested
-    //             format.
-
-    //         Examples:
-    //             >>> import stim
-    //             >>> circuit = stim.Circuit("""
-    //             ...   X_ERROR(0.1) 0
-    //             ...   M 0
-    //             ...   OBSERVABLE_INCLUDE(0) rec[-1]
-    //             ...   X_ERROR(0.4) 0
-    //             ...   M 0
-    //             ...   DETECTOR rec[-1] rec[-2]
-    //             ... """)
-    //             >>> print(circuit.shortest_error_problem_as_wcnf_file(), end='')
-    //             p wcnf 2 4 5
-    //             1 -1 0
-    //             1 -2 0
-    //             5 -1 0
-    //             5 2 0
-    //             >>> print(circuit.shortest_error_problem_as_wcnf_file(
-    //             ...   weighted=True,
-    //             ...   weight_scale_factor=1000
-    //             ... ), end='')
-    //             p wcnf 2 4 4001
-    //             185 -1 0
-    //             1000 -2 0
-    //             4001 -1 0
-    //             4001 2 0
-    //     )DOC")
-    //         .data());
-
     c.def(
         "explain_detector_error_model_errors",
         [](const Circuit &self,
diff --git a/src/stim/search/sat/wcnf.cc b/src/stim/search/sat/wcnf.cc
index f900eaba1..21c2ea2e9 100644
--- a/src/stim/search/sat/wcnf.cc
+++ b/src/stim/search/sat/wcnf.cc
@@ -196,7 +196,7 @@ std::string sat_problem_as_wcnf_string(const DetectorErrorModel& model, bool wei
 
     size_t error_index = 0;
     model.iter_flatten_error_instructions([&](const DemInstruction& e) {
-        if (e.arg_data[0] != 0) {
+        if (!weighted or e.arg_data[0] != 0) {
             BoolRef err_x = errors_activated[error_index];
             // Add parity contribution to the detectors and observables
             for (const auto& t : e.target_data) {
@@ -232,7 +232,7 @@ std::string sat_problem_as_wcnf_string(const DetectorErrorModel& model, bool wei
                 // For unweighted search the error should be soft clause should be that
                 // the error is inactive.
                 clause.add_var(~err_x);
-                clause.weight = -std::log(p / (1 - p));
+                clause.weight = 1.0;
                 instance.add_clause(clause);
             }
         }
diff --git a/src/stim/search/sat/wcnf.test.cc b/src/stim/search/sat/wcnf.test.cc
index fca3607fa..d58db3c34 100644
--- a/src/stim/search/sat/wcnf.test.cc
+++ b/src/stim/search/sat/wcnf.test.cc
@@ -10,46 +10,70 @@ TEST(shortest_error_sat_problem, no_error) {
 }
 
 TEST(shortest_error_sat_problem, single_detector_single_observable) {
-    std::string wcnf = stim::shortest_error_sat_problem(DetectorErrorModel(R"DEM(
-      error(0.1) D0 L0
-      error(0.1) D0
-    )DEM"));
-    // x_0 -- error 0 occurred
-    // x_1 -- error 1 occurred
-    // x_2 -- XOR of x_0 and x_1
-    // There should be 2 soft clauses:
-    // soft clause NOT(x_0) with weight 1
-    // soft clause NOT(x_0) with weight 1
-    // There should be 4 hard clauses to forbid the strings such that x_2 != XOR(x_0, x_1):
-    // hard clause x != (0, 0, 1)
-    // hard clause x != (0, 1, 0)
-    // hard clause x != (1, 0, 0)
-    // hard clause x != (1, 1, 1)
-    // Plus 1 hard clause to ensure detector is not flipped
-    // hard clause -x_2
-    // Plus 1 hard clause to ensure an observable is flipped:
-    // hard clause x_0
-    // This gives a total of 8 clauses
-    // The top value should be at least 1 + 1 + 1 = 3. In our implementation ends up being 9.
-    std::stringstream expected;
-    // WDIMACS header format: p wcnf nbvar nbclauses top
-    expected << "p wcnf 3 8 9\n";
-    // Soft clause
-    expected << "1 -1 0\n";
-    // Hard clauses
-    expected << "9 1 2 -3 0\n";
-    expected << "9 1 -2 3 0\n";
-    expected << "9 -1 2 3 0\n";
-    expected << "9 -1 -2 -3 0\n";
-    // Soft clause
-    expected << "1 -2 0\n";
-    // Hard clause for the detector not to be flipped
-    expected << "9 -3 0\n";
-    // Hard clause for the observable flipped
-    expected << "9 1 0\n";
-    ASSERT_EQ(wcnf, expected.str());
-    // The optimal value of this wcnf file should be 2, but we don't have
-    // a maxSAT solver to be able to test it here.
+    // To test that it ignores weights entirely, we try several combinations of weights.
+    for (std::string dem_str : {
+             R"DEM(
+            error(0.1) D0 L0
+            error(0.1) D0
+        )DEM",
+             R"DEM(
+            error(1.0) D0 L0
+            error(0) D0
+        )DEM",
+             R"DEM(
+            error(0.5) D0 L0
+            error(0.999) D0
+        )DEM",
+             R"DEM(
+            error(0.001) D0 L0
+            error(0.999) D0
+        )DEM",
+             R"DEM(
+            error(0) D0 L0
+            error(0) D0
+        )DEM",
+             R"DEM(
+            error(0.5) D0 L0
+            error(0.5) D0
+        )DEM"}) {
+        std::string wcnf = stim::shortest_error_sat_problem(DetectorErrorModel(dem_str.c_str()));
+        // x_0 -- error 0 occurred
+        // x_1 -- error 1 occurred
+        // x_2 -- XOR of x_0 and x_1
+        // There should be 2 soft clauses:
+        // soft clause NOT(x_0) with weight 1
+        // soft clause NOT(x_0) with weight 1
+        // There should be 4 hard clauses to forbid the strings such that x_2 != XOR(x_0, x_1):
+        // hard clause x != (0, 0, 1)
+        // hard clause x != (0, 1, 0)
+        // hard clause x != (1, 0, 0)
+        // hard clause x != (1, 1, 1)
+        // Plus 1 hard clause to ensure detector is not flipped
+        // hard clause -x_2
+        // Plus 1 hard clause to ensure an observable is flipped:
+        // hard clause x_0
+        // This gives a total of 8 clauses
+        // The top value should be at least 1 + 1 + 1 = 3. In our implementation ends up being 9.
+        std::stringstream expected;
+        // WDIMACS header format: p wcnf nbvar nbclauses top
+        expected << "p wcnf 3 8 9\n";
+        // Soft clause
+        expected << "1 -1 0\n";
+        // Hard clauses
+        expected << "9 1 2 -3 0\n";
+        expected << "9 1 -2 3 0\n";
+        expected << "9 -1 2 3 0\n";
+        expected << "9 -1 -2 -3 0\n";
+        // Soft clause
+        expected << "1 -2 0\n";
+        // Hard clause for the detector not to be flipped
+        expected << "9 -3 0\n";
+        // Hard clause for the observable flipped
+        expected << "9 1 0\n";
+        ASSERT_EQ(wcnf, expected.str());
+        // The optimal value of this wcnf file should be 2, but we don't have
+        // a maxSAT solver to be able to test it here.
+    }
 }
 
 TEST(likeliest_error_sat_problem, no_error) {