Remove the use of floating-point for palette packing

This is primarily a correctness change, *not* a performance one. The expected performance impact is minimal anyway. The goal is to eliminate the use of platform-inconsistent floating-point operations for this load-bearing task.
ISSOtm · Nov 29, 2024 · ca1ccee · ca1ccee
1 parent a27f704
commit ca1ccee
Showing 1 changed file with 36 additions and 21 deletions.
diff --git a/src/gfx/pal_packing.cpp b/src/gfx/pal_packing.cpp
@@ -7,6 +7,7 @@
 #include <inttypes.h>
 #include <optional>
 #include <queue>
+#include <stdint.h>
 #include <type_traits>
 #include <unordered_set>
 
@@ -199,21 +200,35 @@ class AssignedProtos {
 		return colors.size() <= options.maxOpaqueColors();
 	}
 
+	// The formula we want to compute[1] is:
+	//     sum((1 / <how many of our proto-pals contain `color`>)  for `color` in `protoPal`)
+	// But, this yields a non-integer result. Using floating-point numbers would lead
+	// to imprecision, and even per-platform differences. Not good.
+	// We avoid this by multiplying the formula such that the division always produces an integer;
+	// the LCM of all values the denominator can take is the smallest suitable factor.
+	//
+	// [1]: The paper and the associated code disagree on the formula: the code adds 1 to the
+	// denominator, whereas the paper does not; its lack causes a division by 0 if the color is not
+	// found in any proto-pal, and adding it does not seem to invalidate the paper's reasoning,
+	// so it seems safe to assume the paper is wrong.
+	static constexpr uint32_t scaleFactor = 12'252'240; // LCM(1..=17)
+
 	/*
 	 * Computes the "relative size" of a proto-palette on this palette
 	 */
-	double relSizeOf(ProtoPalette const &protoPal) const {
+	uint32_t relSizeOf(ProtoPalette const &protoPal) const {
 		// NOTE: this function must not call `uniqueColors`, or one of its callers will break!
-		double relSize = 0.;
+
+		uint32_t relSize = 0;
 		for (uint16_t color : protoPal) {
 			auto n = std::count_if(RANGE(*this), [this, &color](ProtoPalAttrs const &attrs) {
 				ProtoPalette const &pal = (*_protoPals)[attrs.protoPalIndex];
 				return std::find(RANGE(pal), color) != pal.end();
 			});
-			// NOTE: The paper and the associated code disagree on this: the code has
-			// this `1 +`, whereas the paper does not; its lack causes a division by 0
-			// if the symbol is not found anywhere, so I'm assuming the paper is wrong.
-			relSize += 1. / (1 + n);
+			assume(
+			    scaleFactor % (1 + n) == 0
+			); // The scale factor should ensure integer divisions only.
+			relSize += scaleFactor / (1 + n);
 		}
 		return relSize;
 	}
@@ -383,18 +398,18 @@ std::tuple<DefaultInitVec<size_t>, size_t>
 		size_t bestPalIndex = assignments.size();
 		// We're looking for a palette where the proto-palette's relative size is less than
 		// its actual size; so only overwrite the "not found" index on meeting that criterion
-		double bestRelSize = protoPal.size();
+		uint32_t bestRelSize = protoPal.size() * AssignedProtos::scaleFactor;
 
 		for (size_t i = 0; i < assignments.size(); ++i) {
 			// Skip the page if this one is banned from it
 			if (attrs.isBannedFrom(i)) {
 				continue;
 			}
 
-			double relSize = assignments[i].relSizeOf(protoPal);
+			uint32_t relSize = assignments[i].relSizeOf(protoPal);
 			options.verbosePrint(
 			    Options::VERB_TRACE,
-			    "  Relative size to palette %zu (of %zu): %.20f (size = %zu)\n",
+			    "  Relative size to palette %zu (of %zu): %" PRIu32 " (size = %zu)\n",
 			    i,
 			    assignments.size(),
 			    relSize,
@@ -444,16 +459,16 @@ std::tuple<DefaultInitVec<size_t>, size_t>
 					    ProtoPalette const &rhsProtoPal = protoPalettes[rhs.protoPalIndex];
 					    size_t lhsSize = lhsProtoPal.size();
 					    size_t rhsSize = rhsProtoPal.size();
-					    double lhsRelSize = bestPal.relSizeOf(lhsProtoPal);
-					    double rhsRelSize = bestPal.relSizeOf(rhsProtoPal);
+					    uint32_t lhsRelSize = bestPal.relSizeOf(lhsProtoPal);
+					    uint32_t rhsRelSize = bestPal.relSizeOf(rhsProtoPal);
 
 					    // This comparison is algebraically equivalent to
 					    // `lhsSize / lhsRelSize < rhsSize / rhsRelSize`,
 					    // but without potential precision loss from floating-point division.
 					    options.verbosePrint(
 					        Options::VERB_TRACE,
-					        "  Proto-palettes %zu <=> %zu: Efficiency: %zu / %.20f <=> %zu / "
-					        "%.20f\n",
+					        "  Proto-palettes %zu <=> %zu: Efficiency: %zu / %" PRIu32 " <=> %zu / "
+					        "%" PRIu32 "\n",
 					        lhs.protoPalIndex,
 					        rhs.protoPalIndex,
 					        lhsSize,
@@ -471,23 +486,23 @@ std::tuple<DefaultInitVec<size_t>, size_t>
 				ProtoPalette const &maxProtoPal = protoPalettes[maxEfficiencyIter->protoPalIndex];
 				size_t minSize = minProtoPal.size();
 				size_t maxSize = maxProtoPal.size();
-				double minRelSize = bestPal.relSizeOf(minProtoPal);
-				double maxRelSize = bestPal.relSizeOf(maxProtoPal);
-				// This comparison is algebraically equivalent to
-				// `maxSize / maxRelSize - minSize / minRelSize < .001`,
-				// but without potential precision loss from floating-point division.
-				// TODO: yikes for float comparison! I *think* this threshold is OK?
+				uint32_t minRelSize = bestPal.relSizeOf(minProtoPal);
+				uint32_t maxRelSize = bestPal.relSizeOf(maxProtoPal);
 				options.verbosePrint(
 				    Options::VERB_TRACE,
-				    "  Proto-palettes %zu <= %zu: Efficiency: %zu / %.20f <= %zu / %.20f\n",
+				    "  Proto-palettes %zu <= %zu: Efficiency: %zu / %" PRIu32 " <= %zu / %" PRIu32
+				    "\n",
 				    minEfficiencyIter->protoPalIndex,
 				    maxEfficiencyIter->protoPalIndex,
 				    minSize,
 				    minRelSize,
 				    maxSize,
 				    maxRelSize
 				);
-				if (maxSize * minRelSize - minSize * maxRelSize < minRelSize * maxRelSize * .001) {
+				// This comparison is algebraically equivalent to
+				// `maxSize / maxRelSize == minSize / minRelSize`,
+				// but without potential precision loss from floating-point division.
+				if (maxSize * minRelSize == minSize * maxRelSize) {
 					options.verbosePrint(Options::VERB_TRACE, "  All efficiencies are identical\n");
 					break;
 				}