From 29a9fcfecfe27f78c969a263f43150f7cc4f660e Mon Sep 17 00:00:00 2001
From: Martin <m.w.loncaric@gmail.com>
Date: Sat, 9 Nov 2024 16:13:18 -0500
Subject: [PATCH] Delta lookback (#248)

---
 docs/benchmark_results/mbp_m3_max.csv         |   6 +-
 dtype_dispatch/src/lib.rs                     |  41 +-
 images/real_world_compression_ratio.svg       |  34 +-
 images/real_world_compression_speed.svg       |  74 +--
 images/real_world_decompression_speed.svg     |  67 ++-
 pco/assets/v0_4_0_lookback_delta.pco          | Bin 0 -> 120 bytes
 pco/src/bit_reader.rs                         |   5 +
 pco/src/bit_writer.rs                         |   9 +
 pco/src/chunk_config.rs                       |   5 +
 pco/src/compression_intermediates.rs          |  31 +-
 pco/src/compression_table.rs                  |   2 +-
 pco/src/constants.rs                          |   8 +-
 pco/src/data_types/dynamic.rs                 |   9 +-
 pco/src/data_types/floats.rs                  |  11 +-
 pco/src/data_types/mod.rs                     |  42 +-
 pco/src/data_types/signeds.rs                 |  11 +-
 pco/src/data_types/unsigneds.rs               |  16 +-
 pco/src/delta.rs                              | 375 +++++++++++-
 pco/src/describers.rs                         | 175 ++++--
 pco/src/float_mult_utils.rs                   |  21 +-
 pco/src/float_quant_utils.rs                  |  58 +-
 pco/src/int_mult_utils.rs                     |  29 +-
 pco/src/latent_batch_dissector.rs             |   8 +-
 pco/src/latent_chunk_compressor.rs            |  85 +--
 ...pressor.rs => latent_page_decompressor.rs} | 103 +++-
 pco/src/lib.rs                                |  12 +-
 pco/src/macros.rs                             |  10 +-
 pco/src/metadata/bin.rs                       |   1 -
 pco/src/metadata/chunk.rs                     | 249 ++++----
 pco/src/metadata/chunk_latent_var.rs          |  48 +-
 pco/src/metadata/delta_encoding.rs            | 240 +++++++-
 pco/src/metadata/dyn_latents.rs               |   7 +
 pco/src/metadata/format_version.rs            |   4 +
 pco/src/metadata/mod.rs                       |   5 +-
 pco/src/metadata/mode.rs                      | 151 ++++-
 pco/src/metadata/page.rs                      |  48 +-
 pco/src/metadata/page_latent_var.rs           |  27 +-
 pco/src/metadata/per_latent_var.rs            | 149 +++++
 pco/src/split_latents.rs                      |   7 +
 pco/src/standalone/compressor.rs              |  10 +-
 pco/src/tests/compatibility.rs                | 237 ++++----
 pco/src/tests/recovery.rs                     |  26 +-
 pco/src/tests/stability.rs                    |  24 +-
 pco/src/wrapped/chunk_compressor.rs           | 537 +++++++++++-------
 pco/src/wrapped/file_compressor.rs            |   2 +-
 pco/src/wrapped/file_decompressor.rs          |  12 +-
 pco/src/wrapped/guarantee.rs                  |  27 +-
 pco/src/wrapped/page_decompressor.rs          | 314 +++++-----
 pco_cli/src/dtypes.rs                         |   4 +
 pco_cli/src/input/mod.rs                      |   6 +
 pco_cli/src/inspect/handler.rs                |  53 +-
 pco_python/README.md                          |   2 +-
 pco_python/src/config.rs                      |   6 +
 pco_python/src/wrapped/compressor.rs          |  44 +-
 pco_python/test/test_standalone.py            |  38 +-
 55 files changed, 2391 insertions(+), 1134 deletions(-)
 create mode 100644 pco/assets/v0_4_0_lookback_delta.pco
 rename pco/src/{latent_batch_decompressor.rs => latent_page_decompressor.rs} (74%)
 create mode 100644 pco/src/metadata/per_latent_var.rs
 create mode 100644 pco/src/split_latents.rs

diff --git a/docs/benchmark_results/mbp_m3_max.csv b/docs/benchmark_results/mbp_m3_max.csv
index f586941b..b7ac8f22 100644
--- a/docs/benchmark_results/mbp_m3_max.csv
+++ b/docs/benchmark_results/mbp_m3_max.csv
@@ -1,16 +1,16 @@
 input,codec,compress_dt,decompress_dt,compressed_size
 air_quality,blosc:cname=zstd:level=3,0.104805835,0.028497249,8429280
 air_quality,parquet:compression=zstd1,0.2296,0.02594,11156819
-air_quality,pco,0.10299417,0.021796916,4283105
+air_quality,pco,0.11510511,0.022027887,4283153
 air_quality,spdp,0.09585233,0.10599508,22560633
 air_quality,tpfor,0.029559456,0.002875309,19114630
 r_place,blosc:cname=zstd:level=3,11.372147,3.3396413,976973046
 r_place,parquet:compression=zstd1,15.389868,1.9258637,961718183
-r_place,pco,10.476611,1.5533803,661664577
+r_place,pco,11.368066,1.5747843,661665164
 r_place,spdp,11.464923,12.225844,3306514546
 r_place,tpfor,2.5569496,0.5739353,2023272462
 taxi,blosc:cname=zstd:level=3,6.3025703,1.8824589,841110760
 taxi,parquet:compression=zstd1,5.600355,0.9174722,464867099
-taxi,pco,5.3043575,0.8830483,333004373
+taxi,pco,5.6141233,0.8759089,333004631
 taxi,spdp,4.5603795,5.0574136,1636214754
 taxi,tpfor,1.0123023,0.24857067,1452549995
\ No newline at end of file
diff --git a/dtype_dispatch/src/lib.rs b/dtype_dispatch/src/lib.rs
index 77a185e1..a403ec41 100644
--- a/dtype_dispatch/src/lib.rs
+++ b/dtype_dispatch/src/lib.rs
@@ -1,4 +1,5 @@
 #![doc = include_str!("../README.md")]
+#![allow(unreachable_patterns)]
 
 /// Produces two macros: an enum definer and an enum matcher.
 ///
@@ -15,17 +16,38 @@ macro_rules! build_dtype_macros {
   ) => {
     $(#[$definer_attrs])*
     macro_rules! $definer {
+      (#[$enum_attrs: meta] $vis: vis $name: ident) => {
+        #[$enum_attrs]
+        #[non_exhaustive]
+        $vis enum $name {
+          $($variant,)+
+        }
+
+        impl $name {
+          #[inline]
+          pub fn new<T: $constraint>() -> Option<Self> {
+            let type_id = std::any::TypeId::of::<T>();
+            $(
+              if type_id == std::any::TypeId::of::<$t>() {
+                return Some($name::$variant);
+              }
+            )+
+            None
+          }
+        }
+      };
       (#[$enum_attrs: meta] #[repr($desc_t: ty)] $vis: vis $name: ident = $desc_val: ident) => {
         #[$enum_attrs]
         #[repr($desc_t)]
+        #[non_exhaustive]
         $vis enum $name {
           $($variant = <$t>::$desc_val,)+
         }
 
         impl $name {
           #[inline]
-          pub fn new<S: $constraint>() -> Option<Self> {
-            let type_id = std::any::TypeId::of::<S>();
+          pub fn new<T: $constraint>() -> Option<Self> {
+            let type_id = std::any::TypeId::of::<T>();
             $(
               if type_id == std::any::TypeId::of::<$t>() {
                 return Some($name::$variant);
@@ -50,6 +72,7 @@ macro_rules! build_dtype_macros {
         }
 
         #[$enum_attrs]
+        #[non_exhaustive]
         $vis enum $name {
           $($variant($container<$t>),)+
         }
@@ -106,26 +129,26 @@ macro_rules! build_dtype_macros {
             None
           }
 
-          pub fn downcast<S: $constraint>(self) -> Option<$container<S>> {
+          pub fn downcast<T: $constraint>(self) -> Option<$container<T>> {
             match self {
               $(
-                Self::$variant(inner) => inner.downcast::<S>(),
+                Self::$variant(inner) => inner.downcast::<T>(),
               )+
             }
           }
 
-          pub fn downcast_ref<S: $constraint>(&self) -> Option<&$container<S>> {
+          pub fn downcast_ref<T: $constraint>(&self) -> Option<&$container<T>> {
             match self {
               $(
-                Self::$variant(inner) => inner.downcast_ref::<S>(),
+                Self::$variant(inner) => inner.downcast_ref::<T>(),
               )+
             }
           }
 
-          pub fn downcast_mut<S: $constraint>(&mut self) -> Option<&mut $container<S>> {
+          pub fn downcast_mut<T: $constraint>(&mut self) -> Option<&mut $container<T>> {
             match self {
               $(
-                Self::$variant(inner) => inner.downcast_mut::<S>(),
+                Self::$variant(inner) => inner.downcast_mut::<T>(),
               )+
             }
           }
@@ -141,6 +164,7 @@ macro_rules! build_dtype_macros {
             type $generic = $t;
             $block
           })+
+          _ => unreachable!()
         }
       };
       ($value: expr, $enum_: ident<$generic: ident>($inner: ident) => $block: block) => {
@@ -149,6 +173,7 @@ macro_rules! build_dtype_macros {
             type $generic = $t;
             $block
           })+
+          _ => unreachable!()
         }
       };
     }
diff --git a/images/real_world_compression_ratio.svg b/images/real_world_compression_ratio.svg
index 286e00b8..df793534 100644
--- a/images/real_world_compression_ratio.svg
+++ b/images/real_world_compression_ratio.svg
@@ -8,9 +8,9 @@
         <path id="patch_1" d="M0 216h936V0H0z" style="fill:#fff"/>
         <g id="axes_1">
             <path id="patch_2" d="M117 192.24h213.353V25.92H117z" style="fill:#fff"/>
-            <path id="patch_3" d="M117 192.24h71.118V25.92H117z" clip-path="url(#pe5fe753951)" style="fill:#347269"/>
-            <path id="patch_4" d="M188.118 192.24h71.117v-63.85h-71.117z" clip-path="url(#pe5fe753951)" style="fill:#b70"/>
-            <path id="patch_5" d="M259.235 192.24h71.118v-84.51h-71.118z" clip-path="url(#pe5fe753951)" style="fill:#63b"/>
+            <path id="patch_3" d="M117 192.24h71.118V25.92H117z" clip-path="url(#p87fc1eb380)" style="fill:#347269"/>
+            <path id="patch_4" d="M188.118 192.24h71.117v-63.851h-71.117z" clip-path="url(#p87fc1eb380)" style="fill:#b70"/>
+            <path id="patch_5" d="M259.235 192.24h71.118v-84.512h-71.118z" clip-path="url(#p87fc1eb380)" style="fill:#63b"/>
             <g id="matplotlib.axis_2">
                 <g id="text_1">
                     <g transform="matrix(.1 0 0 -.1 23.69 95.802)">
@@ -132,7 +132,7 @@
                         <use xlink:href="#DejaVuSans-76"/>
                         <use xlink:href="#DejaVuSans-30" x="59.18"/>
                         <use xlink:href="#DejaVuSans-2e" x="122.803"/>
-                        <use xlink:href="#DejaVuSans-33" x="154.59"/>
+                        <use xlink:href="#DejaVuSans-34" x="154.59"/>
                         <use xlink:href="#DejaVuSans-2e" x="218.213"/>
                         <use xlink:href="#DejaVuSans-30" x="250"/>
                     </g>
@@ -185,11 +185,11 @@
         </g>
         <g id="axes_2">
             <path id="patch_11" d="M373.024 192.24h213.352V25.92H373.024z" style="fill:#fff"/>
-            <path id="patch_12" d="M373.024 192.24h71.117v-78.502h-71.117z" clip-path="url(#p05fa61751a)" style="fill:#347269"/>
-            <path id="patch_13" d="M444.141 192.24h71.118v-56.235H444.14z" clip-path="url(#p05fa61751a)" style="fill:#b70"/>
-            <path id="patch_14" d="M515.259 192.24h71.117v-31.08H515.26z" clip-path="url(#p05fa61751a)" style="fill:#63b"/>
+            <path id="patch_12" d="M373.024 192.24h71.117v-78.503h-71.117z" clip-path="url(#pbe3b2f54f0)" style="fill:#347269"/>
+            <path id="patch_13" d="M444.141 192.24h71.118v-56.235H444.14z" clip-path="url(#pbe3b2f54f0)" style="fill:#b70"/>
+            <path id="patch_14" d="M515.259 192.24h71.117v-31.08H515.26z" clip-path="url(#pbe3b2f54f0)" style="fill:#63b"/>
             <path id="patch_15" d="M373.024 192.24h213.352" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 381.864 159.611)">
+            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 381.864 159.61)">
                 <defs>
                     <path id="DejaVuSans-39" d="M703 97v575q238-113 481-172 244-59 479-59 625 0 954 420 330 420 377 1277-181-269-460-413-278-144-615-144-700 0-1108 423-408 424-408 1159 0 718 425 1152 425 435 1131 435 810 0 1236-621 427-620 427-1801 0-1103-524-1761Q2575-91 1691-91q-238 0-482 47Q966 3 703 97m1256 1978q425 0 673 290 249 291 249 798 0 503-249 795-248 292-673 292t-673-292-248-795q0-507 248-798 248-290 673-290" transform="scale(.01563)"/>
                 </defs>
@@ -204,7 +204,7 @@
                 <use xlink:href="#DejaVuSans-39" x="95.41"/>
                 <use xlink:href="#DejaVuSans-34" x="159.033"/>
             </g>
-            <g id="text_11" style="fill:#fff" transform="matrix(.24 0 0 -.24 524.099 183.323)">
+            <g id="text_11" style="fill:#fff" transform="matrix(.24 0 0 -.24 524.099 183.322)">
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-2e" x="63.623"/>
                 <use xlink:href="#DejaVuSans-37" x="95.41"/>
@@ -229,11 +229,11 @@
         </g>
         <g id="axes_3">
             <path id="patch_16" d="M629.047 192.24H842.4V25.92H629.047z" style="fill:#fff"/>
-            <path id="patch_17" d="M629.047 192.24h71.118v-77.214h-71.118z" clip-path="url(#p682d6086e8)" style="fill:#347269"/>
-            <path id="patch_18" d="M700.165 192.24h71.117v-53.123h-71.117z" clip-path="url(#p682d6086e8)" style="fill:#b70"/>
-            <path id="patch_19" d="M771.282 192.24H842.4v-52.294h-71.118z" clip-path="url(#p682d6086e8)" style="fill:#63b"/>
+            <path id="patch_17" d="M629.047 192.24h71.118v-77.215h-71.118z" clip-path="url(#p886f5c61e4)" style="fill:#347269"/>
+            <path id="patch_18" d="M700.165 192.24h71.117v-53.124h-71.117z" clip-path="url(#p886f5c61e4)" style="fill:#b70"/>
+            <path id="patch_19" d="M771.282 192.24H842.4v-52.294h-71.118z" clip-path="url(#p886f5c61e4)" style="fill:#63b"/>
             <path id="patch_20" d="M629.047 192.24H842.4" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_13" style="fill:#fff" transform="matrix(.24 0 0 -.24 637.887 160.256)">
+            <g id="text_13" style="fill:#fff" transform="matrix(.24 0 0 -.24 637.887 160.255)">
                 <use xlink:href="#DejaVuSans-36"/>
                 <use xlink:href="#DejaVuSans-2e" x="63.623"/>
                 <use xlink:href="#DejaVuSans-37" x="95.41"/>
@@ -245,7 +245,7 @@
                 <use xlink:href="#DejaVuSans-36" x="95.41"/>
                 <use xlink:href="#DejaVuSans-37" x="159.033"/>
             </g>
-            <g id="text_15" style="fill:#fff" transform="matrix(.24 0 0 -.24 780.122 172.716)">
+            <g id="text_15" style="fill:#fff" transform="matrix(.24 0 0 -.24 780.122 172.715)">
                 <use xlink:href="#DejaVuSans-34"/>
                 <use xlink:href="#DejaVuSans-2e" x="63.623"/>
                 <use xlink:href="#DejaVuSans-36" x="95.41"/>
@@ -273,13 +273,13 @@
         </g>
     </g>
     <defs>
-        <clipPath id="pe5fe753951">
+        <clipPath id="p87fc1eb380">
             <path d="M117 25.92h213.353v166.32H117z"/>
         </clipPath>
-        <clipPath id="p05fa61751a">
+        <clipPath id="pbe3b2f54f0">
             <path d="M373.024 25.92h213.353v166.32H373.024z"/>
         </clipPath>
-        <clipPath id="p682d6086e8">
+        <clipPath id="p886f5c61e4">
             <path d="M629.047 25.92H842.4v166.32H629.047z"/>
         </clipPath>
     </defs>
diff --git a/images/real_world_compression_speed.svg b/images/real_world_compression_speed.svg
index faab9f86..ac2caf9e 100644
--- a/images/real_world_compression_speed.svg
+++ b/images/real_world_compression_speed.svg
@@ -8,9 +8,9 @@
         <path id="patch_1" d="M0 216h936V0H0z" style="fill:#fff"/>
         <g id="axes_1">
             <path id="patch_2" d="M117 192.24h213.353V25.92H117z" style="fill:#fff"/>
-            <path id="patch_3" d="M117 192.24h71.118V25.92H117z" clip-path="url(#p1f77088402)" style="fill:#347269"/>
-            <path id="patch_4" d="M188.118 192.24h71.117v-74.608h-71.117z" clip-path="url(#p1f77088402)" style="fill:#b70"/>
-            <path id="patch_5" d="M259.235 192.24h71.118V28.795h-71.118z" clip-path="url(#p1f77088402)" style="fill:#63b"/>
+            <path id="patch_3" d="M117 192.24h71.118V40.802H117z" clip-path="url(#pc7f2e6397d)" style="fill:#347269"/>
+            <path id="patch_4" d="M188.118 192.24h71.117v-75.92h-71.117z" clip-path="url(#pc7f2e6397d)" style="fill:#b70"/>
+            <path id="patch_5" d="M259.235 192.24h71.118V25.92h-71.118z" clip-path="url(#pc7f2e6397d)" style="fill:#63b"/>
             <g id="matplotlib.axis_2">
                 <g id="text_1">
                     <g transform="matrix(.1 0 0 -.1 23.69 95.802)">
@@ -59,17 +59,17 @@
                 </g>
             </g>
             <path id="patch_6" d="M117 192.24h213.353" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_2" style="fill:#fff" transform="matrix(.24 0 0 -.24 129.654 115.703)">
+            <g id="text_2" style="fill:#fff" transform="matrix(.24 0 0 -.24 129.654 123.143)">
                 <defs>
                     <path id="DejaVuSans-35" d="M691 4666h2478v-532H1269V2991q137 47 274 70 138 23 276 23 781 0 1237-428 457-428 457-1159 0-753-469-1171Q2575-91 1722-91q-294 0-599 50Q819 9 494 109v635q281-153 581-228t634-75q541 0 856 284 316 284 316 772 0 487-316 771-315 285-856 285-253 0-505-56-251-56-513-175z" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-37" d="M525 4666h3000v-269L1831 0h-659l1594 4134H525z" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-39" d="M703 97v575q238-113 481-172 244-59 479-59 625 0 954 420 330 420 377 1277-181-269-460-413-278-144-615-144-700 0-1108 423-408 424-408 1159 0 718 425 1152 425 435 1131 435 810 0 1236-621 427-620 427-1801 0-1103-524-1761Q2575-91 1691-91q-238 0-482 47Q966 3 703 97m1256 1978q425 0 673 290 249 291 249 798 0 503-249 795-248 292-673 292t-673-292-248-795q0-507 248-798 248-290 673-290" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-31" d="M794 531h1031v3560L703 3866v575l1116 225h631V531h1031V0H794z" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-38" d="M2034 2216q-450 0-708-241-257-241-257-662 0-422 257-663 258-241 708-241t709 242q260 243 260 662 0 421-258 662-257 241-711 241m-631 268q-406 100-633 378-226 279-226 679 0 559 398 884 399 325 1092 325 697 0 1094-325t397-884q0-400-227-679-226-278-629-378 456-106 710-416 255-309 255-755 0-679-414-1042Q2806-91 2034-91q-771 0-1186 362-414 363-414 1042 0 446 256 755 257 310 713 416m-231 997q0-362 226-565 227-203 636-203 407 0 636 203 230 203 230 565 0 363-230 566-229 203-636 203-409 0-636-203-226-203-226-566" transform="scale(.01563)"/>
                 </defs>
                 <use xlink:href="#DejaVuSans-35"/>
-                <use xlink:href="#DejaVuSans-37" x="63.623"/>
-                <use xlink:href="#DejaVuSans-39" x="127.246"/>
+                <use xlink:href="#DejaVuSans-31" x="63.623"/>
+                <use xlink:href="#DejaVuSans-38" x="127.246"/>
             </g>
-            <g id="text_3" style="fill:#fff" transform="matrix(.24 0 0 -.24 200.771 161.559)">
+            <g id="text_3" style="fill:#fff" transform="matrix(.24 0 0 -.24 200.771 160.902)">
                 <defs>
                     <path id="DejaVuSans-32" d="M1228 531h2203V0H469v531q359 372 979 998 621 627 780 809 303 340 423 576 121 236 121 464 0 372-261 606-261 235-680 235-297 0-627-103-329-103-704-313v638q381 153 712 231 332 78 607 78 725 0 1156-363 431-362 431-968 0-288-108-546-107-257-392-607-78-91-497-524-418-433-1181-1211" transform="scale(.01563)"/>
                     <path id="DejaVuSans-36" d="M2113 2584q-425 0-674-291-248-290-248-796 0-503 248-796 249-292 674-292t673 292q248 293 248 796 0 506-248 796-248 291-673 291m1253 1979v-575q-238 112-480 171-242 60-480 60-625 0-955-422-329-422-376-1275 184 272 462 417 279 145 613 145 703 0 1111-427 408-426 408-1160 0-719-425-1154Q2819-91 2113-91q-810 0-1238 620-428 621-428 1799 0 1106 525 1764t1409 658q238 0 480-47t505-140" transform="scale(.01563)"/>
@@ -79,7 +79,10 @@
                 <use xlink:href="#DejaVuSans-36" x="63.623"/>
                 <use xlink:href="#DejaVuSans-30" x="127.246"/>
             </g>
-            <g id="text_4" style="fill:#fff" transform="matrix(.24 0 0 -.24 271.89 117.14)">
+            <g id="text_4" style="fill:#fff" transform="matrix(.24 0 0 -.24 271.89 115.703)">
+                <defs>
+                    <path id="DejaVuSans-39" d="M703 97v575q238-113 481-172 244-59 479-59 625 0 954 420 330 420 377 1277-181-269-460-413-278-144-615-144-700 0-1108 423-408 424-408 1159 0 718 425 1152 425 435 1131 435 810 0 1236-621 427-620 427-1801 0-1103-524-1761Q2575-91 1691-91q-238 0-482 47Q966 3 703 97m1256 1978q425 0 673 290 249 291 249 798 0 503-249 795-248 292-673 292t-673-292-248-795q0-507 248-798 248-290 673-290" transform="scale(.01563)"/>
+                </defs>
                 <use xlink:href="#DejaVuSans-35"/>
                 <use xlink:href="#DejaVuSans-36" x="63.623"/>
                 <use xlink:href="#DejaVuSans-39" x="127.246"/>
@@ -87,26 +90,28 @@
         </g>
         <g id="axes_2">
             <path id="patch_7" d="M373.024 192.24h213.352V25.92H373.024z" style="fill:#fff"/>
-            <path id="patch_8" d="M373.024 192.24h71.117V73.73h-71.117z" clip-path="url(#pbeaf320909)" style="fill:#347269"/>
-            <path id="patch_9" d="M444.141 192.24h71.118V79.994H444.14z" clip-path="url(#pbeaf320909)" style="fill:#b70"/>
-            <path id="patch_10" d="M515.259 192.24h71.117V92.5H515.26z" clip-path="url(#pbeaf320909)" style="fill:#63b"/>
+            <path id="patch_8" d="M373.024 192.24h71.117V78.3h-71.117z" clip-path="url(#p7a35ec1f99)" style="fill:#347269"/>
+            <path id="patch_9" d="M444.141 192.24h71.118V78.02H444.14z" clip-path="url(#p7a35ec1f99)" style="fill:#b70"/>
+            <path id="patch_10" d="M515.259 192.24h71.117V90.746H515.26z" clip-path="url(#p7a35ec1f99)" style="fill:#63b"/>
             <path id="patch_11" d="M373.024 192.24h213.352" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_5" style="fill:#fff" transform="matrix(.24 0 0 -.24 385.677 139.608)">
+            <g id="text_5" style="fill:#fff" transform="matrix(.24 0 0 -.24 385.677 141.892)">
                 <defs>
-                    <path id="DejaVuSans-34" d="M2419 4116 825 1625h1594zm-166 550h794V1625h666v-525h-666V0h-628v1100H313v609z" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-31" d="M794 531h1031v3560L703 3866v575l1116 225h631V531h1031V0H794z" transform="scale(.01563)"/>
                     <path id="DejaVuSans-33" d="M2597 2516q453-97 707-404 255-306 255-756 0-690-475-1069Q2609-91 1734-91q-293 0-604 58T488 141v609q262-153 574-231 313-78 654-78 593 0 904 234t311 681q0 413-289 645-289 233-804 233h-544v519h569q465 0 712 186t247 536q0 359-255 551-254 193-729 193-260 0-557-57-297-56-653-174v562q360 100 674 150t592 50q719 0 1137-327 419-326 419-882 0-388-222-655t-631-370" transform="scale(.01563)"/>
                 </defs>
-                <use xlink:href="#DejaVuSans-34"/>
-                <use xlink:href="#DejaVuSans-31" x="63.623"/>
-                <use xlink:href="#DejaVuSans-33" x="127.246"/>
+                <use xlink:href="#DejaVuSans-33"/>
+                <use xlink:href="#DejaVuSans-39" x="63.623"/>
+                <use xlink:href="#DejaVuSans-30" x="127.246"/>
             </g>
-            <g id="text_6" style="fill:#fff" transform="matrix(.24 0 0 -.24 456.795 142.74)">
+            <g id="text_6" style="fill:#fff" transform="matrix(.24 0 0 -.24 456.795 141.752)">
                 <use xlink:href="#DejaVuSans-33"/>
                 <use xlink:href="#DejaVuSans-39" x="63.623"/>
                 <use xlink:href="#DejaVuSans-31" x="127.246"/>
             </g>
-            <g id="text_7" style="fill:#fff" transform="matrix(.24 0 0 -.24 527.913 148.993)">
+            <g id="text_7" style="fill:#fff" transform="matrix(.24 0 0 -.24 527.913 148.115)">
+                <defs>
+                    <path id="DejaVuSans-34" d="M2419 4116 825 1625h1594zm-166 550h794V1625h666v-525h-666V0h-628v1100H313v609z" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-37" d="M525 4666h3000v-269L1831 0h-659l1594 4134H525z" transform="scale(.01563)"/>
+                </defs>
                 <use xlink:href="#DejaVuSans-33"/>
                 <use xlink:href="#DejaVuSans-34" x="63.623"/>
                 <use xlink:href="#DejaVuSans-37" x="127.246"/>
@@ -114,24 +119,21 @@
         </g>
         <g id="axes_3">
             <path id="patch_12" d="M629.047 192.24H842.4V25.92H629.047z" style="fill:#fff"/>
-            <path id="patch_13" d="M629.047 192.24h71.118V74.976h-71.118z" clip-path="url(#pa33c9979bd)" style="fill:#347269"/>
-            <path id="patch_14" d="M700.165 192.24h71.117v-79.827h-71.117z" clip-path="url(#pa33c9979bd)" style="fill:#b70"/>
-            <path id="patch_15" d="M771.282 192.24H842.4V84.21h-71.118z" clip-path="url(#pa33c9979bd)" style="fill:#63b"/>
+            <path id="patch_13" d="M629.047 192.24h71.118V82.27h-71.118z" clip-path="url(#pc91a103ec1)" style="fill:#347269"/>
+            <path id="patch_14" d="M700.165 192.24h71.117v-81.232h-71.117z" clip-path="url(#pc91a103ec1)" style="fill:#b70"/>
+            <path id="patch_15" d="M771.282 192.24H842.4V82.31h-71.118z" clip-path="url(#pc91a103ec1)" style="fill:#63b"/>
             <path id="patch_16" d="M629.047 192.24H842.4" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_8" style="fill:#fff" transform="matrix(.24 0 0 -.24 641.7 140.23)">
-                <defs>
-                    <path id="DejaVuSans-38" d="M2034 2216q-450 0-708-241-257-241-257-662 0-422 257-663 258-241 708-241t709 242q260 243 260 662 0 421-258 662-257 241-711 241m-631 268q-406 100-633 378-226 279-226 679 0 559 398 884 399 325 1092 325 697 0 1094-325t397-884q0-400-227-679-226-278-629-378 456-106 710-416 255-309 255-755 0-679-414-1042Q2806-91 2034-91q-771 0-1186 362-414 363-414 1042 0 446 256 755 257 310 713 416m-231 997q0-362 226-565 227-203 636-203 407 0 636 203 230 203 230 565 0 363-230 566-229 203-636 203-409 0-636-203-226-203-226-566" transform="scale(.01563)"/>
-                </defs>
-                <use xlink:href="#DejaVuSans-34"/>
-                <use xlink:href="#DejaVuSans-30" x="63.623"/>
-                <use xlink:href="#DejaVuSans-38" x="127.246"/>
+            <g id="text_8" style="fill:#fff" transform="matrix(.24 0 0 -.24 641.7 143.878)">
+                <use xlink:href="#DejaVuSans-33"/>
+                <use xlink:href="#DejaVuSans-37" x="63.623"/>
+                <use xlink:href="#DejaVuSans-36" x="127.246"/>
             </g>
-            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 712.819 158.949)">
+            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 712.819 158.247)">
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-37" x="63.623"/>
                 <use xlink:href="#DejaVuSans-38" x="127.246"/>
             </g>
-            <g id="text_10" style="fill:#fff" transform="matrix(.24 0 0 -.24 783.936 144.847)">
+            <g id="text_10" style="fill:#fff" transform="matrix(.24 0 0 -.24 783.936 143.897)">
                 <use xlink:href="#DejaVuSans-33"/>
                 <use xlink:href="#DejaVuSans-37" x="63.623"/>
                 <use xlink:href="#DejaVuSans-36" x="127.246"/>
@@ -139,13 +141,13 @@
         </g>
     </g>
     <defs>
-        <clipPath id="p1f77088402">
+        <clipPath id="pc7f2e6397d">
             <path d="M117 25.92h213.353v166.32H117z"/>
         </clipPath>
-        <clipPath id="pbeaf320909">
+        <clipPath id="p7a35ec1f99">
             <path d="M373.024 25.92h213.353v166.32H373.024z"/>
         </clipPath>
-        <clipPath id="pa33c9979bd">
+        <clipPath id="pc91a103ec1">
             <path d="M629.047 25.92H842.4v166.32H629.047z"/>
         </clipPath>
     </defs>
diff --git a/images/real_world_decompression_speed.svg b/images/real_world_decompression_speed.svg
index 858dd879..d8c30c3a 100644
--- a/images/real_world_decompression_speed.svg
+++ b/images/real_world_decompression_speed.svg
@@ -8,9 +8,9 @@
         <path id="patch_1" d="M0 216h936V0H0z" style="fill:#fff"/>
         <g id="axes_1">
             <path id="patch_2" d="M117 192.24h213.353V25.92H117z" style="fill:#fff"/>
-            <path id="patch_3" d="M117 192.24h71.118V26.969H117z" clip-path="url(#p1af7833e91)" style="fill:#347269"/>
-            <path id="patch_4" d="M188.118 192.24h71.117V53.366h-71.117z" clip-path="url(#p1af7833e91)" style="fill:#b70"/>
-            <path id="patch_5" d="M259.235 192.24h71.118V65.828h-71.118z" clip-path="url(#p1af7833e91)" style="fill:#63b"/>
+            <path id="patch_3" d="M117 192.24h71.118V26.448H117z" clip-path="url(#p97b0cf2d7d)" style="fill:#347269"/>
+            <path id="patch_4" d="M188.118 192.24h71.117V51.452h-71.117z" clip-path="url(#p97b0cf2d7d)" style="fill:#b70"/>
+            <path id="patch_5" d="M259.235 192.24h71.118V64.086h-71.118z" clip-path="url(#p97b0cf2d7d)" style="fill:#63b"/>
             <g id="matplotlib.axis_2">
                 <g id="text_1">
                     <g transform="matrix(.1 0 0 -.1 16.719 95.802)">
@@ -61,30 +61,30 @@
                 </g>
             </g>
             <path id="patch_6" d="M117 192.24h213.353" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_2" style="fill:#fff" transform="matrix(.24 0 0 -.24 122.019 116.227)">
+            <g id="text_2" style="fill:#fff" transform="matrix(.24 0 0 -.24 122.019 115.967)">
                 <defs>
                     <path id="DejaVuSans-32" d="M1228 531h2203V0H469v531q359 372 979 998 621 627 780 809 303 340 423 576 121 236 121 464 0 372-261 606-261 235-680 235-297 0-627-103-329-103-704-313v638q381 153 712 231 332 78 607 78 725 0 1156-363 431-362 431-968 0-288-108-546-107-257-392-607-78-91-497-524-418-433-1181-1211" transform="scale(.01563)"/>
                     <path id="DejaVuSans-37" d="M525 4666h3000v-269L1831 0h-659l1594 4134H525z" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-33" d="M2597 2516q453-97 707-404 255-306 255-756 0-690-475-1069Q2609-91 1734-91q-293 0-604 58T488 141v609q262-153 574-231 313-78 654-78 593 0 904 234t311 681q0 413-289 645-289 233-804 233h-544v519h569q465 0 712 186t247 536q0 359-255 551-254 193-729 193-260 0-557-57-297-56-653-174v562q360 100 674 150t592 50q719 0 1137-327 419-326 419-882 0-388-222-655t-631-370" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-39" d="M703 97v575q238-113 481-172 244-59 479-59 625 0 954 420 330 420 377 1277-181-269-460-413-278-144-615-144-700 0-1108 423-408 424-408 1159 0 718 425 1152 425 435 1131 435 810 0 1236-621 427-620 427-1801 0-1103-524-1761Q2575-91 1691-91q-238 0-482 47Q966 3 703 97m1256 1978q425 0 673 290 249 291 249 798 0 503-249 795-248 292-673 292t-673-292-248-795q0-507 248-798 248-290 673-290" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-31" d="M794 531h1031v3560L703 3866v575l1116 225h631V531h1031V0H794z" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-30" d="M2034 4250q-487 0-733-480-245-479-245-1442 0-959 245-1439 246-480 733-480 491 0 736 480 246 480 246 1439 0 963-246 1442-245 480-736 480m0 500q785 0 1199-621 414-620 414-1801 0-1178-414-1799Q2819-91 2034-91q-784 0-1198 620-414 621-414 1799 0 1181 414 1801 414 621 1198 621" transform="scale(.01563)"/>
                 </defs>
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-37" x="63.623"/>
-                <use xlink:href="#DejaVuSans-33" x="127.246"/>
-                <use xlink:href="#DejaVuSans-39" x="190.869"/>
+                <use xlink:href="#DejaVuSans-31" x="127.246"/>
+                <use xlink:href="#DejaVuSans-30" x="190.869"/>
             </g>
-            <g id="text_3" style="fill:#fff" transform="matrix(.24 0 0 -.24 193.136 129.425)">
+            <g id="text_3" style="fill:#fff" transform="matrix(.24 0 0 -.24 193.136 128.469)">
                 <defs>
-                    <path id="DejaVuSans-30" d="M2034 4250q-487 0-733-480-245-479-245-1442 0-959 245-1439 246-480 733-480 491 0 736 480 246 480 246 1439 0 963-246 1442-245 480-736 480m0 500q785 0 1199-621 414-620 414-1801 0-1178-414-1799Q2819-91 2034-91q-784 0-1198 620-414 621-414 1799 0 1181 414 1801 414 621 1198 621" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-31" d="M794 531h1031v3560L703 3866v575l1116 225h631V531h1031V0H794z" transform="scale(.01563)"/>
+                    <path id="DejaVuSans-33" d="M2597 2516q453-97 707-404 255-306 255-756 0-690-475-1069Q2609-91 1734-91q-293 0-604 58T488 141v609q262-153 574-231 313-78 654-78 593 0 904 234t311 681q0 413-289 645-289 233-804 233h-544v519h569q465 0 712 186t247 536q0 359-255 551-254 193-729 193-260 0-557-57-297-56-653-174v562q360 100 674 150t592 50q719 0 1137-327 419-326 419-882 0-388-222-655t-631-370" transform="scale(.01563)"/>
                 </defs>
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-33" x="63.623"/>
                 <use xlink:href="#DejaVuSans-30" x="127.246"/>
                 <use xlink:href="#DejaVuSans-31" x="190.869"/>
             </g>
-            <g id="text_4" style="fill:#fff" transform="matrix(.24 0 0 -.24 264.254 135.656)">
+            <g id="text_4" style="fill:#fff" transform="matrix(.24 0 0 -.24 264.254 134.785)">
                 <defs>
+                    <path id="DejaVuSans-39" d="M703 97v575q238-113 481-172 244-59 479-59 625 0 954 420 330 420 377 1277-181-269-460-413-278-144-615-144-700 0-1108 423-408 424-408 1159 0 718 425 1152 425 435 1131 435 810 0 1236-621 427-620 427-1801 0-1103-524-1761Q2575-91 1691-91q-238 0-482 47Q966 3 703 97m1256 1978q425 0 673 290 249 291 249 798 0 503-249 795-248 292-673 292t-673-292-248-795q0-507 248-798 248-290 673-290" transform="scale(.01563)"/>
                     <path id="DejaVuSans-35" d="M691 4666h2478v-532H1269V2991q137 47 274 70 138 23 276 23 781 0 1237-428 457-428 457-1159 0-753-469-1171Q2575-91 1722-91q-294 0-599 50Q819 9 494 109v635q281-153 581-228t634-75q541 0 856 284 316 284 316 772 0 487-316 771-315 285-856 285-253 0-505-56-251-56-513-175z" transform="scale(.01563)"/>
                 </defs>
                 <use xlink:href="#DejaVuSans-32"/>
@@ -95,27 +95,26 @@
         </g>
         <g id="axes_2">
             <path id="patch_7" d="M373.024 192.24h213.352V25.92H373.024z" style="fill:#fff"/>
-            <path id="patch_8" d="M373.024 192.24h71.117V42.535h-71.117z" clip-path="url(#p7674a4cb61)" style="fill:#347269"/>
-            <path id="patch_9" d="M444.141 192.24h71.118V48.152H444.14z" clip-path="url(#p7674a4cb61)" style="fill:#b70"/>
-            <path id="patch_10" d="M515.259 192.24h71.117v-70.226H515.26z" clip-path="url(#p7674a4cb61)" style="fill:#63b"/>
+            <path id="patch_8" d="M373.024 192.24h71.117V39.235h-71.117z" clip-path="url(#p58d2a0461d)" style="fill:#347269"/>
+            <path id="patch_9" d="M444.141 192.24h71.118V46.166H444.14z" clip-path="url(#p58d2a0461d)" style="fill:#b70"/>
+            <path id="patch_10" d="M515.259 192.24h71.117v-71.193H515.26z" clip-path="url(#p58d2a0461d)" style="fill:#63b"/>
             <path id="patch_11" d="M373.024 192.24h213.352" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
-            <g id="text_5" style="fill:#fff" transform="matrix(.24 0 0 -.24 378.042 124.01)">
-                <defs>
-                    <path id="DejaVuSans-34" d="M2419 4116 825 1625h1594zm-166 550h794V1625h666v-525h-666V0h-628v1100H313v609z" transform="scale(.01563)"/>
-                    <path id="DejaVuSans-38" d="M2034 2216q-450 0-708-241-257-241-257-662 0-422 257-663 258-241 708-241t709 242q260 243 260 662 0 421-258 662-257 241-711 241m-631 268q-406 100-633 378-226 279-226 679 0 559 398 884 399 325 1092 325 697 0 1094-325t397-884q0-400-227-679-226-278-629-378 456-106 710-416 255-309 255-755 0-679-414-1042Q2806-91 2034-91q-771 0-1186 362-414 363-414 1042 0 446 256 755 257 310 713 416m-231 997q0-362 226-565 227-203 636-203 407 0 636 203 230 203 230 565 0 363-230 566-229 203-636 203-409 0-636-203-226-203-226-566" transform="scale(.01563)"/>
-                </defs>
+            <g id="text_5" style="fill:#fff" transform="matrix(.24 0 0 -.24 378.042 122.36)">
                 <use xlink:href="#DejaVuSans-32"/>
-                <use xlink:href="#DejaVuSans-34" x="63.623"/>
-                <use xlink:href="#DejaVuSans-38" x="127.246"/>
+                <use xlink:href="#DejaVuSans-35" x="63.623"/>
+                <use xlink:href="#DejaVuSans-30" x="127.246"/>
                 <use xlink:href="#DejaVuSans-31" x="190.869"/>
             </g>
-            <g id="text_6" style="fill:#fff" transform="matrix(.24 0 0 -.24 449.16 126.818)">
+            <g id="text_6" style="fill:#fff" transform="matrix(.24 0 0 -.24 449.16 125.826)">
+                <defs>
+                    <path id="DejaVuSans-38" d="M2034 2216q-450 0-708-241-257-241-257-662 0-422 257-663 258-241 708-241t709 242q260 243 260 662 0 421-258 662-257 241-711 241m-631 268q-406 100-633 378-226 279-226 679 0 559 398 884 399 325 1092 325 697 0 1094-325t397-884q0-400-227-679-226-278-629-378 456-106 710-416 255-309 255-755 0-679-414-1042Q2806-91 2034-91q-771 0-1186 362-414 363-414 1042 0 446 256 755 257 310 713 416m-231 997q0-362 226-565 227-203 636-203 407 0 636 203 230 203 230 565 0 363-230 566-229 203-636 203-409 0-636-203-226-203-226-566" transform="scale(.01563)"/>
+                </defs>
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-33" x="63.623"/>
                 <use xlink:href="#DejaVuSans-38" x="127.246"/>
                 <use xlink:href="#DejaVuSans-38" x="190.869"/>
             </g>
-            <g id="text_7" style="fill:#fff" transform="matrix(.24 0 0 -.24 520.278 163.75)">
+            <g id="text_7" style="fill:#fff" transform="matrix(.24 0 0 -.24 520.278 163.266)">
                 <defs>
                     <path id="DejaVuSans-36" d="M2113 2584q-425 0-674-291-248-290-248-796 0-503 248-796 249-292 674-292t673 292q248 293 248 796 0 506-248 796-248 291-673 291m1253 1979v-575q-238 112-480 171-242 60-480 60-625 0-955-422-329-422-376-1275 184 272 462 417 279 145 613 145 703 0 1111-427 408-426 408-1160 0-719-425-1154Q2819-91 2113-91q-810 0-1238 620-428 621-428 1799 0 1106 525 1764t1409 658q238 0 480-47t505-140" transform="scale(.01563)"/>
                 </defs>
@@ -127,23 +126,23 @@
         </g>
         <g id="axes_3">
             <path id="patch_12" d="M629.047 192.24H842.4V25.92H629.047z" style="fill:#fff"/>
-            <path id="patch_13" d="M629.047 192.24h71.118V25.92h-71.118z" clip-path="url(#p5fb7e2b8fd)" style="fill:#347269"/>
-            <path id="patch_14" d="M700.165 192.24h71.117V58.088h-71.117z" clip-path="url(#p5fb7e2b8fd)" style="fill:#b70"/>
-            <path id="patch_15" d="M771.282 192.24H842.4v-77.361h-71.118z" clip-path="url(#p5fb7e2b8fd)" style="fill:#63b"/>
+            <path id="patch_13" d="M629.047 192.24h71.118V25.92h-71.118z" clip-path="url(#pbefaa67152)" style="fill:#347269"/>
+            <path id="patch_14" d="M700.165 192.24h71.117v-136h-71.117z" clip-path="url(#pbefaa67152)" style="fill:#b70"/>
+            <path id="patch_15" d="M771.282 192.24H842.4v-78.427h-71.118z" clip-path="url(#pbefaa67152)" style="fill:#63b"/>
             <path id="patch_16" d="M629.047 192.24H842.4" style="fill:none;stroke:#000;stroke-width:.8;stroke-linejoin:miter;stroke-linecap:square"/>
             <g id="text_8" style="fill:#fff" transform="matrix(.24 0 0 -.24 634.066 115.703)">
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-37" x="63.623"/>
-                <use xlink:href="#DejaVuSans-35" x="127.246"/>
-                <use xlink:href="#DejaVuSans-36" x="190.869"/>
+                <use xlink:href="#DejaVuSans-31" x="127.246"/>
+                <use xlink:href="#DejaVuSans-39" x="190.869"/>
             </g>
-            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 705.184 131.787)">
+            <g id="text_9" style="fill:#fff" transform="matrix(.24 0 0 -.24 705.184 130.862)">
                 <use xlink:href="#DejaVuSans-32"/>
                 <use xlink:href="#DejaVuSans-32" x="63.623"/>
                 <use xlink:href="#DejaVuSans-32" x="127.246"/>
                 <use xlink:href="#DejaVuSans-33" x="190.869"/>
             </g>
-            <g id="text_10" style="fill:#fff" transform="matrix(.24 0 0 -.24 776.301 160.182)">
+            <g id="text_10" style="fill:#fff" transform="matrix(.24 0 0 -.24 776.301 159.649)">
                 <use xlink:href="#DejaVuSans-31"/>
                 <use xlink:href="#DejaVuSans-32" x="63.623"/>
                 <use xlink:href="#DejaVuSans-38" x="127.246"/>
@@ -152,13 +151,13 @@
         </g>
     </g>
     <defs>
-        <clipPath id="p1af7833e91">
+        <clipPath id="p97b0cf2d7d">
             <path d="M117 25.92h213.353v166.32H117z"/>
         </clipPath>
-        <clipPath id="p7674a4cb61">
+        <clipPath id="p58d2a0461d">
             <path d="M373.024 25.92h213.353v166.32H373.024z"/>
         </clipPath>
-        <clipPath id="p5fb7e2b8fd">
+        <clipPath id="pbefaa67152">
             <path d="M629.047 25.92H842.4v166.32H629.047z"/>
         </clipPath>
     </defs>
diff --git a/pco/assets/v0_4_0_lookback_delta.pco b/pco/assets/v0_4_0_lookback_delta.pco
new file mode 100644
index 0000000000000000000000000000000000000000..de749ba1f4894cded5d05261f8f02c6c96423267
GIT binary patch
literal 120
zcmXR&&R1mO{Kd@poS8v^a}EQ800RSq!w*p)$sohP5VE}Lcg0^IyMf_=!rJ=_*g>)r
zZ|kx!Fch$X)N~xb|DK(pq3Muu!{*J#`s>%~e$d|M+ZvDpRKehIev-?))@3V|4J&_2
S*xxK)v{sr4DEyC+fdK$5@+IB?

literal 0
HcmV?d00001

diff --git a/pco/src/bit_reader.rs b/pco/src/bit_reader.rs
index ca8eb5b1..1e2756e4 100644
--- a/pco/src/bit_reader.rs
+++ b/pco/src/bit_reader.rs
@@ -149,6 +149,7 @@ impl<'a> BitReader<'a> {
     self.consume(n);
     res
   }
+
   pub unsafe fn read_usize(&mut self, n: Bitlen) -> usize {
     self.read_uint(n)
   }
@@ -157,6 +158,10 @@ impl<'a> BitReader<'a> {
     self.read_uint(n)
   }
 
+  pub unsafe fn read_bool(&mut self) -> bool {
+    self.read_uint::<u32>(1) > 0
+  }
+
   // checks in bounds and returns bit idx
   #[inline]
   fn bit_idx_safe(&self) -> PcoResult<usize> {
diff --git a/pco/src/bit_writer.rs b/pco/src/bit_writer.rs
index 9fdee2a6..6e0df0eb 100644
--- a/pco/src/bit_writer.rs
+++ b/pco/src/bit_writer.rs
@@ -129,6 +129,10 @@ impl<W: Write> BitWriter<W> {
     self.write_uint(x, n)
   }
 
+  pub unsafe fn write_bool(&mut self, b: bool) {
+    self.write_uint(b as u32, 1)
+  }
+
   pub fn finish_byte(&mut self) {
     self.stale_byte_idx += self.bits_past_byte.div_ceil(8) as usize;
     self.bits_past_byte = 0;
@@ -153,6 +157,11 @@ impl<W: Write> BitWriter<W> {
   pub fn into_inner(self) -> W {
     self.dst
   }
+
+  #[cfg(test)]
+  pub fn bit_idx(&self) -> usize {
+    self.stale_byte_idx * 8 + self.bits_past_byte as usize
+  }
 }
 
 #[cfg(test)]
diff --git a/pco/src/chunk_config.rs b/pco/src/chunk_config.rs
index 8c755f15..0865715a 100644
--- a/pco/src/chunk_config.rs
+++ b/pco/src/chunk_config.rs
@@ -63,6 +63,11 @@ pub enum DeltaSpec {
   /// deltas-of-deltas, etc.
   /// It is legal to use 0th order, but it is identical to `None`.
   TryConsecutive(usize),
+  /// Tries delta encoding according to an extra latent variable of "lookback".
+  ///
+  /// This can improve compression ratio when there are nontrivial patterns in
+  /// your numbers, but reduces compression speed substantially.
+  TryLookback,
 }
 
 // TODO consider adding a "lossiness" spec that allows dropping secondary latent
diff --git a/pco/src/compression_intermediates.rs b/pco/src/compression_intermediates.rs
index 89acb558..fde097f4 100644
--- a/pco/src/compression_intermediates.rs
+++ b/pco/src/compression_intermediates.rs
@@ -1,30 +1,45 @@
 use crate::ans::{AnsState, Symbol};
 use crate::constants::{Bitlen, Weight, ANS_INTERLEAVING};
 use crate::data_types::{Latent, Number};
-use crate::metadata::Mode;
+use crate::delta::DeltaState;
+use crate::metadata::per_latent_var::{LatentVarKey, PerLatentVar};
+use crate::metadata::{DynLatents, Mode};
+use crate::split_latents::SplitLatents;
+use std::ops::Range;
+
+#[derive(Clone, Debug)]
+pub struct PageInfoVar {
+  pub delta_state: DeltaState,
+  pub range: Range<usize>,
+}
 
 #[derive(Clone, Debug)]
 pub struct PageInfo {
   pub page_n: usize,
-  pub start_idx: usize,
-  pub end_idx_per_var: Vec<usize>,
+  pub per_latent_var: PerLatentVar<PageInfoVar>,
+}
+
+impl PageInfo {
+  pub fn range_for_latent_var(&self, key: LatentVarKey) -> Range<usize> {
+    self.per_latent_var.get(key).unwrap().range.clone()
+  }
 }
 
 #[derive(Clone, Debug)]
-pub struct DissectedPageVar<L: Latent> {
+pub struct DissectedPageVar {
   // These vecs should have the same length.
   pub ans_vals: Vec<AnsState>,
   pub ans_bits: Vec<Bitlen>,
-  pub offsets: Vec<L>,
+  pub offsets: DynLatents,
   pub offset_bits: Vec<Bitlen>,
 
   pub ans_final_states: [AnsState; ANS_INTERLEAVING],
 }
 
 #[derive(Clone, Debug)]
-pub struct DissectedPage<L: Latent> {
+pub struct DissectedPage {
   pub page_n: usize,
-  pub per_latent_var: Vec<DissectedPageVar<L>>, // one per latent variable
+  pub per_latent_var: PerLatentVar<DissectedPageVar>,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -58,5 +73,5 @@ pub(crate) struct Bid<T: Number> {
   // information (inv_base) not captured entirely in the mode.  This extra
   // information is an implementation detail of the compressor, not part of the
   // format itself, and is not / does not need to be known to the decompressor.
-  pub split_fn: Box<dyn FnOnce(&[T]) -> Vec<Vec<T::L>>>,
+  pub split_fn: Box<dyn FnOnce(&[T]) -> SplitLatents>,
 }
diff --git a/pco/src/compression_table.rs b/pco/src/compression_table.rs
index acb9d0da..0b1b9c7e 100644
--- a/pco/src/compression_table.rs
+++ b/pco/src/compression_table.rs
@@ -1,7 +1,7 @@
 use crate::compression_intermediates::BinCompressionInfo;
 use crate::data_types::Latent;
 
-#[derive(Debug, Clone)]
+#[derive(Clone, Debug)]
 pub struct CompressionTable<L: Latent> {
   pub search_size_log: usize,
   pub search_lowers: Vec<L>,
diff --git a/pco/src/constants.rs b/pco/src/constants.rs
index 853a952f..d010b363 100644
--- a/pco/src/constants.rs
+++ b/pco/src/constants.rs
@@ -6,14 +6,18 @@ pub(crate) type Bitlen = u32;
 // must be u32 or larger
 // exposed in public API
 pub(crate) type Weight = u32;
+pub(crate) type DeltaLookback = u32;
 
 // compatibility
-pub const CURRENT_FORMAT_VERSION: u8 = 2;
+pub const CURRENT_FORMAT_VERSION: u8 = 3;
 
 // bit lengths
 pub const BITS_TO_ENCODE_ANS_SIZE_LOG: Bitlen = 4;
+pub const BITS_TO_ENCODE_MODE_VARIANT: Bitlen = 4;
+pub const BITS_TO_ENCODE_DELTA_ENCODING_VARIANT: Bitlen = 4;
 pub const BITS_TO_ENCODE_DELTA_ENCODING_ORDER: Bitlen = 3;
-pub const BITS_TO_ENCODE_MODE: Bitlen = 4;
+pub const BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG: Bitlen = 5;
+pub const BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG: Bitlen = 4;
 pub const BITS_TO_ENCODE_N_BINS: Bitlen = 15;
 // conservative: wide enough to support quantizing float datasets with 255 unused bits of precision
 pub const BITS_TO_ENCODE_QUANTIZE_K: Bitlen = 8;
diff --git a/pco/src/data_types/dynamic.rs b/pco/src/data_types/dynamic.rs
index f6d8d2c5..348d98b5 100644
--- a/pco/src/data_types/dynamic.rs
+++ b/pco/src/data_types/dynamic.rs
@@ -1,8 +1,13 @@
-use crate::data_types::Number;
-use crate::macros::define_number_enum;
+use crate::data_types::{Latent, Number};
+use crate::macros::{define_latent_enum, define_number_enum};
 
 define_number_enum!(
   #[derive(Clone, Copy, Debug, PartialEq, Eq)]
   #[repr(u8)]
   pub NumberType = NUMBER_TYPE_BYTE
 );
+
+define_latent_enum!(
+  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+  pub LatentType
+);
diff --git a/pco/src/data_types/floats.rs b/pco/src/data_types/floats.rs
index 707e4fed..d27536ac 100644
--- a/pco/src/data_types/floats.rs
+++ b/pco/src/data_types/floats.rs
@@ -10,7 +10,8 @@ use crate::data_types::{split_latents_classic, Float, Latent, Number};
 use crate::describers::LatentDescriber;
 use crate::errors::{PcoError, PcoResult};
 use crate::float_mult_utils::FloatMultConfig;
-use crate::metadata::{ChunkMeta, Mode};
+use crate::metadata::per_latent_var::PerLatentVar;
+use crate::metadata::{ChunkMeta, DynLatents, Mode};
 use crate::{describers, float_mult_utils, float_quant_utils, sampling, ChunkConfig};
 
 fn filter_sample<F: Float>(num: &F) -> Option<F> {
@@ -28,7 +29,7 @@ fn filter_sample<F: Float>(num: &F) -> Option<F> {
 fn choose_mode_and_split_latents<F: Float>(
   nums: &[F],
   chunk_config: &ChunkConfig,
-) -> PcoResult<ModeAndLatents<F::L>> {
+) -> PcoResult<ModeAndLatents> {
   match chunk_config.mode_spec {
     ModeSpec::Auto => {
       // up to 3 bids: classic, float mult, float quant modes
@@ -318,7 +319,7 @@ macro_rules! impl_float_number {
 
       type L = $latent;
 
-      fn get_latent_describers(meta: &ChunkMeta) -> Vec<LatentDescriber<Self::L>> {
+      fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar<LatentDescriber> {
         describers::match_classic_mode::<Self>(meta, " ULPs")
           .or_else(|| describers::match_float_modes::<Self>(meta))
           .expect("invalid mode for float type")
@@ -338,7 +339,7 @@ macro_rules! impl_float_number {
       fn choose_mode_and_split_latents(
         nums: &[Self],
         config: &ChunkConfig,
-      ) -> PcoResult<ModeAndLatents<Self::L>> {
+      ) -> PcoResult<ModeAndLatents> {
         choose_mode_and_split_latents(nums, config)
       }
 
@@ -363,7 +364,7 @@ macro_rules! impl_float_number {
           mem_layout ^ $sign_bit_mask
         }
       }
-      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) {
+      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) {
         match mode {
           Mode::Classic => (),
           Mode::FloatMult(dyn_latent) => {
diff --git a/pco/src/data_types/mod.rs b/pco/src/data_types/mod.rs
index 1b564d1c..0e58eff0 100644
--- a/pco/src/data_types/mod.rs
+++ b/pco/src/data_types/mod.rs
@@ -5,12 +5,15 @@ use std::ops::{
   Rem, RemAssign, Shl, Shr, Sub, SubAssign,
 };
 
-pub use dynamic::NumberType;
+pub use dynamic::{LatentType, NumberType};
 
 use crate::constants::Bitlen;
 use crate::describers::LatentDescriber;
 use crate::errors::PcoResult;
+use crate::metadata::dyn_latents::DynLatents;
+use crate::metadata::per_latent_var::PerLatentVar;
 use crate::metadata::{ChunkMeta, Mode};
+use crate::split_latents::SplitLatents;
 use crate::ChunkConfig;
 
 mod dynamic;
@@ -18,7 +21,7 @@ mod floats;
 mod signeds;
 mod unsigneds;
 
-pub(crate) type ModeAndLatents<L> = (Mode, Vec<Vec<L>>);
+pub(crate) type ModeAndLatents = (Mode, SplitLatents);
 
 /// This is used internally for compressing and decompressing with
 /// float modes.
@@ -74,7 +77,7 @@ pub(crate) trait Float:
   fn from_latent_numerical(l: Self::L) -> Self;
 }
 
-/// *unstable API* Trait for data types that behave like unsigned integers.
+/// **unstable API** Trait for data types that behave like unsigned integers.
 ///
 /// This is used extensively in `pco` to guarantee that bitwise
 /// operations like `>>` and `|=` are available and that certain properties
@@ -129,17 +132,18 @@ pub trait Latent:
   }
 }
 
-/// *unstable API* Trait for data types supported for compression/decompression.
+/// **unstable API** Trait for data types supported for compression/decompression.
 ///
-/// If you have a new data type you would like to add to the library or
-/// implement as custom in your own, these are the questions you need to
-/// answer:
+/// If you have a new data type you would like to add to the library or,
+/// these are the questions you need to answer:
 /// * What is the corresponding latent type? This is probably the
 ///   smallest unsigned integer with enough bits to represent the number.
 /// * How can I convert to this latent representation and back
 ///   in *a way that preserves ordering*? For instance, transmuting `f32` to `u32`
 ///   wouldn't preserve ordering and would cause pco to fail. In this example,
 ///   one needs to flip the sign bit and, if negative, the rest of the bits.
+///
+/// Custom data types (defined outside of pco) are not currently supported.
 pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + 'static {
   /// A number from 1-255 that corresponds to the number's data type.
   ///
@@ -154,13 +158,10 @@ pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + '
   /// `pco` data type implementation.
   const NUMBER_TYPE_BYTE: u8;
 
-  /// The latent this type can convert between to do
-  /// bitwise logic and such.
+  /// The latent this type can convert between to do bitwise logic and such.
   type L: Latent;
 
-  /// Returns a `LatentDescriber` for each latent variable in the chunk
-  /// metadata.
-  fn get_latent_describers(meta: &ChunkMeta) -> Vec<LatentDescriber<Self::L>>;
+  fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar<LatentDescriber>;
 
   fn mode_is_valid(mode: Mode) -> bool;
   /// Breaks the numbers into latent variables for better compression.
@@ -168,21 +169,26 @@ pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + '
   /// Returns
   /// * mode: the [`Mode`] that will be stored alongside the data
   ///   for decompression
-  /// * latents: a list of latent variables, each of which contains a latent per
-  ///   num in `nums`
+  /// * latents: a primary and optionally secondary latent variable, each of
+  ///   which contains a latent per num in `nums`. Primary must be of the same
+  ///   latent type as T.
   fn choose_mode_and_split_latents(
     nums: &[Self],
     config: &ChunkConfig,
-  ) -> PcoResult<ModeAndLatents<Self::L>>;
+  ) -> PcoResult<ModeAndLatents>;
 
   fn from_latent_ordered(l: Self::L) -> Self;
   fn to_latent_ordered(self) -> Self::L;
-  fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]);
+  fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>);
 
   fn transmute_to_latents(slice: &mut [Self]) -> &mut [Self::L];
   fn transmute_to_latent(self) -> Self::L;
 }
 
-pub(crate) fn split_latents_classic<T: Number>(nums: &[T]) -> Vec<Vec<T::L>> {
-  vec![nums.iter().map(|&x| x.to_latent_ordered()).collect()]
+pub(crate) fn split_latents_classic<T: Number>(nums: &[T]) -> SplitLatents {
+  let primary = DynLatents::new(nums.iter().map(|&x| x.to_latent_ordered()).collect()).unwrap();
+  SplitLatents {
+    primary,
+    secondary: None,
+  }
 }
diff --git a/pco/src/data_types/signeds.rs b/pco/src/data_types/signeds.rs
index 692ac8b6..211d7c1d 100644
--- a/pco/src/data_types/signeds.rs
+++ b/pco/src/data_types/signeds.rs
@@ -3,7 +3,8 @@ use std::mem;
 use crate::data_types::{unsigneds, ModeAndLatents, Number};
 use crate::describers::LatentDescriber;
 use crate::errors::PcoResult;
-use crate::metadata::{ChunkMeta, Mode};
+use crate::metadata::per_latent_var::PerLatentVar;
+use crate::metadata::{ChunkMeta, DynLatents, Mode};
 use crate::{describers, int_mult_utils, ChunkConfig};
 
 macro_rules! impl_signed {
@@ -13,9 +14,9 @@ macro_rules! impl_signed {
 
       type L = $latent;
 
-      fn get_latent_describers(meta: &ChunkMeta) -> Vec<LatentDescriber<Self::L>> {
+      fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar<LatentDescriber> {
         describers::match_classic_mode::<Self>(meta, "")
-          .or_else(|| describers::match_int_modes(meta, true))
+          .or_else(|| describers::match_int_modes::<Self::L>(meta, true))
           .expect("invalid mode for signed type")
       }
 
@@ -29,7 +30,7 @@ macro_rules! impl_signed {
       fn choose_mode_and_split_latents(
         nums: &[Self],
         config: &ChunkConfig,
-      ) -> PcoResult<ModeAndLatents<Self::L>> {
+      ) -> PcoResult<ModeAndLatents> {
         unsigneds::choose_mode_and_split_latents(&nums, config)
       }
 
@@ -41,7 +42,7 @@ macro_rules! impl_signed {
       fn to_latent_ordered(self) -> Self::L {
         self.wrapping_sub(Self::MIN) as $latent
       }
-      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) {
+      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) {
         match mode {
           Mode::Classic => (),
           Mode::IntMult(dyn_latent) => {
diff --git a/pco/src/data_types/unsigneds.rs b/pco/src/data_types/unsigneds.rs
index 04d351fb..ccac4929 100644
--- a/pco/src/data_types/unsigneds.rs
+++ b/pco/src/data_types/unsigneds.rs
@@ -1,16 +1,16 @@
+use super::ModeAndLatents;
 use crate::constants::Bitlen;
 use crate::data_types::{split_latents_classic, Latent, Number};
 use crate::describers::LatentDescriber;
 use crate::errors::{PcoError, PcoResult};
-use crate::metadata::{ChunkMeta, DynLatent, Mode};
+use crate::metadata::per_latent_var::PerLatentVar;
+use crate::metadata::{ChunkMeta, DynLatent, DynLatents, Mode};
 use crate::{describers, int_mult_utils, ChunkConfig, ModeSpec};
 
-use super::ModeAndLatents;
-
 pub fn choose_mode_and_split_latents<T: Number>(
   nums: &[T],
   config: &ChunkConfig,
-) -> PcoResult<ModeAndLatents<T::L>> {
+) -> PcoResult<ModeAndLatents> {
   match config.mode_spec {
     ModeSpec::Auto => {
       if let Some(base) = int_mult_utils::choose_base(nums) {
@@ -83,9 +83,9 @@ macro_rules! impl_unsigned_number {
 
       type L = Self;
 
-      fn get_latent_describers(meta: &ChunkMeta) -> Vec<LatentDescriber<Self::L>> {
+      fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar<LatentDescriber> {
         describers::match_classic_mode::<Self>(meta, "")
-          .or_else(|| describers::match_int_modes(meta, false))
+          .or_else(|| describers::match_int_modes::<Self>(meta, false))
           .expect("invalid mode for unsigned type")
       }
 
@@ -99,7 +99,7 @@ macro_rules! impl_unsigned_number {
       fn choose_mode_and_split_latents(
         nums: &[Self],
         config: &ChunkConfig,
-      ) -> PcoResult<ModeAndLatents<Self::L>> {
+      ) -> PcoResult<ModeAndLatents> {
         choose_mode_and_split_latents(nums, config)
       }
 
@@ -111,7 +111,7 @@ macro_rules! impl_unsigned_number {
       fn to_latent_ordered(self) -> Self::L {
         self
       }
-      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) {
+      fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) {
         match mode {
           Mode::Classic => (),
           Mode::IntMult(dyn_latent) => {
diff --git a/pco/src/delta.rs b/pco/src/delta.rs
index 6a87e44d..6c05f1b6 100644
--- a/pco/src/delta.rs
+++ b/pco/src/delta.rs
@@ -1,13 +1,15 @@
+use crate::constants::{Bitlen, DeltaLookback};
 use crate::data_types::Latent;
+use crate::macros::match_latent_enum;
+use crate::metadata::delta_encoding::DeltaLookbackConfig;
+use crate::metadata::dyn_latents::DynLatents;
+use crate::metadata::DeltaEncoding;
+use crate::FULL_BATCH_N;
+use std::mem::MaybeUninit;
+use std::ops::Range;
+use std::{array, cmp, mem};
 
-#[derive(Clone, Debug, Default)]
-pub(crate) struct DeltaMoments<L: Latent>(pub(crate) Vec<L>);
-
-impl<L: Latent> DeltaMoments<L> {
-  pub fn order(&self) -> usize {
-    self.0.len()
-  }
-}
+pub type DeltaState = DynLatents;
 
 // Without this, deltas in, say, [-5, 5] would be split out of order into
 // [U::MAX - 4, U::MAX] and [0, 5].
@@ -21,35 +23,39 @@ pub fn toggle_center_in_place<L: Latent>(latents: &mut [L]) {
   }
 }
 
-fn first_order_encode_in_place<L: Latent>(latents: &mut [L]) {
+fn first_order_encode_consecutive_in_place<L: Latent>(latents: &mut [L]) {
   if latents.is_empty() {
     return;
   }
 
-  for i in 0..latents.len() - 1 {
-    latents[i] = latents[i + 1].wrapping_sub(latents[i]);
+  for i in (1..latents.len()).rev() {
+    latents[i] = latents[i].wrapping_sub(latents[i - 1]);
   }
 }
 
-// used for a single page, so we return the delta moments
+// Used for a single page, so we return the delta moments.
+// All encode in place functions leave junk data (`order`
+// latents in this case) at the front of the latents.
+// Using the front instead of the back is preferable because it makes the lookback
+// encode function simpler and faster.
 #[inline(never)]
-pub(crate) fn encode_in_place<L: Latent>(mut latents: &mut [L], order: usize) -> DeltaMoments<L> {
+fn encode_consecutive_in_place<L: Latent>(order: usize, mut latents: &mut [L]) -> Vec<L> {
   // TODO this function could be made faster by doing all steps on mini batches
   // of ~512 at a time
   let mut page_moments = Vec::with_capacity(order);
   for _ in 0..order {
     page_moments.push(latents.first().copied().unwrap_or(L::ZERO));
 
-    first_order_encode_in_place(latents);
-    let truncated_len = latents.len().saturating_sub(1);
-    latents = &mut latents[0..truncated_len];
+    first_order_encode_consecutive_in_place(latents);
+    let truncated_start = cmp::min(latents.len(), 1);
+    latents = &mut latents[truncated_start..];
   }
   toggle_center_in_place(latents);
 
-  DeltaMoments(page_moments)
+  page_moments
 }
 
-fn first_order_decode_in_place<L: Latent>(moment: &mut L, latents: &mut [L]) {
+fn first_order_decode_consecutive_in_place<L: Latent>(moment: &mut L, latents: &mut [L]) {
   for delta in latents.iter_mut() {
     let tmp = *delta;
     *delta = *moment;
@@ -59,34 +65,345 @@ fn first_order_decode_in_place<L: Latent>(moment: &mut L, latents: &mut [L]) {
 
 // used for a single batch, so we mutate the delta moments
 #[inline(never)]
-pub(crate) fn decode_in_place<L: Latent>(delta_moments: &mut DeltaMoments<L>, latents: &mut [L]) {
+pub(crate) fn decode_consecutive_in_place<L: Latent>(delta_moments: &mut [L], latents: &mut [L]) {
   toggle_center_in_place(latents);
-  for moment in delta_moments.0.iter_mut().rev() {
-    first_order_decode_in_place(moment, latents);
+  for moment in delta_moments.iter_mut().rev() {
+    first_order_decode_consecutive_in_place(moment, latents);
   }
 }
 
+// there are 3 types of proposed lookbacks:
+// * brute force: just try the most recent few latents
+// * repeating: try the most recent lookbacks we actually used
+// * hash: look up similar values by hash
+const PROPOSED_LOOKBACKS: usize = 16;
+const BRUTE_LOOKBACKS: usize = 6;
+const REPEATING_LOOKBACKS: usize = 4;
+// To help locate similar latents for lookback encoding, we hash each latent at
+// different "coarsenesses" and write them into a vector. e.g. a coarseness
+// of 8 means that (l >> 8) gets hashed, so we can lookup recent values by
+// quotient by 256.
+const COARSENESSES: [Bitlen; 2] = [0, 8];
+
+fn lookback_hash_lookup(
+  l: u64,
+  i: usize,
+  hash_table_n: usize,
+  window_n: usize,
+  idx_hash_table: &mut [usize],
+  proposed_lookbacks: &mut [usize; PROPOSED_LOOKBACKS],
+) {
+  let hash_mask = hash_table_n - 1;
+  // might be possible to improve this hash fn
+  let hash_fn = |mut x: u64| {
+    // constant is roughly 2**64 / phi
+    x = (x ^ (x >> 32)).wrapping_mul(11400714819323197441);
+    x = x ^ (x >> 32);
+    x as usize & hash_mask
+  };
+
+  let mut proposal_idx = BRUTE_LOOKBACKS + REPEATING_LOOKBACKS;
+  let mut offset = 0;
+  for coarseness in COARSENESSES {
+    let bucket = l >> coarseness;
+    let buckets = [bucket.wrapping_sub(1), bucket, bucket.wrapping_add(1)];
+    let hashes = buckets.map(hash_fn);
+    for h in hashes {
+      let lookback_to_last_instance = unsafe { i - *idx_hash_table.get_unchecked(offset + h) };
+      proposed_lookbacks[proposal_idx] = if lookback_to_last_instance <= window_n {
+        lookback_to_last_instance
+      } else {
+        cmp::min(proposal_idx, i)
+      };
+      proposal_idx += 1;
+    }
+    let h = hashes[1];
+    unsafe {
+      *idx_hash_table.get_unchecked_mut(offset + h) = i;
+    }
+    offset += hash_table_n;
+  }
+}
+
+fn lookback_compute_goodness<L: Latent>(
+  l: L,
+  i: usize,
+  latents: &[L],
+  proposed_lookbacks: &[usize; PROPOSED_LOOKBACKS],
+  lookback_counts: &mut [u32],
+  goodnesses: &mut [Bitlen; PROPOSED_LOOKBACKS],
+) {
+  for lookback_idx in 0..PROPOSED_LOOKBACKS {
+    let lookback = proposed_lookbacks[lookback_idx];
+    let lookback_count = lookback_counts[lookback - 1];
+    let other = unsafe { *latents.get_unchecked(i - lookback) };
+    let lookback_goodness = Bitlen::BITS - lookback_count.leading_zeros();
+    let delta = L::min(l.wrapping_sub(other), other.wrapping_sub(l));
+    let delta_goodness = delta.leading_zeros();
+    goodnesses[lookback_idx] = lookback_goodness + delta_goodness;
+  }
+}
+
+fn lookback_goodness_argmax(goodnesses: &[Bitlen; PROPOSED_LOOKBACKS]) -> usize {
+  let mut best_goodness = goodnesses[0];
+  let mut best_idx = 0;
+
+  for (i, &goodness) in goodnesses.iter().enumerate().skip(1) {
+    if goodness > best_goodness {
+      best_goodness = goodness;
+      best_idx = i;
+    }
+  }
+
+  best_idx
+}
+
+#[inline(never)]
+fn choose_lookbacks<L: Latent>(config: DeltaLookbackConfig, latents: &[L]) -> Vec<DeltaLookback> {
+  let state_n = config.state_n();
+
+  if latents.len() <= state_n {
+    return vec![];
+  }
+
+  let hash_table_n_log = config.window_n_log + 1;
+  let hash_table_n = 1 << hash_table_n_log;
+  let window_n = config.window_n();
+  assert!(
+    window_n >= PROPOSED_LOOKBACKS,
+    "we do not support tiny windows during compression"
+  );
+
+  let mut lookback_counts = vec![1_u32; cmp::min(window_n, latents.len())];
+  let mut lookbacks = vec![MaybeUninit::uninit(); latents.len() - state_n];
+  let mut idx_hash_table = vec![0_usize; COARSENESSES.len() * hash_table_n];
+  let mut proposed_lookbacks = array::from_fn::<_, PROPOSED_LOOKBACKS, _>(|i| (i + 1).min(state_n));
+  let mut goodnesses = [0; PROPOSED_LOOKBACKS];
+  let mut best_lookback = 1;
+  let mut repeating_lookback_idx: usize = 0;
+  for i in state_n..latents.len() {
+    let l = latents[i];
+
+    let new_brute_lookback = i.min(PROPOSED_LOOKBACKS);
+    proposed_lookbacks[new_brute_lookback - 1] = new_brute_lookback;
+
+    lookback_hash_lookup(
+      l.to_u64(),
+      i,
+      hash_table_n,
+      window_n,
+      &mut idx_hash_table,
+      &mut proposed_lookbacks,
+    );
+    lookback_compute_goodness(
+      l,
+      i,
+      latents,
+      &proposed_lookbacks,
+      &mut lookback_counts,
+      &mut goodnesses,
+    );
+    let best_lookback_idx = lookback_goodness_argmax(&goodnesses);
+    let new_best_lookback = proposed_lookbacks[best_lookback_idx];
+    if new_best_lookback != best_lookback {
+      repeating_lookback_idx += 1;
+    }
+    proposed_lookbacks[BRUTE_LOOKBACKS + (repeating_lookback_idx) % REPEATING_LOOKBACKS] =
+      new_best_lookback;
+    best_lookback = new_best_lookback;
+    lookbacks[i - state_n] = MaybeUninit::new(best_lookback as DeltaLookback);
+    lookback_counts[best_lookback - 1] += 1;
+  }
+
+  unsafe { mem::transmute::<Vec<MaybeUninit<DeltaLookback>>, Vec<DeltaLookback>>(lookbacks) }
+}
+
+// All encode in place functions leave junk data (`state_n` latents in this
+// case) at the front of the latents.
+// Using the front instead of the back is preferable because it means we don't
+// need an extra copy of the latents in this case.
+#[inline(never)]
+fn encode_with_lookbacks_in_place<L: Latent>(
+  config: DeltaLookbackConfig,
+  lookbacks: &[DeltaLookback],
+  latents: &mut [L],
+) -> Vec<L> {
+  let state_n = config.state_n();
+  let real_state_n = cmp::min(latents.len(), state_n);
+  // TODO make this fast
+  for i in (real_state_n..latents.len()).rev() {
+    let lookback = lookbacks[i - state_n] as usize;
+    latents[i] = latents[i].wrapping_sub(latents[i - lookback])
+  }
+
+  let mut state = vec![L::ZERO; state_n];
+  state[state_n - real_state_n..].copy_from_slice(&latents[..real_state_n]);
+
+  toggle_center_in_place(latents);
+
+  state
+}
+
+pub fn new_lookback_window_buffer_and_pos<L: Latent>(
+  config: DeltaLookbackConfig,
+  state: &[L],
+) -> (Vec<L>, usize) {
+  let window_n = config.window_n();
+  let buffer_n = cmp::max(window_n, FULL_BATCH_N) * 2;
+  // TODO better default window
+  let mut res = vec![L::ZERO; buffer_n];
+  res[window_n - state.len()..window_n].copy_from_slice(state);
+  (res, window_n)
+}
+
+// returns the new position
+pub fn decode_with_lookbacks_in_place<L: Latent>(
+  config: DeltaLookbackConfig,
+  lookbacks: &[DeltaLookback],
+  window_buffer_pos: &mut usize,
+  window_buffer: &mut [L],
+  latents: &mut [L],
+) {
+  toggle_center_in_place(latents);
+
+  let (window_n, state_n) = (config.window_n(), config.state_n());
+  let mut pos = *window_buffer_pos;
+  let batch_n = latents.len();
+  if pos + batch_n > window_buffer.len() {
+    // we need to cycle the buffer
+    for i in 0..window_n {
+      window_buffer[i] = window_buffer[i + pos - window_n];
+    }
+    pos = window_n;
+  }
+
+  for (i, (&latent, &lookback)) in latents.iter().zip(lookbacks).enumerate() {
+    window_buffer[pos + i] = latent.wrapping_add(window_buffer[pos + i - lookback as usize]);
+  }
+
+  let new_pos = pos + batch_n;
+  latents.copy_from_slice(&window_buffer[pos - state_n..new_pos - state_n]);
+  *window_buffer_pos = new_pos;
+}
+
+pub fn compute_delta_latent_var(
+  delta_encoding: DeltaEncoding,
+  primary_latents: &mut DynLatents,
+  range: Range<usize>,
+) -> Option<DynLatents> {
+  match delta_encoding {
+    DeltaEncoding::None | DeltaEncoding::Consecutive(_) => None,
+    DeltaEncoding::Lookback(config) => {
+      let res = match_latent_enum!(
+        primary_latents,
+        DynLatents<L>(inner) => {
+          let latents = &mut inner[range];
+          DynLatents::new(choose_lookbacks(config, latents)).unwrap()
+        }
+      );
+      Some(res)
+    }
+  }
+}
+
+pub fn encode_in_place(
+  delta_encoding: DeltaEncoding,
+  delta_latents: Option<&DynLatents>,
+  range: Range<usize>,
+  latents: &mut DynLatents,
+) -> DeltaState {
+  match_latent_enum!(
+    latents,
+    DynLatents<L>(inner) => {
+      let delta_state = match delta_encoding {
+        DeltaEncoding::None => Vec::<L>::new(),
+        DeltaEncoding::Consecutive(config) => {
+          encode_consecutive_in_place(config.order, &mut inner[range])
+        }
+        DeltaEncoding::Lookback(config) => {
+          let lookbacks = delta_latents.unwrap().downcast_ref::<DeltaLookback>().unwrap();
+          encode_with_lookbacks_in_place(config, lookbacks, &mut inner[range])
+        }
+      };
+      DynLatents::new(delta_state).unwrap()
+    }
+  )
+}
+
 #[cfg(test)]
 mod tests {
   use super::*;
 
   #[test]
-  fn test_delta_encode_decode() {
+  fn test_consecutive_encode_decode() {
     let orig_latents: Vec<u32> = vec![2, 2, 1, u32::MAX, 0];
-    let mut deltas = orig_latents.to_vec();
+    let mut deltas = orig_latents.clone();
     let order = 2;
-    let zero_delta = u32::MID;
-    let mut moments = encode_in_place(&mut deltas, order);
+    let mut moments = encode_consecutive_in_place(order, &mut deltas);
 
-    // add back some padding we lose during compression
+    // Encoding left junk deltas at the front,
+    // but for decoding we need junk deltas at the end.
+    let mut deltas_to_decode = Vec::new();
+    deltas_to_decode.extend(&deltas[order..]);
     for _ in 0..order {
-      deltas.push(zero_delta);
+      deltas_to_decode.push(1337);
     }
+    let mut deltas = deltas_to_decode;
 
-    decode_in_place::<u32>(&mut moments, &mut deltas[..3]);
+    // decode in two parts to show we keep state properly
+    decode_consecutive_in_place::<u32>(&mut moments, &mut deltas[..3]);
     assert_eq!(&deltas[..3], &orig_latents[..3]);
 
-    decode_in_place::<u32>(&mut moments, &mut deltas[3..]);
+    decode_consecutive_in_place::<u32>(&mut moments, &mut deltas[3..]);
     assert_eq!(&deltas[3..5], &orig_latents[3..5]);
   }
+
+  #[test]
+  fn test_lookback_encode_decode() {
+    let original_latents = {
+      let mut res = vec![100_u32; 100];
+      res[1] = 200;
+      res[2] = 201;
+      res[3] = 202;
+      res[5] = 203;
+      res[15] = 204;
+      res[50] = 205;
+      res
+    };
+    let config = DeltaLookbackConfig {
+      window_n_log: 4,
+      state_n_log: 1,
+      secondary_uses_delta: false,
+    };
+
+    let mut deltas = original_latents.clone();
+    let lookbacks = choose_lookbacks(config, &original_latents);
+    assert_eq!(lookbacks[0], 1); // 201 -> 200
+    assert_eq!(lookbacks[2], 4); // 0 -> 0
+    assert_eq!(lookbacks[13], 10); // 204 -> 203
+    assert_eq!(lookbacks[48], 1); // 205 -> 0; 204 was outside window
+
+    let state = encode_with_lookbacks_in_place(config, &lookbacks, &mut deltas);
+    assert_eq!(state, vec![100, 200]);
+
+    // Encoding left junk deltas at the front,
+    // but for decoding we need junk deltas at the end.
+    let mut deltas_to_decode = Vec::<u32>::new();
+    deltas_to_decode.extend(&deltas[2..]);
+    for _ in 0..2 {
+      deltas_to_decode.push(1337);
+    }
+
+    let (mut window_buffer, mut pos) = new_lookback_window_buffer_and_pos(config, &state);
+    assert_eq!(pos, 16);
+    decode_with_lookbacks_in_place(
+      config,
+      &lookbacks,
+      &mut pos,
+      &mut window_buffer,
+      &mut deltas_to_decode,
+    );
+    assert_eq!(deltas_to_decode, original_latents);
+    assert_eq!(pos, 16 + original_latents.len());
+  }
 }
diff --git a/pco/src/describers.rs b/pco/src/describers.rs
index b8b3b739..05a03618 100644
--- a/pco/src/describers.rs
+++ b/pco/src/describers.rs
@@ -1,12 +1,13 @@
-use crate::constants::Bitlen;
+use crate::constants::{Bitlen, DeltaLookback};
 use crate::data_types::{Float, Latent, Number};
-use crate::metadata::{ChunkMeta, DeltaEncoding, Mode};
+use crate::metadata::per_latent_var::PerLatentVar;
+use crate::metadata::{ChunkMeta, DeltaEncoding, DynLatent, LatentVarKey, Mode};
 use std::marker::PhantomData;
 
 /// Interprets the meaning of latent variables and values from [`ChunkMeta`].
 ///
 /// Obtainable via [`crate::data_types::Number::get_latent_describers`].
-pub trait DescribeLatent<L: Latent> {
+pub trait DescribeLatent {
   /// Returns a description for this latent variable.
   fn latent_var(&self) -> String;
   /// Returns a description for this latent variable's units, when formatted
@@ -16,32 +17,49 @@ pub trait DescribeLatent<L: Latent> {
   /// numbers.
   fn latent_units(&self) -> String;
   /// Returns a more easily interpretable description for the latent.
-  fn latent(&self, latent: L) -> String;
+  fn latent(&self, latent: DynLatent) -> String;
 }
 
-pub type LatentDescriber<L> = Box<dyn DescribeLatent<L>>;
+pub type LatentDescriber = Box<dyn DescribeLatent>;
+
+fn delta_latent_describer(delta_encoding: DeltaEncoding) -> Option<LatentDescriber> {
+  match delta_encoding {
+    DeltaEncoding::None | DeltaEncoding::Consecutive(_) => None,
+    DeltaEncoding::Lookback(_) => {
+      let describer = IntDescriber {
+        description: "lookback".to_string(),
+        units: "".to_string(),
+        center: 0 as DeltaLookback,
+        is_signed: false,
+      };
+      Some(Box::new(describer))
+    }
+  }
+}
 
 pub(crate) fn match_classic_mode<T: Number>(
   meta: &ChunkMeta,
   delta_units: &'static str,
-) -> Option<Vec<LatentDescriber<T::L>>> {
-  match (meta.mode, meta.delta_encoding) {
-    (Mode::Classic, DeltaEncoding::None) => {
-      let describer = Box::new(ClassicDescriber::<T>::default());
-      Some(vec![describer])
-    }
+) -> Option<PerLatentVar<LatentDescriber>> {
+  let primary: LatentDescriber = match (meta.mode, meta.delta_encoding) {
+    (Mode::Classic, DeltaEncoding::None) => Box::new(ClassicDescriber::<T>::default()),
     (Mode::Classic, _) => {
-      let describer = centered_delta_describer("delta".to_string(), delta_units.to_string());
-      Some(vec![describer])
+      centered_delta_describer::<T::L>("delta".to_string(), delta_units.to_string())
     }
-    _ => None,
-  }
+    _ => return None,
+  };
+
+  Some(PerLatentVar {
+    delta: delta_latent_describer(meta.delta_encoding),
+    primary,
+    secondary: None,
+  })
 }
 
 pub(crate) fn match_int_modes<L: Latent>(
   meta: &ChunkMeta,
   is_signed: bool,
-) -> Option<Vec<LatentDescriber<L>>> {
+) -> Option<PerLatentVar<LatentDescriber>> {
   match meta.mode {
     Mode::IntMult(dyn_latent) => {
       let base = *dyn_latent.downcast_ref::<L>().unwrap();
@@ -56,29 +74,47 @@ pub(crate) fn match_int_modes<L: Latent>(
           is_signed,
         })
       } else {
-        centered_delta_describer(
+        centered_delta_describer::<L>(
           format!("multiplier delta [x{}]", base),
           "x".to_string(),
         )
       };
-      let secondary = Box::new(IntDescriber {
-        description: "adjustment".to_string(),
-        units: "".to_string(),
-        center: adj_center,
-        is_signed: false,
-      });
-      Some(vec![primary, secondary])
+
+      let secondary: LatentDescriber = if meta
+        .delta_encoding
+        .applies_to_latent_var(LatentVarKey::Secondary)
+      {
+        centered_delta_describer::<L>(
+          "adjustment delta".to_string(),
+          "".to_string(),
+        )
+      } else {
+        Box::new(IntDescriber {
+          description: "adjustment".to_string(),
+          units: "".to_string(),
+          center: adj_center,
+          is_signed: false,
+        })
+      };
+
+      Some(PerLatentVar {
+        delta: delta_latent_describer(meta.delta_encoding),
+        primary,
+        secondary: Some(secondary),
+      })
     }
     _ => None,
   }
 }
 
-pub(crate) fn match_float_modes<F: Float>(meta: &ChunkMeta) -> Option<Vec<LatentDescriber<F::L>>> {
+pub(crate) fn match_float_modes<F: Float>(
+  meta: &ChunkMeta,
+) -> Option<PerLatentVar<LatentDescriber>> {
   match meta.mode {
     Mode::FloatMult(dyn_latent) => {
       let base_latent = *dyn_latent.downcast_ref::<F::L>().unwrap();
       let base_string = F::from_latent_ordered(base_latent).to_string();
-      let primary: LatentDescriber<F::L> = if matches!(meta.delta_encoding, DeltaEncoding::None) {
+      let primary: LatentDescriber = if matches!(meta.delta_encoding, DeltaEncoding::None) {
         Box::new(FloatMultDescriber {
           base_string,
           phantom: PhantomData::<F>,
@@ -91,13 +127,29 @@ pub(crate) fn match_float_modes<F: Float>(meta: &ChunkMeta) -> Option<Vec<Latent
           is_signed: true,
         })
       };
-      let secondary = Box::new(IntDescriber {
-        description: "adjustment".to_string(),
-        units: " ULPs".to_string(),
-        center: F::L::MID,
-        is_signed: true,
-      });
-      Some(vec![primary, secondary])
+
+      let secondary: LatentDescriber = if meta
+        .delta_encoding
+        .applies_to_latent_var(LatentVarKey::Secondary)
+      {
+        centered_delta_describer::<F::L>(
+          "adjustment delta".to_string(),
+          "".to_string(),
+        )
+      } else {
+        Box::new(IntDescriber {
+          description: "adjustment".to_string(),
+          units: " ULPs".to_string(),
+          center: F::L::MID,
+          is_signed: true,
+        })
+      };
+
+      Some(PerLatentVar {
+        delta: delta_latent_describer(meta.delta_encoding),
+        primary,
+        secondary: Some(secondary),
+      })
     }
     Mode::FloatQuant(k) => {
       let primary = if matches!(meta.delta_encoding, DeltaEncoding::None) {
@@ -106,19 +158,34 @@ pub(crate) fn match_float_modes<F: Float>(meta: &ChunkMeta) -> Option<Vec<Latent
           phantom: PhantomData::<F>,
         })
       } else {
-        centered_delta_describer(
+        centered_delta_describer::<F::L>(
           format!("quantums delta [<<{}]", k),
           "q".to_string(),
         )
       };
-      let secondary = Box::new(IntDescriber {
-        description: "magnitude adjustment".to_string(),
-        units: " ULPs".to_string(),
-        center: F::L::ZERO,
-        is_signed: false,
-      });
 
-      Some(vec![primary, secondary])
+      let secondary: LatentDescriber = if meta
+        .delta_encoding
+        .applies_to_latent_var(LatentVarKey::Secondary)
+      {
+        centered_delta_describer::<F::L>(
+          "magnitude adjustment delta".to_string(),
+          "".to_string(),
+        )
+      } else {
+        Box::new(IntDescriber {
+          description: "magnitude adjustment".to_string(),
+          units: " ULPs".to_string(),
+          center: F::L::ZERO,
+          is_signed: false,
+        })
+      };
+
+      Some(PerLatentVar {
+        delta: delta_latent_describer(meta.delta_encoding),
+        primary,
+        secondary: Some(secondary),
+      })
     }
     _ => None,
   }
@@ -127,7 +194,7 @@ pub(crate) fn match_float_modes<F: Float>(meta: &ChunkMeta) -> Option<Vec<Latent
 #[derive(Default)]
 struct ClassicDescriber<T: Number>(PhantomData<T>);
 
-impl<T: Number> DescribeLatent<T::L> for ClassicDescriber<T> {
+impl<T: Number> DescribeLatent for ClassicDescriber<T> {
   fn latent_var(&self) -> String {
     "primary".to_string()
   }
@@ -136,8 +203,8 @@ impl<T: Number> DescribeLatent<T::L> for ClassicDescriber<T> {
     "".to_string()
   }
 
-  fn latent(&self, latent: T::L) -> String {
-    T::from_latent_ordered(latent).to_string()
+  fn latent(&self, latent: DynLatent) -> String {
+    T::from_latent_ordered(latent.downcast::<T::L>().unwrap()).to_string()
   }
 }
 
@@ -148,7 +215,7 @@ struct IntDescriber<L: Latent> {
   is_signed: bool,
 }
 
-impl<L: Latent> DescribeLatent<L> for IntDescriber<L> {
+impl<L: Latent> DescribeLatent for IntDescriber<L> {
   fn latent_var(&self) -> String {
     self.description.to_string()
   }
@@ -157,8 +224,8 @@ impl<L: Latent> DescribeLatent<L> for IntDescriber<L> {
     self.units.to_string()
   }
 
-  fn latent(&self, latent: L) -> String {
-    let centered = latent.wrapping_sub(self.center);
+  fn latent(&self, latent: DynLatent) -> String {
+    let centered = latent.downcast::<L>().unwrap().wrapping_sub(self.center);
     if centered < L::MID || !self.is_signed {
       centered.to_string()
     } else {
@@ -167,7 +234,7 @@ impl<L: Latent> DescribeLatent<L> for IntDescriber<L> {
   }
 }
 
-fn centered_delta_describer<L: Latent>(description: String, units: String) -> LatentDescriber<L> {
+fn centered_delta_describer<L: Latent>(description: String, units: String) -> LatentDescriber {
   Box::new(IntDescriber {
     description,
     units,
@@ -181,7 +248,7 @@ struct FloatMultDescriber<F: Float> {
   phantom: PhantomData<F>,
 }
 
-impl<F: Float> DescribeLatent<F::L> for FloatMultDescriber<F> {
+impl<F: Float> DescribeLatent for FloatMultDescriber<F> {
   fn latent_var(&self) -> String {
     format!("multiplier [x{}]", self.base_string)
   }
@@ -190,8 +257,8 @@ impl<F: Float> DescribeLatent<F::L> for FloatMultDescriber<F> {
     "x".to_string()
   }
 
-  fn latent(&self, latent: F::L) -> String {
-    F::int_float_from_latent(latent).to_string()
+  fn latent(&self, latent: DynLatent) -> String {
+    F::int_float_from_latent(latent.downcast::<F::L>().unwrap()).to_string()
   }
 }
 
@@ -200,7 +267,7 @@ struct FloatQuantDescriber<F: Float> {
   phantom: PhantomData<F>,
 }
 
-impl<F: Float> DescribeLatent<F::L> for FloatQuantDescriber<F> {
+impl<F: Float> DescribeLatent for FloatQuantDescriber<F> {
   fn latent_var(&self) -> String {
     "quantized".to_string()
   }
@@ -209,8 +276,8 @@ impl<F: Float> DescribeLatent<F::L> for FloatQuantDescriber<F> {
     "".to_string()
   }
 
-  fn latent(&self, latent: F::L) -> String {
-    let shifted = latent << self.k;
+  fn latent(&self, latent: DynLatent) -> String {
+    let shifted = latent.downcast::<F::L>().unwrap() << self.k;
     if shifted >= F::L::MID {
       F::from_latent_ordered(shifted).to_string()
     } else {
diff --git a/pco/src/float_mult_utils.rs b/pco/src/float_mult_utils.rs
index b65db320..3e0338d7 100644
--- a/pco/src/float_mult_utils.rs
+++ b/pco/src/float_mult_utils.rs
@@ -4,12 +4,18 @@ use std::mem;
 use crate::compression_intermediates::Bid;
 use crate::constants::{Bitlen, MULT_REQUIRED_BITS_SAVED_PER_NUM};
 use crate::data_types::{Float, Latent};
-use crate::metadata::Mode;
+use crate::metadata::{DynLatents, Mode};
 use crate::sampling::PrimaryLatentAndSavings;
+use crate::split_latents::SplitLatents;
 use crate::{int_mult_utils, sampling};
 
 #[inline(never)]
-pub(crate) fn join_latents<F: Float>(base: F, primary: &mut [F::L], secondary: &[F::L]) {
+pub(crate) fn join_latents<F: Float>(
+  base: F,
+  primary: &mut [F::L],
+  secondary: Option<&DynLatents>,
+) {
+  let secondary = secondary.unwrap().downcast_ref::<F::L>().unwrap();
   for (mult_and_dst, &adj) in primary.iter_mut().zip(secondary.iter()) {
     let unadjusted = F::int_float_from_latent(*mult_and_dst) * base;
     *mult_and_dst = unadjusted
@@ -19,10 +25,7 @@ pub(crate) fn join_latents<F: Float>(base: F, primary: &mut [F::L], secondary: &
   }
 }
 
-pub(crate) fn split_latents<F: Float>(
-  page_nums: &[F],
-  config: FloatMultConfig<F>,
-) -> Vec<Vec<F::L>> {
+pub(crate) fn split_latents<F: Float>(page_nums: &[F], config: FloatMultConfig<F>) -> SplitLatents {
   let FloatMultConfig { base, inv_base } = config;
   let n = page_nums.len();
   let uninit_vec = || unsafe {
@@ -45,7 +48,11 @@ pub(crate) fn split_latents<F: Float>(
       // that 0 is in the middle of the range
       .toggle_center();
   }
-  vec![primary, adjustments]
+
+  SplitLatents {
+    primary: DynLatents::new(primary).unwrap(),
+    secondary: Some(DynLatents::new(adjustments).unwrap()),
+  }
 }
 
 // The rest of this file concerns automatically detecting the float `base`
diff --git a/pco/src/float_quant_utils.rs b/pco/src/float_quant_utils.rs
index 0d06330c..80b2b6b3 100644
--- a/pco/src/float_quant_utils.rs
+++ b/pco/src/float_quant_utils.rs
@@ -1,14 +1,20 @@
 use crate::compression_intermediates::Bid;
 use crate::constants::{Bitlen, QUANT_REQUIRED_BITS_SAVED_PER_NUM};
 use crate::data_types::{Float, Latent};
-use crate::metadata::Mode;
+use crate::metadata::{DynLatents, Mode};
 use crate::sampling::{self, PrimaryLatentAndSavings};
+use crate::split_latents::SplitLatents;
 use std::cmp;
 
 const REQUIRED_QUANTIZED_PROPORTION: f64 = 0.95;
 
 #[inline(never)]
-pub(crate) fn join_latents<F: Float>(k: Bitlen, primary: &mut [F::L], secondary: &[F::L]) {
+pub(crate) fn join_latents<F: Float>(
+  k: Bitlen,
+  primary: &mut [F::L],
+  secondary: Option<&DynLatents>,
+) {
+  let secondary = secondary.unwrap().downcast_ref::<F::L>().unwrap();
   // For any float `num` such that `split_latents([num], k) == [[y], [m]]`, we have
   //     num.is_sign_positive() == (y >= sign_cutoff)
   let sign_cutoff = F::L::MID >> k;
@@ -28,7 +34,7 @@ pub(crate) fn join_latents<F: Float>(k: Bitlen, primary: &mut [F::L], secondary:
   }
 }
 
-pub(crate) fn split_latents<F: Float>(page_nums: &[F], k: Bitlen) -> Vec<Vec<F::L>> {
+pub(crate) fn split_latents<F: Float>(page_nums: &[F], k: Bitlen) -> SplitLatents {
   let n = page_nums.len();
   let uninit_vec = || unsafe {
     let mut res = Vec::<F::L>::with_capacity(n);
@@ -55,7 +61,11 @@ pub(crate) fn split_latents<F: Float>(page_nums: &[F], k: Bitlen) -> Vec<Vec<F::
       lowest_k_bits_max - lowest_k_bits
     };
   }
-  vec![primary, secondary]
+
+  SplitLatents {
+    primary: DynLatents::new(primary).unwrap(),
+    secondary: Some(DynLatents::new(secondary).unwrap()),
+  }
 }
 
 pub(crate) fn compute_bid<F: Float>(sample: &[F]) -> Option<Bid<F>> {
@@ -179,16 +189,15 @@ mod test {
     let (nums, (_expected_ys, _expected_ms)): (Vec<_>, (Vec<_>, Vec<_>)) =
       expected.iter().cloned().unzip();
     let k: Bitlen = 5;
-    if let [ref mut ys, ms] = &mut split_latents(&nums, k)[..] {
-      let actual: Vec<_> = nums
-        .iter()
-        .cloned()
-        .zip(ys.iter().cloned().zip(ms.iter().cloned()))
-        .collect();
-      assert_eq!(expected, actual);
-    } else {
-      panic!("Bug: `split_latents` returned data in an unexpected format");
-    }
+    let SplitLatents { primary, secondary } = split_latents(&nums, k);
+    let primary = primary.downcast::<u32>().unwrap();
+    let secondary = secondary.unwrap().downcast::<u32>().unwrap();
+    let actual: Vec<_> = nums
+      .iter()
+      .cloned()
+      .zip(primary.iter().cloned().zip(secondary.iter().cloned()))
+      .collect();
+    assert_eq!(expected, actual);
   }
 
   #[test]
@@ -198,11 +207,12 @@ mod test {
       .iter()
       .map(|&num| num as f64)
       .collect();
-    if let [_, ms] = &split_latents(&nums, k)[..] {
-      assert!(ms.iter().all(|&m| m == 0u64));
-    } else {
-      panic!("Bug: `split_latents` returned data in an unexpected format");
-    }
+    let SplitLatents {
+      primary: _primary,
+      secondary,
+    } = split_latents(&nums, k);
+    let secondary = secondary.unwrap().downcast::<u64>().unwrap();
+    assert!(secondary.iter().all(|&m| m == 0u64));
   }
 
   #[test]
@@ -214,12 +224,10 @@ mod test {
       .collect::<Vec<_>>();
 
     let k: Bitlen = 5;
-    if let [ref mut ys, ms] = &mut split_latents(&nums, k)[..] {
-      join_latents::<f64>(k, ys, &ms);
-      assert_eq!(uints, *ys);
-    } else {
-      panic!("Bug: `split_latents` returned data in an unexpected format");
-    }
+    let SplitLatents { primary, secondary } = split_latents(&nums, k);
+    let mut primary = primary.downcast::<u64>().unwrap();
+    join_latents::<f64>(k, &mut primary, secondary.as_ref());
+    assert_eq!(uints, primary);
   }
 
   #[test]
diff --git a/pco/src/int_mult_utils.rs b/pco/src/int_mult_utils.rs
index 2b6f81a6..24e308f5 100644
--- a/pco/src/int_mult_utils.rs
+++ b/pco/src/int_mult_utils.rs
@@ -5,14 +5,16 @@ use std::mem;
 
 use crate::constants::MULT_REQUIRED_BITS_SAVED_PER_NUM;
 use crate::data_types::{Latent, Number};
+use crate::metadata::DynLatents;
 use crate::sampling::{self, PrimaryLatentAndSavings};
+use crate::split_latents::SplitLatents;
 
 // riemann zeta function
 const ZETA_OF_2: f64 = PI * PI / 6.0;
 const LCB_RATIO: f64 = 1.0;
 
 #[inline(never)]
-pub fn split_latents<T: Number>(nums: &[T], base: T::L) -> Vec<Vec<T::L>> {
+pub fn split_latents<T: Number>(nums: &[T], base: T::L) -> SplitLatents {
   let n = nums.len();
   let mut mults = Vec::with_capacity(n);
   let mut adjs = Vec::with_capacity(n);
@@ -26,11 +28,16 @@ pub fn split_latents<T: Number>(nums: &[T], base: T::L) -> Vec<Vec<T::L>> {
     *mult_dst = u / base;
     *adj_dst = u % base;
   }
-  vec![mults, adjs]
+
+  SplitLatents {
+    primary: DynLatents::new(mults).unwrap(),
+    secondary: Some(DynLatents::new(adjs).unwrap()),
+  }
 }
 
 #[inline(never)]
-pub(crate) fn join_latents<L: Latent>(base: L, primary: &mut [L], secondary: &[L]) {
+pub(crate) fn join_latents<L: Latent>(base: L, primary: &mut [L], secondary: Option<&DynLatents>) {
+  let secondary = secondary.unwrap().downcast_ref::<L>().unwrap();
   for (mult_and_dst, &adj) in primary.iter_mut().zip(secondary.iter()) {
     *mult_and_dst = (*mult_and_dst * base).wrapping_add(adj);
   }
@@ -249,15 +256,19 @@ mod tests {
     let nums = vec![8_u32, 1, 5];
     let base = 4_u32;
     let latents = split_latents(&nums, base);
-    assert_eq!(latents.len(), 2);
-    assert_eq!(latents[0], vec![2_u32, 0, 1]);
-    assert_eq!(latents[1], vec![0_u32, 1, 1]);
+    let mut primary = latents.primary.downcast::<u32>().unwrap();
+    let secondary = latents.secondary.unwrap().downcast::<u32>().unwrap();
+    assert_eq!(&primary, &vec![2_u32, 0, 1]);
+    assert_eq!(&secondary, &vec![0_u32, 1, 1]);
 
     // JOIN
-    let mut primary_and_dst = latents[0].to_vec();
-    join_latents(base, &mut primary_and_dst, &latents[1]);
+    join_latents(
+      base,
+      &mut primary,
+      DynLatents::new(secondary).as_ref(),
+    );
 
-    assert_eq!(primary_and_dst, nums);
+    assert_eq!(primary, nums);
   }
 
   #[test]
diff --git a/pco/src/latent_batch_dissector.rs b/pco/src/latent_batch_dissector.rs
index f0db662e..a5f2a636 100644
--- a/pco/src/latent_batch_dissector.rs
+++ b/pco/src/latent_batch_dissector.rs
@@ -104,12 +104,7 @@ impl<'a, L: Latent> LatentBatchDissector<'a, L> {
     }
   }
 
-  pub fn dissect_latent_batch(
-    &mut self,
-    latents: &[L],
-    base_i: usize,
-    dst: &mut DissectedPageVar<L>,
-  ) {
+  pub fn dissect_latent_batch(&mut self, latents: &[L], base_i: usize, dst: &mut DissectedPageVar) {
     let DissectedPageVar {
       ans_vals,
       ans_bits,
@@ -127,6 +122,7 @@ impl<'a, L: Latent> LatentBatchDissector<'a, L> {
       &mut offset_bits[base_i..end_i],
     );
 
+    let offsets = offsets.downcast_mut::<L>().unwrap();
     self.set_offsets(latents, &mut offsets[base_i..end_i]);
 
     self.encode_ans_in_reverse(
diff --git a/pco/src/latent_chunk_compressor.rs b/pco/src/latent_chunk_compressor.rs
index 3e5af990..b10679d0 100644
--- a/pco/src/latent_chunk_compressor.rs
+++ b/pco/src/latent_chunk_compressor.rs
@@ -6,10 +6,13 @@ use crate::constants::{Bitlen, Weight, ANS_INTERLEAVING, PAGE_PADDING};
 use crate::data_types::Latent;
 use crate::errors::PcoResult;
 use crate::latent_batch_dissector::LatentBatchDissector;
+use crate::macros::{define_latent_enum, match_latent_enum};
+use crate::metadata::dyn_latents::DynLatents;
 use crate::metadata::{bins, Bin};
 use crate::read_write_uint::ReadWriteUint;
 use crate::{ans, bit_reader, bit_writer, read_write_uint, FULL_BATCH_N};
 use std::io::Write;
+use std::ops::Range;
 
 // This would be very hard to combine with write_uints because it makes use of
 // an optimization that only works easily for single-u64 writes of 56 bits or
@@ -79,14 +82,15 @@ pub(crate) struct TrainedBins<L: Latent> {
 pub struct LatentChunkCompressor<L: Latent> {
   table: CompressionTable<L>,
   pub encoder: ans::Encoder,
-  pub avg_bits_per_delta: f64,
+  pub avg_bits_per_latent: f64,
   is_trivial: bool,
   needs_ans: bool,
   max_u64s_per_offset: usize,
+  latents: Vec<L>,
 }
 
 impl<L: Latent> LatentChunkCompressor<L> {
-  pub(crate) fn new(trained: TrainedBins<L>, bins: &[Bin<L>]) -> PcoResult<Self> {
+  pub(crate) fn new(trained: TrainedBins<L>, bins: &[Bin<L>], latents: Vec<L>) -> PcoResult<Self> {
     let needs_ans = bins.len() != 1;
 
     let table = CompressionTable::from(trained.infos);
@@ -100,20 +104,21 @@ impl<L: Latent> LatentChunkCompressor<L> {
     Ok(LatentChunkCompressor {
       table,
       encoder,
-      avg_bits_per_delta: bins::avg_bits_per_latent(bins, trained.ans_size_log),
+      avg_bits_per_latent: bins::avg_bits_per_latent(bins, trained.ans_size_log),
       is_trivial: bins::are_trivial(bins),
       needs_ans,
       max_u64s_per_offset,
+      latents,
     })
   }
 
-  pub fn dissect_page(&self, page_latents: &[L]) -> DissectedPageVar<L> {
+  pub fn dissect_page(&self, page_range: Range<usize>) -> DissectedPageVar {
     let uninit_dissected_page_var = |n, ans_default_state| {
       let ans_final_states = [ans_default_state; ANS_INTERLEAVING];
       DissectedPageVar {
         ans_vals: uninit_vec(n),
         ans_bits: uninit_vec(n),
-        offsets: uninit_vec(n),
+        offsets: DynLatents::new(uninit_vec::<L>(n)).unwrap(),
         offset_bits: uninit_vec(n),
         ans_final_states,
       }
@@ -124,13 +129,17 @@ impl<L: Latent> LatentChunkCompressor<L> {
     }
 
     let mut dissected_page_var = uninit_dissected_page_var(
-      page_latents.len(),
+      page_range.len(),
       self.encoder.default_state(),
     );
 
     // we go through in reverse for ANS!
     let mut lbd = LatentBatchDissector::new(&self.table, &self.encoder);
-    for (batch_idx, batch) in page_latents.chunks(FULL_BATCH_N).enumerate().rev() {
+    for (batch_idx, batch) in self.latents[page_range]
+      .chunks(FULL_BATCH_N)
+      .enumerate()
+      .rev()
+    {
       let base_i = batch_idx * FULL_BATCH_N;
       lbd.dissect_latent_batch(batch, base_i, &mut dissected_page_var)
     }
@@ -139,7 +148,7 @@ impl<L: Latent> LatentChunkCompressor<L> {
 
   pub fn write_dissected_batch<W: Write>(
     &self,
-    dissected_page_var: &DissectedPageVar<L>,
+    dissected_page_var: &DissectedPageVar,
     batch_start: usize,
     writer: &mut BitWriter<W>,
   ) -> PcoResult<()> {
@@ -165,33 +174,43 @@ impl<L: Latent> LatentChunkCompressor<L> {
 
     // write offsets
     (writer.stale_byte_idx, writer.bits_past_byte) = unsafe {
-      match self.max_u64s_per_offset {
-        0 => (writer.stale_byte_idx, writer.bits_past_byte),
-        1 => write_short_uints::<L>(
-          &dissected_page_var.offsets[batch_start..],
-          &dissected_page_var.offset_bits[batch_start..],
-          writer.stale_byte_idx,
-          writer.bits_past_byte,
-          &mut writer.buf,
-        ),
-        2 => write_uints::<L, 2>(
-          &dissected_page_var.offsets[batch_start..],
-          &dissected_page_var.offset_bits[batch_start..],
-          writer.stale_byte_idx,
-          writer.bits_past_byte,
-          &mut writer.buf,
-        ),
-        3 => write_uints::<L, 3>(
-          &dissected_page_var.offsets[batch_start..],
-          &dissected_page_var.offset_bits[batch_start..],
-          writer.stale_byte_idx,
-          writer.bits_past_byte,
-          &mut writer.buf,
-        ),
-        _ => panic!("[ChunkCompressor] data type is too large"),
-      }
+      match_latent_enum!(
+        &dissected_page_var.offsets,
+        DynLatents<L>(offsets) => {
+          match self.max_u64s_per_offset {
+            0 => (writer.stale_byte_idx, writer.bits_past_byte),
+            1 => write_short_uints::<L>(
+              &offsets[batch_start..],
+              &dissected_page_var.offset_bits[batch_start..],
+              writer.stale_byte_idx,
+              writer.bits_past_byte,
+              &mut writer.buf,
+            ),
+            2 => write_uints::<L, 2>(
+              &offsets[batch_start..],
+              &dissected_page_var.offset_bits[batch_start..],
+              writer.stale_byte_idx,
+              writer.bits_past_byte,
+              &mut writer.buf,
+            ),
+            3 => write_uints::<L, 3>(
+              &offsets[batch_start..],
+              &dissected_page_var.offset_bits[batch_start..],
+              writer.stale_byte_idx,
+              writer.bits_past_byte,
+              &mut writer.buf,
+            ),
+            _ => panic!("[ChunkCompressor] data type is too large"),
+          }
+        }
+      )
     };
 
     Ok(())
   }
 }
+
+define_latent_enum!(
+  #[derive(Clone, Debug)]
+  pub DynLatentChunkCompressor(LatentChunkCompressor)
+);
diff --git a/pco/src/latent_batch_decompressor.rs b/pco/src/latent_page_decompressor.rs
similarity index 74%
rename from pco/src/latent_batch_decompressor.rs
rename to pco/src/latent_page_decompressor.rs
index 53027f13..951ea20d 100644
--- a/pco/src/latent_batch_decompressor.rs
+++ b/pco/src/latent_page_decompressor.rs
@@ -2,11 +2,11 @@ use std::fmt::Debug;
 
 use crate::ans::{AnsState, Spec};
 use crate::bit_reader::BitReader;
-use crate::constants::{Bitlen, ANS_INTERLEAVING, FULL_BATCH_N};
+use crate::constants::{Bitlen, DeltaLookback, ANS_INTERLEAVING, FULL_BATCH_N};
 use crate::data_types::Latent;
 use crate::errors::PcoResult;
-use crate::metadata::{bins, Bin};
-use crate::{ans, bit_reader, read_write_uint};
+use crate::metadata::{bins, Bin, DeltaEncoding, DynLatents};
+use crate::{ans, bit_reader, delta, read_write_uint};
 
 // Default here is meaningless and should only be used to fill in empty
 // vectors.
@@ -16,8 +16,8 @@ pub struct BinDecompressionInfo<L: Latent> {
   pub offset_bits: Bitlen,
 }
 
-impl<L: Latent> From<&Bin<L>> for BinDecompressionInfo<L> {
-  fn from(bin: &Bin<L>) -> Self {
+impl<L: Latent> BinDecompressionInfo<L> {
+  fn new(bin: &Bin<L>) -> Self {
     Self {
       lower: bin.lower,
       offset_bits: bin.offset_bits,
@@ -31,7 +31,10 @@ struct State<L: Latent> {
   offset_bits_csum_scratch: [Bitlen; FULL_BATCH_N],
   offset_bits_scratch: [Bitlen; FULL_BATCH_N],
   lowers_scratch: [L; FULL_BATCH_N],
-  state_idxs: [AnsState; ANS_INTERLEAVING],
+
+  ans_state_idxs: [AnsState; ANS_INTERLEAVING],
+  delta_state: Vec<L>,
+  delta_state_pos: usize,
 }
 
 impl<L: Latent> State<L> {
@@ -47,38 +50,50 @@ impl<L: Latent> State<L> {
 
 // LatentBatchDecompressor does the main work of decoding bytes into Latents
 #[derive(Clone, Debug)]
-pub struct LatentBatchDecompressor<L: Latent> {
+pub struct LatentPageDecompressor<L: Latent> {
   // known information about this latent variable
   u64s_per_offset: usize,
   infos: Vec<BinDecompressionInfo<L>>,
   needs_ans: bool,
   decoder: ans::Decoder,
+  delta_encoding: DeltaEncoding,
   pub maybe_constant_value: Option<L>,
 
   // mutable state
   state: State<L>,
 }
 
-impl<L: Latent> LatentBatchDecompressor<L> {
+impl<L: Latent> LatentPageDecompressor<L> {
   pub fn new(
     ans_size_log: Bitlen,
     bins: &[Bin<L>],
+    delta_encoding: DeltaEncoding,
     ans_final_state_idxs: [AnsState; ANS_INTERLEAVING],
+    stored_delta_state: Vec<L>,
   ) -> PcoResult<Self> {
     let u64s_per_offset = read_write_uint::calc_max_u64s(bins::max_offset_bits(bins));
     let infos = bins
       .iter()
-      .map(BinDecompressionInfo::from)
+      .map(BinDecompressionInfo::new)
       .collect::<Vec<_>>();
     let weights = bins::weights(bins);
     let ans_spec = Spec::from_weights(ans_size_log, weights)?;
     let decoder = ans::Decoder::new(&ans_spec);
 
+    let (working_delta_state, delta_state_pos) = match delta_encoding {
+      DeltaEncoding::None | DeltaEncoding::Consecutive(_) => (stored_delta_state, 0),
+      DeltaEncoding::Lookback(config) => {
+        delta::new_lookback_window_buffer_and_pos(config, &stored_delta_state)
+      }
+    };
+
     let mut state = State {
       offset_bits_csum_scratch: [0; FULL_BATCH_N],
       offset_bits_scratch: [0; FULL_BATCH_N],
       lowers_scratch: [L::ZERO; FULL_BATCH_N],
-      state_idxs: ans_final_state_idxs,
+      ans_state_idxs: ans_final_state_idxs,
+      delta_state: working_delta_state,
+      delta_state_pos,
     };
 
     let needs_ans = bins.len() != 1;
@@ -94,17 +109,19 @@ impl<L: Latent> LatentBatchDecompressor<L> {
       }
     }
 
-    let maybe_constant_value = if bins::are_trivial(bins) {
-      bins.first().map(|bin| bin.lower)
-    } else {
-      None
-    };
+    let maybe_constant_value =
+      if bins::are_trivial(bins) && matches!(delta_encoding, DeltaEncoding::None) {
+        bins.first().map(|bin| bin.lower)
+      } else {
+        None
+      };
 
     Ok(Self {
       u64s_per_offset,
       infos,
       needs_ans,
       decoder,
+      delta_encoding,
       maybe_constant_value,
       state,
     })
@@ -123,7 +140,7 @@ impl<L: Latent> LatentBatchDecompressor<L> {
     let mut bits_past_byte = reader.bits_past_byte;
     let mut offset_bit_idx = 0;
     let [mut state_idx_0, mut state_idx_1, mut state_idx_2, mut state_idx_3] =
-      self.state.state_idxs;
+      self.state.ans_state_idxs;
     let infos = self.infos.as_slice();
     let ans_nodes = self.decoder.nodes.as_slice();
     for base_i in (0..FULL_BATCH_N).step_by(ANS_INTERLEAVING) {
@@ -154,7 +171,7 @@ impl<L: Latent> LatentBatchDecompressor<L> {
 
     reader.stale_byte_idx = stale_byte_idx;
     reader.bits_past_byte = bits_past_byte;
-    self.state.state_idxs = [state_idx_0, state_idx_1, state_idx_2, state_idx_3];
+    self.state.ans_state_idxs = [state_idx_0, state_idx_1, state_idx_2, state_idx_3];
   }
 
   // This implementation handles arbitrary batch size and looks simpler, but is
@@ -165,7 +182,7 @@ impl<L: Latent> LatentBatchDecompressor<L> {
     let mut stale_byte_idx = reader.stale_byte_idx;
     let mut bits_past_byte = reader.bits_past_byte;
     let mut offset_bit_idx = 0;
-    let mut state_idxs = self.state.state_idxs;
+    let mut state_idxs = self.state.ans_state_idxs;
     for i in 0..batch_n {
       let j = i % 4;
       stale_byte_idx += bits_past_byte as usize / 8;
@@ -182,7 +199,7 @@ impl<L: Latent> LatentBatchDecompressor<L> {
 
     reader.stale_byte_idx = stale_byte_idx;
     reader.bits_past_byte = bits_past_byte;
-    self.state.state_idxs = state_idxs;
+    self.state.ans_state_idxs = state_idxs;
   }
 
   #[inline(never)]
@@ -224,13 +241,9 @@ impl<L: Latent> LatentBatchDecompressor<L> {
 
   // If hits a corruption, it returns an error and leaves reader and self unchanged.
   // May contaminate dst.
-  pub unsafe fn decompress_latent_batch(
-    &mut self,
-    reader: &mut BitReader,
-    dst: &mut [L],
-  ) -> PcoResult<()> {
+  pub unsafe fn decompress_batch_pre_delta(&mut self, reader: &mut BitReader, dst: &mut [L]) {
     if dst.is_empty() {
-      return Ok(());
+      return;
     }
 
     if self.needs_ans {
@@ -258,7 +271,45 @@ impl<L: Latent> LatentBatchDecompressor<L> {
     }
 
     self.add_lowers(dst);
+  }
 
-    Ok(())
+  pub unsafe fn decompress_batch(
+    &mut self,
+    delta_latents: Option<&DynLatents>,
+    n_remaining_in_page: usize,
+    reader: &mut BitReader,
+    dst: &mut [L],
+  ) {
+    let n_remaining_pre_delta =
+      n_remaining_in_page.saturating_sub(self.delta_encoding.n_latents_per_state());
+    let pre_delta_len = if dst.len() <= n_remaining_pre_delta {
+      dst.len()
+    } else {
+      // If we're at the end, this won't initialize the last
+      // few elements before delta decoding them, so we do that manually here to
+      // satisfy MIRI. This step isn't really necessary.
+      dst[n_remaining_pre_delta..].fill(L::default());
+      n_remaining_pre_delta
+    };
+    self.decompress_batch_pre_delta(reader, &mut dst[..pre_delta_len]);
+
+    match self.delta_encoding {
+      DeltaEncoding::None => (),
+      DeltaEncoding::Consecutive(_) => {
+        delta::decode_consecutive_in_place(&mut self.state.delta_state, dst)
+      }
+      DeltaEncoding::Lookback(config) => {
+        delta::decode_with_lookbacks_in_place(
+          config,
+          delta_latents
+            .unwrap()
+            .downcast_ref::<DeltaLookback>()
+            .unwrap(),
+          &mut self.state.delta_state_pos,
+          &mut self.state.delta_state,
+          dst,
+        );
+      }
+    }
   }
 }
diff --git a/pco/src/lib.rs b/pco/src/lib.rs
index 8204a428..d1fab610 100644
--- a/pco/src/lib.rs
+++ b/pco/src/lib.rs
@@ -12,14 +12,15 @@
 #![deny(clippy::unused_unit)]
 #![deny(dead_code)]
 
-pub use chunk_config::{ChunkConfig, DeltaSpec, ModeSpec, PagingSpec};
-pub use constants::{DEFAULT_COMPRESSION_LEVEL, DEFAULT_MAX_PAGE_N, FULL_BATCH_N};
-pub use progress::Progress;
-
 #[doc = include_str!("../README.md")]
 #[cfg(doctest)]
 struct ReadmeDoctest;
 
+pub use chunk_config::{ChunkConfig, DeltaSpec, ModeSpec, PagingSpec};
+pub use constants::{DEFAULT_COMPRESSION_LEVEL, DEFAULT_MAX_PAGE_N, FULL_BATCH_N};
+pub use progress::Progress;
+pub use split_latents::SplitLatents;
+
 pub mod data_types;
 /// for inspecting certain types of Pco metadata
 pub mod describers;
@@ -45,14 +46,15 @@ mod float_mult_utils;
 mod float_quant_utils;
 mod histograms;
 mod int_mult_utils;
-mod latent_batch_decompressor;
 mod latent_batch_dissector;
 mod latent_chunk_compressor;
+mod latent_page_decompressor;
 mod macros;
 mod progress;
 mod read_write_uint;
 mod sampling;
 mod sort_utils;
+mod split_latents;
 
 #[cfg(test)]
 mod tests;
diff --git a/pco/src/macros.rs b/pco/src/macros.rs
index 19fb58e1..27c351a1 100644
--- a/pco/src/macros.rs
+++ b/pco/src/macros.rs
@@ -2,7 +2,7 @@
 
 dtype_dispatch::build_dtype_macros!(
   #[doc = "\
-    Defines enums holding a container generic to `Number`.
+    **unstable API** Defines enums holding a container generic to `Number`.
   "]
   #[doc = "\
     You'll only want to use this if you're using pco's low level APIs.
@@ -37,14 +37,6 @@ dtype_dispatch::build_dtype_macros!(
 );
 
 dtype_dispatch::build_dtype_macros!(
-  #[doc = "\
-    Defines enums holding a container generic to `Latent`.
-  "]
-  #[doc = "\
-    You'll only want to use this if you're using pco's low level APIs.
-    See the dtype_dispatch crate for more details.
-  "]
-  #[macro_export]
   define_latent_enum,
 
   #[doc = "\
diff --git a/pco/src/metadata/bin.rs b/pco/src/metadata/bin.rs
index 431b52e2..f0d2e9cc 100644
--- a/pco/src/metadata/bin.rs
+++ b/pco/src/metadata/bin.rs
@@ -6,7 +6,6 @@ use crate::data_types::Latent;
 /// Part of [`ChunkLatentVarMeta`][`crate::metadata::ChunkLatentVarMeta`] representing
 /// a numerical range.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-#[non_exhaustive]
 pub struct Bin<L: Latent> {
   /// The number of occurrences of this bin in the asymmetric numeral system
   /// table.
diff --git a/pco/src/metadata/chunk.rs b/pco/src/metadata/chunk.rs
index 318861a7..eea7e8a8 100644
--- a/pco/src/metadata/chunk.rs
+++ b/pco/src/metadata/chunk.rs
@@ -4,129 +4,92 @@ use better_io::BetterBufRead;
 
 use crate::bit_reader::BitReaderBuilder;
 use crate::bit_writer::BitWriter;
-use crate::constants::*;
-use crate::data_types::Latent;
-use crate::errors::{PcoError, PcoResult};
+use crate::data_types::LatentType;
+use crate::errors::PcoResult;
 use crate::metadata::chunk_latent_var::ChunkLatentVarMeta;
 use crate::metadata::delta_encoding::DeltaEncoding;
-use crate::metadata::dyn_latent::DynLatent;
 use crate::metadata::format_version::FormatVersion;
+use crate::metadata::per_latent_var::PerLatentVar;
 use crate::metadata::Mode;
 
 /// The metadata of a pco chunk.
 #[derive(Clone, Debug, PartialEq, Eq)]
-#[non_exhaustive]
 pub struct ChunkMeta {
   /// The formula `pco` used to compress each number at a low level.
   pub mode: Mode,
-  /// How many times delta encoding was applied during compression.
-  /// This is between 0 and 7, inclusive.
-  ///
-  /// See [`ChunkConfig`][crate::ChunkConfig] for more details.
+  /// How delta encoding was applied.
   pub delta_encoding: DeltaEncoding,
   /// Metadata about the interleaved streams needed by `pco` to
   /// compress/decompress the inputs
   /// according to the formula used by `mode`.
-  pub per_latent_var: Vec<ChunkLatentVarMeta>,
+  pub per_latent_var: PerLatentVar<ChunkLatentVarMeta>,
 }
 
 impl ChunkMeta {
-  pub(crate) fn new(
-    mode: Mode,
-    delta_encoding: DeltaEncoding,
-    per_latent_var: Vec<ChunkLatentVarMeta>,
-  ) -> Self {
-    ChunkMeta {
-      mode,
-      delta_encoding,
-      per_latent_var,
-    }
-  }
-
   pub(crate) fn exact_size(&self) -> usize {
-    let extra_bits_for_mode = match self.mode {
-      Mode::Classic => 0,
-      Mode::IntMult(inner) => inner.bits(),
-      Mode::FloatMult(inner) => inner.bits(),
-      Mode::FloatQuant(_) => BITS_TO_ENCODE_QUANTIZE_K,
-    };
-    let bits_for_latent_vars: usize = self
+    let bits_for_latent_vars = self
       .per_latent_var
-      .iter()
-      .map(ChunkLatentVarMeta::exact_bit_size)
+      .as_ref()
+      .map(|_, var_meta| var_meta.exact_bit_size())
       .sum();
-    let n_bits = BITS_TO_ENCODE_MODE as usize
-      + extra_bits_for_mode as usize
-      + BITS_TO_ENCODE_DELTA_ENCODING_ORDER as usize
+    let n_bits = self.mode.exact_bit_size() as usize
+      + self.delta_encoding.exact_bit_size() as usize
       + bits_for_latent_vars;
     n_bits.div_ceil(8)
   }
 
   pub(crate) fn exact_page_meta_size(&self) -> usize {
-    let bit_size: usize = self
+    let bit_size = self
       .per_latent_var
-      .iter()
-      .enumerate()
-      .map(|(latent_var_idx, latent_var)| {
-        let delta_encoding = self
-          .mode
-          .delta_encoding_for_latent_var(latent_var_idx, self.delta_encoding);
-        latent_var.exact_page_meta_bit_size(delta_encoding)
+      .as_ref()
+      .map(|key, var_meta| {
+        let delta_encoding = self.delta_encoding.for_latent_var(key);
+        var_meta.exact_page_meta_bit_size(delta_encoding)
       })
       .sum();
     bit_size.div_ceil(8)
   }
 
-  pub(crate) unsafe fn read_from<L: Latent, R: BetterBufRead>(
+  pub(crate) unsafe fn read_from<R: BetterBufRead>(
     reader_builder: &mut BitReaderBuilder<R>,
     version: &FormatVersion,
+    latent_type: LatentType,
   ) -> PcoResult<Self> {
     let (mode, delta_encoding) = reader_builder.with_reader(|reader| {
-      let mode = match reader.read_usize(BITS_TO_ENCODE_MODE) {
-        0 => Ok(Mode::Classic),
-        1 => {
-          if version.used_old_gcds() {
-            return Err(PcoError::compatibility(
-              "unable to decompress data from v0.0.0 of pco with different GCD encoding",
-            ));
-          }
-
-          let base = DynLatent::read_uncompressed_from::<L>(reader);
-          Ok(Mode::IntMult(base))
-        }
-        2 => {
-          let base_latent = DynLatent::read_uncompressed_from::<L>(reader);
-          Ok(Mode::FloatMult(base_latent))
-        }
-        3 => {
-          let k = reader.read_bitlen(BITS_TO_ENCODE_QUANTIZE_K);
-          Ok(Mode::FloatQuant(k))
-        }
-        value => Err(PcoError::corruption(format!(
-          "unknown mode value {}",
-          value
-        ))),
-      }?;
-
-      let delta_encoding_order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER);
-      let delta_encoding = if delta_encoding_order == 0 {
-        DeltaEncoding::None
-      } else {
-        DeltaEncoding::Consecutive(delta_encoding_order)
-      };
+      let mode = Mode::read_from(reader, version, latent_type)?;
+      let delta_encoding = DeltaEncoding::read_from(version, reader)?;
 
       Ok((mode, delta_encoding))
     })?;
 
-    let n_latent_vars = mode.n_latent_vars();
+    let delta = if let Some(delta_latent_type) = delta_encoding.latent_type() {
+      Some(ChunkLatentVarMeta::read_from::<R>(
+        reader_builder,
+        delta_latent_type,
+      )?)
+    } else {
+      None
+    };
 
-    let mut per_latent_var = Vec::with_capacity(n_latent_vars);
+    let primary = ChunkLatentVarMeta::read_from::<R>(
+      reader_builder,
+      mode.primary_latent_type(latent_type),
+    )?;
 
-    for _ in 0..n_latent_vars {
-      per_latent_var.push(ChunkLatentVarMeta::read_from::<L, R>(
+    let secondary = if let Some(secondary_latent_type) = mode.secondary_latent_type(latent_type) {
+      Some(ChunkLatentVarMeta::read_from::<R>(
         reader_builder,
+        secondary_latent_type,
       )?)
-    }
+    } else {
+      None
+    };
+
+    let per_latent_var = PerLatentVar {
+      delta,
+      primary,
+      secondary,
+    };
 
     reader_builder.with_reader(|reader| {
       reader.drain_empty_byte("nonzero bits in end of final byte of chunk metadata")
@@ -140,36 +103,12 @@ impl ChunkMeta {
   }
 
   pub(crate) unsafe fn write_to<W: Write>(&self, writer: &mut BitWriter<W>) -> PcoResult<()> {
-    let mode_value = match self.mode {
-      Mode::Classic => 0,
-      Mode::IntMult(_) => 1,
-      Mode::FloatMult { .. } => 2,
-      Mode::FloatQuant { .. } => 3,
-    };
-    writer.write_usize(mode_value, BITS_TO_ENCODE_MODE);
-    match self.mode {
-      Mode::Classic => (),
-      Mode::IntMult(base) => {
-        base.write_uncompressed_to(writer);
-      }
-      Mode::FloatMult(base_latent) => {
-        base_latent.write_uncompressed_to(writer);
-      }
-      Mode::FloatQuant(k) => {
-        writer.write_uint(k, BITS_TO_ENCODE_QUANTIZE_K);
-      }
-    };
-
-    match self.delta_encoding {
-      DeltaEncoding::None => writer.write_usize(0, BITS_TO_ENCODE_DELTA_ENCODING_ORDER),
-      DeltaEncoding::Consecutive(order) => {
-        writer.write_usize(order, BITS_TO_ENCODE_DELTA_ENCODING_ORDER)
-      }
-    }
+    self.mode.write_to(writer);
+    self.delta_encoding.write_to(writer);
 
     writer.flush()?;
 
-    for latents in &self.per_latent_var {
+    for (_, latents) in self.per_latent_var.as_ref().enumerated() {
       latents.write_to(writer)?;
     }
 
@@ -177,23 +116,20 @@ impl ChunkMeta {
     writer.flush()?;
     Ok(())
   }
-
-  pub(crate) fn delta_encoding_for_latent_var(&self, latent_idx: usize) -> DeltaEncoding {
-    self
-      .mode
-      .delta_encoding_for_latent_var(latent_idx, self.delta_encoding)
-  }
 }
 
 #[cfg(test)]
 mod tests {
   use super::*;
+  use crate::constants::ANS_INTERLEAVING;
+  use crate::data_types::Latent;
   use crate::macros::match_latent_enum;
+  use crate::metadata::delta_encoding::DeltaConsecutiveConfig;
   use crate::metadata::dyn_bins::DynBins;
   use crate::metadata::dyn_latents::DynLatents;
   use crate::metadata::page::PageMeta;
   use crate::metadata::page_latent_var::PageLatentVarMeta;
-  use crate::metadata::Bin;
+  use crate::metadata::{Bin, DynLatent};
 
   fn check_exact_sizes(meta: &ChunkMeta) -> PcoResult<()> {
     let buffer_size = 8192;
@@ -207,30 +143,26 @@ mod tests {
     let mut dst = Vec::new();
     let mut writer = BitWriter::new(&mut dst, buffer_size);
     let page_meta = PageMeta {
-      per_latent_var: (0..meta.per_latent_var.len())
-        .map(|latent_var_idx| {
-          let delta_encoding = meta
-            .mode
-            .delta_encoding_for_latent_var(latent_var_idx, meta.delta_encoding);
-          let delta_moments = match_latent_enum!(
-            &meta.per_latent_var[latent_var_idx].bins,
-            DynBins<L>(_bins) => {
-              DynLatents::new(vec![L::ZERO; delta_encoding.n_latents_per_state()]).unwrap()
-            }
-          );
-          PageLatentVarMeta {
-            delta_moments,
-            ans_final_state_idxs: [0; ANS_INTERLEAVING],
+      per_latent_var: meta.per_latent_var.as_ref().map(|key, latent_var_meta| {
+        let delta_encoding = meta.delta_encoding.for_latent_var(key);
+        let delta_moments = match_latent_enum!(
+          &latent_var_meta.bins,
+          DynBins<L>(_bins) => {
+            DynLatents::new(vec![L::ZERO; delta_encoding.n_latents_per_state()]).unwrap()
           }
-        })
-        .collect(),
+        );
+        PageLatentVarMeta {
+          delta_state: delta_moments,
+          ans_final_state_idxs: [0; ANS_INTERLEAVING],
+        }
+      }),
     };
     unsafe {
       page_meta.write_to(
         meta
           .per_latent_var
-          .iter()
-          .map(|var_meta| var_meta.ans_size_log),
+          .as_ref()
+          .map(|_, var_meta| var_meta.ans_size_log),
         &mut writer,
       )
     };
@@ -243,11 +175,18 @@ mod tests {
   fn exact_size_binless() -> PcoResult<()> {
     let meta = ChunkMeta {
       mode: Mode::Classic,
-      delta_encoding: DeltaEncoding::Consecutive(5),
-      per_latent_var: vec![ChunkLatentVarMeta {
-        ans_size_log: 0,
-        bins: DynBins::U32(vec![]),
-      }],
+      delta_encoding: DeltaEncoding::Consecutive(DeltaConsecutiveConfig {
+        order: 5,
+        secondary_uses_delta: false,
+      }),
+      per_latent_var: PerLatentVar {
+        delta: None,
+        primary: ChunkLatentVarMeta {
+          ans_size_log: 0,
+          bins: DynBins::U32(vec![]),
+        },
+        secondary: None,
+      },
     };
 
     check_exact_sizes(&meta)
@@ -258,14 +197,18 @@ mod tests {
     let meta = ChunkMeta {
       mode: Mode::Classic,
       delta_encoding: DeltaEncoding::None,
-      per_latent_var: vec![ChunkLatentVarMeta {
-        ans_size_log: 0,
-        bins: DynBins::U64(vec![Bin {
-          weight: 1,
-          lower: 77_u64,
-          offset_bits: 0,
-        }]),
-      }],
+      per_latent_var: PerLatentVar {
+        delta: None,
+        primary: ChunkLatentVarMeta {
+          ans_size_log: 0,
+          bins: DynBins::U64(vec![Bin {
+            weight: 1,
+            lower: 77_u64,
+            offset_bits: 0,
+          }]),
+        },
+        secondary: None,
+      },
     };
 
     check_exact_sizes(&meta)
@@ -275,9 +218,13 @@ mod tests {
   fn exact_size_float_mult() -> PcoResult<()> {
     let meta = ChunkMeta {
       mode: Mode::FloatMult(DynLatent::U32(777_u32)),
-      delta_encoding: DeltaEncoding::Consecutive(3),
-      per_latent_var: vec![
-        ChunkLatentVarMeta {
+      delta_encoding: DeltaEncoding::Consecutive(DeltaConsecutiveConfig {
+        order: 3,
+        secondary_uses_delta: false,
+      }),
+      per_latent_var: PerLatentVar {
+        delta: None,
+        primary: ChunkLatentVarMeta {
           ans_size_log: 7,
           bins: DynBins::U32(vec![
             Bin {
@@ -292,7 +239,7 @@ mod tests {
             },
           ]),
         },
-        ChunkLatentVarMeta {
+        secondary: Some(ChunkLatentVarMeta {
           ans_size_log: 3,
           bins: DynBins::U32(vec![
             Bin {
@@ -306,8 +253,8 @@ mod tests {
               offset_bits: 0,
             },
           ]),
-        },
-      ],
+        }),
+      },
     };
 
     check_exact_sizes(&meta)
diff --git a/pco/src/metadata/chunk_latent_var.rs b/pco/src/metadata/chunk_latent_var.rs
index 6bf4a487..febc44d4 100644
--- a/pco/src/metadata/chunk_latent_var.rs
+++ b/pco/src/metadata/chunk_latent_var.rs
@@ -5,7 +5,7 @@ use crate::constants::{
   Bitlen, Weight, ANS_INTERLEAVING, BITS_TO_ENCODE_ANS_SIZE_LOG, BITS_TO_ENCODE_N_BINS,
   FULL_BIN_BATCH_SIZE, MAX_ANS_BITS,
 };
-use crate::data_types::Latent;
+use crate::data_types::{Latent, LatentType};
 use crate::errors::{PcoError, PcoResult};
 use crate::macros::match_latent_enum;
 use crate::metadata::dyn_bins::DynBins;
@@ -75,7 +75,6 @@ unsafe fn write_bins<L: Latent, W: Write>(
 ///
 /// This is mainly useful for inspecting how compression was done.
 #[derive(Clone, Debug, PartialEq, Eq)]
-#[non_exhaustive]
 pub struct ChunkLatentVarMeta {
   /// The log2 of the number of the number of states in this chunk's tANS
   /// table.
@@ -88,8 +87,16 @@ pub struct ChunkLatentVarMeta {
 }
 
 impl ChunkLatentVarMeta {
-  pub(crate) unsafe fn read_from<L: Latent, R: BetterBufRead>(
+  pub(crate) fn latent_type(&self) -> LatentType {
+    match_latent_enum!(
+      &self.bins,
+      DynBins<L>(_inner) => { LatentType::new::<L>().unwrap() }
+    )
+  }
+
+  pub(crate) unsafe fn read_from<R: BetterBufRead>(
     reader_builder: &mut BitReaderBuilder<R>,
+    latent_type: LatentType,
   ) -> PcoResult<Self> {
     let (ans_size_log, n_bins) = reader_builder.with_reader(|reader| {
       let ans_size_log = reader.read_bitlen(BITS_TO_ENCODE_ANS_SIZE_LOG);
@@ -116,18 +123,23 @@ impl ChunkLatentVarMeta {
       )));
     }
 
-    let mut bins = Vec::with_capacity(n_bins);
-    while bins.len() < n_bins {
-      let batch_size = min(n_bins - bins.len(), FULL_BIN_BATCH_SIZE);
-      read_bin_batch::<L, R>(
-        reader_builder,
-        ans_size_log,
-        batch_size,
-        &mut bins,
-      )?;
-    }
+    let bins = match_latent_enum!(
+      latent_type,
+      LatentType<L> => {
+        let mut bins = Vec::with_capacity(n_bins);
+        while bins.len() < n_bins {
+          let batch_size = min(n_bins - bins.len(), FULL_BIN_BATCH_SIZE);
+          read_bin_batch::<L, R>(
+            reader_builder,
+            ans_size_log,
+            batch_size,
+            &mut bins,
+          )?;
+        }
 
-    let bins = DynBins::new(bins).unwrap();
+        DynBins::new(bins).unwrap()
+      }
+    );
 
     Ok(Self { bins, ans_size_log })
   }
@@ -155,13 +167,11 @@ impl ChunkLatentVarMeta {
   }
 
   pub(crate) fn exact_page_meta_bit_size(&self, delta_encoding: DeltaEncoding) -> usize {
-    let bits_per_delta = match_latent_enum!(
+    let bits_per_latent = match_latent_enum!(
       &self.bins,
-      DynBins<L>(_bins) => {
-        L::BITS
-      }
+      DynBins<L>(_bins) => { L::BITS }
     );
     self.ans_size_log as usize * ANS_INTERLEAVING
-      + bits_per_delta as usize * delta_encoding.n_latents_per_state()
+      + bits_per_latent as usize * delta_encoding.n_latents_per_state()
   }
 }
diff --git a/pco/src/metadata/delta_encoding.rs b/pco/src/metadata/delta_encoding.rs
index 7e839ea6..2308ed85 100644
--- a/pco/src/metadata/delta_encoding.rs
+++ b/pco/src/metadata/delta_encoding.rs
@@ -1,29 +1,253 @@
-/// How Pco does
+use crate::bit_reader::BitReader;
+use crate::bit_writer::BitWriter;
+use crate::constants::{
+  Bitlen, BITS_TO_ENCODE_DELTA_ENCODING_ORDER, BITS_TO_ENCODE_DELTA_ENCODING_VARIANT,
+  BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG, BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG,
+};
+use crate::data_types::LatentType;
+use crate::errors::{PcoError, PcoResult};
+use crate::metadata::delta_encoding::DeltaEncoding::*;
+use crate::metadata::format_version::FormatVersion;
+use crate::metadata::per_latent_var::LatentVarKey;
+use std::io::Write;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct DeltaConsecutiveConfig {
+  /// The number of times consecutive deltas were taken.
+  /// For instance, 2nd order delta encoding is delta-of-deltas.
+  ///
+  /// This is always positive, between 1 and 7.
+  pub order: usize,
+  pub secondary_uses_delta: bool,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct DeltaLookbackConfig {
+  /// The log2 of the number of latents explicitly stored in page metadata
+  /// to prepopulate the lookback window.
+  pub state_n_log: Bitlen,
+  /// The log2 of the maximum possible lookback.
+  pub window_n_log: Bitlen,
+  pub secondary_uses_delta: bool,
+}
+
+impl DeltaLookbackConfig {
+  pub(crate) fn state_n(&self) -> usize {
+    1 << self.state_n_log
+  }
+
+  pub(crate) fn window_n(&self) -> usize {
+    1 << self.window_n_log
+  }
+}
+
+/// How Pco did
 /// [delta encoding](https://en.wikipedia.org/wiki/Delta_encoding) on this
 /// chunk.
 ///
 /// Delta encoding optionally takes differences between nearby numbers,
 /// greatly reducing the entropy of the data distribution in some cases.
 /// This stage of processing happens after applying the
-/// [`Mode`][crate::metadata::Mode].
+/// [`Mode`][crate::metadata::Mode] during compression.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[non_exhaustive]
 pub enum DeltaEncoding {
   /// No delta encoding; the values are encoded as-is.
   ///
   /// This is best if your data is in random order.
   None,
-  /// Encodes the differences between values (or differences between those,
-  /// etc.).
+  /// Encodes the differences between consecutive values (or differences
+  /// between those, etc.).
+  ///
+  /// This is best if your numbers have high variance overall, but adjacent
+  /// numbers are close in value, e.g. an arithmetic sequence.
+  Consecutive(DeltaConsecutiveConfig),
+  /// Encodes an extra "lookback" latent variable and the differences
+  /// `x[i] - x[i - lookback[i]]` between values.
   ///
-  /// This order is always positive, between 1 and 7.
-  Consecutive(usize),
+  /// This is best if your numbers have complex repeating patterns
+  /// beyond just adjacent elements.
+  /// It is in spirit similar to LZ77 compression, but only stores lookbacks
+  /// (AKA match offsets) and no match lengths.
+  Lookback(DeltaLookbackConfig),
 }
 
 impl DeltaEncoding {
+  unsafe fn read_from_pre_v3(reader: &mut BitReader) -> Self {
+    let order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER);
+    match order {
+      0 => None,
+      _ => Consecutive(DeltaConsecutiveConfig {
+        order,
+        secondary_uses_delta: false,
+      }),
+    }
+  }
+
+  pub(crate) unsafe fn read_from(
+    version: &FormatVersion,
+    reader: &mut BitReader,
+  ) -> PcoResult<Self> {
+    if !version.supports_delta_variants() {
+      return Ok(Self::read_from_pre_v3(reader));
+    }
+
+    let delta_encoding_variant = reader.read_bitlen(BITS_TO_ENCODE_DELTA_ENCODING_VARIANT);
+
+    let res = match delta_encoding_variant {
+      0 => None,
+      1 => {
+        let order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER);
+        if order == 0 {
+          return Err(PcoError::corruption(
+            "Consecutive delta encoding order must not be 0",
+          ));
+        } else {
+          Consecutive(DeltaConsecutiveConfig {
+            order,
+            secondary_uses_delta: reader.read_bool(),
+          })
+        }
+      }
+      2 => {
+        let window_n_log = 1 + reader.read_bitlen(BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG);
+        let state_n_log = reader.read_bitlen(BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG);
+        if state_n_log > window_n_log {
+          return Err(PcoError::corruption(format!(
+            "LZ delta encoding state size log exceeded window size log: {} vs {}",
+            state_n_log, window_n_log
+          )));
+        }
+        Lookback(DeltaLookbackConfig {
+          window_n_log,
+          state_n_log,
+          secondary_uses_delta: reader.read_bool(),
+        })
+      }
+      value => {
+        return Err(PcoError::corruption(format!(
+          "unknown delta encoding value: {}",
+          value
+        )))
+      }
+    };
+    Ok(res)
+  }
+
+  pub(crate) unsafe fn write_to<W: Write>(&self, writer: &mut BitWriter<W>) {
+    let variant = match self {
+      None => 0,
+      Consecutive(_) => 1,
+      Lookback(_) => 2,
+    };
+    writer.write_bitlen(
+      variant,
+      BITS_TO_ENCODE_DELTA_ENCODING_VARIANT,
+    );
+
+    match self {
+      None => (),
+      Consecutive(config) => {
+        writer.write_usize(
+          config.order,
+          BITS_TO_ENCODE_DELTA_ENCODING_ORDER,
+        );
+        writer.write_bool(config.secondary_uses_delta);
+      }
+      Lookback(config) => {
+        writer.write_bitlen(
+          config.window_n_log - 1,
+          BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG,
+        );
+        writer.write_bitlen(
+          config.state_n_log,
+          BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG,
+        );
+        writer.write_bool(config.secondary_uses_delta);
+      }
+    }
+  }
+
+  pub(crate) fn latent_type(&self) -> Option<LatentType> {
+    match self {
+      None | Consecutive(_) => Option::None,
+      Lookback(_) => Some(LatentType::U32),
+    }
+  }
+
+  pub(crate) fn applies_to_latent_var(&self, key: LatentVarKey) -> bool {
+    match (self, key) {
+      // We never recursively delta encode.
+      (_, LatentVarKey::Delta) => false,
+      // We always apply the DeltaEncoding to the primary latents.
+      (_, LatentVarKey::Primary) => true,
+      (None, LatentVarKey::Secondary) => false,
+      (Consecutive(config), LatentVarKey::Secondary) => config.secondary_uses_delta,
+      (Lookback(config), LatentVarKey::Secondary) => config.secondary_uses_delta,
+    }
+  }
+
+  pub(crate) fn for_latent_var(self, key: LatentVarKey) -> DeltaEncoding {
+    if self.applies_to_latent_var(key) {
+      self
+    } else {
+      None
+    }
+  }
+
   pub(crate) fn n_latents_per_state(&self) -> usize {
     match self {
-      Self::None => 0,
-      Self::Consecutive(order) => *order,
+      None => 0,
+      Consecutive(config) => config.order,
+      Lookback(config) => 1 << config.state_n_log,
     }
   }
+
+  pub(crate) fn exact_bit_size(&self) -> Bitlen {
+    let payload_bits = match self {
+      None => 0,
+      // For nontrivial encodings, we have a +1 bit for whether the
+      // secondary latent is delta-encoded or not.
+      Consecutive(_) => BITS_TO_ENCODE_DELTA_ENCODING_ORDER + 1,
+      Lookback(_) => BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG + BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG + 1,
+    };
+    BITS_TO_ENCODE_DELTA_ENCODING_VARIANT + payload_bits
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  use crate::bit_writer::BitWriter;
+  use crate::metadata::delta_encoding::{DeltaConsecutiveConfig, DeltaLookbackConfig};
+  use crate::metadata::DeltaEncoding;
+
+  fn check_bit_size(encoding: DeltaEncoding) {
+    let mut bytes = Vec::new();
+    let mut writer = BitWriter::new(&mut bytes, 100);
+    unsafe {
+      encoding.write_to(&mut writer);
+    }
+    assert_eq!(
+      encoding.exact_bit_size() as usize,
+      writer.bit_idx(),
+    );
+  }
+
+  #[test]
+  fn test_bit_size() {
+    check_bit_size(DeltaEncoding::None);
+    check_bit_size(DeltaEncoding::Consecutive(
+      DeltaConsecutiveConfig {
+        order: 3,
+        secondary_uses_delta: false,
+      },
+    ));
+    check_bit_size(DeltaEncoding::Lookback(
+      DeltaLookbackConfig {
+        window_n_log: 8,
+        state_n_log: 1,
+        secondary_uses_delta: true,
+      },
+    ));
+  }
 }
diff --git a/pco/src/metadata/dyn_latents.rs b/pco/src/metadata/dyn_latents.rs
index 783bb3cc..67ecd608 100644
--- a/pco/src/metadata/dyn_latents.rs
+++ b/pco/src/metadata/dyn_latents.rs
@@ -10,6 +10,13 @@ define_latent_enum!(
 );
 
 impl DynLatents {
+  pub(crate) fn len(&self) -> usize {
+    match_latent_enum!(
+      self,
+      DynLatents<T>(inner) => { inner.len() }
+    )
+  }
+
   pub(crate) unsafe fn read_uncompressed_from<L: Latent>(
     reader: &mut BitReader,
     len: usize,
diff --git a/pco/src/metadata/format_version.rs b/pco/src/metadata/format_version.rs
index 10c425af..43258f60 100644
--- a/pco/src/metadata/format_version.rs
+++ b/pco/src/metadata/format_version.rs
@@ -46,4 +46,8 @@ impl FormatVersion {
   pub(crate) fn used_old_gcds(&self) -> bool {
     self.0 == 0
   }
+
+  pub(crate) fn supports_delta_variants(&self) -> bool {
+    self.0 >= 3
+  }
 }
diff --git a/pco/src/metadata/mod.rs b/pco/src/metadata/mod.rs
index 87e87072..74323a04 100644
--- a/pco/src/metadata/mod.rs
+++ b/pco/src/metadata/mod.rs
@@ -1,10 +1,12 @@
 pub use bin::Bin;
 pub use chunk::ChunkMeta;
 pub use chunk_latent_var::ChunkLatentVarMeta;
-pub use delta_encoding::DeltaEncoding;
+pub use delta_encoding::{DeltaConsecutiveConfig, DeltaEncoding, DeltaLookbackConfig};
 pub use dyn_bins::DynBins;
 pub use dyn_latent::DynLatent;
+pub use dyn_latents::DynLatents;
 pub use mode::Mode;
+pub use per_latent_var::{LatentVarKey, PerLatentVar};
 
 pub(crate) mod bin;
 pub(crate) mod bins;
@@ -18,3 +20,4 @@ pub(crate) mod format_version;
 pub(crate) mod mode;
 pub(crate) mod page;
 pub(crate) mod page_latent_var;
+pub(crate) mod per_latent_var;
diff --git a/pco/src/metadata/mode.rs b/pco/src/metadata/mode.rs
index 7590c3d4..74ed51ce 100644
--- a/pco/src/metadata/mode.rs
+++ b/pco/src/metadata/mode.rs
@@ -1,9 +1,14 @@
-use std::fmt::Debug;
-
-use crate::constants::Bitlen;
-use crate::data_types::Float;
+use crate::bit_reader::BitReader;
+use crate::bit_writer::BitWriter;
+use crate::constants::{Bitlen, BITS_TO_ENCODE_MODE_VARIANT, BITS_TO_ENCODE_QUANTIZE_K};
+use crate::data_types::{Float, LatentType};
+use crate::errors::{PcoError, PcoResult};
+use crate::macros::match_latent_enum;
 use crate::metadata::dyn_latent::DynLatent;
-use crate::metadata::DeltaEncoding;
+use crate::metadata::format_version::FormatVersion;
+use crate::metadata::Mode::*;
+use std::fmt::Debug;
+use std::io::Write;
 
 // Internally, here's how we should model each mode:
 //
@@ -74,39 +79,125 @@ pub enum Mode {
 }
 
 impl Mode {
-  pub(crate) fn n_latent_vars(&self) -> usize {
-    use Mode::*;
+  pub(crate) unsafe fn read_from(
+    reader: &mut BitReader,
+    version: &FormatVersion,
+    latent_type: LatentType,
+  ) -> PcoResult<Self> {
+    let read_latent = |reader| {
+      match_latent_enum!(
+        latent_type,
+        LatentType<L> => {
+          DynLatent::read_uncompressed_from::<L>(reader)
+        }
+      )
+    };
+
+    let mode = match reader.read_bitlen(BITS_TO_ENCODE_MODE_VARIANT) {
+      0 => Classic,
+      1 => {
+        if version.used_old_gcds() {
+          return Err(PcoError::compatibility(
+            "unable to decompress data from v0.0.0 of pco with different GCD encoding",
+          ));
+        }
+
+        let base = read_latent(reader);
+        IntMult(base)
+      }
+      2 => {
+        let base_latent = read_latent(reader);
+        FloatMult(base_latent)
+      }
+      3 => {
+        let k = reader.read_bitlen(BITS_TO_ENCODE_QUANTIZE_K);
+        FloatQuant(k)
+      }
+      value => {
+        return Err(PcoError::corruption(format!(
+          "unknown mode value {}",
+          value
+        )))
+      }
+    };
+    Ok(mode)
+  }
 
+  pub(crate) unsafe fn write_to<W: Write>(&self, writer: &mut BitWriter<W>) {
+    let mode_value = match self {
+      Classic => 0,
+      IntMult(_) => 1,
+      FloatMult { .. } => 2,
+      FloatQuant { .. } => 3,
+    };
+    writer.write_bitlen(mode_value, BITS_TO_ENCODE_MODE_VARIANT);
     match self {
-      Classic => 1,
-      FloatMult(_) | IntMult(_) => 2, // multiplier, adjustment
-      FloatQuant(_) => 2,             // quantums, adjustment
-    }
+      Classic => (),
+      IntMult(base) => {
+        base.write_uncompressed_to(writer);
+      }
+      FloatMult(base_latent) => {
+        base_latent.write_uncompressed_to(writer);
+      }
+      &FloatQuant(k) => {
+        writer.write_uint(k, BITS_TO_ENCODE_QUANTIZE_K);
+      }
+    };
   }
 
-  pub(crate) fn delta_encoding_for_latent_var(
-    &self,
-    latent_var_idx: usize,
-    delta_encoding: DeltaEncoding,
-  ) -> DeltaEncoding {
-    use Mode::*;
+  pub(crate) fn primary_latent_type(&self, number_latent_type: LatentType) -> LatentType {
+    match self {
+      Classic | FloatMult(_) | FloatQuant(_) | IntMult(_) => number_latent_type,
+    }
+  }
 
-    match (self, latent_var_idx) {
-      // In all currently-available modes, the overall `delta_order` is really the delta-order of
-      // the first latent.
-      (Classic, 0) | (FloatMult(_), 0) | (FloatQuant(_), 0) | (IntMult(_), 0) => delta_encoding,
-      // In FloatMult, IntMult, and FloatQuant, the second latent is essentially a remainder or
-      // adjustment; there isn't any a priori reason that deltas should be useful for that kind of
-      // term and we do not attempt them.
-      (FloatMult(_), 1) | (IntMult(_), 1) | (FloatQuant(_), 1) => DeltaEncoding::None,
-      _ => unreachable!(
-        "unknown latent {:?}/{}",
-        self, latent_var_idx
-      ),
+  pub(crate) fn secondary_latent_type(&self, number_latent_type: LatentType) -> Option<LatentType> {
+    match self {
+      Classic => None,
+      FloatMult(_) | FloatQuant(_) | IntMult(_) => Some(number_latent_type),
     }
   }
 
   pub(crate) fn float_mult<F: Float>(base: F) -> Self {
-    Self::FloatMult(DynLatent::new(base.to_latent_ordered()).unwrap())
+    FloatMult(DynLatent::new(base.to_latent_ordered()).unwrap())
+  }
+
+  pub(crate) fn exact_bit_size(&self) -> Bitlen {
+    let payload_bits = match self {
+      Classic => 0,
+      IntMult(base) | FloatMult(base) => base.bits(),
+      FloatQuant(_) => BITS_TO_ENCODE_QUANTIZE_K,
+    };
+    BITS_TO_ENCODE_MODE_VARIANT + payload_bits
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  use crate::bit_writer::BitWriter;
+  use crate::metadata::{DynLatent, Mode};
+
+  fn check_bit_size(mode: Mode) {
+    let mut bytes = Vec::new();
+    let mut writer = BitWriter::new(&mut bytes, 100);
+    unsafe {
+      mode.write_to(&mut writer);
+    }
+    assert_eq!(
+      mode.exact_bit_size() as usize,
+      writer.bit_idx()
+    );
+  }
+
+  #[test]
+  fn test_bit_size() {
+    check_bit_size(Mode::Classic);
+    check_bit_size(Mode::IntMult(
+      DynLatent::new(77_u32).unwrap(),
+    ));
+    check_bit_size(Mode::FloatMult(
+      DynLatent::new(77_u32).unwrap(),
+    ));
+    check_bit_size(Mode::FloatQuant(7));
   }
 }
diff --git a/pco/src/metadata/page.rs b/pco/src/metadata/page.rs
index eeff5776..9f7b39f0 100644
--- a/pco/src/metadata/page.rs
+++ b/pco/src/metadata/page.rs
@@ -3,9 +3,9 @@ use std::io::Write;
 use crate::bit_reader::BitReader;
 use crate::bit_writer::BitWriter;
 use crate::constants::Bitlen;
-use crate::data_types::Latent;
 use crate::errors::PcoResult;
 use crate::metadata::page_latent_var::PageLatentVarMeta;
+use crate::metadata::per_latent_var::{PerLatentVar, PerLatentVarBuilder};
 use crate::metadata::ChunkMeta;
 
 // Data page metadata is slightly semantically different from chunk metadata,
@@ -15,37 +15,45 @@ use crate::metadata::ChunkMeta;
 // (wrapped mode).
 #[derive(Clone, Debug)]
 pub struct PageMeta {
-  pub per_latent_var: Vec<PageLatentVarMeta>,
+  pub per_latent_var: PerLatentVar<PageLatentVarMeta>,
 }
 
 impl PageMeta {
-  pub unsafe fn write_to<I: Iterator<Item = Bitlen>, W: Write>(
+  pub unsafe fn write_to<W: Write>(
     &self,
-    ans_size_logs: I,
+    ans_size_logs: PerLatentVar<Bitlen>,
     writer: &mut BitWriter<W>,
   ) {
-    for (latent_idx, ans_size_log) in ans_size_logs.enumerate() {
-      self.per_latent_var[latent_idx].write_to(ans_size_log, writer);
+    for (_, (ans_size_log, latent_var_meta)) in ans_size_logs
+      .zip_exact(self.per_latent_var.as_ref())
+      .enumerated()
+    {
+      latent_var_meta.write_to(ans_size_log, writer);
     }
     writer.finish_byte();
   }
 
-  pub unsafe fn read_from<L: Latent>(
-    reader: &mut BitReader,
-    chunk_meta: &ChunkMeta,
-  ) -> PcoResult<Self> {
-    let mut per_latent_var = Vec::with_capacity(chunk_meta.per_latent_var.len());
-    for (latent_idx, chunk_latent_var_meta) in chunk_meta.per_latent_var.iter().enumerate() {
-      per_latent_var.push(PageLatentVarMeta::read_from::<L>(
-        reader,
-        chunk_meta
-          .delta_encoding_for_latent_var(latent_idx)
-          .n_latents_per_state(),
-        chunk_latent_var_meta.ans_size_log,
-      )?);
+  pub unsafe fn read_from(reader: &mut BitReader, chunk_meta: &ChunkMeta) -> PcoResult<Self> {
+    let mut per_latent_var_builder = PerLatentVarBuilder::default();
+    for (key, chunk_latent_var_meta) in chunk_meta.per_latent_var.as_ref().enumerated() {
+      let n_latents_per_state = chunk_meta
+        .delta_encoding
+        .for_latent_var(key)
+        .n_latents_per_state();
+      per_latent_var_builder.set(
+        key,
+        PageLatentVarMeta::read_from(
+          reader,
+          chunk_latent_var_meta.latent_type(),
+          n_latents_per_state,
+          chunk_latent_var_meta.ans_size_log,
+        ),
+      )
     }
     reader.drain_empty_byte("non-zero bits at end of data page metadata")?;
 
-    Ok(Self { per_latent_var })
+    Ok(Self {
+      per_latent_var: per_latent_var_builder.into(),
+    })
   }
 }
diff --git a/pco/src/metadata/page_latent_var.rs b/pco/src/metadata/page_latent_var.rs
index dfb3dfa7..2813b1c7 100644
--- a/pco/src/metadata/page_latent_var.rs
+++ b/pco/src/metadata/page_latent_var.rs
@@ -2,20 +2,21 @@ use crate::ans::AnsState;
 use crate::bit_reader::BitReader;
 use crate::bit_writer::BitWriter;
 use crate::constants::{Bitlen, ANS_INTERLEAVING};
-use crate::data_types::Latent;
-use crate::errors::PcoResult;
+use crate::data_types::LatentType;
+use crate::delta::DeltaState;
+use crate::macros::match_latent_enum;
 use crate::metadata::dyn_latents::DynLatents;
 use std::io::Write;
 
 #[derive(Clone, Debug)]
 pub struct PageLatentVarMeta {
-  pub delta_moments: DynLatents,
+  pub delta_state: DeltaState,
   pub ans_final_state_idxs: [AnsState; ANS_INTERLEAVING],
 }
 
 impl PageLatentVarMeta {
   pub unsafe fn write_to<W: Write>(&self, ans_size_log: Bitlen, writer: &mut BitWriter<W>) {
-    self.delta_moments.write_uncompressed_to(writer);
+    self.delta_state.write_uncompressed_to(writer);
 
     // write the final ANS state, moving it down the range [0, table_size)
     for state_idx in self.ans_final_state_idxs {
@@ -23,19 +24,25 @@ impl PageLatentVarMeta {
     }
   }
 
-  pub unsafe fn read_from<L: Latent>(
+  pub unsafe fn read_from(
     reader: &mut BitReader,
+    latent_type: LatentType,
     n_latents_per_delta_state: usize,
     ans_size_log: Bitlen,
-  ) -> PcoResult<Self> {
-    let delta_moments = DynLatents::read_uncompressed_from::<L>(reader, n_latents_per_delta_state);
+  ) -> Self {
+    let delta_state = match_latent_enum!(
+      latent_type,
+      LatentType<L> => {
+        DynLatents::read_uncompressed_from::<L>(reader, n_latents_per_delta_state)
+      }
+    );
     let mut ans_final_state_idxs = [0; ANS_INTERLEAVING];
     for state in &mut ans_final_state_idxs {
       *state = reader.read_uint::<AnsState>(ans_size_log);
     }
-    Ok(Self {
-      delta_moments,
+    Self {
+      delta_state,
       ans_final_state_idxs,
-    })
+    }
   }
 }
diff --git a/pco/src/metadata/per_latent_var.rs b/pco/src/metadata/per_latent_var.rs
new file mode 100644
index 00000000..e15b0f30
--- /dev/null
+++ b/pco/src/metadata/per_latent_var.rs
@@ -0,0 +1,149 @@
+use std::fmt::Debug;
+use std::iter::Sum;
+
+/// The possible kinds of latent variables present in a chunk.
+///
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LatentVarKey {
+  /// Used by certain types of
+  /// [delta encodings][crate::metadata::DeltaEncoding]. E.g. lookback delta
+  /// encoding uses this to store lookbacks.
+  Delta,
+  /// The only required latent variable, used by
+  /// [modes][crate::metadata::Mode] to represent number values.
+  ///
+  /// Always has the same precision as the encoded numbers.
+  Primary,
+  /// An optional additional latent variable, used by certain
+  /// [modes][crate::metadata::Mode] to represent number values.
+  Secondary,
+}
+
+/// A generic container holding a value for each applicable latent variable.
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct PerLatentVar<T> {
+  pub delta: Option<T>,
+  pub primary: T,
+  pub secondary: Option<T>,
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct PerLatentVarBuilder<T> {
+  pub delta: Option<T>,
+  pub primary: Option<T>,
+  pub secondary: Option<T>,
+}
+
+impl<T> Default for PerLatentVarBuilder<T> {
+  fn default() -> Self {
+    Self {
+      delta: None,
+      primary: None,
+      secondary: None,
+    }
+  }
+}
+
+impl<T> PerLatentVarBuilder<T> {
+  pub fn set(&mut self, key: LatentVarKey, value: T) {
+    match key {
+      LatentVarKey::Delta => self.delta = Some(value),
+      LatentVarKey::Primary => self.primary = Some(value),
+      LatentVarKey::Secondary => self.secondary = Some(value),
+    }
+  }
+}
+
+impl<T> From<PerLatentVarBuilder<T>> for PerLatentVar<T> {
+  fn from(value: PerLatentVarBuilder<T>) -> Self {
+    PerLatentVar {
+      delta: value.delta,
+      primary: value.primary.unwrap(),
+      secondary: value.secondary,
+    }
+  }
+}
+
+impl<T> PerLatentVar<T> {
+  pub(crate) fn map<S, F: Fn(LatentVarKey, T) -> S>(self, f: F) -> PerLatentVar<S> {
+    PerLatentVar {
+      delta: self.delta.map(|delta| f(LatentVarKey::Delta, delta)),
+      primary: f(LatentVarKey::Primary, self.primary),
+      secondary: self
+        .secondary
+        .map(|secondary| f(LatentVarKey::Secondary, secondary)),
+    }
+  }
+
+  /// Returns a new `PerLatentVar` where each entry has been wrapped in a
+  /// reference.
+  pub fn as_ref(&self) -> PerLatentVar<&T> {
+    PerLatentVar {
+      delta: self.delta.as_ref(),
+      primary: &self.primary,
+      secondary: self.secondary.as_ref(),
+    }
+  }
+
+  pub(crate) fn as_mut(&mut self) -> PerLatentVar<&mut T> {
+    PerLatentVar {
+      delta: self.delta.as_mut(),
+      primary: &mut self.primary,
+      secondary: self.secondary.as_mut(),
+    }
+  }
+
+  pub(crate) fn get(&self, key: LatentVarKey) -> Option<&T> {
+    match key {
+      LatentVarKey::Delta => self.delta.as_ref(),
+      LatentVarKey::Primary => Some(&self.primary),
+      LatentVarKey::Secondary => self.secondary.as_ref(),
+    }
+  }
+
+  /// Zips each element of this `PerLatentVar` with each element of the other.
+  ///
+  /// Will panic if either one has a latent variable that the other does not.
+  pub fn zip_exact<S>(self, other: PerLatentVar<S>) -> PerLatentVar<(T, S)> {
+    let zip_option = |a: Option<T>, b: Option<S>| match (a, b) {
+      (Some(a), Some(b)) => Some((a, b)),
+      (None, None) => None,
+      _ => panic!("expected values of left and right sides to match"),
+    };
+
+    PerLatentVar {
+      delta: zip_option(self.delta, other.delta),
+      primary: (self.primary, other.primary),
+      secondary: zip_option(self.secondary, other.secondary),
+    }
+  }
+
+  /// Returns a vector of the defined `LatentVarKey`s and values, in order
+  /// of appearance in the file.
+  pub fn enumerated(self) -> Vec<(LatentVarKey, T)> {
+    let mut res = Vec::with_capacity(3);
+    if let Some(value) = self.delta {
+      res.push((LatentVarKey::Delta, value));
+    }
+    res.push((LatentVarKey::Primary, self.primary));
+    if let Some(value) = self.secondary {
+      res.push((LatentVarKey::Secondary, value));
+    }
+    res
+  }
+
+  pub(crate) fn sum(self) -> T
+  where
+    T: Sum,
+  {
+    let mut values = Vec::with_capacity(3);
+    if let Some(value) = self.delta {
+      values.push(value);
+    }
+    values.push(self.primary);
+    if let Some(value) = self.secondary {
+      values.push(value);
+    }
+    T::sum(values.into_iter())
+  }
+}
diff --git a/pco/src/split_latents.rs b/pco/src/split_latents.rs
new file mode 100644
index 00000000..a3352e4b
--- /dev/null
+++ b/pco/src/split_latents.rs
@@ -0,0 +1,7 @@
+use crate::metadata::DynLatents;
+
+#[derive(Clone, Debug)]
+pub struct SplitLatents {
+  pub primary: DynLatents,
+  pub secondary: Option<DynLatents>,
+}
diff --git a/pco/src/standalone/compressor.rs b/pco/src/standalone/compressor.rs
index c0bcfbec..4f68edfc 100644
--- a/pco/src/standalone/compressor.rs
+++ b/pco/src/standalone/compressor.rs
@@ -2,7 +2,7 @@ use std::io::Write;
 
 use crate::bit_writer::BitWriter;
 use crate::chunk_config::PagingSpec;
-use crate::data_types::{Latent, Number};
+use crate::data_types::Number;
 use crate::errors::PcoResult;
 use crate::metadata::ChunkMeta;
 use crate::standalone::constants::*;
@@ -83,7 +83,7 @@ impl FileCompressor {
     &self,
     nums: &[T],
     config: &ChunkConfig,
-  ) -> PcoResult<ChunkCompressor<T::L>> {
+  ) -> PcoResult<ChunkCompressor> {
     let mut config = config.clone();
     config.paging_spec = PagingSpec::Exact(vec![nums.len()]);
 
@@ -106,12 +106,12 @@ impl FileCompressor {
 
 /// Holds metadata about a chunk and supports compression.
 #[derive(Clone, Debug)]
-pub struct ChunkCompressor<L: Latent> {
-  inner: wrapped::ChunkCompressor<L>,
+pub struct ChunkCompressor {
+  inner: wrapped::ChunkCompressor,
   number_type_byte: u8,
 }
 
-impl<L: Latent> ChunkCompressor<L> {
+impl ChunkCompressor {
   /// Returns pre-computed information about the chunk.
   pub fn meta(&self) -> &ChunkMeta {
     self.inner.meta()
diff --git a/pco/src/tests/compatibility.rs b/pco/src/tests/compatibility.rs
index 078f9f00..f2b51f56 100644
--- a/pco/src/tests/compatibility.rs
+++ b/pco/src/tests/compatibility.rs
@@ -1,11 +1,11 @@
+use crate::data_types::Number;
+use crate::errors::PcoResult;
+use crate::{standalone, ChunkConfig, DeltaSpec, ModeSpec};
+use half::f16;
 use std::fs;
 use std::path::PathBuf;
 use std::str::FromStr;
 
-use crate::data_types::Number;
-use crate::errors::PcoResult;
-use crate::{standalone, ChunkConfig};
-
 fn get_asset_dir() -> PathBuf {
   PathBuf::from_str(env!("CARGO_MANIFEST_DIR"))
     .unwrap()
@@ -66,122 +66,131 @@ fn simple_write_if_version_matches<T: Number>(
   Ok(())
 }
 
-#[cfg(test)]
-mod tests {
-  use crate::chunk_config::DeltaSpec;
-  use crate::errors::PcoResult;
-  use crate::tests::compatibility::{assert_compatible, simple_write_if_version_matches};
-  use crate::{ChunkConfig, ModeSpec};
-  use half::f16;
-
-  #[test]
-  fn v0_0_0_classic() -> PcoResult<()> {
-    let version = "0.0.0";
-    let name = "classic";
-    let nums = (0_i32..1000).chain(2000..3000).collect::<Vec<_>>();
-    let config = ChunkConfig {
-      delta_spec: DeltaSpec::None,
-      ..Default::default()
-    };
-    simple_write_if_version_matches(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_0_0_classic() -> PcoResult<()> {
+  let version = "0.0.0";
+  let name = "classic";
+  let nums = (0_i32..1000).chain(2000..3000).collect::<Vec<_>>();
+  let config = ChunkConfig {
+    delta_spec: DeltaSpec::None,
+    ..Default::default()
+  };
+  simple_write_if_version_matches(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
 
-  #[test]
-  fn v0_0_0_delta_float_mult() -> PcoResult<()> {
-    let version = "0.0.0";
-    let name = "delta_float_mult";
-    let mut nums = (0..2000).map(|i| i as f32).collect::<Vec<_>>();
-    nums[1337] += 1.001;
-    let config = ChunkConfig {
-      delta_spec: DeltaSpec::TryConsecutive(1),
-      ..Default::default()
-    };
-    simple_write_if_version_matches(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_0_0_delta_float_mult() -> PcoResult<()> {
+  let version = "0.0.0";
+  let name = "delta_float_mult";
+  let mut nums = (0..2000).map(|i| i as f32).collect::<Vec<_>>();
+  nums[1337] += 1.001;
+  let config = ChunkConfig {
+    delta_spec: DeltaSpec::TryConsecutive(1),
+    ..Default::default()
+  };
+  simple_write_if_version_matches(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
 
-  #[test]
-  fn v0_1_0_delta_int_mult() -> PcoResult<()> {
-    // starting at 0.1.0 because 0.0.0 had GCD mode (no longer supported)
-    // instead of int mult
-    let version = "0.1.0";
-    let name = "delta_int_mult";
-    let mut nums = (0..2000).map(|i| i * 1000).collect::<Vec<_>>();
-    nums[1337] -= 1;
-    let config = ChunkConfig {
-      delta_spec: DeltaSpec::TryConsecutive(1),
-      ..Default::default()
-    };
-    simple_write_if_version_matches(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_1_0_delta_int_mult() -> PcoResult<()> {
+  // starting at 0.1.0 because 0.0.0 had GCD mode (no longer supported)
+  // instead of int mult
+  let version = "0.1.0";
+  let name = "delta_int_mult";
+  let mut nums = (0..2000).map(|i| i * 1000).collect::<Vec<_>>();
+  nums[1337] -= 1;
+  let config = ChunkConfig {
+    delta_spec: DeltaSpec::TryConsecutive(1),
+    ..Default::default()
+  };
+  simple_write_if_version_matches(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
 
-  #[test]
-  fn v0_1_1_classic() -> PcoResult<()> {
-    // v0.1.1 introduced standalone versioning, separate from wrapped versioning
-    let version = "0.1.1";
-    let name = "standalone_versioned";
-    let nums = vec![];
-    let config = ChunkConfig::default();
-    simple_write_if_version_matches::<f32>(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_1_1_classic() -> PcoResult<()> {
+  // v0.1.1 introduced standalone versioning, separate from wrapped versioning
+  let version = "0.1.1";
+  let name = "standalone_versioned";
+  let nums = vec![];
+  let config = ChunkConfig::default();
+  simple_write_if_version_matches::<f32>(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
 
-  fn generate_pseudorandom_f16s() -> Vec<f16> {
-    // makes a variety of floats approximately uniformly distributed
-    // between (-2.0, -1.0] U [1.0, 2.0)
-    let mut num = 0.1_f32;
-    let mut nums = vec![];
-    for _ in 0..2000 {
-      num = ((num * 77.7) + 0.1) % 2.0;
-      if num < 1.0 {
-        nums.push(f16::from_f32(-1.0 - num));
-      } else {
-        nums.push(f16::from_f32(num));
-      }
+fn generate_pseudorandom_f16s() -> Vec<f16> {
+  // makes a variety of floats approximately uniformly distributed
+  // between (-2.0, -1.0] U [1.0, 2.0)
+  let mut num = 0.1_f32;
+  let mut nums = vec![];
+  for _ in 0..2000 {
+    num = ((num * 77.7) + 0.1) % 2.0;
+    if num < 1.0 {
+      nums.push(f16::from_f32(-1.0 - num));
+    } else {
+      nums.push(f16::from_f32(num));
     }
-    nums
   }
+  nums
+}
 
-  #[test]
-  fn v0_3_0_f16() -> PcoResult<()> {
-    // v0.3.0 introduced 16-bit data types, including f16, which requires the
-    // half crate
-    let version = "0.3.0";
-    let name = "f16";
-    let config = ChunkConfig::default();
-    let nums = generate_pseudorandom_f16s();
-    simple_write_if_version_matches::<f16>(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_3_0_f16() -> PcoResult<()> {
+  // v0.3.0 introduced 16-bit data types, including f16, which requires the
+  // half crate
+  let version = "0.3.0";
+  let name = "f16";
+  let config = ChunkConfig::default();
+  let nums = generate_pseudorandom_f16s();
+  simple_write_if_version_matches::<f16>(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
 
-  #[test]
-  fn v0_3_0_float_quant() -> PcoResult<()> {
-    // v0.3.0 introduced float quantization mode
-    let version = "0.3.0";
-    let name = "float_quant";
-    let nums = generate_pseudorandom_f16s()
-      .into_iter()
-      .map(|x| {
-        let x = x.to_f32();
-        if x.abs() < 1.1 {
-          f32::from_bits(x.to_bits() + 1)
-        } else {
-          x
-        }
-      })
-      .collect::<Vec<_>>();
-    let config = ChunkConfig::default().with_mode_spec(ModeSpec::TryFloatQuant(
-      f32::MANTISSA_DIGITS - f16::MANTISSA_DIGITS,
-    ));
-    simple_write_if_version_matches::<f32>(version, name, &nums, &config)?;
-    assert_compatible(version, name, &nums)?;
-    Ok(())
-  }
+#[test]
+fn v0_3_0_float_quant() -> PcoResult<()> {
+  // v0.3.0 introduced float quantization mode
+  let version = "0.3.0";
+  let name = "float_quant";
+  let nums = generate_pseudorandom_f16s()
+    .into_iter()
+    .map(|x| {
+      let x = x.to_f32();
+      if x.abs() < 1.1 {
+        f32::from_bits(x.to_bits() + 1)
+      } else {
+        x
+      }
+    })
+    .collect::<Vec<_>>();
+  let config = ChunkConfig::default().with_mode_spec(ModeSpec::TryFloatQuant(
+    f32::MANTISSA_DIGITS - f16::MANTISSA_DIGITS,
+  ));
+  simple_write_if_version_matches::<f32>(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
+}
+
+#[test]
+fn v0_4_0_lookback_delta() -> PcoResult<()> {
+  // v0.4.0 introduced lookback delta encoding
+  let version = "0.4.0";
+  let name = "lookback_delta";
+
+  // randomly generated ahead of time
+  let nums: Vec<u32> = vec![
+    1121827092, 729032807, 3968137854, 2875434067, 3775328080, 431649926, 1048116090, 1906978350,
+    14752788, 1180462487,
+  ]
+  .repeat(100);
+  let config = ChunkConfig::default().with_delta_spec(DeltaSpec::TryLookback);
+  simple_write_if_version_matches(version, name, &nums, &config)?;
+  assert_compatible(version, name, &nums)?;
+  Ok(())
 }
diff --git a/pco/src/tests/recovery.rs b/pco/src/tests/recovery.rs
index 24ef80ff..f6f73d61 100644
--- a/pco/src/tests/recovery.rs
+++ b/pco/src/tests/recovery.rs
@@ -6,7 +6,7 @@ use crate::chunk_config::{ChunkConfig, DeltaSpec};
 use crate::constants::Bitlen;
 use crate::data_types::Number;
 use crate::errors::PcoResult;
-use crate::metadata::{ChunkMeta, DynLatent, Mode};
+use crate::metadata::{ChunkMeta, DeltaEncoding, DynLatent, Mode};
 use crate::standalone::{simple_compress, simple_decompress, FileCompressor};
 use crate::ModeSpec;
 
@@ -241,8 +241,9 @@ fn recover_with_alternating_nums(offset_bits: Bitlen, name: &str) -> PcoResult<(
       ..Default::default()
     },
   )?;
-  assert_eq!(meta.per_latent_var.len(), 1);
-  let latent_var = &meta.per_latent_var[0];
+  assert!(meta.per_latent_var.delta.is_none());
+  assert!(meta.per_latent_var.secondary.is_none());
+  let latent_var = &meta.per_latent_var.primary;
   let bins = latent_var.bins.downcast_ref::<u64>().unwrap();
   assert_eq!(bins.len(), 1);
   assert_eq!(bins[0].offset_bits, offset_bits);
@@ -356,3 +357,22 @@ fn test_trivial_first_latent_var() -> PcoResult<()> {
   assert_nums_eq(&decompressed, &nums, "trivial_first_latent")?;
   Ok(())
 }
+
+#[test]
+fn test_lookback_delta_encoding() -> PcoResult<()> {
+  let mut nums = Vec::new();
+  for i in 0..100 {
+    nums.push(i % 9);
+  }
+  let (compressed, meta) = compress_w_meta(
+    &nums,
+    &ChunkConfig::default().with_delta_spec(DeltaSpec::TryLookback),
+  )?;
+  assert!(matches!(
+    meta.delta_encoding,
+    DeltaEncoding::Lookback(_)
+  ));
+  let decompressed = simple_decompress(&compressed)?;
+  assert_nums_eq(&decompressed, &nums, "trivial_first_latent")?;
+  Ok(())
+}
diff --git a/pco/src/tests/stability.rs b/pco/src/tests/stability.rs
index 3bd19a40..a0feba12 100644
--- a/pco/src/tests/stability.rs
+++ b/pco/src/tests/stability.rs
@@ -44,9 +44,12 @@ fn test_insufficient_data_short_bins() -> PcoResult<()> {
   }
 
   let meta = assert_panic_safe(nums)?;
-  assert_eq!(meta.per_latent_var.len(), 1);
+  assert!(meta.per_latent_var.delta.is_none());
+  assert!(meta.per_latent_var.secondary.is_none());
   assert_eq!(
-    meta.per_latent_var[0]
+    meta
+      .per_latent_var
+      .primary
       .bins
       .downcast_ref::<u32>()
       .unwrap()
@@ -64,9 +67,12 @@ fn test_insufficient_data_sparse() -> PcoResult<()> {
   }
 
   let meta = assert_panic_safe(nums)?;
-  assert_eq!(meta.per_latent_var.len(), 1);
+  assert!(meta.per_latent_var.delta.is_none());
+  assert!(meta.per_latent_var.secondary.is_none());
   assert_eq!(
-    meta.per_latent_var[0]
+    meta
+      .per_latent_var
+      .primary
       .bins
       .downcast_ref::<u32>()
       .unwrap()
@@ -85,8 +91,14 @@ fn test_insufficient_data_long_offsets() -> PcoResult<()> {
   }
 
   let meta = assert_panic_safe(nums)?;
-  let bins = meta.per_latent_var[0].bins.downcast_ref::<u64>().unwrap();
-  assert_eq!(meta.per_latent_var.len(), 1);
+  let bins = meta
+    .per_latent_var
+    .primary
+    .bins
+    .downcast_ref::<u64>()
+    .unwrap();
+  assert!(meta.per_latent_var.delta.is_none());
+  assert!(meta.per_latent_var.secondary.is_none());
   assert_eq!(bins.len(), 1);
   assert_eq!(bins[0].offset_bits, 64);
   Ok(())
diff --git a/pco/src/wrapped/chunk_compressor.rs b/pco/src/wrapped/chunk_compressor.rs
index f3939bae..9ce5557d 100644
--- a/pco/src/wrapped/chunk_compressor.rs
+++ b/pco/src/wrapped/chunk_compressor.rs
@@ -1,34 +1,57 @@
-use std::cmp::min;
-use std::io::Write;
-
 use crate::bit_writer::BitWriter;
 use crate::chunk_config::DeltaSpec;
-use crate::compression_intermediates::BinCompressionInfo;
+use crate::compression_intermediates::{BinCompressionInfo, PageInfoVar};
 use crate::compression_intermediates::{DissectedPage, PageInfo};
 use crate::constants::{
-  Bitlen, Weight, ANS_INTERLEAVING, LIMITED_UNOPTIMIZED_BINS_LOG, MAX_COMPRESSION_LEVEL,
-  MAX_DELTA_ENCODING_ORDER, MAX_ENTRIES, OVERSHOOT_PADDING, PAGE_PADDING,
+  Bitlen, Weight, LIMITED_UNOPTIMIZED_BINS_LOG, MAX_COMPRESSION_LEVEL, MAX_DELTA_ENCODING_ORDER,
+  MAX_ENTRIES, OVERSHOOT_PADDING, PAGE_PADDING,
 };
-use crate::data_types::{Latent, Number};
-use crate::delta::DeltaMoments;
+use crate::data_types::{Latent, LatentType, Number};
+use crate::delta::DeltaState;
 use crate::errors::{PcoError, PcoResult};
 use crate::histograms::histogram;
-use crate::latent_chunk_compressor::{LatentChunkCompressor, TrainedBins};
+use crate::latent_chunk_compressor::{
+  DynLatentChunkCompressor, LatentChunkCompressor, TrainedBins,
+};
 use crate::macros::match_latent_enum;
 use crate::metadata::chunk_latent_var::ChunkLatentVarMeta;
+use crate::metadata::delta_encoding::{DeltaConsecutiveConfig, DeltaLookbackConfig};
 use crate::metadata::dyn_bins::DynBins;
 use crate::metadata::dyn_latents::DynLatents;
 use crate::metadata::page::PageMeta;
 use crate::metadata::page_latent_var::PageLatentVarMeta;
+use crate::metadata::per_latent_var::{LatentVarKey, PerLatentVar, PerLatentVarBuilder};
 use crate::metadata::{Bin, ChunkMeta, DeltaEncoding, Mode};
+use crate::split_latents::SplitLatents;
 use crate::wrapped::guarantee;
-use crate::{ans, bin_optimization, data_types, delta, ChunkConfig, PagingSpec, FULL_BATCH_N};
+use crate::{
+  ans, bin_optimization, bits, data_types, delta, ChunkConfig, PagingSpec, FULL_BATCH_N,
+};
+use std::cmp::min;
+use std::io::Write;
 
 // if it looks like the average page of size n will use k bits, hint that it
 // will be PAGE_SIZE_OVERESTIMATION * k bits.
 const PAGE_SIZE_OVERESTIMATION: f64 = 1.2;
 const N_PER_EXTRA_DELTA_GROUP: usize = 10000;
 const DELTA_GROUP_SIZE: usize = 200;
+const LOOKBACK_MAX_WINDOW_N_LOG: Bitlen = 15;
+const LOOKBACK_MIN_WINDOW_N_LOG: Bitlen = 4;
+const LOOKBACK_REQUIRED_BYTE_SAVINGS_PER_N: f64 = 0.25;
+
+// TODO taking deltas of secondary latents has been proven to help slightly
+// in some cases, so we should consider it in the future
+
+fn new_lz_delta_encoding(n: usize) -> DeltaEncoding {
+  DeltaEncoding::Lookback(DeltaLookbackConfig {
+    window_n_log: bits::bits_to_encode_offset(n as u32 - 1).clamp(
+      LOOKBACK_MIN_WINDOW_N_LOG,
+      LOOKBACK_MAX_WINDOW_N_LOG,
+    ),
+    state_n_log: 0,
+    secondary_uses_delta: false,
+  })
+}
 
 // returns table size log
 fn quantize_weights<L: Latent>(
@@ -96,14 +119,10 @@ fn train_infos<L: Latent>(
 
 /// Holds metadata about a chunk and supports compression.
 #[derive(Clone, Debug)]
-pub struct ChunkCompressor<L: Latent> {
+pub struct ChunkCompressor {
   meta: ChunkMeta,
-  latent_chunk_compressors: Vec<LatentChunkCompressor<L>>,
+  latent_chunk_compressors: PerLatentVar<DynLatentChunkCompressor>,
   page_infos: Vec<PageInfo>,
-  // n_latent_vars x n_deltas
-  deltas: Vec<Vec<L>>,
-  // n_pages x n_latent_vars
-  delta_moments: Vec<Vec<DeltaMoments<L>>>,
 }
 
 fn bins_from_compression_infos<L: Latent>(infos: &[BinCompressionInfo<L>]) -> Vec<Bin<L>> {
@@ -147,153 +166,207 @@ fn validate_chunk_size(n: usize) -> PcoResult<()> {
   Ok(())
 }
 
-#[inline(never)]
-fn collect_contiguous_deltas<L: Latent>(
-  deltas: &[L],
+fn collect_contiguous_latents<L: Latent>(
+  latents: &[L],
   page_infos: &[PageInfo],
-  latent_idx: usize,
+  latent_var_key: LatentVarKey,
 ) -> Vec<L> {
-  let mut res = Vec::with_capacity(deltas.len());
+  let mut res = Vec::with_capacity(latents.len());
   for page in page_infos {
-    res.extend(&deltas[page.start_idx..page.end_idx_per_var[latent_idx]]);
+    let range = page.range_for_latent_var(latent_var_key);
+    res.extend(&latents[range]);
   }
   res
 }
 
-fn build_page_infos_and_delta_moments<L: Latent>(
-  mode: Mode,
+fn delta_encode_and_build_page_infos(
   delta_encoding: DeltaEncoding,
   n_per_page: &[usize],
-  latents: &mut [Vec<L>],
-  // TODO put delta state into page info
-) -> (Vec<PageInfo>, Vec<Vec<DeltaMoments<L>>>) {
+  latents: SplitLatents,
+) -> (PerLatentVar<DynLatents>, Vec<PageInfo>) {
+  let n = latents.primary.len();
+  let mut latents = PerLatentVar {
+    delta: None,
+    primary: latents.primary,
+    secondary: latents.secondary,
+  };
   let n_pages = n_per_page.len();
   let mut page_infos = Vec::with_capacity(n_pages);
-  let mut delta_moments = vec![Vec::new(); n_pages];
 
   // delta encoding
   let mut start_idx = 0;
-  for (&page_n, delta_moments) in n_per_page.iter().zip(delta_moments.iter_mut()) {
-    let mut end_idx_per_var = Vec::new();
-    for (latent_var_idx, latents) in latents.iter_mut().enumerate() {
-      let var_delta_encoding = mode.delta_encoding_for_latent_var(latent_var_idx, delta_encoding);
-
-      let moments = match var_delta_encoding {
-        DeltaEncoding::None => DeltaMoments::default(),
-        DeltaEncoding::Consecutive(order) => delta::encode_in_place(
-          &mut latents[start_idx..start_idx + page_n],
-          order,
-        ),
-      };
-      delta_moments.push(moments);
-      end_idx_per_var
-        .push(start_idx + page_n.saturating_sub(var_delta_encoding.n_latents_per_state()));
+  let mut delta_latents = delta_encoding.latent_type().map(|ltype| {
+    match_latent_enum!(
+      ltype,
+      LatentType<L> => { DynLatents::new(Vec::<L>::with_capacity(n)).unwrap() }
+    )
+  });
+  for &page_n in n_per_page {
+    let end_idx = start_idx + page_n;
+
+    let page_delta_latents = delta::compute_delta_latent_var(
+      delta_encoding,
+      &mut latents.primary,
+      start_idx..end_idx,
+    );
+
+    let mut per_latent_var = latents.as_mut().map(|key, var_latents| {
+      let encoding_for_var = delta_encoding.for_latent_var(key);
+      let delta_state = delta::encode_in_place(
+        encoding_for_var,
+        page_delta_latents.as_ref(),
+        start_idx..end_idx,
+        var_latents,
+      );
+      // delta encoding in place leaves junk in the first n_latents_per_state
+      let stored_start_idx = min(
+        start_idx + encoding_for_var.n_latents_per_state(),
+        end_idx,
+      );
+      let range = stored_start_idx..end_idx;
+      PageInfoVar { delta_state, range }
+    });
+
+    if let Some(delta_latents) = delta_latents.as_mut() {
+      match_latent_enum!(
+        delta_latents,
+        DynLatents<L>(delta_latents) => {
+          let page_delta_latents = page_delta_latents.unwrap().downcast::<L>().unwrap();
+          let delta_state = DeltaState::new(Vec::<L>::new()).unwrap();
+          let range = delta_latents.len()..delta_latents.len() + page_delta_latents.len();
+          per_latent_var.delta = Some(PageInfoVar { delta_state, range });
+          delta_latents.extend(&page_delta_latents);
+        }
+      )
     }
+
     page_infos.push(PageInfo {
       page_n,
-      start_idx,
-      end_idx_per_var,
+      per_latent_var,
     });
 
-    start_idx += page_n;
+    start_idx = end_idx;
   }
+  latents.delta = delta_latents;
 
-  (page_infos, delta_moments)
+  (latents, page_infos)
 }
 
-fn new_candidate_w_split_and_delta_encoding<L: Latent>(
-  mut latents: Vec<Vec<L>>, // start out plain, gets delta encoded in place
+fn new_candidate_w_split_and_delta_encoding(
+  latents: SplitLatents, // start out plain, gets delta encoded in place
   paging_spec: &PagingSpec,
   mode: Mode,
   delta_encoding: DeltaEncoding,
   unoptimized_bins_log: Bitlen,
-) -> PcoResult<(ChunkCompressor<L>, Vec<Vec<Weight>>)> {
-  let chunk_n = latents[0].len();
+) -> PcoResult<(ChunkCompressor, PerLatentVar<Vec<Weight>>)> {
+  let chunk_n = latents.primary.len();
   let n_per_page = paging_spec.n_per_page(chunk_n)?;
-  let n_latent_vars = mode.n_latent_vars();
 
-  let (page_infos, delta_moments) = build_page_infos_and_delta_moments(
-    mode,
-    delta_encoding,
-    &n_per_page,
-    &mut latents,
-  );
-  let deltas = latents;
+  // delta encoding
+  let (latents, page_infos) =
+    delta_encode_and_build_page_infos(delta_encoding, &n_per_page, latents);
 
   // training bins
-  let mut var_metas = Vec::with_capacity(n_latent_vars);
-  let mut latent_chunk_compressors = Vec::with_capacity(n_latent_vars);
-  let mut bin_counts = Vec::with_capacity(n_latent_vars);
-  for (latent_idx, deltas) in deltas.iter().enumerate() {
-    // secondary latents should be compressed faster
-    let unoptimized_bins_log = if latent_idx == 0 {
-      unoptimized_bins_log
-    } else {
-      min(
+  let mut var_metas = PerLatentVarBuilder::default();
+  let mut latent_chunk_compressors = PerLatentVarBuilder::default();
+  let mut bin_countss = PerLatentVarBuilder::default();
+  for (key, latents) in latents.enumerated() {
+    let unoptimized_bins_log = match key {
+      // primary latents are generally the most important to compress, and
+      // delta latents typically have a small number of discrete values, so
+      // aren't slow to optimize anyway
+      LatentVarKey::Delta | LatentVarKey::Primary => unoptimized_bins_log,
+      // secondary latents should be compressed faster
+      LatentVarKey::Secondary => min(
         unoptimized_bins_log,
         LIMITED_UNOPTIMIZED_BINS_LOG,
-      )
+      ),
     };
 
-    let contiguous_deltas = collect_contiguous_deltas(deltas, &page_infos, latent_idx);
-    let trained = train_infos(contiguous_deltas, unoptimized_bins_log)?;
-    let bins = bins_from_compression_infos(&trained.infos);
-
-    let ans_size_log = trained.ans_size_log;
-    bin_counts.push(trained.counts.to_vec());
-    latent_chunk_compressors.push(LatentChunkCompressor::new(trained, &bins)?);
-    let latent_meta = ChunkLatentVarMeta {
-      bins: DynBins::new(bins).unwrap(),
-      ans_size_log,
-    };
-    var_metas.push(latent_meta);
+    match_latent_enum!(
+      latents,
+      DynLatents<L>(latents) => {
+        let contiguous_deltas = collect_contiguous_latents(&latents, &page_infos, key);
+        let trained = train_infos(contiguous_deltas, unoptimized_bins_log)?;
+
+        let bins = bins_from_compression_infos(&trained.infos);
+
+        let ans_size_log = trained.ans_size_log;
+        let bin_counts = trained.counts.to_vec();
+        let lcc = DynLatentChunkCompressor::new(
+          LatentChunkCompressor::new(trained, &bins, latents)?
+        ).unwrap();
+        let var_meta = ChunkLatentVarMeta {
+          bins: DynBins::new(bins).unwrap(),
+          ans_size_log,
+        };
+        var_metas.set(key, var_meta);
+        latent_chunk_compressors.set(key, lcc);
+        bin_countss.set(key, bin_counts);
+      }
+    )
   }
 
-  let meta = ChunkMeta::new(mode, delta_encoding, var_metas);
+  let var_metas = var_metas.into();
+  let latent_chunk_compressors = latent_chunk_compressors.into();
+  let bin_countss = bin_countss.into();
+
+  let meta = ChunkMeta {
+    mode,
+    delta_encoding,
+    per_latent_var: var_metas,
+  };
   let chunk_compressor = ChunkCompressor {
     meta,
     latent_chunk_compressors,
     page_infos,
-    deltas,
-    delta_moments,
   };
 
-  Ok((chunk_compressor, bin_counts))
+  Ok((chunk_compressor, bin_countss))
 }
 
-fn choose_delta_sample<L: Latent>(
-  primary_latents: &[L],
+fn choose_delta_sample(
+  primary_latents: &DynLatents,
   group_size: usize,
   n_extra_groups: usize,
-) -> Vec<L> {
+) -> DynLatents {
   let n = primary_latents.len();
   let nominal_sample_size = (n_extra_groups + 1) * group_size;
-  let mut sample = Vec::with_capacity(nominal_sample_size);
   let group_padding = if n_extra_groups == 0 {
     0
   } else {
     n.saturating_sub(nominal_sample_size) / n_extra_groups
   };
 
-  sample.extend(primary_latents.iter().take(group_size));
   let mut i = group_size;
-  for _ in 0..n_extra_groups {
-    i += group_padding;
-    sample.extend(primary_latents.iter().skip(i).take(group_size));
-    i += group_size;
-  }
 
-  sample
+  match_latent_enum!(
+    primary_latents,
+    DynLatents<L>(primary_latents) => {
+      let mut sample = Vec::<L>::with_capacity(nominal_sample_size);
+      sample.extend(primary_latents.iter().take(group_size));
+      for _ in 0..n_extra_groups {
+        i += group_padding;
+        sample.extend(primary_latents.iter().skip(i).take(group_size));
+        i += group_size;
+      }
+      DynLatents::new(sample).unwrap()
+    }
+  )
 }
 
-fn calculate_compressed_sample_size<L: Latent>(
-  sample: &[L],
+fn calculate_compressed_sample_size(
+  sample: &DynLatents,
   unoptimized_bins_log: Bitlen,
   delta_encoding: DeltaEncoding,
 ) -> PcoResult<usize> {
+  let sample_n = sample.len();
   let (sample_cc, _) = new_candidate_w_split_and_delta_encoding(
-    vec![sample.to_vec()],
-    &PagingSpec::Exact(vec![sample.len()]),
+    SplitLatents {
+      primary: sample.clone(),
+      secondary: None,
+    },
+    &PagingSpec::Exact(vec![sample_n]),
     Mode::Classic,
     delta_encoding,
     unoptimized_bins_log,
@@ -301,19 +374,18 @@ fn calculate_compressed_sample_size<L: Latent>(
   Ok(sample_cc.chunk_meta_size_hint() + sample_cc.page_size_hint_inner(0, 1.0))
 }
 
-// Right now this is entirely based on the primary latents since no existing
-// modes apply deltas to secondary latents. Might want to change this
-// eventually?
 #[inline(never)]
-fn choose_delta_encoding<L: Latent>(
-  primary_latents: &[L],
+fn choose_delta_encoding(
+  primary_latents: &DynLatents,
   unoptimized_bins_log: Bitlen,
 ) -> PcoResult<DeltaEncoding> {
+  let n = primary_latents.len();
   let sample = choose_delta_sample(
     primary_latents,
     DELTA_GROUP_SIZE,
-    1 + primary_latents.len() / N_PER_EXTRA_DELTA_GROUP,
+    1 + n / N_PER_EXTRA_DELTA_GROUP,
   );
+  let sample_n = sample.len();
 
   let mut best_encoding = DeltaEncoding::None;
   let mut best_size = calculate_compressed_sample_size(
@@ -322,8 +394,22 @@ fn choose_delta_encoding<L: Latent>(
     DeltaEncoding::None,
   )?;
 
+  let lz_penalty = (LOOKBACK_REQUIRED_BYTE_SAVINGS_PER_N * sample_n as f64) as usize;
+  if best_size > lz_penalty {
+    let lz_encoding = new_lz_delta_encoding(sample_n);
+    let lz_penalized_size_estimate =
+      calculate_compressed_sample_size(&sample, unoptimized_bins_log, lz_encoding)? + lz_penalty;
+    if lz_penalized_size_estimate < best_size {
+      best_encoding = new_lz_delta_encoding(primary_latents.len());
+      best_size = lz_penalized_size_estimate;
+    }
+  }
+
   for delta_encoding_order in 1..MAX_DELTA_ENCODING_ORDER + 1 {
-    let encoding = DeltaEncoding::Consecutive(delta_encoding_order);
+    let encoding = DeltaEncoding::Consecutive(DeltaConsecutiveConfig {
+      order: delta_encoding_order,
+      secondary_uses_delta: false,
+    });
     let size_estimate = calculate_compressed_sample_size(&sample, unoptimized_bins_log, encoding)?;
     if size_estimate < best_size {
       best_encoding = encoding;
@@ -352,17 +438,21 @@ fn choose_unoptimized_bins_log(compression_level: usize, n: usize) -> Bitlen {
 // and we don't need a specialization for each full number type.
 // Returns a chunk compressor and the counts (per latent var) of numbers in
 // each bin.
-fn new_candidate_w_split<L: Latent>(
+fn new_candidate_w_split(
   mode: Mode,
-  latents: Vec<Vec<L>>,
+  latents: SplitLatents,
   config: &ChunkConfig,
-) -> PcoResult<(ChunkCompressor<L>, Vec<Vec<Weight>>)> {
-  let unoptimized_bins_log =
-    choose_unoptimized_bins_log(config.compression_level, latents[0].len());
+) -> PcoResult<(ChunkCompressor, PerLatentVar<Vec<Weight>>)> {
+  let n = latents.primary.len();
+  let unoptimized_bins_log = choose_unoptimized_bins_log(config.compression_level, n);
   let delta_encoding = match config.delta_spec {
-    DeltaSpec::Auto => choose_delta_encoding(&latents[0], unoptimized_bins_log)?,
+    DeltaSpec::Auto => choose_delta_encoding(&latents.primary, unoptimized_bins_log)?,
     DeltaSpec::None | DeltaSpec::TryConsecutive(0) => DeltaEncoding::None,
-    DeltaSpec::TryConsecutive(order) => DeltaEncoding::Consecutive(order),
+    DeltaSpec::TryConsecutive(order) => DeltaEncoding::Consecutive(DeltaConsecutiveConfig {
+      order,
+      secondary_uses_delta: false,
+    }),
+    DeltaSpec::TryLookback => new_lz_delta_encoding(n),
   };
 
   new_candidate_w_split_and_delta_encoding(
@@ -374,45 +464,52 @@ fn new_candidate_w_split<L: Latent>(
   )
 }
 
-fn fallback_chunk_compressor<L: Latent>(
-  mut latents: Vec<Vec<L>>,
+fn fallback_chunk_compressor(
+  latents: SplitLatents,
   config: &ChunkConfig,
-) -> PcoResult<ChunkCompressor<L>> {
-  let n = latents[0].len();
+) -> PcoResult<ChunkCompressor> {
+  let n = latents.primary.len();
   let n_per_page = config.paging_spec.n_per_page(n)?;
-  let (page_infos, delta_moments) = build_page_infos_and_delta_moments(
-    Mode::Classic,
-    DeltaEncoding::None,
-    &n_per_page,
-    &mut latents,
+  let (latents, page_infos) =
+    delta_encode_and_build_page_infos(DeltaEncoding::None, &n_per_page, latents);
+
+  let (meta, lcc) = match_latent_enum!(
+    latents.primary,
+    DynLatents<L>(latents) => {
+      let infos = vec![BinCompressionInfo::<L> {
+        weight: 1,
+        symbol: 0,
+        ..Default::default()
+      }];
+      let meta = guarantee::baseline_chunk_meta::<L>();
+      let latent_var_meta = &meta.per_latent_var.primary;
+
+      let lcc = LatentChunkCompressor::new(
+        TrainedBins {
+          infos,
+          ans_size_log: 0,
+          counts: vec![n as Weight],
+        },
+        latent_var_meta.bins.downcast_ref::<L>().unwrap(),
+        latents,
+      )?;
+      (meta, DynLatentChunkCompressor::new(lcc).unwrap())
+    }
   );
-  let infos = vec![BinCompressionInfo::<L> {
-    weight: 1,
-    symbol: 0,
-    ..Default::default()
-  }];
-  let meta = guarantee::baseline_chunk_meta::<L>();
-  let latent_var_meta = &meta.per_latent_var[0];
-
-  let lcc = LatentChunkCompressor::new(
-    TrainedBins {
-      infos,
-      ans_size_log: 0,
-      counts: vec![n as Weight],
-    },
-    latent_var_meta.bins.downcast_ref::<L>().unwrap(),
-  )?;
+
   Ok(ChunkCompressor {
     meta,
-    latent_chunk_compressors: vec![lcc],
+    latent_chunk_compressors: PerLatentVar {
+      delta: None,
+      primary: lcc,
+      secondary: None,
+    },
     page_infos,
-    deltas: latents,
-    delta_moments,
   })
 }
 
 // Should this take nums as a slice of slices instead of having a config.paging_spec?
-pub(crate) fn new<T: Number>(nums: &[T], config: &ChunkConfig) -> PcoResult<ChunkCompressor<T::L>> {
+pub(crate) fn new<T: Number>(nums: &[T], config: &ChunkConfig) -> PcoResult<ChunkCompressor> {
   validate_config(config)?;
   let n = nums.len();
   validate_chunk_size(n)?;
@@ -420,16 +517,25 @@ pub(crate) fn new<T: Number>(nums: &[T], config: &ChunkConfig) -> PcoResult<Chun
   let (mode, latents) = T::choose_mode_and_split_latents(nums, config)?;
 
   let (candidate, bin_counts) = new_candidate_w_split(mode, latents, config)?;
-  if candidate.should_fallback(n, bin_counts) {
-    let latents = data_types::split_latents_classic(nums);
-    return fallback_chunk_compressor(latents, config);
+  if candidate.should_fallback(
+    LatentType::new::<T::L>().unwrap(),
+    n,
+    bin_counts,
+  ) {
+    let split_latents = data_types::split_latents_classic(nums);
+    return fallback_chunk_compressor(split_latents, config);
   }
 
   Ok(candidate)
 }
 
-impl<L: Latent> ChunkCompressor<L> {
-  fn should_fallback(&self, n: usize, bin_counts_per_latent_var: Vec<Vec<Weight>>) -> bool {
+impl ChunkCompressor {
+  fn should_fallback(
+    &self,
+    latent_type: LatentType,
+    n: usize,
+    bin_counts_per_latent_var: PerLatentVar<Vec<Weight>>,
+  ) -> bool {
     let meta = &self.meta;
     if meta.delta_encoding == DeltaEncoding::None && meta.mode == Mode::Classic {
       // we already have a size guarantee in this case
@@ -440,10 +546,11 @@ impl<L: Latent> ChunkCompressor<L> {
 
     // worst case trailing bytes after bit packing
     let mut worst_case_body_bit_size = 7 * n_pages;
-    for (latent_var_meta, bin_counts) in meta
+    for (_, (latent_var_meta, bin_counts)) in meta
       .per_latent_var
-      .iter()
-      .zip(bin_counts_per_latent_var.iter())
+      .as_ref()
+      .zip_exact(bin_counts_per_latent_var.as_ref())
+      .enumerated()
     {
       match_latent_enum!(&latent_var_meta.bins, DynBins<L>(bins) => {
         for (bin, &count) in bins.iter().zip(bin_counts) {
@@ -456,12 +563,12 @@ impl<L: Latent> ChunkCompressor<L> {
     let worst_case_size = meta.exact_size()
       + n_pages * meta.exact_page_meta_size()
       + worst_case_body_bit_size.div_ceil(8);
-    let baseline_size = guarantee::chunk_size::<L>(n);
-    worst_case_size > baseline_size
-  }
 
-  fn page_moments(&self, page_idx: usize, latent_var_idx: usize) -> &DeltaMoments<L> {
-    &self.delta_moments[page_idx][latent_var_idx]
+    let baseline_size = match_latent_enum!(
+      latent_type,
+      LatentType<L> => { guarantee::chunk_size::<L>(n) }
+    );
+    worst_case_size > baseline_size
   }
 
   /// Returns the count of numbers this chunk will contain in each page.
@@ -494,25 +601,24 @@ impl<L: Latent> ChunkCompressor<L> {
     Ok(writer.into_inner())
   }
 
-  fn dissect_page(&self, page_idx: usize) -> PcoResult<DissectedPage<L>> {
+  fn dissect_page(&self, page_idx: usize) -> PcoResult<DissectedPage> {
     let Self {
       latent_chunk_compressors,
-      deltas,
       page_infos,
       ..
     } = self;
 
     let page_info = &page_infos[page_idx];
-    let mut per_latent_var = Vec::new();
 
-    for ((lcc, &delta_end), var_deltas) in latent_chunk_compressors
-      .iter()
-      .zip(page_info.end_idx_per_var.iter())
-      .zip(deltas)
-    {
-      let page_deltas = &var_deltas[page_info.start_idx..delta_end];
-      per_latent_var.push(lcc.dissect_page(page_deltas));
-    }
+    let per_latent_var = latent_chunk_compressors.as_ref().map(|key, lcc| {
+      let range = page_info.range_for_latent_var(key);
+      match_latent_enum!(
+        lcc,
+        DynLatentChunkCompressor<L>(inner) => {
+          inner.dissect_page(range)
+        }
+      )
+    });
 
     Ok(DissectedPage {
       page_n: page_info.page_n,
@@ -531,13 +637,18 @@ impl<L: Latent> ChunkCompressor<L> {
   fn page_size_hint_inner(&self, page_idx: usize, page_size_overestimation: f64) -> usize {
     let page_info = &self.page_infos[page_idx];
     let mut body_bit_size = 0;
-    for (lcc, &end_idx) in self
+    for (_, (lcc, page_info_var)) in self
       .latent_chunk_compressors
-      .iter()
-      .zip(&page_info.end_idx_per_var)
+      .as_ref()
+      .zip_exact(page_info.per_latent_var.as_ref())
+      .enumerated()
     {
-      let page_n_deltas = end_idx - page_info.start_idx;
-      let nums_bit_size = page_n_deltas as f64 * lcc.avg_bits_per_delta;
+      let n_stored_latents = page_info_var.range.len();
+      let avg_bits_per_latent = match_latent_enum!(
+        lcc,
+        DynLatentChunkCompressor<L>(inner) => { inner.avg_bits_per_latent }
+      );
+      let nums_bit_size = n_stored_latents as f64 * avg_bits_per_latent;
       body_bit_size += (nums_bit_size * page_size_overestimation).ceil() as usize;
     }
     self.meta.exact_page_meta_size() + body_bit_size.div_ceil(8)
@@ -546,7 +657,7 @@ impl<L: Latent> ChunkCompressor<L> {
   #[inline(never)]
   fn write_dissected_page<W: Write>(
     &self,
-    dissected_page: DissectedPage<L>,
+    dissected_page: DissectedPage,
     writer: &mut BitWriter<W>,
   ) -> PcoResult<()> {
     let mut batch_start = 0;
@@ -555,12 +666,18 @@ impl<L: Latent> ChunkCompressor<L> {
         batch_start + FULL_BATCH_N,
         dissected_page.page_n,
       );
-      for (dissected_page_var, lcc) in dissected_page
+      for (_, (dissected_page_var, lcc)) in dissected_page
         .per_latent_var
-        .iter()
-        .zip(&self.latent_chunk_compressors)
+        .as_ref()
+        .zip_exact(self.latent_chunk_compressors.as_ref())
+        .enumerated()
       {
-        lcc.write_dissected_batch(dissected_page_var, batch_start, writer)?;
+        match_latent_enum!(
+          lcc,
+          DynLatentChunkCompressor<L>(inner) => {
+            inner.write_dissected_batch(dissected_page_var, batch_start, writer)?;
+          }
+        );
       }
       batch_start = batch_end;
     }
@@ -582,31 +699,33 @@ impl<L: Latent> ChunkCompressor<L> {
     let mut writer = BitWriter::new(dst, PAGE_PADDING);
 
     let dissected_page = self.dissect_page(page_idx)?;
+    let page_info = &self.page_infos[page_idx];
 
-    let n_latents = self.meta.mode.n_latent_vars();
-    let mut per_latent_var = Vec::with_capacity(n_latents);
-    for latent_idx in 0..n_latents {
-      let delta_moments = self.page_moments(page_idx, latent_idx).clone();
-      let base_state = self.latent_chunk_compressors[latent_idx]
-        .encoder
-        .default_state();
+    let ans_default_state_and_size_log = self.latent_chunk_compressors.as_ref().map(|_, lcc| {
+      match_latent_enum!(
+        lcc,
+        DynLatentChunkCompressor<L>(inner) => { (inner.encoder.default_state(), inner.encoder.size_log()) }
+      )
+    });
 
-      let ans_final_state_idxs = dissected_page
-        .per_latent_var
-        .get(latent_idx)
-        .map(|dissected| dissected.ans_final_states.map(|state| state - base_state))
-        .unwrap_or([0; ANS_INTERLEAVING]);
-      per_latent_var.push(PageLatentVarMeta {
-        delta_moments: DynLatents::new(delta_moments.0).unwrap(),
-        ans_final_state_idxs,
+    let per_latent_var = page_info
+      .per_latent_var
+      .as_ref()
+      .zip_exact(ans_default_state_and_size_log.as_ref())
+      .zip_exact(dissected_page.per_latent_var.as_ref())
+      .map(|_, tuple| {
+        let ((page_info_var, (ans_default_state, _)), dissected) = tuple;
+        let ans_final_state_idxs = dissected
+          .ans_final_states
+          .map(|state| state - ans_default_state);
+        PageLatentVarMeta {
+          delta_state: page_info_var.delta_state.clone(),
+          ans_final_state_idxs,
+        }
       });
-    }
-    let page_meta = PageMeta { per_latent_var };
-    let ans_size_logs = self
-      .latent_chunk_compressors
-      .iter()
-      .map(|config| config.encoder.size_log());
 
+    let page_meta = PageMeta { per_latent_var };
+    let ans_size_logs = ans_default_state_and_size_log.map(|_, (_, size_log)| size_log);
     unsafe { page_meta.write_to(ans_size_logs, &mut writer) };
 
     self.write_dissected_page(dissected_page, &mut writer)?;
@@ -623,25 +742,33 @@ mod tests {
 
   #[test]
   fn test_choose_delta_sample() {
-    let latents = vec![0_u32, 1];
+    let latents = DynLatents::new(vec![0_u32, 1]).unwrap();
     assert_eq!(
-      choose_delta_sample(&latents, 100, 0),
+      choose_delta_sample(&latents, 100, 0)
+        .downcast::<u32>()
+        .unwrap(),
       vec![0, 1]
     );
     assert_eq!(
-      choose_delta_sample(&latents, 100, 1),
+      choose_delta_sample(&latents, 100, 1)
+        .downcast::<u32>()
+        .unwrap(),
       vec![0, 1]
     );
 
-    let latents = (0..300).collect::<Vec<u32>>();
-    let sample = choose_delta_sample(&latents, 100, 1);
+    let latents = DynLatents::new((0..300).collect::<Vec<u32>>()).unwrap();
+    let sample = choose_delta_sample(&latents, 100, 1)
+      .downcast::<u32>()
+      .unwrap();
     assert_eq!(sample.len(), 200);
     assert_eq!(&sample[..3], &[0, 1, 2]);
     assert_eq!(&sample[197..], &[297, 298, 299]);
 
-    let latents = (0..8).collect::<Vec<u32>>();
+    let latents = DynLatents::new((0..8).collect::<Vec<u32>>()).unwrap();
     assert_eq!(
-      choose_delta_sample(&latents, 2, 2),
+      choose_delta_sample(&latents, 2, 2)
+        .downcast::<u32>()
+        .unwrap(),
       vec![0, 1, 3, 4, 6, 7]
     );
   }
diff --git a/pco/src/wrapped/file_compressor.rs b/pco/src/wrapped/file_compressor.rs
index 3e0cf047..b9820838 100644
--- a/pco/src/wrapped/file_compressor.rs
+++ b/pco/src/wrapped/file_compressor.rs
@@ -70,7 +70,7 @@ impl FileCompressor {
     &self,
     nums: &[T],
     config: &ChunkConfig,
-  ) -> PcoResult<ChunkCompressor<T::L>> {
+  ) -> PcoResult<ChunkCompressor> {
     chunk_compressor::new(nums, config)
   }
 }
diff --git a/pco/src/wrapped/file_decompressor.rs b/pco/src/wrapped/file_decompressor.rs
index 90719bb7..6be9926d 100644
--- a/pco/src/wrapped/file_decompressor.rs
+++ b/pco/src/wrapped/file_decompressor.rs
@@ -5,7 +5,7 @@ use better_io::BetterBufRead;
 use crate::bit_reader;
 use crate::bit_reader::BitReaderBuilder;
 use crate::constants::{CHUNK_META_PADDING, HEADER_PADDING};
-use crate::data_types::Number;
+use crate::data_types::{LatentType, Number};
 use crate::errors::PcoResult;
 use crate::metadata::chunk::ChunkMeta;
 use crate::metadata::format_version::FormatVersion;
@@ -49,8 +49,14 @@ impl FileDecompressor {
   ) -> PcoResult<(ChunkDecompressor<T>, R)> {
     bit_reader::ensure_buf_read_capacity(&mut src, CHUNK_META_PADDING);
     let mut reader_builder = BitReaderBuilder::new(src, CHUNK_META_PADDING, 0);
-    let chunk_meta =
-      unsafe { ChunkMeta::read_from::<T::L, R>(&mut reader_builder, &self.format_version)? };
+    let latent_type = LatentType::new::<T::L>().unwrap();
+    let chunk_meta = unsafe {
+      ChunkMeta::read_from::<R>(
+        &mut reader_builder,
+        &self.format_version,
+        latent_type,
+      )?
+    };
     let cd = ChunkDecompressor::new(chunk_meta)?;
     Ok((cd, reader_builder.into_inner()))
   }
diff --git a/pco/src/wrapped/guarantee.rs b/pco/src/wrapped/guarantee.rs
index 06d6e775..f19bc5a4 100644
--- a/pco/src/wrapped/guarantee.rs
+++ b/pco/src/wrapped/guarantee.rs
@@ -1,5 +1,6 @@
 use crate::data_types::Latent;
 use crate::metadata::chunk_latent_var::ChunkLatentVarMeta;
+use crate::metadata::per_latent_var::PerLatentVar;
 use crate::metadata::{Bin, ChunkMeta, DeltaEncoding, DynBins, Mode};
 
 /// Returns the maximum possible byte size of a wrapped header.
@@ -8,26 +9,30 @@ pub fn header_size() -> usize {
 }
 
 pub(crate) fn baseline_chunk_meta<L: Latent>() -> ChunkMeta {
+  let primary = ChunkLatentVarMeta {
+    ans_size_log: 0,
+    bins: DynBins::new(vec![Bin {
+      weight: 1,
+      lower: L::ZERO,
+      offset_bits: L::BITS,
+    }])
+    .unwrap(),
+  };
+
   ChunkMeta {
     mode: Mode::Classic,
     delta_encoding: DeltaEncoding::None,
-    per_latent_var: vec![ChunkLatentVarMeta {
-      ans_size_log: 0,
-      bins: DynBins::new(vec![Bin {
-        weight: 1,
-        lower: L::ZERO,
-        offset_bits: L::BITS,
-      }])
-      .unwrap(),
-    }],
+    per_latent_var: PerLatentVar {
+      delta: None,
+      primary,
+      secondary: None,
+    },
   }
 }
 
 /// Returns the maximum possible byte size of a wrapped chunk for a given
 /// latent type (e.g. u32 or u64) and count of numbers.
 pub fn chunk_size<L: Latent>(n: usize) -> usize {
-  // TODO if we ever add Numbers that are smaller than their Latents, we
-  // may want to make this more generic
   baseline_chunk_meta::<L>().exact_size() + n * L::BITS.div_ceil(8) as usize
 }
 
diff --git a/pco/src/wrapped/page_decompressor.rs b/pco/src/wrapped/page_decompressor.rs
index 8b8e53b9..902ee0f4 100644
--- a/pco/src/wrapped/page_decompressor.rs
+++ b/pco/src/wrapped/page_decompressor.rs
@@ -5,65 +5,49 @@ use std::marker::PhantomData;
 use better_io::BetterBufRead;
 
 use crate::bit_reader;
-use crate::bit_reader::{BitReader, BitReaderBuilder};
+use crate::bit_reader::BitReaderBuilder;
 use crate::constants::{FULL_BATCH_N, PAGE_PADDING};
 use crate::data_types::{Latent, Number};
-use crate::delta;
-use crate::delta::DeltaMoments;
 use crate::errors::{PcoError, PcoResult};
-use crate::latent_batch_decompressor::LatentBatchDecompressor;
+use crate::latent_page_decompressor::LatentPageDecompressor;
+use crate::macros::{define_latent_enum, match_latent_enum};
 use crate::metadata::page::PageMeta;
-use crate::metadata::{ChunkMeta, DeltaEncoding, Mode};
+use crate::metadata::per_latent_var::{PerLatentVar, PerLatentVarBuilder};
+use crate::metadata::{ChunkMeta, DeltaEncoding, DynBins, DynLatents, Mode};
 use crate::progress::Progress;
 
 const PERFORMANT_BUF_READ_CAPACITY: usize = 8192;
 
-#[derive(Clone, Debug)]
-pub struct State<L: Latent> {
-  n_processed: usize,
-  latent_batch_decompressors: Vec<LatentBatchDecompressor<L>>,
-  delta_momentss: Vec<DeltaMoments<L>>, // one per latent variable
-  secondary_latents: [L; FULL_BATCH_N],
+#[derive(Debug)]
+struct LatentScratch {
+  is_constant: bool,
+  dst: DynLatents,
 }
 
-/// Holds metadata about a page and supports decompression.
-pub struct PageDecompressor<T: Number, R: BetterBufRead> {
+define_latent_enum!(
+  #[derive()]
+  DynLatentPageDecompressor(LatentPageDecompressor)
+);
+
+struct PageDecompressorInner<R: BetterBufRead> {
   // immutable
   n: usize,
   mode: Mode,
   delta_encoding: DeltaEncoding,
-  maybe_constant_latents: Vec<Option<T::L>>, // 1 per latent var
-  phantom: PhantomData<T>,
 
   // mutable
   reader_builder: BitReaderBuilder<R>,
-  state: State<T::L>,
+  n_processed: usize,
+  // TODO make these heap allocated
+  latent_decompressors: PerLatentVar<DynLatentPageDecompressor>,
+  delta_scratch: Option<LatentScratch>,
+  secondary_scratch: Option<LatentScratch>,
 }
 
-unsafe fn decompress_latents_w_delta<L: Latent>(
-  reader: &mut BitReader,
-  delta_encoding: DeltaEncoding,
-  n_remaining: usize,
-  delta_state: &mut DeltaMoments<L>,
-  lbd: &mut LatentBatchDecompressor<L>,
-  dst: &mut [L],
-) -> PcoResult<()> {
-  let n_remaining_pre_delta = n_remaining.saturating_sub(delta_state.order());
-  let pre_delta_len = if dst.len() <= n_remaining_pre_delta {
-    dst.len()
-  } else {
-    // If we're at the end, LatentBatchdDecompressor won't initialize the last
-    // few elements before delta decoding them, so we do that manually here to
-    // satisfy MIRI. This step isn't really necessary.
-    dst[n_remaining_pre_delta..].fill(L::default());
-    n_remaining_pre_delta
-  };
-  lbd.decompress_latent_batch(reader, &mut dst[..pre_delta_len])?;
-  match delta_encoding {
-    DeltaEncoding::None => (),
-    DeltaEncoding::Consecutive(_) => delta::decode_in_place(delta_state, dst),
-  }
-  Ok(())
+/// Holds metadata about a page and supports decompression.
+pub struct PageDecompressor<T: Number, R: BetterBufRead> {
+  inner: PageDecompressorInner<R>,
+  phantom: PhantomData<T>,
 }
 
 fn convert_from_latents_to_numbers<T: Number>(dst: &mut [T]) {
@@ -73,127 +57,190 @@ fn convert_from_latents_to_numbers<T: Number>(dst: &mut [T]) {
   }
 }
 
-impl<T: Number, R: BetterBufRead> PageDecompressor<T, R> {
+impl<R: BetterBufRead> PageDecompressorInner<R> {
   pub(crate) fn new(mut src: R, chunk_meta: &ChunkMeta, n: usize) -> PcoResult<Self> {
     bit_reader::ensure_buf_read_capacity(&mut src, PERFORMANT_BUF_READ_CAPACITY);
     let mut reader_builder = BitReaderBuilder::new(src, PAGE_PADDING, 0);
 
-    let page_meta = reader_builder
-      .with_reader(|reader| unsafe { PageMeta::read_from::<T::L>(reader, chunk_meta) })?;
+    let page_meta =
+      reader_builder.with_reader(|reader| unsafe { PageMeta::read_from(reader, chunk_meta) })?;
 
     let mode = chunk_meta.mode;
-    let delta_momentss = page_meta
+
+    let mut states = PerLatentVarBuilder::default();
+    for (key, (chunk_latent_var_meta, page_latent_var_meta)) in chunk_meta
       .per_latent_var
-      .iter()
-      .map(|latent_var_meta| {
-        let moments = latent_var_meta
-          .delta_moments
-          .downcast_ref::<T::L>()
-          .unwrap()
-          .clone();
-        DeltaMoments(moments)
-      })
-      .collect::<Vec<_>>();
-
-    let mut latent_batch_decompressors = Vec::new();
-    for latent_idx in 0..mode.n_latent_vars() {
-      let chunk_latent_meta = &chunk_meta.per_latent_var[latent_idx];
-
-      // this will change to dynamically typed soon
-      let bins = chunk_latent_meta.bins.downcast_ref::<T::L>().unwrap();
-      let n_in_body = n.saturating_sub(chunk_meta.delta_encoding.n_latents_per_state());
-      if bins.is_empty() && n_in_body > 0 {
-        return Err(PcoError::corruption(format!(
-          "unable to decompress chunk with no bins and {} latents",
-          n_in_body
-        )));
-      }
-
-      latent_batch_decompressors.push(LatentBatchDecompressor::new(
-        chunk_latent_meta.ans_size_log,
-        bins,
-        page_meta.per_latent_var[latent_idx].ans_final_state_idxs,
-      )?);
+      .as_ref()
+      .zip_exact(page_meta.per_latent_var.as_ref())
+      .enumerated()
+    {
+      let var_delta_encoding = chunk_meta.delta_encoding.for_latent_var(key);
+      let n_in_body = n.saturating_sub(var_delta_encoding.n_latents_per_state());
+      let state = match_latent_enum!(
+        &chunk_latent_var_meta.bins,
+        DynBins<L>(bins) => {
+          let delta_state = page_latent_var_meta
+            .delta_state
+            .downcast_ref::<L>()
+            .unwrap()
+            .clone();
+
+          if bins.is_empty() && n_in_body > 0 {
+            return Err(PcoError::corruption(format!(
+              "unable to decompress chunk with no bins and {} latents",
+              n_in_body
+            )));
+          }
+
+          let lpd = LatentPageDecompressor::new(
+            chunk_latent_var_meta.ans_size_log,
+            bins,
+            var_delta_encoding,
+            page_latent_var_meta.ans_final_state_idxs,
+            delta_state,
+          )?;
+
+          DynLatentPageDecompressor::new(lpd).unwrap()
+        }
+      );
+
+      states.set(key, state);
     }
+    let latent_decompressors: PerLatentVar<DynLatentPageDecompressor> = states.into();
 
-    let maybe_constant_secondary =
-      if latent_batch_decompressors.len() >= 2 && delta_momentss[1].order() == 0 {
-        latent_batch_decompressors[1].maybe_constant_value
-      } else {
-        None
-      };
-    let maybe_constant_latents = vec![None, maybe_constant_secondary];
+    fn make_latent_scratch(lpd: Option<&DynLatentPageDecompressor>) -> Option<LatentScratch> {
+      let lpd = lpd?;
+
+      match_latent_enum!(
+        lpd,
+        DynLatentPageDecompressor<L>(inner) => {
+          let maybe_constant_value = inner.maybe_constant_value;
+          Some(LatentScratch {
+            is_constant: maybe_constant_value.is_some(),
+            dst: DynLatents::new(vec![maybe_constant_value.unwrap_or_default(); FULL_BATCH_N]).unwrap(),
+          })
+        }
+      )
+    }
+    let delta_scratch = make_latent_scratch(latent_decompressors.delta.as_ref());
+    let secondary_scratch = make_latent_scratch(latent_decompressors.secondary.as_ref());
 
     // we don't store the whole ChunkMeta because it can get large due to bins
-    let secondary_default = maybe_constant_secondary.unwrap_or(T::L::default());
     Ok(Self {
       n,
       mode,
       delta_encoding: chunk_meta.delta_encoding,
-      maybe_constant_latents,
-      phantom: PhantomData,
       reader_builder,
-      state: State {
-        n_processed: 0,
-        latent_batch_decompressors,
-        delta_momentss,
-        secondary_latents: [secondary_default; FULL_BATCH_N],
-      },
+      n_processed: 0,
+      latent_decompressors,
+      delta_scratch,
+      secondary_scratch,
+    })
+  }
+
+  fn n_remaining(&self) -> usize {
+    self.n - self.n_processed
+  }
+}
+
+impl<T: Number, R: BetterBufRead> PageDecompressor<T, R> {
+  #[inline(never)]
+  pub(crate) fn new(src: R, chunk_meta: &ChunkMeta, n: usize) -> PcoResult<Self> {
+    Ok(Self {
+      inner: PageDecompressorInner::new(src, chunk_meta, n)?,
+      phantom: PhantomData::<T>,
     })
   }
 
   fn decompress_batch(&mut self, dst: &mut [T]) -> PcoResult<()> {
     let batch_n = dst.len();
-    let n = self.n;
-    let mode = self.mode;
-    let State {
-      latent_batch_decompressors,
-      delta_momentss,
-      secondary_latents,
-      n_processed,
-      ..
-    } = &mut self.state;
+    let inner = &mut self.inner;
+    let n = inner.n;
+    let n_remaining = inner.n_remaining();
+    let mode = inner.mode;
 
-    let secondary_latents = &mut secondary_latents[..batch_n];
-    let n_latents = latent_batch_decompressors.len();
+    // DELTA LATENTS
+    if let Some(LatentScratch {
+      is_constant: false,
+      dst,
+    }) = &mut inner.delta_scratch
+    {
+      let dyn_lpd = inner.latent_decompressors.delta.as_mut().unwrap();
+      let limit = min(
+        n_remaining.saturating_sub(inner.delta_encoding.n_latents_per_state()),
+        batch_n,
+      );
+      inner.reader_builder.with_reader(|reader| unsafe {
+        match_latent_enum!(
+          dyn_lpd,
+          DynLatentPageDecompressor<L>(lpd) => {
+            // Delta latents only line up with pre-delta length of the other
+            // latents.
+            // We never apply delta encoding to delta latents, so we just
+            // skip straight to the inner LatentBatchDecompressor
+            lpd.decompress_batch_pre_delta(
+              reader,
+              &mut dst.downcast_mut::<L>().unwrap()[..limit]
+            )
+          }
+        );
+        Ok(())
+      })?;
+    }
+    let delta_latents = inner.delta_scratch.as_ref().map(|scratch| &scratch.dst);
 
-    self.reader_builder.with_reader(|reader| {
+    // PRIMARY LATENTS
+    inner.reader_builder.with_reader(|reader| unsafe {
       let primary_dst = T::transmute_to_latents(dst);
-      unsafe {
-        decompress_latents_w_delta(
-          reader,
-          mode.delta_encoding_for_latent_var(0, self.delta_encoding),
-          n - *n_processed,
-          &mut delta_momentss[0],
-          &mut latent_batch_decompressors[0],
-          primary_dst,
-        )
-      }
+      let dyn_lpd = inner
+        .latent_decompressors
+        .primary
+        .downcast_mut::<T::L>()
+        .unwrap();
+      dyn_lpd.decompress_batch(
+        delta_latents,
+        n_remaining,
+        reader,
+        primary_dst,
+      );
+      Ok(())
     })?;
 
-    if n_latents >= 2 && self.maybe_constant_latents[1].is_none() {
-      self.reader_builder.with_reader(|reader| unsafe {
-        decompress_latents_w_delta(
-          reader,
-          mode.delta_encoding_for_latent_var(1, self.delta_encoding),
-          n - *n_processed,
-          &mut delta_momentss[1],
-          &mut latent_batch_decompressors[1],
-          secondary_latents,
-        )
+    // SECONDARY LATENTS
+    if let Some(LatentScratch {
+      is_constant: false,
+      dst,
+    }) = &mut inner.secondary_scratch
+    {
+      let dyn_lpd = inner.latent_decompressors.secondary.as_mut().unwrap();
+      inner.reader_builder.with_reader(|reader| unsafe {
+        match_latent_enum!(
+          dyn_lpd,
+          DynLatentPageDecompressor<L>(lpd) => {
+            // We never apply delta encoding to delta latents, so we just
+            // skip straight to the inner LatentBatchDecompressor
+            lpd.decompress_batch(
+              delta_latents,
+              n_remaining,
+              reader,
+              &mut dst.downcast_mut::<L>().unwrap()[..batch_n]
+            )
+          }
+        );
+        Ok(())
       })?;
     }
 
     T::join_latents(
       mode,
       T::transmute_to_latents(dst),
-      secondary_latents,
+      inner.secondary_scratch.as_ref().map(|scratch| &scratch.dst),
     );
     convert_from_latents_to_numbers(dst);
 
-    *n_processed += batch_n;
-    if *n_processed == n {
-      self.reader_builder.with_reader(|reader| {
+    inner.n_processed += batch_n;
+    if inner.n_processed == n {
+      inner.reader_builder.with_reader(|reader| {
         reader.drain_empty_byte("expected trailing bits at end of page to be empty")
       })?;
     }
@@ -209,17 +256,18 @@ impl<T: Number, R: BetterBufRead> PageDecompressor<T, R> {
   /// `dst` must have length either a multiple of 256 or be at least the count
   /// of numbers remaining in the page.
   pub fn decompress(&mut self, num_dst: &mut [T]) -> PcoResult<Progress> {
-    if num_dst.len() % FULL_BATCH_N != 0 && num_dst.len() < self.n_remaining() {
+    let n_remaining = self.inner.n_remaining();
+    if num_dst.len() % FULL_BATCH_N != 0 && num_dst.len() < n_remaining {
       return Err(PcoError::invalid_argument(format!(
         "num_dst's length must either be a multiple of {} or be \
          at least the count of numbers remaining ({} < {})",
         FULL_BATCH_N,
         num_dst.len(),
-        self.n_remaining(),
+        n_remaining,
       )));
     }
 
-    let n_to_process = min(num_dst.len(), self.n_remaining());
+    let n_to_process = min(num_dst.len(), n_remaining);
 
     let mut n_processed = 0;
     while n_processed < n_to_process {
@@ -230,16 +278,12 @@ impl<T: Number, R: BetterBufRead> PageDecompressor<T, R> {
 
     Ok(Progress {
       n_processed,
-      finished: self.n_remaining() == 0,
+      finished: self.inner.n_remaining() == 0,
     })
   }
 
-  fn n_remaining(&self) -> usize {
-    self.n - self.state.n_processed
-  }
-
   /// Returns the rest of the compressed data source.
   pub fn into_src(self) -> R {
-    self.reader_builder.into_inner()
+    self.inner.reader_builder.into_inner()
   }
 }
diff --git a/pco_cli/src/dtypes.rs b/pco_cli/src/dtypes.rs
index 5ac74578..1c26885c 100644
--- a/pco_cli/src/dtypes.rs
+++ b/pco_cli/src/dtypes.rs
@@ -299,5 +299,9 @@ pub fn to_arrow(dtype: NumberType) -> ArrowDataType {
     NumberType::U16 => ArrowDataType::UInt16,
     NumberType::U32 => ArrowDataType::UInt32,
     NumberType::U64 => ArrowDataType::UInt64,
+    other => panic!(
+      "number type {:?} not yet supported in pco_cli",
+      other
+    ),
   }
 }
diff --git a/pco_cli/src/input/mod.rs b/pco_cli/src/input/mod.rs
index be73a544..3a2beae0 100644
--- a/pco_cli/src/input/mod.rs
+++ b/pco_cli/src/input/mod.rs
@@ -441,6 +441,12 @@ impl PcoColumnReader {
       U64 => Arc::new(UInt64Array::from(simple_decompress::<u64>(
         &compressed,
       )?)),
+      other => {
+        return Err(anyhow!(
+          "number type {:?} not yet supported in pco_cli",
+          other
+        ))
+      }
     };
     Ok(array)
   }
diff --git a/pco_cli/src/inspect/handler.rs b/pco_cli/src/inspect/handler.rs
index bc2ff3c2..3a2640e3 100644
--- a/pco_cli/src/inspect/handler.rs
+++ b/pco_cli/src/inspect/handler.rs
@@ -7,7 +7,8 @@ use tabled::settings::{Alignment, Modify, Style};
 use tabled::{Table, Tabled};
 
 use pco::data_types::{Latent, Number};
-use pco::metadata::ChunkMeta;
+use pco::match_latent_enum;
+use pco::metadata::{ChunkMeta, DynBins, DynLatent, LatentVarKey};
 use pco::standalone::{FileDecompressor, MaybeChunkDecompressor};
 
 use crate::core_handlers::CoreHandlerImpl;
@@ -39,8 +40,10 @@ pub struct BinSummary {
 
 #[derive(Serialize)]
 pub struct LatentVarSummary {
+  name: String,
   n_bins: usize,
   ans_size_log: u32,
+  approx_avg_bits: f64,
   bins: String,
 }
 
@@ -75,31 +78,53 @@ fn measure_bytes_read(src: &[u8], prev_src_len: &mut usize) -> usize {
 fn build_latent_var_summaries<T: Number>(meta: &ChunkMeta) -> BTreeMap<String, LatentVarSummary> {
   let describers = T::get_latent_describers(meta);
   let mut summaries = BTreeMap::new();
-  for (latent_var_idx, latent_var_meta) in meta.per_latent_var.iter().enumerate() {
-    let describer = &describers[latent_var_idx];
+  for (key, (latent_var_meta, describer)) in meta
+    .per_latent_var
+    .as_ref()
+    .zip_exact(describers)
+    .enumerated()
+  {
     let unit = describer.latent_units();
 
-    let bins = latent_var_meta.bins.downcast_ref::<T::L>().unwrap();
-    let mut bin_summaries = Vec::new();
-    for bin in bins {
-      bin_summaries.push(BinSummary {
-        weight: bin.weight,
-        lower: format!("{}{}", describer.latent(bin.lower), unit),
-        offset_bits: bin.offset_bits,
-      });
-    }
+    let mut approx_total_bits = 0.0;
+    let bin_summaries = match_latent_enum!(
+      &latent_var_meta.bins,
+      DynBins<L>(bins) => {
+        let mut bin_summaries = Vec::new();
+        for bin in bins {
+          bin_summaries.push(BinSummary {
+            weight: bin.weight,
+            lower: format!("{}{}", describer.latent(DynLatent::new(bin.lower).unwrap()), unit),
+            offset_bits: bin.offset_bits,
+          });
+          let weight = bin.weight as f64;
+          approx_total_bits += weight * (bin.offset_bits as f64 + latent_var_meta.ans_size_log as f64 - weight.log2());
+        }
+        bin_summaries
+      }
+    );
+    let n_bins = bin_summaries.len();
     let bins_table = Table::new(bin_summaries)
       .with(Style::rounded())
       .with(Modify::new(Columns::new(0..3)).with(Alignment::right()))
       .to_string();
+    let total_weight = (1 << latent_var_meta.ans_size_log) as f64;
 
     let summary = LatentVarSummary {
-      n_bins: bins.len(),
+      name: describer.latent_var(),
+      n_bins,
       ans_size_log: latent_var_meta.ans_size_log,
+      approx_avg_bits: approx_total_bits / total_weight,
       bins: bins_table.to_string(),
     };
 
-    summaries.insert(describer.latent_var(), summary);
+    let key_name = match key {
+      LatentVarKey::Delta => "delta",
+      LatentVarKey::Primary => "primary",
+      LatentVarKey::Secondary => "secondary",
+    };
+
+    summaries.insert(key_name.to_string(), summary);
   }
 
   summaries
diff --git a/pco_python/README.md b/pco_python/README.md
index 00b2fa1f..38cb4c75 100644
--- a/pco_python/README.md
+++ b/pco_python/README.md
@@ -22,7 +22,7 @@ Pcodec is a codec for numerical sequences. Example usage:
 >>> # compress
 >>> compressed = standalone.simple_compress(nums, ChunkConfig())
 >>> print(f'compressed to {len(compressed)} bytes')
-compressed to 6946257 bytes
+compressed to 6946258 bytes
 >>> 
 >>> # decompress
 >>> recovered = standalone.simple_decompress(compressed)
diff --git a/pco_python/src/config.rs b/pco_python/src/config.rs
index a658fe4b..166ea652 100644
--- a/pco_python/src/config.rs
+++ b/pco_python/src/config.rs
@@ -67,6 +67,12 @@ impl PyDeltaSpec {
   fn try_consecutive(order: usize) -> Self {
     Self(DeltaSpec::TryConsecutive(order))
   }
+
+  /// :returns: a DeltaSpec that tries to use delta lookbacks, if possible.
+  #[staticmethod]
+  fn try_lookback() -> Self {
+    Self(DeltaSpec::TryLookback)
+  }
 }
 
 #[pyclass(name = "PagingSpec")]
diff --git a/pco_python/src/wrapped/compressor.rs b/pco_python/src/wrapped/compressor.rs
index 8155519b..1933efcb 100644
--- a/pco_python/src/wrapped/compressor.rs
+++ b/pco_python/src/wrapped/compressor.rs
@@ -5,9 +5,9 @@ use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PyModule};
 use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
 
-use pco::data_types::{Latent, Number, NumberType};
+use pco::data_types::{Number, NumberType};
 use pco::wrapped::{ChunkCompressor, FileCompressor};
-use pco::{match_latent_enum, match_number_enum, ChunkConfig};
+use pco::{match_number_enum, ChunkConfig};
 
 use crate::utils::pco_err_to_py;
 use crate::{utils, PyChunkConfig};
@@ -18,15 +18,10 @@ struct PyFc {
   inner: FileCompressor,
 }
 
-pco::define_latent_enum!(
-  #[derive()]
-  DynCc(ChunkCompressor)
-);
-
 // can't pass inner directly since pyo3 only supports unit variant enums
 /// Holds metadata about a chunk and supports compressing one page at a time.
 #[pyclass(name = "ChunkCompressor")]
-struct PyCc(DynCc);
+struct PyCc(ChunkCompressor);
 
 impl PyFc {
   fn chunk_compressor_generic<T: Number + Element>(
@@ -34,7 +29,7 @@ impl PyFc {
     py: Python,
     arr: &Bound<PyArray1<T>>,
     config: &ChunkConfig,
-  ) -> PyResult<ChunkCompressor<T::L>> {
+  ) -> PyResult<ChunkCompressor> {
     let arr_ro = arr.readonly();
     let src = arr_ro.as_slice()?;
     py.allow_threads(|| self.inner.chunk_compressor(src, config))
@@ -87,7 +82,7 @@ impl PyFc {
       number_type,
       NumberType<T> => {
         let cc = self.chunk_compressor_generic::<T>(py, nums.downcast::<PyArray1<T>>()?, &config)?;
-        Ok(PyCc(DynCc::new(cc).unwrap()))
+        Ok(PyCc(cc))
       }
     )
   }
@@ -99,22 +94,14 @@ impl PyCc {
   ///
   /// :raises: TypeError, RuntimeError
   fn write_chunk_meta<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
-    match_latent_enum!(
-      &self.0,
-      DynCc<T>(cc) => {
-        let mut res = Vec::new();
-        cc.write_chunk_meta(&mut res).map_err(pco_err_to_py)?;
-        Ok(PyBytes::new_bound(py, &res))
-      }
-    )
+    let mut res = Vec::new();
+    self.0.write_chunk_meta(&mut res).map_err(pco_err_to_py)?;
+    Ok(PyBytes::new_bound(py, &res))
   }
 
   /// :returns: a list containing the count of numbers in each page.
   fn n_per_page(&self) -> Vec<usize> {
-    match_latent_enum!(
-      &self.0,
-      DynCc<T>(cc) => { cc.n_per_page() }
-    )
+    self.0.n_per_page()
   }
 
   /// :param page_idx: an int for which page you want to write.
@@ -123,15 +110,10 @@ impl PyCc {
   ///
   /// :raises: TypeError, RuntimeError
   fn write_page<'py>(&self, py: Python<'py>, page_idx: usize) -> PyResult<Bound<'py, PyBytes>> {
-    match_latent_enum!(
-      &self.0,
-      DynCc<T>(cc) => {
-        let mut res = Vec::new();
-        py.allow_threads(|| cc.write_page(page_idx, &mut res))
-          .map_err(pco_err_to_py)?;
-        Ok(PyBytes::new_bound(py, &res))
-      }
-    )
+    let mut res = Vec::new();
+    py.allow_threads(|| self.0.write_page(page_idx, &mut res))
+      .map_err(pco_err_to_py)?;
+    Ok(PyBytes::new_bound(py, &res))
   }
 }
 
diff --git a/pco_python/test/test_standalone.py b/pco_python/test/test_standalone.py
index 3b5085a6..d5561686 100644
--- a/pco_python/test/test_standalone.py
+++ b/pco_python/test/test_standalone.py
@@ -15,6 +15,7 @@
 )
 all_dtypes = ("f2", "f4", "f8", "i2", "i4", "i8", "u2", "u4", "u8")
 
+
 @pytest.mark.parametrize("length", all_lengths)
 @pytest.mark.parametrize("dtype", all_dtypes)
 def test_round_trip_decompress_into(length, dtype):
@@ -96,23 +97,22 @@ def test_compression_options():
 
     # this is mostly just to check that there is no error, but these settings
     # should give worse compression than the defaults
-    assert (
-        len(
-            standalone.simple_compress(
-                data,
-                ChunkConfig(
-                    compression_level=0,
-                    delta_spec=DeltaSpec.try_consecutive(1),
-                    mode_spec=ModeSpec.classic(),
-                    paging_spec=PagingSpec.equal_pages_up_to(77),
-                ),
-            )
+    for delta_spec in [DeltaSpec.try_consecutive(1), DeltaSpec.try_lookback()]:
+        compressed = standalone.simple_compress(
+            data,
+            ChunkConfig(
+                compression_level=0,
+                delta_spec=delta_spec,
+                mode_spec=ModeSpec.classic(),
+                paging_spec=PagingSpec.equal_pages_up_to(77),
+            ),
         )
-        > default_size
-    )
+        assert len(compressed) > default_size
 
 
-@pytest.mark.parametrize("mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_int_mult(10)])
+@pytest.mark.parametrize(
+    "mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_int_mult(10)]
+)
 def test_compression_int_mode_spec_options(mode_spec):
     data = (np.random.normal(size=100) * 1000).astype(np.int32)
 
@@ -128,7 +128,15 @@ def test_compression_int_mode_spec_options(mode_spec):
     np.testing.assert_array_equal(data, out)
 
 
-@pytest.mark.parametrize("mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_float_mult(10.0), ModeSpec.try_float_quant(4)])
+@pytest.mark.parametrize(
+    "mode_spec",
+    [
+        ModeSpec.auto(),
+        ModeSpec.classic(),
+        ModeSpec.try_float_mult(10.0),
+        ModeSpec.try_float_quant(4),
+    ],
+)
 def test_compression_float_mode_spec_options(mode_spec):
     data = (np.random.normal(size=100) * 1000).astype(np.int32) * np.pi