diff --git a/Makefile b/Makefile
index 514f80bba..b356c4f0f 100644
--- a/Makefile
+++ b/Makefile
@@ -118,6 +118,7 @@ SUBDIR += tests/equals
 SUBDIR += tests/subtract
 SUBDIR += tests/detect_required
 SUBDIR += tests/determinise
+SUBDIR += tests/eager_output
 SUBDIR += tests/endids
 SUBDIR += tests/epsilons
 SUBDIR += tests/fsm
diff --git a/fuzz/target.c b/fuzz/target.c
index 543891bb9..d56a9bf82 100644
--- a/fuzz/target.c
+++ b/fuzz/target.c
@@ -26,10 +26,21 @@
 /* 10 seconds */
 #define TIMEOUT_USEC (10ULL * 1000 * 1000)
 
+static bool verbosity_checked = false;
+static bool verbose = false;
+
+#define LOG(...)					\
+	do {						\
+		if (verbose) {				\
+			fprintf(stderr, __VA_ARGS__);	\
+		}					\
+	} while (0)					\
+
 enum run_mode {
 	MODE_DEFAULT,
 	MODE_SHUFFLE_MINIMISE,
 	MODE_ALL_PRINT_FUNCTIONS,
+	MODE_EAGER_OUTPUT,
 };
 
 
@@ -344,6 +355,508 @@ fuzz_all_print_functions(FILE *f, const char *pattern, bool det, bool min, const
 	return EXIT_SUCCESS;
 }
 
+#define MAX_PATTERNS 4
+struct eager_output_cb_info {
+	size_t used;
+	fsm_output_id_t ids[MAX_PATTERNS];
+};
+
+static void
+reset_eager_output_info(struct eager_output_cb_info *info)
+{
+	info->used = 0;
+}
+
+struct feo_env {
+	bool ok;
+	size_t pattern_count;
+	size_t fsm_count;
+	size_t max_match_count;
+	size_t max_steps;
+
+	char *patterns[MAX_PATTERNS];
+	struct fsm *fsms[MAX_PATTERNS];
+	struct fsm *combined;
+
+	/* which pattern is being used for generation, (size_t)-1 for combined */
+	size_t current_pattern;
+
+	struct eager_output_cb_info outputs;
+	struct eager_output_cb_info outputs_combined;
+};
+
+void
+append_eager_output_cb(fsm_output_id_t id, void *opaque)
+{
+	struct eager_output_cb_info *info = (struct eager_output_cb_info *)opaque;
+
+	for (size_t i = 0; i < info->used; i++) {
+		if (info->ids[i] == id) {
+			return;	/* already present */
+		}
+	}
+
+	assert(info->used < MAX_PATTERNS);
+	info->ids[info->used++] = id;
+}
+
+static enum fsm_generate_matches_cb_res
+gen_combined_check_individual_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque);
+
+static enum fsm_generate_matches_cb_res
+gen_individual_check_combined_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque);
+
+#define DEF_MAX_STEPS 100000
+#define DEF_MAX_MATCH_COUNT 1000
+
+/* This isn't part of the public interface, per se. */
+void
+fsm_eager_output_dump(FILE *f, const struct fsm *fsm);
+
+static int
+fuzz_eager_output(const uint8_t *data, size_t size)
+{
+	struct feo_env env = {
+		.ok = true,
+		.pattern_count = 0,
+		.max_steps = DEF_MAX_STEPS,
+		.max_match_count = DEF_MAX_MATCH_COUNT,
+	};
+
+	{
+		const char *steps = getenv("STEPS");
+		const char *matches = getenv("MATCHES");
+		if (steps != NULL) {
+			env.max_steps = strtoul(steps, NULL, 10);
+			assert(env.max_steps > 0);
+		}
+		if (matches != NULL) {
+			env.max_match_count = strtoul(matches, NULL, 10);
+			assert(env.max_match_count > 0);
+		}
+	}
+
+	int ret = 0;
+
+	size_t max_pattern_length = 0;
+
+	/* chop data into a series of patterns */
+	{
+		size_t prev = 0;
+		size_t offset = 0;
+
+		/* Patterns with lots of '.' can take a while to determinise.
+		 * That slows down fuzzer coverage, but isn't interesting here. */
+		size_t dots = 0;
+
+		while (offset < size && env.pattern_count < MAX_PATTERNS) {
+#define MAX_DOTS 4
+			if (data[offset] == '.') { dots++; }
+
+			if (data[offset] == '\0' || data[offset] == '\n' || offset == size - 1) {
+				size_t len = offset - prev;
+
+				if (dots > MAX_DOTS) {
+					/* ignored */
+					prev = offset;
+				} else if (len > 0) {
+					char *pattern = malloc(len + 1);
+					assert(pattern != NULL);
+
+					memcpy(pattern, &data[prev], len);
+					if (len > 0 && pattern[len] == '\n') {
+						len--; /* drop trailing newline */
+					}
+					pattern[len] = '\0';
+                                        bool keep = true;
+
+                                        if (len > 0) {
+                                            for (size_t i = 0; i < len - 1; i++) {
+                                                if (pattern[i] == '\\' && pattern[i + 1] == 'x') {
+                                                    /* ignore unhandled parser errors from "\x", see #386 */
+                                                    keep = false;
+                                                }
+                                            }
+                                        }
+
+                                        if (keep) {
+                                            env.patterns[env.pattern_count++] = pattern;
+
+                                            if (len > max_pattern_length) {
+						max_pattern_length = len;
+                                            }
+                                        } else {
+                                            free(pattern);
+                                        }
+					prev = offset;
+					dots = 0;
+				}
+			}
+
+			offset++;
+		}
+	}
+
+	struct re_anchoring_info anchorage[MAX_PATTERNS] = {0};
+
+	/* for each pattern, attempt to compile to a DFA */
+	for (size_t p_i = 0; p_i < env.pattern_count; p_i++) {
+		const char *p = env.patterns[p_i];
+
+		if (!re_is_anchored(RE_PCRE, fsm_sgetc, &p, 0, NULL, &anchorage[p_i])) {
+			continue; /* unsupported regex */
+		}
+
+		p = env.patterns[p_i];
+		struct fsm *fsm = re_comp(RE_PCRE, fsm_sgetc, &p, NULL, 0, NULL);
+
+		LOG("%s: pattern %zd: '%s' => %p\n", __func__, p_i, env.patterns[p_i], (void *)fsm);
+
+		if (fsm == NULL) {
+			continue; /* invalid regex */
+		}
+
+		const fsm_output_id_t endid = (fsm_output_id_t)p_i;
+		ret = fsm_seteageroutputonends(fsm, endid);
+		assert(ret == 1);
+
+		if (verbose) {
+			fprintf(stderr, "==== pattern %zd, pre det\n", p_i);
+			fsm_dump(stderr, fsm);
+			fsm_eager_output_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+
+			fsm_state_t c = fsm_countstates(fsm);
+			for (fsm_state_t i = 0; i < c; i++) {
+				fprintf(stderr, "-- %d: end? %d\n", i, fsm_isend(fsm, i));
+			}
+		}
+
+		ret = fsm_determinise(fsm);
+		assert(ret == 1);
+
+		ret = fsm_minimise(fsm);
+		assert(ret == 1);
+
+		fsm_state_t start;
+		if (!fsm_getstart(fsm, &start)) {
+			fsm_free(fsm);
+			continue;
+		}
+
+		if (verbose) {
+			fprintf(stderr, "==== pattern %zd, post det\n", p_i);
+			fsm_dump(stderr, fsm);
+			fsm_eager_output_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+
+			fsm_state_t c = fsm_countstates(fsm);
+			for (fsm_state_t i = 0; i < c; i++) {
+				fprintf(stderr, "-- %d: end? %d\n", i, fsm_isend(fsm, i));
+			}
+		}
+
+		fsm_eager_output_set_cb(fsm, append_eager_output_cb, &env.outputs);
+		env.fsms[env.fsm_count++] = fsm;
+	}
+
+	/* don't bother checking combined behavior unless there's multiple DFAs */
+	if (env.fsm_count < 2) { goto cleanup; }
+
+	/* copy and combine fsms into one DFA */
+	{
+		size_t used = 0;
+		struct fsm_union_entry entries[MAX_PATTERNS] = {0};
+
+		for (size_t i = 0; i < env.fsm_count; i++) {
+			/* there can be gaps, fsms[] lines up with patterns[] */
+			if (env.fsms[i] == NULL) { continue; }
+
+			fsm_state_t start;
+			if (!fsm_getstart(env.fsms[i], &start)) {
+				assert(!"hit");
+			}
+
+			struct fsm *cp = fsm_clone(env.fsms[i]);
+			assert(cp != NULL);
+
+			if (verbose) {
+				fprintf(stderr, "==== cp %zd\n", i);
+				fsm_dump(stderr, cp);
+				fsm_eager_output_dump(stderr, cp);
+				fprintf(stderr, "====\n");
+
+				fsm_state_t c = fsm_countstates(cp);
+				for (fsm_state_t i = 0; i < c; i++) {
+					fprintf(stderr, "-- %d: end? %d\n", i, fsm_isend(cp, i));
+				}
+			}
+
+			entries[used].fsm = cp;
+			entries[used].anchored_start = anchorage[i].start;
+			entries[used].anchored_end = anchorage[i].end;
+			used++;
+		}
+
+		if (used == 0) {
+			goto cleanup; /* nothing to do */
+		}
+
+		/* consumes entries[] */
+		struct fsm *fsm = fsm_union_repeated_pattern_group(used, entries, NULL);
+		assert(fsm != NULL);
+
+		if (verbose) {
+			fprintf(stderr, "==== combined (pre-det)\n");
+			fsm_dump(stderr, fsm);
+			fsm_eager_output_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+		}
+
+		if (!fsm_determinise(fsm)) {
+			assert(!"failed to determinise");
+		}
+
+		if (!fsm_minimise(fsm)) {
+			assert(!"failed to minimise");
+		}
+
+		LOG("%s: combined state_count %d\n", __func__, fsm_countstates(fsm));
+		env.combined = fsm;
+		/* fsm_eager_output_set_cb(fsm, append_eager_output_cb, &env.outputs_combined); */
+
+		if (verbose) {
+			fprintf(stderr, "==== combined\n");
+			fsm_dump(stderr, env.combined);
+			fsm_eager_output_dump(stderr, env.combined);
+			fprintf(stderr, "====\n");
+		}
+
+	}
+
+	/* Use fsm_generate_matches to check for matches that got lost
+	 * and false positives introduced while combining the DFAs.
+	 * Use the combined DFA to generate matches, check that the
+	 * match behavior agrees with the individual DFA copies. */
+	env.current_pattern = (size_t)-1;
+	if (!fsm_generate_matches(env.combined, max_pattern_length, gen_combined_check_individual_cb, &env)) {
+		goto cleanup;
+	}
+
+	if (!env.ok) { goto cleanup; }
+
+	/* Likewise, use every individual DFA to generate matches and */
+	/* check behavior against the combined DFA. */
+	for (size_t i = 0; i < env.pattern_count; i++) {
+		env.current_pattern = i;
+		if (!fsm_generate_matches(env.combined, max_pattern_length, gen_individual_check_combined_cb, &env)) {
+			goto cleanup;
+		}
+	}
+
+	ret = env.ok ? EXIT_SUCCESS : EXIT_FAILURE;
+cleanup:
+	for (size_t i = 0; i < MAX_PATTERNS; i++) {
+		if (env.patterns[i] != NULL) {
+			free(env.patterns[i]);
+			env.patterns[i] = NULL;
+		}
+		if (env.fsms[i] != NULL) {
+			fsm_free(env.fsms[i]);
+		}
+	}
+	if (env.combined != NULL) {
+		fsm_free(env.combined);
+	}
+
+	return ret;
+}
+
+static int
+cmp_output_id(const void *pa, const void *pb)
+{
+	const fsm_output_id_t a = *(fsm_output_id_t *)pa;
+	const fsm_output_id_t b = *(fsm_output_id_t *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+static bool
+match_input_get_eager_outputs(struct fsm *fsm, const char *input, size_t input_length,
+    struct eager_output_cb_info *dst)
+{
+	(void)input_length;
+	fsm_state_t end;
+
+	reset_eager_output_info(dst);
+
+	fsm_eager_output_set_cb(fsm, append_eager_output_cb, dst);
+	const int ret = fsm_exec(fsm, fsm_sgetc, &input, &end, NULL);
+	if (ret == 0) {
+		return false; /* no match */
+	} else {
+		assert(ret == 1); /* match */
+	}
+
+	/* sort the IDs, to make comparison cheaper */
+	qsort(dst->ids, dst->used, sizeof(dst->ids[0]), cmp_output_id);
+	return true;	/* match */
+}
+
+/* For a given matching input generated by the combined DFA, check that
+ * only the expected individual source DFAs match. */
+static enum fsm_generate_matches_cb_res
+gen_combined_check_individual_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque)
+{
+	(void)fsm;
+	(void)depth;
+	(void)end_state;
+
+	struct feo_env *env = opaque;
+	assert(env->current_pattern == (size_t)-1);
+
+	if (match_count > env->max_match_count) { return FSM_GENERATE_MATCHES_CB_RES_HALT; }
+	if (steps > env->max_steps) { return FSM_GENERATE_MATCHES_CB_RES_HALT; }
+
+	/* execute, to set eager outputs */
+	if (!match_input_get_eager_outputs(env->combined, input, input_length, &env->outputs_combined)) {
+		env->ok = false;
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	size_t individual_outputs_used = 0;
+	fsm_output_id_t individual_outputs[MAX_PATTERNS];
+
+	for (size_t i = 0; i < env->pattern_count; i++) {
+		struct fsm *fsm = env->fsms[i];
+		if (fsm == NULL) { continue; }
+
+		if (!match_input_get_eager_outputs(fsm, input, input_length, &env->outputs)) {
+			env->ok = false;
+			return FSM_GENERATE_MATCHES_CB_RES_HALT;
+		}
+
+		if (env->outputs.used > 0) {
+			assert(env->outputs.used == 1);
+			individual_outputs[individual_outputs_used++] = env->outputs.ids[0];
+		}
+	}
+
+	bool match = true;
+	if (env->outputs_combined.used != individual_outputs_used) {
+		match = false;
+	}
+
+	for (size_t cmb_i = 0; cmb_i < env->outputs_combined.used; cmb_i++) {
+		const fsm_output_id_t cur = env->outputs_combined.ids[cmb_i];
+		assert(env->fsms[cmb_i] != NULL);
+		bool found = false;
+		for (size_t i = 0; i < individual_outputs_used; i++) {
+			if (individual_outputs[i] == cur) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			match = false;
+			break;
+		}
+	}
+
+	if (!match) {
+		fprintf(stderr, "%s: combined <-> individual mismatch for input '%s'(%zd)!\n", __func__, input, input_length);
+
+		fprintf(stderr, "-- combined: %zu IDs:", env->outputs_combined.used);
+		for (size_t cmb_i = 0; cmb_i < env->outputs_combined.used; cmb_i++) {
+			fprintf(stderr, " %d", env->outputs_combined.ids[cmb_i]);
+		}
+		fprintf(stderr, "\n");
+		fprintf(stderr, "-- individiual: %zu IDs:", individual_outputs_used);
+		for (size_t i = 0; i < individual_outputs_used; i++) {
+			fprintf(stderr, " %d", individual_outputs[i]);
+		}
+		fprintf(stderr, "\n");
+		goto fail;
+	}
+
+	return FSM_GENERATE_MATCHES_CB_RES_CONTINUE;
+
+fail:
+	env->ok = false;
+	return FSM_GENERATE_MATCHES_CB_RES_HALT;
+}
+
+/* For a given matching input generated by one of the source DFAs, check that
+ * the combined DFA also matches, and that the only other source DFAs that match
+ * are ones that should according to the combined DFA. */
+static enum fsm_generate_matches_cb_res
+gen_individual_check_combined_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque)
+{
+	(void)fsm;
+	(void)depth;
+	(void)end_state;
+
+	struct feo_env *env = opaque;
+	assert(env->current_pattern < env->pattern_count);
+	if (match_count > env->max_match_count) { return FSM_GENERATE_MATCHES_CB_RES_HALT; }
+	if (steps > env->max_steps) { return FSM_GENERATE_MATCHES_CB_RES_HALT; }
+
+	struct fsm *cur_fsm = env->fsms[env->current_pattern];
+	if (cur_fsm == NULL) { return FSM_GENERATE_MATCHES_CB_RES_CONTINUE; }
+
+	/* execute, to set eager outputs */
+	if (!match_input_get_eager_outputs(cur_fsm, input, input_length, &env->outputs)) {
+		goto fail;
+	}
+	if (!match_input_get_eager_outputs(env->combined, input, input_length, &env->outputs_combined)) {
+		goto fail;
+	}
+
+	assert(env->outputs.used == 1);
+
+	bool found = false;
+	for (size_t i = 0; i < env->outputs_combined.used; i++) {
+		if (env->outputs_combined.ids[i] == env->outputs.ids[0]) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		fprintf(stderr, "%s: combined <-> individual mismatch for input '%s'(%zd)!\n", __func__, input, input_length);
+
+		fprintf(stderr, "-- combined: %zu IDs:", env->outputs_combined.used);
+		for (size_t cmb_i = 0; cmb_i < env->outputs_combined.used; cmb_i++) {
+			fprintf(stderr, " %d", env->outputs_combined.ids[cmb_i]);
+		}
+		fprintf(stderr, "\n");
+		fprintf(stderr, "-- pattern %zd: %zu IDs:", env->current_pattern, env->outputs.used);
+		for (size_t i = 0; i < env->outputs.used; i++) {
+			fprintf(stderr, " %d", env->outputs.ids[i]);
+		}
+		fprintf(stderr, "\n");
+		goto fail;
+	}
+
+	return FSM_GENERATE_MATCHES_CB_RES_CONTINUE;
+
+fail:
+	env->ok = false;
+	return FSM_GENERATE_MATCHES_CB_RES_HALT;
+}
+#undef MAX_PATTERNS
+
 #define MAX_FUZZER_DATA (64 * 1024)
 static uint8_t data_buf[MAX_FUZZER_DATA + 1];
 
@@ -358,6 +871,7 @@ get_run_mode(void)
 	switch (mode[0]) {
 	case 'm': return MODE_SHUFFLE_MINIMISE;
 	case 'p': return MODE_ALL_PRINT_FUNCTIONS;
+	case 'E': return MODE_EAGER_OUTPUT;
 	case 'd':
 	default:
 		return MODE_DEFAULT;
@@ -373,6 +887,11 @@ harness_fuzzer_target(const uint8_t *data, size_t size)
 		return EXIT_SUCCESS;
 	}
 
+	if (!verbosity_checked) {
+		verbosity_checked = true;
+		verbose = getenv("VERBOSE") != NULL;
+	}
+
 	/* Ensure that input is '\0'-terminated. */
 	if (size > MAX_FUZZER_DATA) {
 		size = MAX_FUZZER_DATA;
@@ -392,6 +911,9 @@ harness_fuzzer_target(const uint8_t *data, size_t size)
 	case MODE_SHUFFLE_MINIMISE:
 		return shuffle_minimise(pattern);
 
+	case MODE_EAGER_OUTPUT:
+		return fuzz_eager_output(data, size);
+
 	case MODE_ALL_PRINT_FUNCTIONS:
 	{
 		if (dev_null == NULL) {
@@ -403,7 +925,7 @@ harness_fuzzer_target(const uint8_t *data, size_t size)
 		const bool det = b0 & 0x1;
 		const bool min = b0 & 0x2;
 		const enum fsm_io io_mode = (b0 >> 2) % 3;
-		
+
 		const char *shifted_pattern = (const char *)&data_buf[1];
 		int res = fuzz_all_print_functions(dev_null, shifted_pattern, det, min, io_mode);
 		return res;
diff --git a/include/fsm/bool.h b/include/fsm/bool.h
index d92518297..4d9f1889a 100644
--- a/include/fsm/bool.h
+++ b/include/fsm/bool.h
@@ -52,6 +52,16 @@ struct fsm *
 fsm_union_array(size_t fsm_count,
     struct fsm **fsms, struct fsm_combined_base_pair *bases);
 
+struct fsm_union_entry {
+	struct fsm *fsm;
+	bool anchored_start;
+	bool anchored_end;
+};
+
+struct fsm *
+fsm_union_repeated_pattern_group(size_t entry_count,
+    struct fsm_union_entry *entries, struct fsm_combined_base_pair *bases);
+
 struct fsm *
 fsm_intersect(struct fsm *a, struct fsm *b);
 
diff --git a/include/fsm/fsm.h b/include/fsm/fsm.h
index 877d5c1bf..701efe70b 100644
--- a/include/fsm/fsm.h
+++ b/include/fsm/fsm.h
@@ -7,6 +7,7 @@
 #ifndef FSM_H
 #define FSM_H
 
+#include <stdlib.h>
 #include <stdbool.h>
 
 struct fsm;
@@ -27,6 +28,9 @@ typedef unsigned int fsm_state_t;
  * original FSM(s) matched when executing a combined FSM. */
 typedef unsigned int fsm_end_id_t;
 
+/* Eager output ID. */
+typedef unsigned int fsm_output_id_t;
+
 #define FSM_END_ID_MAX UINT_MAX
 
 /*
@@ -266,6 +270,39 @@ fsm_mapendids(struct fsm * fsm, fsm_endid_remap_fun remap, void *opaque);
 void
 fsm_increndids(struct fsm * fsm, int delta);
 
+/* Associate an eagerly matched numeric ID with the end states in an fsm.
+ *
+ * This is similar to fsm_setendid, but has different performance
+ * trade-offs. In particular, it can become extremely expensive to
+ * combine multiple DFAs with endids on their end states when they
+ * representing regexes with unanchored ends, because the FSM has to
+ * explicitly represent all the possible combinations of matches by
+ * copying the entire path to every reachable end state. Eager endids
+ * are associated with the edge leaving the main pattern match.
+ *
+ * Returns 1 on success, 0 on error.
+ * */
+int
+fsm_seteagerendid(struct fsm *fsm, fsm_end_id_t id);
+
+/* Set an eager output ID to emit every time the state is entered.
+ * This turns the automata into a Moore machine. */
+int
+fsm_seteageroutput(struct fsm *fsm, fsm_state_t state, fsm_output_id_t id);
+
+/* Set an eager output ID on all current end states. */
+int
+fsm_seteageroutputonends(struct fsm *fsm, fsm_output_id_t id);
+
+/* HACK */
+typedef void
+fsm_eager_output_cb(fsm_output_id_t id, void *opaque);
+void
+fsm_eager_output_set_cb(struct fsm *fsm, fsm_eager_output_cb *cb, void *opaque);
+
+void
+fsm_eager_output_get_cb(const struct fsm *fsm, fsm_eager_output_cb **cb, void **opaque);
+
 /*
  * Find the state (if there is just one), or add epsilon edges from all states,
  * for which the given predicate is true.
@@ -436,6 +473,15 @@ fsm_shortest(const struct fsm *fsm,
 	fsm_state_t start, fsm_state_t goal,
 	unsigned (*cost)(fsm_state_t from, fsm_state_t to, char c));
 
+/* HACK */
+typedef void
+fsm_eager_endid_cb(fsm_end_id_t id, void *opaque);
+void
+fsm_eager_endid_set_cb(struct fsm *fsm, fsm_eager_endid_cb *cb, void *opaque);
+
+void
+fsm_eager_endid_get_cb(const struct fsm *fsm, fsm_eager_endid_cb **cb, void **opaque);
+
 /*
  * Execute an FSM reading input from the user-specified callback fsm_getc().
  * fsm_getc() is passed the opaque pointer given, and is expected to return
diff --git a/include/fsm/print.h b/include/fsm/print.h
index 9f7264e81..10244129b 100644
--- a/include/fsm/print.h
+++ b/include/fsm/print.h
@@ -45,6 +45,9 @@ enum fsm_print_lang {
 struct fsm_state_metadata {
 	const fsm_end_id_t *end_ids;
 	size_t end_id_count;
+
+	const fsm_output_id_t *eager_output_ids;
+	size_t eager_output_count;
 };
 
 /*
diff --git a/include/re/re.h b/include/re/re.h
index 20408e98a..a3e1f7e0c 100644
--- a/include/re/re.h
+++ b/include/re/re.h
@@ -136,6 +136,21 @@ re_comp(enum re_dialect dialect,
 	const struct fsm_alloc *alloc,
 	enum re_flags flags, struct re_err *err);
 
+struct re_anchoring_info {
+	int start;
+	int end;
+	/* FIXME: this could also check for AST_FLAG_NULLABLE, AST_FLAG_UNSATISFIABLE,
+	 * AST_FLAG_ALWAYS_CONSUMES, AST_FLAG_CAN_CONSUME */
+};
+
+/* Parse and analyze the regex enough to determine whether it is
+ * anchored at the start and/or end. Returns 0 if the regex is not
+ * supported, otherwise returns 1 and writes anchoring flags into *info. */
+int
+re_is_anchored(enum re_dialect dialect, re_getchar_fun *f, void *opaque,
+	enum re_flags flags, struct re_err *err,
+	struct re_anchoring_info *info);
+
 /*
  * Return a human-readable string describing a given error code. The string
  * returned has static storage, and must not be freed.
diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile
index 5e2ed57e3..c7782f0ff 100644
--- a/src/libfsm/Makefile
+++ b/src/libfsm/Makefile
@@ -8,6 +8,7 @@ SRC += src/libfsm/consolidate.c
 SRC += src/libfsm/clone.c
 SRC += src/libfsm/closure.c
 SRC += src/libfsm/detect_required.c
+SRC += src/libfsm/eager_output.c
 SRC += src/libfsm/edge.c
 SRC += src/libfsm/empty.c
 SRC += src/libfsm/end.c
diff --git a/src/libfsm/clone.c b/src/libfsm/clone.c
index 9fd236a4d..2161599ae 100644
--- a/src/libfsm/clone.c
+++ b/src/libfsm/clone.c
@@ -19,6 +19,7 @@
 #include "internal.h"
 #include "capture.h"
 #include "endids.h"
+#include "eager_output.h"
 
 #define LOG_CLONE_ENDIDS 0
 
@@ -28,6 +29,9 @@ copy_capture_actions(struct fsm *dst, const struct fsm *src);
 static int
 copy_end_ids(struct fsm *dst, const struct fsm *src);
 
+static int
+copy_eager_output_ids(struct fsm *dst, const struct fsm *src);
+
 struct fsm *
 fsm_clone(const struct fsm *fsm)
 {
@@ -80,6 +84,12 @@ fsm_clone(const struct fsm *fsm)
 			fsm_free(new);
 			return NULL;
 		}
+
+		/* does not copy callback */
+		if (!copy_eager_output_ids(new, fsm)) {
+			fsm_free(new);
+			return NULL;
+		}
 	}
 
 	return new;
@@ -159,3 +169,31 @@ copy_end_ids(struct fsm *dst, const struct fsm *src)
 
 	return env.ok;
 }
+
+struct copy_eager_output_ids_env {
+	bool ok;
+	struct fsm *dst;
+};
+
+static int
+copy_eager_output_ids_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	struct copy_eager_output_ids_env *env = opaque;
+	if (!fsm_seteageroutput(env->dst, state, id)) {
+		env->ok = false;
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+copy_eager_output_ids(struct fsm *dst, const struct fsm *src)
+{
+	struct copy_eager_output_ids_env env;
+	env.dst = dst;
+	env.ok = true;
+
+	fsm_eager_output_iter_all(src, copy_eager_output_ids_cb, &env);
+	return env.ok;
+}
diff --git a/src/libfsm/consolidate.c b/src/libfsm/consolidate.c
index 236a4f6f5..b7a8905b2 100644
--- a/src/libfsm/consolidate.c
+++ b/src/libfsm/consolidate.c
@@ -25,6 +25,7 @@
 #include "internal.h"
 #include "capture.h"
 #include "endids.h"
+#include "eager_output.h"
 
 #define LOG_MAPPING 0
 #define LOG_CONSOLIDATE_CAPTURES 0
@@ -53,6 +54,10 @@ static int
 consolidate_end_ids(struct fsm *dst, const struct fsm *src,
     const fsm_state_t *mapping, size_t mapping_count);
 
+static int
+consolidate_eager_output_ids(struct fsm *dst, const struct fsm *src,
+    const fsm_state_t *mapping, size_t mapping_count);
+
 static fsm_state_t
 mapping_cb(fsm_state_t id, const void *opaque)
 {
@@ -154,6 +159,10 @@ fsm_consolidate(const struct fsm *src,
 		}
 	}
 
+	if (!consolidate_eager_output_ids(dst, src, mapping, mapping_count)) {
+		goto cleanup;
+	}
+
 	f_free(src->alloc, seen);
 
 	return dst;
@@ -270,3 +279,40 @@ consolidate_end_ids(struct fsm *dst, const struct fsm *src,
 
 	return ret;
 }
+
+struct consolidate_eager_output_ids_env {
+	bool ok;
+	struct fsm *dst;
+	const fsm_state_t *mapping;
+	size_t mapping_count;
+};
+
+static int
+consolidate_eager_output_ids_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	struct consolidate_eager_output_ids_env *env = opaque;
+	assert(state < env->mapping_count);
+	const fsm_state_t dst_state = env->mapping[state];
+
+	if (!fsm_seteageroutput(env->dst, dst_state, id)) {
+		env->ok = false;
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+consolidate_eager_output_ids(struct fsm *dst, const struct fsm *src,
+    const fsm_state_t *mapping, size_t mapping_count)
+{
+	struct consolidate_eager_output_ids_env env = {
+		.ok = true,
+		.dst = dst,
+		.mapping = mapping,
+		.mapping_count = mapping_count,
+	};
+	fsm_eager_output_iter_all(src, consolidate_eager_output_ids_cb, &env);
+	return env.ok;
+}
+
diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c
index 42992b6bc..8978ce06c 100644
--- a/src/libfsm/determinise.c
+++ b/src/libfsm/determinise.c
@@ -6,6 +6,9 @@
 
 #include "determinise_internal.h"
 
+#include <fsm/print.h>
+#include <fsm/options.h>
+
 static void
 dump_labels(FILE *f, const uint64_t labels[4])
 {
@@ -253,6 +256,10 @@ fsm_determinise(struct fsm *nfa)
 			goto cleanup;
 		}
 
+		if (!remap_eager_outputs(&map, issp, dfa, nfa)) {
+			goto cleanup;
+		}
+
 		fsm_move(nfa, dfa);
 	}
 
@@ -334,6 +341,22 @@ add_reverse_mapping(const struct fsm_alloc *alloc,
 	return 1;
 }
 
+static void
+free_reverse_mappings(const struct fsm_alloc *alloc, size_t map_count, struct reverse_mapping *rmaps)
+{
+	if (rmaps == NULL) { return; }
+
+	for (size_t map_i = 0; map_i < map_count; map_i++) {
+		struct reverse_mapping *rmap = &rmaps[map_i];
+		for (size_t i = 0; i < rmap->count; i++) {
+			f_free(alloc, rmap[i].list);
+			rmap->count = 0;
+			rmap[i].list = NULL;
+		}
+	}
+	f_free(alloc, rmaps);
+}
+
 static int
 det_copy_capture_actions_cb(fsm_state_t state,
     enum capture_action_type type, unsigned capture_id, fsm_state_t to,
@@ -405,7 +428,7 @@ hash_iss(interned_state_set_id iss)
 }
 
 static struct mapping *
-map_first(struct map *map, struct map_iter *iter)
+map_first(const struct map *map, struct map_iter *iter)
 {
 	iter->m = map;
 	iter->i = 0;
@@ -641,22 +664,14 @@ stack_pop(struct mappingstack *stack)
 	return item;
 }
 
-static int
-remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
-    struct fsm *dst_dfa, struct fsm *src_nfa)
+static struct reverse_mapping *
+build_reverse_mappings(const struct map *map, struct interned_state_set_pool *issp,
+    struct fsm *dst_dfa, const struct fsm *src_nfa)
 {
+	struct reverse_mapping *reverse_mappings = NULL;
 	struct map_iter it;
 	struct state_iter si;
 	struct mapping *m;
-	struct reverse_mapping *reverse_mappings;
-	fsm_state_t state;
-	const size_t capture_count = fsm_countcaptures(src_nfa);
-	size_t i, j;
-	int res = 0;
-
-	if (capture_count == 0) {
-		return 1;
-	}
 
 	/* This is not 1 to 1 -- if state X is now represented by multiple
 	 * states Y in the DFA, and state X has action(s) when transitioning
@@ -667,9 +682,7 @@ remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
 	 * checking reachability from every X, but the actual path
 	 * handling later will also check reachability. */
 	reverse_mappings = f_calloc(dst_dfa->alloc, src_nfa->statecount, sizeof(reverse_mappings[0]));
-	if (reverse_mappings == NULL) {
-		return 0;
-	}
+	if (reverse_mappings == NULL) { goto cleanup; }
 
 	/* build reverse mappings table: for every NFA state X, if X is part
 	 * of the new DFA state Y, then add Y to a list for X */
@@ -679,6 +692,7 @@ remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
 		assert(m->dfastate < dst_dfa->statecount);
 		ss = interned_state_set_get_state_set(issp, iss_id);
 
+		fsm_state_t state;
 		for (state_set_reset(ss, &si); state_set_next(&si, &state); ) {
 			if (!add_reverse_mapping(dst_dfa->alloc,
 				reverse_mappings,
@@ -688,33 +702,47 @@ remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
 		}
 	}
 
-#if LOG_DETERMINISE_CAPTURES
+#if LOG_BUILD_REVERSE_MAPPING
 	fprintf(stderr, "#### reverse mapping for %zu states\n", src_nfa->statecount);
-	for (i = 0; i < src_nfa->statecount; i++) {
+	for (size_t i = 0; i < src_nfa->statecount; i++) {
 		struct reverse_mapping *rm = &reverse_mappings[i];
 		fprintf(stderr, "%lu:", i);
-		for (j = 0; j < rm->count; j++) {
+		for (size_t j = 0; j < rm->count; j++) {
 			fprintf(stderr, " %u", rm->list[j]);
 		}
 		fprintf(stderr, "\n");
 	}
-#else
-	(void)j;
 #endif
 
+	return reverse_mappings;
+
+cleanup:
+	free_reverse_mappings(dst_dfa->alloc, src_nfa->statecount, reverse_mappings);
+	return NULL;
+}
+
+static int
+remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
+    struct fsm *dst_dfa, struct fsm *src_nfa)
+{
+	const size_t capture_count = fsm_countcaptures(src_nfa);
+	int res = 0;
+
+	if (capture_count == 0) {
+		return 1;
+	}
+
+	struct reverse_mapping *reverse_mappings = build_reverse_mappings(map, issp, dst_dfa, src_nfa);
+	if (reverse_mappings == NULL) { goto cleanup; }
+
 	if (!det_copy_capture_actions(reverse_mappings, dst_dfa, src_nfa)) {
 		goto cleanup;
 	}
 
 	res = 1;
-cleanup:
-	for (i = 0; i < src_nfa->statecount; i++) {
-		if (reverse_mappings[i].list != NULL) {
-			f_free(dst_dfa->alloc, reverse_mappings[i].list);
-		}
-	}
-	f_free(dst_dfa->alloc, reverse_mappings);
 
+cleanup:
+	free_reverse_mappings(dst_dfa->alloc, src_nfa->statecount, reverse_mappings);
 	return res;
 }
 
@@ -2528,3 +2556,50 @@ analyze_closures__grow_outputs(struct analyze_closures_env *env)
 	env->output_ceil = nceil;
 	return 1;
 }
+
+struct remap_eager_output_env {
+	bool ok;
+	struct fsm *dst;
+	fsm_state_t dst_state;
+};
+
+static int
+remap_eager_output_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	(void)state;
+	struct remap_eager_output_env *env = opaque;
+	if (!fsm_seteageroutput(env->dst, env->dst_state, id)) {
+		env->ok = false;
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+remap_eager_outputs(const struct map *map, struct interned_state_set_pool *issp,
+	struct fsm *dst_dfa, const struct fsm *src_nfa)
+{
+	/* For each DFA state, get the set of NFA states corresponding to it from the
+	 * map and issp, then copy every eager output ID over. */
+	struct map_iter iter;
+	for (struct mapping *b = map_first(map, &iter); b != NULL; b = map_next(&iter)) {
+		struct state_set *ss = interned_state_set_get_state_set(issp, b->iss);
+		assert(ss != NULL);
+
+		struct state_iter it;
+		fsm_state_t s;
+		state_set_reset(ss, &it);
+		while (state_set_next(&it, &s)) {
+			struct remap_eager_output_env env = {
+				.ok = true,
+				.dst = dst_dfa,
+				.dst_state = b->dfastate,
+			};
+			fsm_eager_output_iter_state(src_nfa, s, remap_eager_output_cb, &env);
+			if (!env.ok) { return 0; }
+		}
+	}
+
+	return 1;
+}
diff --git a/src/libfsm/determinise_internal.h b/src/libfsm/determinise_internal.h
index cfd4ea663..2e925d28c 100644
--- a/src/libfsm/determinise_internal.h
+++ b/src/libfsm/determinise_internal.h
@@ -23,6 +23,7 @@
 #include "internal.h"
 #include "capture.h"
 #include "endids.h"
+#include "eager_output.h"
 
 #include <ctype.h>
 
@@ -35,6 +36,7 @@
 #define LOG_AC 0
 #define LOG_GROUPING 0
 #define LOG_ANALYSIS_STATS 0
+#define LOG_BUILD_REVERSE_MAPPING 0
 
 #if LOG_DETERMINISE_CAPTURES || LOG_INPUT
 #include <fsm/print.h>
@@ -72,7 +74,7 @@ struct map {
 };
 
 struct map_iter {
-	struct map *m;
+	const struct map *m;
 	size_t i;
 };
 
@@ -304,7 +306,7 @@ static void
 map_free(struct map *map);
 
 static struct mapping *
-map_first(struct map *map, struct map_iter *iter);
+map_first(const struct map *map, struct map_iter *iter);
 
 static struct mapping *
 map_next(struct map_iter *iter);
@@ -325,6 +327,10 @@ static int
 remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
 	struct fsm *dst_dfa, struct fsm *src_nfa);
 
+static int
+remap_eager_outputs(const struct map *map, struct interned_state_set_pool *issp,
+	struct fsm *dst_dfa, const struct fsm *src_nfa);
+
 static struct mappingstack *
 stack_init(const struct fsm_alloc *alloc);
 
diff --git a/src/libfsm/eager_output.c b/src/libfsm/eager_output.c
new file mode 100644
index 000000000..e37a8a4bf
--- /dev/null
+++ b/src/libfsm/eager_output.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright 2024 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include <stdio.h>
+#include <assert.h>
+
+#include "internal.h"
+
+#include <fsm/pred.h>
+#include <fsm/print.h>
+
+#include <adt/edgeset.h>
+#include <adt/hash.h>
+#include <adt/stateset.h>
+
+#include "eager_output.h"
+
+#define LOG_LEVEL 0
+
+/* must be a power of 2 */
+#define DEF_BUCKET_COUNT 4
+#define DEF_ENTRY_CEIL 2
+
+struct eager_output_info {
+	fsm_eager_output_cb *cb;
+	void *opaque;
+
+	struct eager_output_htab {
+		size_t bucket_count;
+		size_t buckets_used;
+		/* empty if entry is NULL, otherwise keyed by state */
+		struct eager_output_bucket {
+			fsm_state_t state;
+			struct eager_output_entry {
+				unsigned used;
+				unsigned ceil;
+				fsm_end_id_t ids[];
+			} *entry;
+		} *buckets;
+	} htab;
+};
+
+void
+fsm_eager_output_set_cb(struct fsm *fsm, fsm_eager_output_cb *cb, void *opaque)
+{
+#if LOG_LEVEL > 2
+	fprintf(stderr, "-- fsm_eager_output_set_cb %p\n", (void *)fsm);
+#endif
+	assert(fsm != NULL);
+	assert(fsm->eager_output_info != NULL);
+	fsm->eager_output_info->cb = cb;
+	fsm->eager_output_info->opaque = opaque;
+}
+
+void
+fsm_eager_output_get_cb(const struct fsm *fsm, fsm_eager_output_cb **cb, void **opaque)
+{
+	*cb = fsm->eager_output_info->cb;
+	*opaque = fsm->eager_output_info->opaque;
+}
+
+int
+fsm_eager_output_init(struct fsm *fsm)
+{
+	struct eager_output_info *ei = f_calloc(fsm->alloc, 1, sizeof(*ei));
+
+	if (ei == NULL) { return 0; }
+
+	struct eager_output_bucket *buckets = f_calloc(fsm->alloc,
+	    DEF_BUCKET_COUNT, sizeof(buckets[0]));
+	if (buckets == NULL) {
+		f_free(fsm->alloc, ei);
+		return 0;
+	}
+
+#if LOG_LEVEL > 2
+	fprintf(stderr, "-- fsm_eager_output_init %p\n", (void *)fsm);
+#endif
+
+	ei->htab.buckets = buckets;
+	ei->htab.bucket_count = DEF_BUCKET_COUNT;
+
+	fsm->eager_output_info = ei;
+	return 1;
+}
+
+void
+fsm_eager_output_free(struct fsm *fsm)
+{
+	if (fsm == NULL || fsm->eager_output_info == NULL) { return; }
+
+	for (size_t i = 0; i < fsm->eager_output_info->htab.bucket_count; i++) {
+		struct eager_output_bucket *b = &fsm->eager_output_info->htab.buckets[i];
+		if (b->entry == NULL) { continue; }
+		f_free(fsm->alloc, b->entry);
+	}
+	f_free(fsm->alloc, fsm->eager_output_info->htab.buckets);
+
+	f_free(fsm->alloc, fsm->eager_output_info);
+#if LOG_LEVEL > 2
+	fprintf(stderr, "-- fsm_eager_output_free %p\n", (void *)fsm);
+#endif
+	fsm->eager_output_info = NULL;
+}
+
+int
+fsm_seteageroutputonends(struct fsm *fsm, fsm_output_id_t id)
+{
+	assert(fsm != NULL);
+	const size_t count = fsm_countstates(fsm);
+	for (size_t i = 0; i < count; i++) {
+		if (fsm_isend(fsm, i)) {
+			if (!fsm_seteageroutput(fsm, i, id)) { return 0; }
+		}
+	}
+	return 1;
+}
+
+static bool
+grow_htab(const struct fsm_alloc *alloc, struct eager_output_htab *htab)
+{
+	const size_t nbucket_count = 2*htab->bucket_count;
+	assert(nbucket_count != 0);
+
+	struct eager_output_bucket *nbuckets = f_calloc(alloc, nbucket_count,
+	    sizeof(nbuckets[0]));
+	if (nbuckets == NULL) { return false; }
+
+	const uint64_t nmask = nbucket_count - 1;
+	assert((nmask & nbucket_count) == 0); /* power of 2 */
+
+	for (size_t ob_i = 0; ob_i < htab->bucket_count; ob_i++) {
+		struct eager_output_bucket *ob = &htab->buckets[ob_i];
+		if (ob->entry == NULL) { continue; }
+
+		const uint64_t hash = hash_id(ob->state);
+		for (size_t probes = 0; probes < nbucket_count; probes++) {
+			const size_t nb_i = (hash + probes) & nmask;
+			struct eager_output_bucket *nb = &nbuckets[nb_i];
+			if (nb->entry == NULL) {
+				nb->state = ob->state;
+				nb->entry = ob->entry;
+				break;
+			} else {
+				assert(nb->state != ob->state);
+			}
+		}
+	}
+
+	f_free(alloc, htab->buckets);
+	htab->bucket_count = nbucket_count;
+	htab->buckets = nbuckets;
+	return true;
+}
+
+int
+fsm_seteageroutput(struct fsm *fsm, fsm_state_t state, fsm_output_id_t id)
+{
+	assert(fsm != NULL);
+
+	struct eager_output_info *info = fsm->eager_output_info;
+	assert(info->htab.bucket_count > 0);
+
+	if (info->htab.buckets_used >= info->htab.bucket_count/2) {
+		if (!grow_htab(fsm->alloc, &info->htab)) { return 0; }
+	}
+
+	const uint64_t hash = hash_id(state);
+	const uint64_t mask = info->htab.bucket_count - 1;
+	assert((mask & info->htab.bucket_count) == 0); /* power of 2 */
+
+	/* fprintf(stderr, "%s: bucket_count %zd\n", __func__, info->htab.bucket_count); */
+	for (size_t probes = 0; probes < info->htab.bucket_count; probes++) {
+		const size_t b_i = (hash + probes) & mask;
+		struct eager_output_bucket *b = &info->htab.buckets[b_i];
+		/* fprintf(stderr, "%s: state %d -> b_i %zd, state %d, entry %p\n", */
+		/*     __func__, state, b_i, b->state, (void *)b->entry); */
+		struct eager_output_entry *e = b->entry;
+		if (e == NULL) { /* empty */
+			/* add */
+			const size_t alloc_sz = sizeof(*e)
+			    + DEF_ENTRY_CEIL * sizeof(e->ids[0]);
+			e = f_calloc(fsm->alloc, 1, alloc_sz);
+			if (e == NULL) {
+				return 0;
+			}
+			e->ceil = DEF_ENTRY_CEIL;
+			b->state = state;
+			b->entry = e;
+			info->htab.buckets_used++;
+			/* fprintf(stderr, "%s: buckets_used %zd\n", __func__, info->htab.buckets_used); */
+			/* fprintf(stderr, "%s: saved new entry in bucket %zd\n", __func__, b_i); */
+		} else if (b->state != state) { /* collision */
+			continue;
+		}
+
+		if (e->used == e->ceil) {
+			const size_t nceil = 2 * e->ceil;
+			const size_t nsize = sizeof(*e)
+			    + nceil * sizeof(e->ids[0]);
+			struct eager_output_entry *nentry = f_realloc(fsm->alloc, e, nsize);
+			if (nentry == NULL) { return 0; }
+			nentry->ceil = nceil;
+			b->entry = nentry;
+			e = b->entry;
+		}
+
+		/* ignore duplicates */
+		for (size_t i = 0; i < e->used; i++) {
+			if (e->ids[i] == id) { return 1; }
+		}
+
+		e->ids[e->used++] = id;
+		/* fprintf(stderr, "%s: e->ids_used %u\n", __func__, e->used); */
+		fsm->states[state].has_eager_outputs = 1;
+		return 1;
+	}
+
+	return 1;
+}
+
+bool
+fsm_eager_output_has_eager_output(const struct fsm *fsm)
+{
+	assert(fsm->eager_output_info != NULL);
+	const struct eager_output_htab *htab = &fsm->eager_output_info->htab;
+
+	for (size_t b_i = 0; b_i < htab->bucket_count; b_i++) {
+		struct eager_output_bucket *b = &htab->buckets[b_i];
+		if (b->entry == NULL) { continue; }
+		if (b->entry->used > 0) { return 1; }
+	}
+	return 0;
+}
+
+bool
+fsm_eager_output_state_has_eager_output(const struct fsm *fsm, fsm_state_t state)
+{
+	assert(state < fsm->statecount);
+	return fsm->states[state].has_eager_outputs;
+}
+
+void
+fsm_eager_output_iter_state(const struct fsm *fsm,
+    fsm_state_t state, fsm_eager_output_iter_cb *cb, void *opaque)
+{
+	assert(fsm != NULL);
+	assert(cb != NULL);
+
+	const uint64_t hash = hash_id(state);
+
+	struct eager_output_info *info = fsm->eager_output_info;
+	const uint64_t mask = info->htab.bucket_count - 1;
+	assert((mask & info->htab.bucket_count) == 0); /* power of 2 */
+
+	for (size_t probes = 0; probes < info->htab.bucket_count; probes++) {
+		const size_t b_i = (hash + probes) & mask;
+		struct eager_output_bucket *b = &info->htab.buckets[b_i];
+		/* fprintf(stderr, "%s: state %d -> b_i %zd, state %d, entry %p\n", */
+		/*     __func__, state, b_i, b->state, (void *)b->entry); */
+		struct eager_output_entry *e = b->entry;
+		if (e == NULL) { /* empty */
+			return;
+		} else if (b->state != state) { /* collision */
+			continue;
+		}
+
+		assert(e->used == 0 || fsm->states[state].has_eager_outputs);
+
+		for (size_t i = 0; i < e->used; i++) {
+			if (!cb(state, e->ids[i], opaque)) { return; }
+		}
+	}
+}
+
+void
+fsm_eager_output_iter_all(const struct fsm *fsm,
+    fsm_eager_output_iter_cb *cb, void *opaque)
+{
+	assert(fsm != NULL);
+	assert(cb != NULL);
+	assert(fsm->eager_output_info != NULL);
+
+	struct eager_output_info *info = fsm->eager_output_info;
+
+	/* fprintf(stderr, "%s: bucket_count %zd\n", __func__, info->htab.bucket_count); */
+	for (size_t b_i = 0; b_i < info->htab.bucket_count; b_i++) {
+		struct eager_output_bucket *b = &info->htab.buckets[b_i];
+		struct eager_output_entry *e = b->entry;
+		/* fprintf(stderr, "%s: b_i %zd, state %d, entry %p\n", */
+		/*     __func__, b_i, b->state, (void *)b->entry); */
+		if (e == NULL) { /* empty */
+			continue;
+		}
+		assert(e->used == 0 || fsm->states[b->state].has_eager_outputs);
+
+		for (size_t i = 0; i < e->used; i++) {
+			if (!cb(b->state, e->ids[i], opaque)) { return; }
+		}
+	}
+}
+
+struct dump_env {
+	FILE *f;
+	size_t count;
+};
+
+static int
+dump_cb(fsm_state_t state, fsm_end_id_t id, void *opaque)
+
+{
+	struct dump_env *env = opaque;
+	fprintf(env->f, "-- %d: id %d\n", state, id);
+	env->count++;
+	return 1;
+}
+
+void
+fsm_eager_output_dump(FILE *f, const struct fsm *fsm)
+{
+	struct dump_env env = { .f = f };
+	fprintf(f, "%s:\n", __func__);
+	fsm_eager_output_iter_all(fsm, dump_cb, (void *)&env);
+	fprintf(f, "== %zu total\n", env.count);
+}
+
+static int
+inc_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	(void)state;
+	(void)id;
+	size_t *count = opaque;
+	(*count)++;
+	return 1;
+}
+
+bool
+fsm_eager_output_has_any(const struct fsm *fsm,
+    fsm_state_t state, size_t *count)
+{
+	size_t c = 0;
+	fsm_eager_output_iter_state(fsm, state, &inc_cb, &c);
+	if (count != NULL) { *count = c; }
+	return c > 0;
+}
+
+int
+fsm_eager_output_compact(struct fsm *fsm, fsm_state_t *mapping, size_t mapping_count)
+{
+	/* Don't reallocate unless something has actually changed. */
+	bool changes = false;
+	for (size_t i = 0; i < mapping_count; i++) {
+		if (mapping[i] != i) {
+			changes = true;
+			break;
+		}
+	}
+
+	/* nothing to do */
+	if (!changes) { return 1; }
+
+	struct eager_output_info *eoi = fsm->eager_output_info;
+
+	struct eager_output_bucket *nbuckets = f_calloc(fsm->alloc,
+	    eoi->htab.bucket_count, sizeof(nbuckets[0]));
+	if (nbuckets == NULL) {
+		return 0;
+	}
+
+	const uint64_t mask = eoi->htab.bucket_count - 1;
+	assert((eoi->htab.bucket_count & mask) == 0);
+
+	for (size_t ob_i = 0; ob_i < eoi->htab.bucket_count; ob_i++) {
+		const struct eager_output_bucket *ob = &eoi->htab.buckets[ob_i];
+		if (ob->entry == NULL) { continue; }
+
+		assert(ob->state < mapping_count);
+		const fsm_state_t nstate = mapping[ob->state];
+		if (nstate == FSM_STATE_REMAP_NO_STATE) { continue; }
+
+		const uint64_t hash = hash_id(nstate);
+
+		bool placed = false;
+		for (size_t probes = 0; probes < eoi->htab.bucket_count; probes++) {
+			const size_t nb_i = (hash + probes) & mask;
+			struct eager_output_bucket *nb = &nbuckets[nb_i];
+			if (nb->entry == NULL) {
+				nb->state = nstate;
+				nb->entry = ob->entry;
+				placed = true;
+				break;
+			}
+		}
+		assert(placed);
+	}
+
+	f_free(fsm->alloc, eoi->htab.buckets);
+	eoi->htab.buckets = nbuckets;
+	return 1;
+}
diff --git a/src/libfsm/eager_output.h b/src/libfsm/eager_output.h
new file mode 100644
index 000000000..1b48ba4c4
--- /dev/null
+++ b/src/libfsm/eager_output.h
@@ -0,0 +1,46 @@
+#ifndef EAGER_OUTPUT_H
+#define EAGER_OUTPUT_H
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <inttypes.h>
+
+struct eager_output_info;
+
+int
+fsm_eager_output_init(struct fsm *fsm);
+
+void
+fsm_eager_output_free(struct fsm *fsm);
+
+bool
+fsm_eager_output_has_eager_output(const struct fsm *fsm);
+
+bool
+fsm_eager_output_state_has_eager_output(const struct fsm *fsm, fsm_state_t state);
+
+void
+fsm_eager_output_dump(FILE *f, const struct fsm *fsm);
+
+/* Callback for fsm_eager_output_iter_*.
+ * The return value indicates whether iteration should continue.
+ * The results may not be sorted in any particular order. */
+typedef int
+fsm_eager_output_iter_cb(fsm_state_t state, fsm_output_id_t id, void *opaque);
+
+void
+fsm_eager_output_iter_state(const struct fsm *fsm,
+    fsm_state_t state, fsm_eager_output_iter_cb *cb, void *opaque);
+
+void
+fsm_eager_output_iter_all(const struct fsm *fsm,
+    fsm_eager_output_iter_cb *cb, void *opaque);
+
+bool
+fsm_eager_output_has_any(const struct fsm *fsm,
+    fsm_state_t state, size_t *count);
+
+int
+fsm_eager_output_compact(struct fsm *fsm, fsm_state_t *mapping, size_t mapping_count);
+
+#endif
diff --git a/src/libfsm/epsilons.c b/src/libfsm/epsilons.c
index 9394a2d9b..adfcdec2a 100644
--- a/src/libfsm/epsilons.c
+++ b/src/libfsm/epsilons.c
@@ -9,24 +9,42 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <errno.h>
+#include <string.h>
 
 #include <fsm/fsm.h>
 #include <fsm/pred.h>
+#include <fsm/print.h>
 
 #include <adt/alloc.h>
 #include <adt/set.h>
 #include <adt/edgeset.h>
 #include <adt/stateset.h>
+#include <adt/u64bitset.h>
 
 #include "internal.h"
 #include "capture.h"
 #include "endids.h"
+#include "eager_output.h"
 
 #define DUMP_EPSILON_CLOSURES 0
 #define DEF_PENDING_CAPTURE_ACTIONS_CEIL 2
 #define LOG_RM_EPSILONS_CAPTURES 0
 #define DEF_CARRY_ENDIDS_COUNT 2
 
+#define LOG_LEVEL 0
+
+#if LOG_LEVEL > 0
+static bool log_it;
+#define LOG(LVL, ...)					\
+	do {						\
+		if (log_it && LVL <= LOG_LEVEL) {	\
+			fprintf(stderr, __VA_ARGS__);	\
+		}					\
+	} while (0)
+#else
+#define LOG(_LVL, ...)
+#endif
+
 struct remap_env {
 #ifndef NDEBUG
 	char tag;
@@ -57,6 +75,49 @@ static int
 carry_endids(struct fsm *fsm, struct state_set *states,
     fsm_state_t s);
 
+static void
+mark_states_reachable_by_label(const struct fsm *nfa, uint64_t *reachable_by_label);
+
+struct eager_output_buf {
+#define DEF_EAGER_OUTPUT_BUF_CEIL 8
+	bool ok;
+	const struct fsm_alloc *alloc;
+	size_t ceil;
+	size_t used;
+	fsm_output_id_t *ids;
+};
+
+static bool
+append_eager_output_id(struct eager_output_buf *buf, fsm_output_id_t id)
+{
+	if (buf->used == buf->ceil) {
+		const size_t nceil = buf->ceil == 0 ? DEF_EAGER_OUTPUT_BUF_CEIL : 2*buf->ceil;
+		fsm_output_id_t *nids = f_realloc(buf->alloc, buf->ids, nceil * sizeof(nids[0]));
+		if (nids == NULL) {
+			buf->ok = false;
+			return false;
+		}
+		buf->ids = nids;
+		buf->ceil = nceil;
+	}
+
+	for (size_t i = 0; i < buf->used; i++) {
+		/* avoid duplicates */
+		if (buf->ids[i] == id) { return true; }
+	}
+
+	buf->ids[buf->used++] = id;
+	return true;
+}
+
+static int
+collect_eager_output_ids_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	(void)state;
+	struct eager_output_buf *buf = opaque;
+	return append_eager_output_id(buf, id) ? 1 : 0;
+}
+
 int
 fsm_remove_epsilons(struct fsm *nfa)
 {
@@ -64,9 +125,20 @@ fsm_remove_epsilons(struct fsm *nfa)
 	int res = 0;
 	struct state_set **eclosures = NULL;
 	fsm_state_t s;
+	struct eager_output_buf eager_output_buf = {
+		.ok = true,
+		.alloc = nfa->alloc,
+	};
+	uint64_t *reachable_by_label = NULL;
+
+	LOG(2, "%s: starting\n", __func__);
 
 	INIT_TIMERS();
 
+#if LOG_LEVEL > 0
+	log_it = getenv("LOG") != NULL;
+#endif
+
 	assert(nfa != NULL);
 
 	TIME(&pre);
@@ -94,6 +166,17 @@ fsm_remove_epsilons(struct fsm *nfa)
 	}
 #endif
 
+	const size_t state_words = u64bitset_words(state_count);
+	reachable_by_label = f_calloc(nfa->alloc, state_words, sizeof(reachable_by_label[0]));
+	if (reachable_by_label == NULL) { goto cleanup; }
+
+	mark_states_reachable_by_label(nfa, reachable_by_label);
+
+	fsm_state_t start;
+	if (!fsm_getstart(nfa, &start)) {
+		goto cleanup;	/* no start state */
+	}
+
 	for (s = 0; s < state_count; s++) {
 		struct state_iter si;
 		fsm_state_t es_id;
@@ -101,6 +184,12 @@ fsm_remove_epsilons(struct fsm *nfa)
 		struct edge_group_iter egi;
 		struct edge_group_iter_info info;
 
+		/* If the state isn't reachable by a label and isn't the start state,
+		 * skip processing -- it will soon become garbage. */
+		if (!u64bitset_get(reachable_by_label, s) && s != start) {
+			continue;
+		}
+
 		/* Process the epsilon closure. */
 		state_set_reset(eclosures[s], &si);
 		while (state_set_next(&si, &es_id)) {
@@ -129,6 +218,16 @@ fsm_remove_epsilons(struct fsm *nfa)
 				}
 			}
 
+			/* Collect every eager output ID from any state
+			 * in the current state's epsilon closure to the
+			 * current state. These will be added at the end. */
+			{
+				if (fsm_eager_output_has_any(nfa, es_id, NULL)) {
+					fsm_eager_output_iter_state(nfa, es_id, collect_eager_output_ids_cb, &eager_output_buf);
+					if (!eager_output_buf.ok) { goto cleanup; }
+				}
+			}
+
 			/* For every state in this state's transitive
 			 * epsilon closure, add all of their sets of
 			 * labeled edges. */
@@ -144,6 +243,13 @@ fsm_remove_epsilons(struct fsm *nfa)
 				}
 			}
 		}
+
+		for (size_t i = 0; i < eager_output_buf.used; i++) {
+			if (!fsm_seteageroutput(nfa, s, eager_output_buf.ids[i])) {
+				goto cleanup;
+			}
+		}
+		eager_output_buf.used = 0; /* clear */
 	}
 
 	/* Remove the epsilon-edge state sets from everything.
@@ -170,13 +276,53 @@ fsm_remove_epsilons(struct fsm *nfa)
 
 	res = 1;
 cleanup:
+	LOG(2, "%s: finishing\n", __func__);
 	if (eclosures != NULL) {
 		closure_free(nfa, eclosures, state_count);
 	}
+	f_free(nfa->alloc, reachable_by_label);
+	f_free(nfa->alloc, eager_output_buf.ids);
 
 	return res;
 }
 
+/* For every state, mark every state reached by a labeled edge as
+ * reachable. This doesn't check that the FROM state is reachable from
+ * the start state (trim will do that soon enough), it's just used to
+ * check which states will become unreachable once epsilon edges are
+ * removed. We don't need to add eager endids for them, because they
+ * will soon be disconnected from the epsilon-free NFA. */
+static void
+mark_states_reachable_by_label(const struct fsm *nfa, uint64_t *reachable_by_label)
+{
+	fsm_state_t start;
+	if (!fsm_getstart(nfa, &start)) {
+		return;		/* nothing reachable */
+	}
+	u64bitset_set(reachable_by_label, start);
+
+	const fsm_state_t state_count = fsm_countstates(nfa);
+
+	for (size_t s_i = 0; s_i < state_count; s_i++) {
+		struct edge_group_iter egi;
+		struct edge_group_iter_info info;
+
+		struct fsm_state *s = &nfa->states[s_i];
+
+		/* Clear the visited flag, it will be used to avoid cycles. */
+#if 1
+		assert(s->visited == 0); /* stale */
+#endif
+		s->visited = 0;
+
+		edge_set_group_iter_reset(s->edges, EDGE_GROUP_ITER_ALL, &egi);
+		while (edge_set_group_iter_next(&egi, &info)) {
+			LOG(1, "%s: reachable: %d\n", __func__, info.to);
+			u64bitset_set(reachable_by_label, info.to);
+		}
+	}
+}
+
 static int
 remap_capture_actions(struct fsm *nfa, struct state_set **eclosures)
 {
@@ -425,4 +571,3 @@ carry_endids(struct fsm *fsm, struct state_set *states,
 
 	return env.ok;
 }
-
diff --git a/src/libfsm/exec.c b/src/libfsm/exec.c
index 9f7b21802..077494b8f 100644
--- a/src/libfsm/exec.c
+++ b/src/libfsm/exec.c
@@ -20,9 +20,12 @@
 
 #include "internal.h"
 #include "capture.h"
+#include "eager_output.h"
 
 #define LOG_EXEC 0
 
+#define LOG_EAGER 0
+
 static int
 transition(const struct fsm *fsm, fsm_state_t state, int c,
 	size_t offset, struct fsm_capture *captures,
@@ -43,6 +46,44 @@ transition(const struct fsm *fsm, fsm_state_t state, int c,
 	return 1;
 }
 
+struct check_eager_outputs_for_state_env {
+	const struct fsm *fsm;
+	fsm_eager_output_cb *cb;
+	void *opaque;
+};
+
+static int
+match_eager_outputs_for_state_cb(fsm_state_t state, fsm_end_id_t id, void *opaque)
+{
+	/* HACK update the types here once it's working */
+	(void)state;
+	struct check_eager_outputs_for_state_env *env = opaque;
+#if LOG_EAGER
+	fprintf(stderr, "%s: state %d, id %d\n", __func__, state, id);
+#endif
+	env->cb(id, env->opaque);
+	return 1;
+}
+
+static int
+match_eager_outputs_for_state(const struct fsm *fsm, fsm_state_t state)
+{
+	/* HACK update the types here once it's working */
+	fsm_eager_output_cb *cb = NULL;
+	void *opaque = NULL;
+	fsm_eager_output_get_cb(fsm, &cb, &opaque);
+	if (cb == NULL) { return 1; } /* nothing to do */
+
+	struct check_eager_outputs_for_state_env env = {
+		.fsm = fsm,
+		.cb = cb,
+		.opaque = opaque,
+	};
+	fsm_eager_output_iter_state(fsm,
+	    state, match_eager_outputs_for_state_cb, &env);
+	return 1;
+}
+
 int
 fsm_exec(const struct fsm *fsm,
 	int (*fsm_getc)(void *opaque), void *opaque,
@@ -73,6 +114,7 @@ fsm_exec(const struct fsm *fsm,
 		errno = EINVAL;
 		return -1;
 	}
+	const fsm_state_t start = state;
 
 	for (i = 0; i < capture_count; i++) {
 		captures[i].pos[0] = FSM_CAPTURE_NO_POS;
@@ -83,6 +125,12 @@ fsm_exec(const struct fsm *fsm,
 	fprintf(stderr, "fsm_exec: starting at %d\n", state);
 #endif
 
+	if (fsm->states[start].has_eager_outputs) {
+		if (!match_eager_outputs_for_state(fsm, start)) {
+			return 0;
+		}
+	}
+
 	while (c = fsm_getc(opaque), c != EOF) {
 		if (!transition(fsm, state, c, offset, captures, &state)) {
 #if LOG_EXEC
@@ -91,6 +139,12 @@ fsm_exec(const struct fsm *fsm,
 			return 0;
 		}
 
+		if (fsm->states[state].has_eager_outputs) {
+			if (!match_eager_outputs_for_state(fsm, state)) {
+				return 0;
+			}
+		}
+
 #if LOG_EXEC
 		fprintf(stderr, "fsm_exec: @ %zu, input '%c', new state %u\n",
 		    offset, c, state);
@@ -113,4 +167,3 @@ fsm_exec(const struct fsm *fsm,
 	*end = state;
 	return 1;
 }
-
diff --git a/src/libfsm/fsm.c b/src/libfsm/fsm.c
index ba2d2db26..c442c8262 100644
--- a/src/libfsm/fsm.c
+++ b/src/libfsm/fsm.c
@@ -21,6 +21,7 @@
 #include "internal.h"
 #include "capture.h"
 #include "endids.h"
+#include "eager_output.h"
 
 /* guess for default state allocation */
 #define FSM_DEFAULT_STATEALLOC 128
@@ -39,6 +40,7 @@ free_contents(struct fsm *fsm)
 
 	fsm_capture_free(fsm);
 	fsm_endid_free(fsm);
+	fsm_eager_output_free(fsm);
 
 	f_free(fsm->alloc, fsm->states);
 }
@@ -92,6 +94,14 @@ fsm_new_statealloc(const struct fsm_alloc *alloc, size_t statealloc)
 		return NULL;
 	}
 
+	if (!fsm_eager_output_init(new)) {
+		f_free(new->alloc, new->states);
+		f_free(new->alloc, new);
+		fsm_capture_free(new);
+		fsm_endid_free(new);
+		return NULL;
+	}
+
 	return new;
 }
 
@@ -133,6 +143,7 @@ fsm_move(struct fsm *dst, struct fsm *src)
 
 	dst->capture_info = src->capture_info;
 	dst->endid_info = src->endid_info;
+	dst->eager_output_info = src->eager_output_info;
 
 	f_free(src->alloc, src);
 }
diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h
index f84bbef0f..46997c82a 100644
--- a/src/libfsm/internal.h
+++ b/src/libfsm/internal.h
@@ -60,6 +60,10 @@ struct fsm_state {
 
 	/* meaningful within one particular transformation only */
 	unsigned int visited:1;
+
+	/* If 0, then this state has no need for checking
+	 * the fsm->eager_output_info struct. */
+	unsigned int has_eager_outputs:1;
 };
 
 struct fsm {
@@ -75,6 +79,7 @@ struct fsm {
 
 	struct fsm_capture_info *capture_info;
 	struct endid_info *endid_info;
+	struct eager_output_info *eager_output_info;
 };
 
 struct fsm *
diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms
index 34be09e77..75c20eb64 100644
--- a/src/libfsm/libfsm.syms
+++ b/src/libfsm/libfsm.syms
@@ -2,6 +2,7 @@
 fsm_complement
 fsm_union
 fsm_union_array
+fsm_union_repeated_pattern_group
 fsm_intersect
 fsm_intersect_charset
 
@@ -72,6 +73,8 @@ fsm_removestate
 fsm_shuffle
 fsm_vacuum
 
+fsm_new_statealloc
+
 fsm_addedge_any
 fsm_addedge_epsilon
 fsm_addedge_literal
@@ -95,6 +98,14 @@ fsm_setendid
 fsm_mapendids
 fsm_increndids
 
+fsm_endid_dump
+
+fsm_seteageroutput
+fsm_seteageroutputonends
+# short term hack
+fsm_eager_output_set_cb
+fsm_eager_output_dump
+
 fsm_countedges
 fsm_countstates
 
diff --git a/src/libfsm/merge.c b/src/libfsm/merge.c
index 8c972c145..ccc1568ff 100644
--- a/src/libfsm/merge.c
+++ b/src/libfsm/merge.c
@@ -22,6 +22,7 @@
 #include "capture.h"
 #include "internal.h"
 #include "endids.h"
+#include "eager_output.h"
 
 #define LOG_MERGE_ENDIDS 0
 
@@ -39,6 +40,9 @@ copy_capture_actions(struct fsm *dst, struct fsm *src);
 static int
 copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src);
 
+static int
+copy_eager_output_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src);
+
 static struct fsm *
 merge(struct fsm *dst, struct fsm *src,
 	fsm_state_t *base_dst, fsm_state_t *base_src,
@@ -113,6 +117,11 @@ merge(struct fsm *dst, struct fsm *src,
 		return NULL;
 	}
 
+	if (!copy_eager_output_ids(dst, src, *base_src)) {
+		/* non-recoverable -- destructive operation */
+		return NULL;
+	}
+
 	f_free(src->alloc, src->states);
 	src->states = NULL;
 	src->statealloc = 0;
@@ -194,6 +203,39 @@ copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src)
 	return fsm_endid_iter_bulk(src, copy_end_ids_cb, &env);
 }
 
+struct copy_eager_output_ids_env {
+	bool ok;
+	struct fsm *dst;
+	struct fsm *src;
+	fsm_state_t base_src;
+};
+
+static int
+copy_eager_output_ids_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	struct copy_eager_output_ids_env *env = opaque;
+	if (!fsm_seteageroutput(env->dst, state + env->base_src, id)) {
+		env->ok = false;
+		return 0;
+	}
+
+	return 1;
+
+}
+
+static int
+copy_eager_output_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src)
+{
+	struct copy_eager_output_ids_env env = {
+		.ok = true,
+		.dst = dst,
+		.src = src,
+		.base_src = base_src,
+	};
+	fsm_eager_output_iter_all(src, copy_eager_output_ids_cb, &env);
+	return env.ok;
+}
+
 struct fsm *
 fsm_mergeab(struct fsm *a, struct fsm *b,
 	fsm_state_t *base_b)
diff --git a/src/libfsm/minimise.c b/src/libfsm/minimise.c
index a8d53c57e..86f00b46f 100644
--- a/src/libfsm/minimise.c
+++ b/src/libfsm/minimise.c
@@ -25,6 +25,8 @@
 
 #include "internal.h"
 #include "capture.h"
+#include "eager_output.h"
+#include "endids.h"
 
 #define LOG_MAPPINGS 0
 #define LOG_STEPS 0
@@ -54,12 +56,21 @@ struct end_metadata {
 		unsigned count;
 		fsm_end_id_t *ids;
 	} end;
+
+	struct end_metadata_eager_outputs {
+		unsigned count;
+		fsm_output_id_t *ids;
+	} eager_outputs;
 };
 
 static int
 collect_end_ids(const struct fsm *fsm, fsm_state_t s,
 	struct end_metadata_end *e);
 
+static int
+collect_eager_output_ids(const struct fsm *fsm, fsm_state_t s,
+	struct end_metadata_eager_outputs *e);
+
 int
 fsm_minimise(struct fsm *fsm)
 {
@@ -122,6 +133,10 @@ fsm_minimise(struct fsm *fsm)
 	/* Minimisation should never add states. */
 	assert(minimised_states <= orig_states);
 
+	for (size_t i = 0; i < fsm->statecount; i++) {
+		assert(mapping[i] < fsm->statecount);
+	}
+
 	/* Use the mapping to consolidate the current states
 	 * into a new DFA, combining states that could not be
 	 * proven distinguishable. */
@@ -693,6 +708,9 @@ same_end_metadata(const struct end_metadata *a, const struct end_metadata *b)
 	if (a->end.count != b->end.count) {
 		return 0;
 	}
+	if (a->eager_outputs.count != b->eager_outputs.count) {
+		return 0;
+	}
 
 	/* compare -- these must be sorted */
 
@@ -702,6 +720,12 @@ same_end_metadata(const struct end_metadata *a, const struct end_metadata *b)
 		}
 	}
 
+	for (size_t i = 0; i < a->eager_outputs.count; i++) {
+		if (a->eager_outputs.ids[i] != b->eager_outputs.ids[i]) {
+			return 0;
+		}
+	}
+
 	return 1;
 }
 
@@ -750,14 +774,21 @@ split_ecs_by_end_metadata(struct min_env *env, const struct fsm *fsm)
 #endif
 		while (s != NO_ID) {
 			struct end_metadata *e = &end_md[s];
-			if (!fsm_isend(fsm, s)) {
-				break; /* this EC has non-end states, skip */
+			const bool is_end = fsm_isend(fsm, s);
+			const bool has_eager_outputs = fsm_eager_output_state_has_eager_output(fsm, s);
+
+			if (!is_end && !has_eager_outputs) {
+				break; /* skip */
 			}
 
 			if (!collect_end_ids(fsm, s, &e->end)) {
 				goto cleanup;
 			}
 
+			if (!collect_eager_output_ids(fsm, s, &e->eager_outputs)) {
+				goto cleanup;
+			}
+
 			s = env->jump[s];
 		}
 	}
@@ -789,6 +820,10 @@ split_ecs_by_end_metadata(struct min_env *env, const struct fsm *fsm)
 				incremental_hash_of_ids(&hash, s_md->end.ids[eid_i]);
 			}
 
+			for (size_t eo_i = 0; eo_i < s_md->eager_outputs.count; eo_i++) {
+				incremental_hash_of_ids(&hash, s_md->eager_outputs.ids[eo_i]);
+			}
+
 			for (size_t b_i = 0; b_i < bucket_count; b_i++) {
 				fsm_state_t *b = &htab[(b_i + hash) & mask];
 				const fsm_state_t other = *b;
@@ -932,6 +967,9 @@ split_ecs_by_end_metadata(struct min_env *env, const struct fsm *fsm)
 			if (e->end.ids != NULL) {
 				f_free(fsm->alloc, e->end.ids);
 			}
+			if (e->eager_outputs.ids != NULL) {
+				f_free(fsm->alloc, e->eager_outputs.ids);
+			}
 		}
 		f_free(fsm->alloc, end_md);
 	}
@@ -959,7 +997,7 @@ collect_end_ids(const struct fsm *fsm, fsm_state_t s,
 
 #if LOG_ECS
 	fprintf(stderr, "%d:", s);
-	for (size_t i = 0; i < written; i++) {
+	for (size_t i = 0; i < e->count; i++) {
 		fprintf(stderr, " %u", e->ids[i]);
 	}
 	fprintf(stderr, "\n");
@@ -968,6 +1006,41 @@ collect_end_ids(const struct fsm *fsm, fsm_state_t s,
 	return 1;
 }
 
+static int
+collect_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	(void)state;
+	struct end_metadata_eager_outputs *e = opaque;
+	e->ids[e->count++] = id;
+	return 1;
+}
+
+static int cmp_eager_output_id(const void *pa, const void *pb)
+{
+	const fsm_output_id_t a = *(fsm_output_id_t *)pa;
+	const fsm_output_id_t b = *(fsm_output_id_t *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+static int
+collect_eager_output_ids(const struct fsm *fsm, fsm_state_t state,
+	struct end_metadata_eager_outputs *e)
+{
+	size_t count = 0;
+	if (!fsm_eager_output_has_any(fsm, state, &count)) {
+		return 1;	/* nothing to do */
+	}
+
+	e->ids = f_malloc(fsm->alloc, count * sizeof(e->ids[0]));
+	if (e->ids == NULL) { return 0; }
+
+	fsm_eager_output_iter_state(fsm, state, collect_cb, e);
+
+	/* sort, to normalize set */
+	qsort(e->ids, e->count, sizeof(e->ids[0]), cmp_eager_output_id);
+	return 1;
+}
+
 #if EXPENSIVE_CHECKS
 static void
 check_done_ec_offset(const struct min_env *env)
diff --git a/src/libfsm/print/c.c b/src/libfsm/print/c.c
index 22b03963e..cc3927dc6 100644
--- a/src/libfsm/print/c.c
+++ b/src/libfsm/print/c.c
@@ -222,6 +222,14 @@ print_case(FILE *f, const struct ir *ir,
 	assert(f != NULL);
 	assert(cs != NULL);
 
+	if (cs->eager_outputs != NULL && opt->fragment) {
+		/* If .fragment is set and the state has eager outputs, then emit a call to a
+		 * macro (the caller is expected to define). This is a temporary interface. */
+		for (size_t i = 0; i < cs->eager_outputs->count; i++) {
+			fprintf(f, "\t\t\tFSM_SET_EAGER_OUTPUT(%u);\n", cs->eager_outputs->ids[i]);
+		}
+	}
+
 	switch (cs->strategy) {
 	case IR_NONE:
 		fprintf(f, "\t\t\t");
@@ -377,6 +385,11 @@ print_endstates(FILE *f,
 		const struct fsm_state_metadata state_metadata = {
 			.end_ids = ir->states[i].endids.ids,
 			.end_id_count = ir->states[i].endids.count,
+
+			.eager_output_count = (ir->states[i].eager_outputs == NULL
+			    ? 0 : ir->states[i].eager_outputs->count),
+			.eager_output_ids = (ir->states[i].eager_outputs == NULL
+			    ? NULL : ir->states[i].eager_outputs->ids),
 		};
 
 		if (-1 == print_hook_accept(f, opt, hooks,
diff --git a/src/libfsm/print/ir.c b/src/libfsm/print/ir.c
index 457716dcc..81d5890e0 100644
--- a/src/libfsm/print/ir.c
+++ b/src/libfsm/print/ir.c
@@ -26,6 +26,7 @@
 #include <adt/edgeset.h>
 
 #include "libfsm/internal.h"
+#include "libfsm/eager_output.h"
 
 #include "ir.h"
 
@@ -505,6 +506,23 @@ make_example(const struct fsm *fsm, fsm_state_t s, char **example)
 	return 0;
 }
 
+static int
+append_eager_output_cb(fsm_state_t state, fsm_output_id_t id, void *opaque)
+{
+	struct ir_state_eager_output *outputs = opaque;
+	(void)state;
+	outputs->ids[outputs->count++] = id;
+	return 1;
+}
+
+static int
+cmp_fsm_output_id_t(const void *pa, const void *pb)
+{
+	const fsm_output_id_t a = *(fsm_output_id_t *)pa;
+	const fsm_output_id_t b = *(fsm_output_id_t *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
 struct ir *
 make_ir(const struct fsm *fsm, const struct fsm_options *opt)
 {
@@ -544,6 +562,8 @@ make_ir(const struct fsm *fsm, const struct fsm_options *opt)
 		ir->states[i].endids.ids = NULL;
 		ir->states[i].endids.count = 0;
 
+		ir->states[i].eager_outputs = NULL;
+
 		if (fsm_isend(fsm, i)) {
 			fsm_end_id_t *ids;
 			size_t count;
@@ -567,6 +587,20 @@ make_ir(const struct fsm *fsm, const struct fsm_options *opt)
 			ir->states[i].endids.count = count;
 		}
 
+		size_t count;
+		if (fsm_eager_output_has_any(fsm, i, &count)) {
+			struct ir_state_eager_output *outputs = f_malloc(fsm->alloc,
+			    sizeof(*outputs) + count * sizeof(outputs->ids[0]));
+			if (outputs == NULL) {
+				goto error;
+			}
+			outputs->count = 0;
+			fsm_eager_output_iter_state(fsm, i, append_eager_output_cb, outputs);
+			assert(outputs->count == count);
+			qsort(outputs->ids, outputs->count, sizeof(outputs->ids[0]), cmp_fsm_output_id_t);
+			ir->states[i].eager_outputs = outputs;
+		}
+
 		if (make_state(fsm, i, &ir->states[i]) == -1) {
 			goto error;
 		}
@@ -630,6 +664,7 @@ free_ir(const struct fsm *fsm, struct ir *ir)
 	for (i = 0; i < ir->n; i++) {
 		f_free(fsm->alloc, (void *) ir->states[i].example);
 		f_free(fsm->alloc, (void *) ir->states[i].endids.ids);
+		f_free(fsm->alloc, (void *) ir->states[i].eager_outputs);
 
 		switch (ir->states[i].strategy) {
 		case IR_TABLE:
diff --git a/src/libfsm/print/ir.h b/src/libfsm/print/ir.h
index b375ba850..7678d3f35 100644
--- a/src/libfsm/print/ir.h
+++ b/src/libfsm/print/ir.h
@@ -59,6 +59,11 @@ struct ir_state {
 		size_t count;
 	} endids;
 
+	struct ir_state_eager_output {
+		size_t count;
+		fsm_output_id_t ids[];
+	} *eager_outputs;	/* NULL -> 0 */
+
 	unsigned int isend:1;
 
 	enum ir_strategy strategy;
diff --git a/src/libfsm/state.c b/src/libfsm/state.c
index c845cbe46..d96c33653 100644
--- a/src/libfsm/state.c
+++ b/src/libfsm/state.c
@@ -19,6 +19,7 @@
 
 #include "internal.h"
 #include "endids.h"
+#include "eager_output.h"
 
 int
 fsm_addstate(struct fsm *fsm, fsm_state_t *state)
@@ -44,6 +45,7 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state)
 
 		for (i = fsm->statealloc; i < n; i++) {
 			tmp[i].has_capture_actions = 0;
+			tmp[i].has_eager_outputs = 0;
 		}
 
 		fsm->statealloc = n;
@@ -87,6 +89,8 @@ fsm_addstate_bulk(struct fsm *fsm, size_t n)
 			new->visited  = 0;
 			new->epsilons = NULL;
 			new->edges    = NULL;
+
+			new->has_eager_outputs = 0;
 		}
 
 		fsm->statecount += n;
@@ -259,6 +263,10 @@ fsm_compact_states(struct fsm *fsm,
 	if (!fsm_endid_compact(fsm, mapping, orig_statecount)) {
 		return 0;
 	}
+	if (!fsm_eager_output_compact(fsm, mapping, orig_statecount)) {
+		return 0;
+	}
+
 	assert(dst == kept);
 	assert(kept == fsm->statecount);
 
diff --git a/src/libfsm/union.c b/src/libfsm/union.c
index a3b4b230c..0b18cd30c 100644
--- a/src/libfsm/union.c
+++ b/src/libfsm/union.c
@@ -15,9 +15,14 @@
 #include <fsm/capture.h>
 #include <fsm/bool.h>
 #include <fsm/pred.h>
+#include <fsm/options.h>
+#include <fsm/print.h>
 
 #include "internal.h"
 
+#include <adt/edgeset.h>
+#include "eager_output.h"
+
 #define LOG_UNION_ARRAY 0
 
 struct fsm *
@@ -151,3 +156,231 @@ fsm_union_array(size_t fsm_count,
 
 	return res;
 }
+
+#define LOG_UNION_REPEATED_PATTERN_GROUP 0
+
+/* Combine an array of FSMs into a single FSM in one pass, with an extra loop
+ * so that more than one pattern with eager outputs can match. */
+struct fsm *
+fsm_union_repeated_pattern_group(size_t entry_count,
+    struct fsm_union_entry *entries, struct fsm_combined_base_pair *bases)
+{
+	const struct fsm_alloc *alloc = entries[0].fsm->alloc;
+	const bool log = 0 || LOG_UNION_REPEATED_PATTERN_GROUP;
+
+	if (entry_count == 1) {
+		return entries[0].fsm;
+	}
+
+	size_t est_total_states = 0;
+	for (size_t i = 0; i < entry_count; i++) {
+		assert(entries[i].fsm);
+		if (entries[i].fsm->alloc != alloc) {
+			errno = EINVAL;
+			return NULL;
+		}
+		const size_t count = fsm_countstates(entries[i].fsm);
+		est_total_states += count;
+	}
+
+	est_total_states += 5;	/* new start and end, new unanchored start and end loops */
+
+	struct fsm *res = fsm_new_statealloc(alloc, est_total_states);
+	if (res == NULL) { return NULL; }
+
+	/* collected end states */
+	struct ends_buf {
+		size_t ceil;
+		size_t used;
+		fsm_state_t *states;
+	} ends = { .ceil = 0 };
+
+	/* The new overall start state, which will have an epsilon edge to... */
+	fsm_state_t global_start;
+	if (!fsm_addstate(res, &global_start)) { goto fail; }
+
+	/* states linking to the starts of unanchored and anchored subgraphs, respectively. */
+	fsm_state_t global_start_loop, global_start_anchored;
+	if (!fsm_addstate(res, &global_start_loop)) { goto fail; }
+	if (!fsm_addstate(res, &global_start_anchored)) { goto fail; }
+
+	/* The unanchored end loop state, and an end state with no outgoing edges. */
+	fsm_state_t global_end_loop, global_end;
+	if (!fsm_addstate(res, &global_end)) { goto fail; }
+	if (!fsm_addstate(res, &global_end_loop)) { goto fail; }
+
+	/* link the start to the start loop and anchored start, and the start loop to itself */
+	if (log) {
+		fprintf(stderr, "link_before: global_start %d -> global_start_loop %d and global_start_anchored %d\n",
+		    global_start, global_start_loop, global_start_anchored);
+	}
+	if (!fsm_addedge_epsilon(res, global_start, global_start_loop)) { goto fail; }
+	if (!fsm_addedge_epsilon(res, global_start, global_start_anchored)) { goto fail; }
+	if (!fsm_addedge_any(res, global_start_loop, global_start_loop)) { goto fail; }
+
+	/* link the end loop and end */
+	if (log) {
+		fprintf(stderr, "link_before: global_end_loop %d -> global_end %d (and -> self)\n", global_end_loop, global_end);
+	}
+	if (!fsm_addedge_epsilon(res, global_end_loop, global_end)) { goto fail; }
+	if (!fsm_addedge_any(res, global_end_loop, global_end_loop)) { goto fail; }
+
+	if (bases != NULL) {
+		memset(bases, 0x00, entry_count * sizeof(bases[0]));
+	}
+
+	for (size_t fsm_i = 0; fsm_i < entry_count; fsm_i++) {
+		ends.used = 0;	/* reset */
+
+		struct fsm *fsm = entries[fsm_i].fsm;
+		entries[fsm_i].fsm = NULL; /* transfer ownership */
+
+		const size_t state_count = fsm_countstates(fsm);
+
+		fsm_state_t fsm_start;
+		if (!fsm_getstart(fsm, &fsm_start)) {
+			fsm_free(fsm);		      /* no start, just discard */
+			continue;
+		}
+
+		for (fsm_state_t s_i = 0; s_i < state_count; s_i++) {
+			if (fsm_isend(fsm, s_i)) {
+				if (ends.used == ends.ceil) { /* grow? */
+					size_t nceil = (ends.ceil == 0 ? 4 : 2*ends.ceil);
+					fsm_state_t *nstates = f_realloc(alloc,
+					    ends.states, nceil * sizeof(nstates[0]));
+					if (nstates == NULL) { goto fail; }
+					ends.ceil = nceil;
+					ends.states = nstates;
+				}
+				ends.states[ends.used++] = s_i;
+			}
+		}
+
+		if (ends.used == 0) {
+			fsm_free(fsm);		      /* no ends, just discard */
+			continue;
+		}
+
+		/* When combining these, remove self-edges from any states on the FSMs to be
+		 * combined that also have eager output IDs. We are about to add an epsilon edge
+		 * from each to a shared state that won't have eager output IDs.
+		 *
+		 * Eager output matching should be idempotent, so carrying it to other reachable
+		 * state is redundant, and it leads to a combinatorial explosion that blows up the
+		 * state count while determinising the combined FSM otherwise.
+		 *
+		 * For example, if /aaa/, /bbb/, and /ccc/ are combined into a DFA that repeats
+		 * the sub-patterns (like `^.*(?:(aaa)|(bbb)|(ccc))+.*$`), the self-edge at each
+		 * eager output state would combine with every reachable state from then on,
+		 * leading to a copy of the whole reachable subgraph colored by every
+		 * combination of eager output IDs: aaa, bbb, ccc, aaa+bbb, aaa+ccc,
+		 * bbb+ccc, aaa+bbb+ccc. Instead of three relatively separate subgraphs
+		 * that set the eager output at their last state, one for each pattern,
+		 * it leads to 8 (2**3) subgraph clusters because it encodes _each
+		 * distinct combination_ in the DFA. This becomes incredibly expensive
+		 * as the combined pattern count increases; it's essentially what I'm
+		 * trying to avoid by adding eager output support in the first place.
+		 *
+		 * FIXME: instead of actively removing these, filter in fsm_determinise? */
+		if (fsm_eager_output_has_eager_output(fsm)) {
+			/* for any state that has eager outputs and a self edge,
+			 * remove the self edge before further linkage */
+			for (fsm_state_t s = 0; s < fsm->statecount; s++) {
+				if (!fsm_eager_output_has_any(fsm, s, NULL)) { continue; }
+				struct edge_set *edges = fsm->states[s].edges;
+				struct edge_set *new = edge_set_new();
+
+				struct edge_group_iter iter;
+				struct edge_group_iter_info info;
+				edge_set_group_iter_reset(edges, EDGE_GROUP_ITER_ALL, &iter);
+				while (edge_set_group_iter_next(&iter, &info)) {
+					if (info.to != s) {
+						if (!edge_set_add_bulk(&new, fsm->alloc,
+							info.symbols, info.to)) {
+							goto fail;
+						}
+					}
+				}
+				edge_set_free(fsm->alloc, edges);
+				fsm->states[s].edges = new;
+			}
+		}
+
+		/* call fsm_merge; we really don't care which is which */
+		struct fsm_combine_info combine_info;
+		struct fsm *merged = fsm_merge(res, fsm, &combine_info);
+		if (merged == NULL) { goto fail; }
+
+		/* update offsets if res had its state IDs shifted forward */
+		global_start += combine_info.base_a;
+		global_start_loop += combine_info.base_a;
+		global_start_anchored += combine_info.base_a;;
+		global_end += combine_info.base_a;
+		global_end_loop += combine_info.base_a;
+
+		/* also update offsets for the FSM's states */
+		fsm_start += combine_info.base_b;
+		for (size_t i = 0; i < ends.used; i++) {
+			ends.states[i] += combine_info.base_b;
+		}
+
+		if (bases != NULL) {
+			bases[fsm_i].state = combine_info.base_b;
+			bases[fsm_i].capture = combine_info.capture_base_b;
+		}
+
+		if (log) {
+			fprintf(stderr, "%s: fsm[%zd].start: %d\n", __func__, fsm_i, fsm_start);
+			for (size_t i = 0; i < ends.used; i++) {
+				fprintf(stderr, "%s: fsm[%zd].ends[%zd]: %d\n", __func__, fsm_i, i, ends.states[i]);
+			}
+		}
+
+		/* link to the FSM's start state */
+		const fsm_state_t start_src = entries[fsm_i].anchored_start ? global_start_anchored : global_start_loop;
+		if (!fsm_addedge_epsilon(merged, start_src, fsm_start)) { goto fail; }
+		if (log) {
+			fprintf(stderr, "%s: linking %s %d to fsm[%zd]'s start %d (anchored? %d)\n",
+			    __func__,
+			    entries[fsm_i].anchored_start ? "global_start_anchored" : "global_start_loop",
+			    start_src, fsm_i, fsm_start, entries[fsm_i].anchored_start);
+		}
+
+		/* link from the FSM's ends */
+		const fsm_state_t end_dst = entries[fsm_i].anchored_end ? global_end : global_end_loop;
+		for (size_t i = 0; i < ends.used; i++) {
+			if (log) {
+				fprintf(stderr, "%s: linking fsm[%zd]'s end[%zd] %d (anchored? %d) to %s %d\n",
+				    __func__, fsm_i, i, ends.states[i], entries[fsm_i].anchored_end,
+				    entries[fsm_i].anchored_end ? "global_end" : "global_end_loop",
+				    end_dst);
+			}
+			if (!fsm_addedge_epsilon(merged, ends.states[i], end_dst)) { goto fail; }
+		}
+
+		res = merged;
+	}
+
+	/* Link from the global_end_loop to the global_start_loop, so patterns with an
+	 * unanchored start can follow other patterns with an unanchored end. */
+	if (log) {
+		fprintf(stderr, "%s: g_start %d, g_start_loop %d, g_start_anchored %d, g_end_loop %d, g_end %d (after all merging)\n",
+		    __func__, global_start, global_start_loop, global_start_anchored, global_end_loop, global_end);
+		fprintf(stderr, "%s: linking global_end_loop %d to global_start_loop %d\n",
+		    __func__, global_end_loop, global_start_loop);
+		fprintf(stderr, "%s: setting global_start %d and end %d\n", __func__, global_start, global_end);
+	}
+	if (!fsm_addedge_epsilon(res, global_end_loop, global_start_loop)) { goto fail; }
+
+	/* This needs to be set after merging, because that clears the start state. */
+	fsm_setstart(res, global_start);
+	fsm_setend(res, global_end, 1);
+
+	f_free(alloc, ends.states);
+	return res;
+
+fail:
+	f_free(alloc, ends.states);
+	return NULL;
+}
diff --git a/src/libre/libre.syms b/src/libre/libre.syms
index a4f1a223b..9d381cb0f 100644
--- a/src/libre/libre.syms
+++ b/src/libre/libre.syms
@@ -3,6 +3,7 @@ re_is_literal
 re_flags
 re_strerror
 re_perror
+re_is_anchored
 
 ast_print
 ast_print_dot
diff --git a/src/libre/re.c b/src/libre/re.c
index 15af848b5..c19183dcc 100644
--- a/src/libre/re.c
+++ b/src/libre/re.c
@@ -335,3 +335,37 @@ re_is_literal(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque,
 	return -1;
 }
 
+/* FIXME: placeholder interface */
+int
+re_is_anchored(enum re_dialect dialect, re_getchar_fun *getc, void *opaque,
+	enum re_flags flags, struct re_err *err,
+	struct re_anchoring_info *info)
+{
+	/* FIXME: copy/pasted from above, factor out common */
+
+	struct ast *ast;
+	const struct dialect *m;
+	int unsatisfiable;
+
+	assert(getc != NULL);
+	assert(info != NULL);
+
+	m = re_dialect(dialect);
+	if (m == NULL) {
+		if (err != NULL) { err->e = RE_EBADDIALECT; }
+		return 0;
+	}
+
+	flags |= m->flags;
+
+	ast = re_parse(dialect, getc, opaque, flags, err, &unsatisfiable);
+	if (ast == NULL) {
+		return 0;
+	}
+
+	info->start = (ast->expr->flags & AST_FLAG_ANCHORED_START) != 0;
+	info->end = (ast->expr->flags & AST_FLAG_ANCHORED_END) != 0;
+
+	ast_free(ast);
+	return 1;
+}
diff --git a/tests/eager_output/Makefile b/tests/eager_output/Makefile
new file mode 100644
index 000000000..a650bf802
--- /dev/null
+++ b/tests/eager_output/Makefile
@@ -0,0 +1,22 @@
+.include "../../share/mk/top.mk"
+
+TEST.tests/eager_output != ls -1 tests/eager_output/eager_output*.c
+TEST_SRCDIR.tests/eager_output = tests/eager_output
+TEST_OUTDIR.tests/eager_output = ${BUILD}/tests/eager_output
+
+.for n in ${TEST.tests/eager_output:T:R:C/^eager_output//}
+INCDIR.${TEST_SRCDIR.tests/eager_output}/eager_output${n}.c += src/adt
+.endfor
+
+SRC += ${TEST_SRCDIR.tests/eager_output}/utils.c
+
+.for n in ${TEST.tests/eager_output:T:R:C/^eager_output//}
+test:: ${TEST_OUTDIR.tests/eager_output}/res${n}
+SRC += ${TEST_SRCDIR.tests/eager_output}/eager_output${n}.c
+CFLAGS.${TEST_SRCDIR.tests/eager_output}/eager_output${n}.c += -UNDEBUG
+
+${TEST_OUTDIR.tests/eager_output}/run${n}: ${TEST_OUTDIR.tests/eager_output}/eager_output${n}.o ${TEST_OUTDIR.tests/eager_output}/utils.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a
+	${CC} ${CFLAGS} ${CFLAGS.${TEST_SRCDIR.tests/eager_output}/eager_output${n}.c} -o ${TEST_OUTDIR.tests/eager_output}/run${n} ${TEST_OUTDIR.tests/eager_output}/eager_output${n}.o ${TEST_OUTDIR.tests/eager_output}/utils.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a
+${TEST_OUTDIR.tests/eager_output}/res${n}: ${TEST_OUTDIR.tests/eager_output}/run${n}
+	( ${TEST_OUTDIR.tests/eager_output}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/eager_output}/res${n}
+.endfor
diff --git a/tests/eager_output/eager_output1.c b/tests/eager_output/eager_output1.c
new file mode 100644
index 000000000..f20ef77b7
--- /dev/null
+++ b/tests/eager_output/eager_output1.c
@@ -0,0 +1,12 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "abc" },
+		.inputs = {
+			{ .input = "abc", .expected_ids = { 1 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output2.c b/tests/eager_output/eager_output2.c
new file mode 100644
index 000000000..cdac204e2
--- /dev/null
+++ b/tests/eager_output/eager_output2.c
@@ -0,0 +1,17 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "ab(c|d|e)" },
+		.inputs = {
+			{ .input = "abc", .expected_ids = { 1 } },
+			{ .input = "abd", .expected_ids = { 1 } },
+			{ .input = "abe", .expected_ids = { 1 } },
+			{ .input = "Xabe", .expected_ids = { 1 } },
+			{ .input = "abeX", .expected_ids = { 1 } },
+			{ .input = "XabeX", .expected_ids = { 1 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output3.c b/tests/eager_output/eager_output3.c
new file mode 100644
index 000000000..c11bc58a4
--- /dev/null
+++ b/tests/eager_output/eager_output3.c
@@ -0,0 +1,16 @@
+#include "utils.h"
+
+/* test that eager endids are correctly propagated through fsm_determinise() and fsm_minimise() */
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "ab(c|d|e)?" },
+		.inputs = {
+			{ .input = "ab", .expected_ids = { 1 } },
+			{ .input = "abc", .expected_ids = { 1 } },
+			{ .input = "abd", .expected_ids = { 1 } },
+			{ .input = "abe", .expected_ids = { 1 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output4.c b/tests/eager_output/eager_output4.c
new file mode 100644
index 000000000..47cd32029
--- /dev/null
+++ b/tests/eager_output/eager_output4.c
@@ -0,0 +1,13 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "abcde$" },
+		.inputs = {
+			{ .input = "abcde", .expected_ids = { 1 } },
+			{ .input = "Xabcde", .expected_ids = { 1 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output5.c b/tests/eager_output/eager_output5.c
new file mode 100644
index 000000000..4551c68b1
--- /dev/null
+++ b/tests/eager_output/eager_output5.c
@@ -0,0 +1,14 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "^abc$", "^ab*c$" },
+		.inputs = {
+			{ .input = "ac", .expected_ids = { 2 } },
+			{ .input = "abc", .expected_ids = { 1, 2 } },
+			{ .input = "abbc", .expected_ids = { 2 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output6.c b/tests/eager_output/eager_output6.c
new file mode 100644
index 000000000..5431d0981
--- /dev/null
+++ b/tests/eager_output/eager_output6.c
@@ -0,0 +1,34 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  {
+			"apple",
+			"banana",
+			"carrot",
+			"durian",
+			"eggplant",
+			"fig",
+			"grapefruit",
+			"hazelnut",
+			"iceberg lettuce",
+			"jicama",
+		},
+		.inputs = {
+			{ .input = "apple", .expected_ids = { 1 } },
+			{ .input = "banana", .expected_ids = { 2 } },
+			{ .input = "carrot", .expected_ids = { 3 } },
+			{ .input = "durian", .expected_ids = { 4 } },
+			{ .input = "eggplant", .expected_ids = { 5 } },
+			{ .input = "fig", .expected_ids = { 6 } },
+			{ .input = "grapefruit", .expected_ids = { 7 } },
+			{ .input = "hazelnut", .expected_ids = { 8 } },
+			{ .input = "iceberg lettuce", .expected_ids = { 9 } },
+			{ .input = "jicama", .expected_ids = { 10 } },
+			{ .input = "apple banana carrot", .expected_ids = { 1, 2, 3 } },
+		},
+	};
+
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output7.c b/tests/eager_output/eager_output7.c
new file mode 100644
index 000000000..3d123878b
--- /dev/null
+++ b/tests/eager_output/eager_output7.c
@@ -0,0 +1,103 @@
+#include "utils.h"
+
+int main(void)
+{
+	/* Run this test with env FORCE_ENDIDS=N ... to see how much more
+	 * expensive it is to combine the first N patterns using endids,
+	 * rather than eager_outputs. It becomes VERY slow for >= 9 or so.
+	 * (Note that the checks probably will not pass for N < 4, because
+	 * it will start skipping appear in the early test inputs.) */
+	bool force_endids = false;
+	size_t force_endid_count = 0;
+	{
+		const char *str = getenv("FORCE_ENDIDS");
+		if (str != NULL) {
+			force_endid_count = atoi(str);
+			if (force_endid_count == 0) {
+				force_endid_count = 26;
+			}
+			force_endids = true;
+		}
+	}
+
+	struct eager_output_test test = {
+		.patterns =  {
+			[0] = "apple",
+			[1] = "banana",
+			[2] = "carrot",
+			[3] = "durian",
+			[4] = "eggplant",
+			[5] = "fig",
+			[6] = "grapefruit",
+			[7] = "hazelnut",
+			[8] = "iceberg lettuce",
+			[9] = "jicama",
+			[10] = "kiwano",
+			[11] = "lemon",
+			[12] = "mango",
+			[13] = "nectarine",
+			[14] = "orange",
+			[15] = "plum",
+			[16] = "quince",
+			[17] = "radish",
+			[18] = "strawberry",
+			[19] = "turnip",
+			[20] = "ube",
+			[21] = "vanilla",
+			[22] = "watermelon",
+			[23] = "xigua watermelon",
+			[24] = "yam",
+			[25] = "zucchini",
+		},
+		.inputs = {
+			/* Note: expected IDs are shifted by 1, it's 0-terminated. */
+			{ .input = "apple", .expected_ids = { 1 } },
+			{ .input = "banana", .expected_ids = { 2 } },
+			{ .input = "carrot", .expected_ids = { 3 } },
+			{ .input = "apple banana", .expected_ids = { 1, 2 } },
+			{ .input = "carrot durian apple", .expected_ids = { 1, 3, 4 } },
+			{ .input = "carrot fig apple", .expected_ids = { 1, 3, 6 } },
+
+			/* leading characters and an incomplete trailing match */
+			{ .input = "mumble mumble fig hazelnut banana xigua watermelo", .expected_ids = { 2, 6, 8 } },
+
+			/* redundant matches */
+			{ .input = "ube ube ube ube ube", .expected_ids = { 21 } },
+
+			/* everything */
+			{ .input =
+			  "apple banana carrot durian eggplant fig grapefruit "
+			  "hazelnut iceberg lettuce jicamaa kiwano lemon mango "
+			  "nectarine orange plum quince radish strawberry "
+			  "turnip ube vanilla watermelon xigua watermelon yam zucchini",
+			  .expected_ids = {
+				  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+				  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+			  },
+			},
+			/* everything, only spaces appearing in patterns */
+			{ .input =
+			  "applebananacarrotdurianeggplantfiggrapefruit"
+			  "hazelnuticeberg lettucejicamaakiwanolemonmango"
+			  "nectarineorangeplumquinceradishstrawberry"
+			  "turnipubevanillawatermelonxigua watermelonyamzucchini",
+			  .expected_ids = {
+				  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+				  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+			  },
+			},
+		},
+	};
+
+	/* truncate patterns to the first N */
+	if (force_endids) {
+		assert(force_endid_count > 0 && force_endid_count <= 26);
+		test.patterns[force_endid_count] = NULL;
+
+		/* truncate test inputs to just the first couple, since
+		 * later inputs use later patterns */
+		test.inputs[5].input = NULL;
+	}
+
+	return run_test(&test, false, force_endids);
+}
diff --git a/tests/eager_output/eager_output_at_start.c b/tests/eager_output/eager_output_at_start.c
new file mode 100644
index 000000000..407aa4e77
--- /dev/null
+++ b/tests/eager_output/eager_output_at_start.c
@@ -0,0 +1,12 @@
+#include "utils.h"
+
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "" },
+		.inputs = {
+			{ .input = "", .expected_ids = { 1 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output_fr1.c b/tests/eager_output/eager_output_fr1.c
new file mode 100644
index 000000000..e8e5f3395
--- /dev/null
+++ b/tests/eager_output/eager_output_fr1.c
@@ -0,0 +1,13 @@
+#include "utils.h"
+
+/* Fuzzer regresison */
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "ab", "" },
+		.inputs = {
+			{ .input = "ab", .expected_ids = { 1, 2 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output_fr2.c b/tests/eager_output/eager_output_fr2.c
new file mode 100644
index 000000000..404e98644
--- /dev/null
+++ b/tests/eager_output/eager_output_fr2.c
@@ -0,0 +1,13 @@
+#include "utils.h"
+
+/* Fuzzer regresison */
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "", "" },
+		.inputs = {
+			{ .input = "", .expected_ids = { 1, 2 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output_fr3.c b/tests/eager_output/eager_output_fr3.c
new file mode 100644
index 000000000..c7e4127a6
--- /dev/null
+++ b/tests/eager_output/eager_output_fr3.c
@@ -0,0 +1,13 @@
+#include "utils.h"
+
+/* Fuzzer regresison */
+int main(void)
+{
+	struct eager_output_test test = {
+		.patterns =  { "^", "" },
+		.inputs = {
+			{ .input = "", .expected_ids = { 1, 2 } },
+		},
+	};
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/eager_output_mixed_anchored_unanchored.c b/tests/eager_output/eager_output_mixed_anchored_unanchored.c
new file mode 100644
index 000000000..a586f9840
--- /dev/null
+++ b/tests/eager_output/eager_output_mixed_anchored_unanchored.c
@@ -0,0 +1,46 @@
+#include "utils.h"
+
+int main(void)
+{
+	/* fprintf(stderr, "%s: skipping for now, this doesn't pass yet.\n", __FILE__); */
+	/* return EXIT_SUCCESS; */
+
+	struct eager_output_test test = {
+		.patterns =  {
+			"^abc$",
+			"def",
+			"^ghi",
+			"jkl$",
+			"mno",
+		},
+		.inputs = {
+			{ .input = "abc", .expected_ids = { 1 } },
+			{ .input = "def", .expected_ids = { 2 } },
+			{ .input = "ghi", .expected_ids = { 3 } },
+			{ .input = "jkl", .expected_ids = { 4 } },
+			{ .input = "mno", .expected_ids = { 5 } },
+
+			{ .input = "defmno", .expected_ids = { 2, 5 } },
+			{ .input = " def mno ", .expected_ids = { 2, 5 } },
+
+			/* Matching a start-anchored pattern followed by
+			 * unanchored ones should just work. */
+			{ .input = "ghi def", .expected_ids = { 2, 3 } },
+
+			/* An unanchored pattern before a start-anchored pattern
+			 * should only match the unanchored pattern. */
+			{ .input = "def ghi", .expected_ids = { 2 } },
+
+			/* Matching an unanchored pattern before an
+			 * end-anchored one is fine. */
+			{ .input = "mno jkl", .expected_ids = { 4, 5 } },
+
+			/* This should match "mno" with the "jkl" prefix
+			 * ignored by the unanchored start, which does
+			 * not count as a match for "jkl$". */
+			{ .input = "jkl mno", .expected_ids = { 5 } },
+		},
+	};
+
+	return run_test(&test, false, false);
+}
diff --git a/tests/eager_output/utils.c b/tests/eager_output/utils.c
new file mode 100644
index 000000000..4bee8d848
--- /dev/null
+++ b/tests/eager_output/utils.c
@@ -0,0 +1,278 @@
+#include "utils.h"
+
+void
+fsm_eager_output_dump(FILE *f, const struct fsm *fsm);
+
+void
+fsm_endid_dump(FILE *f, const struct fsm *fsm);
+
+void
+append_eager_output_cb(fsm_output_id_t id, void *opaque)
+{
+	struct cb_info *info = (struct cb_info *)opaque;
+	assert(info->used < MAX_IDS);
+
+	for (size_t i = 0; i < info->used; i++) {
+		if (info->ids[i] == id) {
+			return;	/* already present */
+		}
+	}
+
+	info->ids[info->used++] = id;
+}
+
+int
+cmp_output(const void *pa, const void *pb)
+{
+	const fsm_output_id_t a = *(fsm_output_id_t *)pa;
+	const fsm_output_id_t b = *(fsm_output_id_t *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+struct fsm_options print_options = {
+	.consolidate_edges = 1,
+	.comments = 0,
+	.group_edges = 1,
+};
+
+void
+dump(const struct fsm *fsm)
+{
+	fsm_print(stderr, fsm,
+	    &print_options, NULL, FSM_PRINT_DOT);
+}
+
+int
+run_test(const struct eager_output_test *test, bool allow_extra_outputs, bool force_endids)
+{
+	struct fsm_union_entry entries[MAX_PATTERNS] = {0};
+
+	allow_extra_outputs = false;
+
+	size_t fsms_used = 0;
+	int ret = 0;
+
+	int log = 0;
+	{
+		const char *logstr = getenv("LOG");
+		if (logstr != NULL) {
+			if (logstr[0] == 'y') { /* make "y" or "yes" non-zero */
+				logstr = "1";
+			}
+			log = atoi(logstr);
+		}
+	}
+
+	for (size_t i = 0; i < MAX_PATTERNS; i++) {
+		const char *p = test->patterns[i];
+		if (test->patterns[i] == NULL) { break; }
+		const size_t len = strlen(p);
+		struct fsm_union_entry *e = &entries[fsms_used];
+
+		/* For sake of these patterns, they are anchored if the first/last
+		 * character is '^' and '$', respectively. This is too simplistic
+		 * for the general case, though. */
+		if (len > 0) {
+			if (p[0] == '^') { e->anchored_start = true; }
+			if (p[len - 1] == '$') { e->anchored_end = true; }
+			/* fprintf(stderr, "%s: p[%zd]: '%s', start %d, end %d\n", */
+			/*     __func__, fsms_used, p, e->anchored_start, e->anchored_end); */
+		}
+
+		struct fsm *fsm = re_comp(RE_PCRE, fsm_sgetc, &p, NULL, 0, NULL);
+		assert(fsm != NULL);
+
+		/* Zero is used to terminate expected_ids, so don't use it here. */
+		const fsm_output_id_t output_id = (fsm_output_id_t) (i + 1);
+		const fsm_end_id_t end_id = (fsm_end_id_t) (i + 1);
+
+		/* Set either an end ID or an eager output ID, depending on
+		 * whether the fsm is anchored at the end or not. */
+		if (e->anchored_end || force_endids) {
+			ret = fsm_setendid(fsm, end_id);
+		} else {
+			ret = fsm_seteageroutputonends(fsm, output_id);
+		}
+		assert(ret == 1);
+
+		if (log) {
+			fprintf(stderr, "==== source DFA %zd (pre det+min)\n", i);
+			if (log > 1) { dump(fsm); }
+			fsm_eager_output_dump(stderr, fsm);
+			fsm_endid_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+		}
+
+		ret = fsm_determinise(fsm);
+		assert(ret == 1);
+
+		if (log) {
+			fprintf(stderr, "==== source DFA %zd (post det)\n", i);
+			if (log > 1) { dump(fsm); }
+			fsm_eager_output_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+		}
+
+		ret = fsm_minimise(fsm);
+		assert(ret == 1);
+
+		if (log) {
+			fprintf(stderr, "==== source DFA %zd (post det+min)\n", i);
+			if (log > 1) { dump(fsm); }
+			fsm_eager_output_dump(stderr, fsm);
+			fprintf(stderr, "====\n");
+		}
+
+		e->fsm = fsm;
+		fsms_used++;
+	}
+
+	/* If there's only one pattern this just returns fsms[0]. */
+	struct fsm *fsm = fsm_union_repeated_pattern_group(fsms_used, entries, NULL);
+	assert(fsm != NULL);
+
+	if (log) {
+		fprintf(stderr, "==== combined (pre det+min)\n");
+		if (log > 1) { dump(fsm); }
+		fsm_eager_output_dump(stderr, fsm);
+		fprintf(stderr, "--- endids:\n");
+		fsm_endid_dump(stderr, fsm);
+		fprintf(stderr, "====\n");
+	}
+
+	if (log) {
+		fprintf(stderr, "=== determinising combined... NFA has %u states\n", fsm_countstates(fsm));
+	}
+	ret = fsm_determinise(fsm);
+	assert(ret == 1);
+	if (log) {
+		fprintf(stderr, "=== determinising combined...done, DFA has %u states\n", fsm_countstates(fsm));
+	}
+
+	if (log) {
+		fprintf(stderr, "==== combined (post det)\n");
+		if (log > 1) { dump(fsm); }
+		fsm_eager_output_dump(stderr, fsm);
+		fprintf(stderr, "====\n");
+	}
+
+	ret = fsm_minimise(fsm);
+	if (log) {
+		fprintf(stderr, "=== minimised combined...done, DFA has %u states\n", fsm_countstates(fsm));
+	}
+	assert(ret == 1);
+
+	if (log) {
+		fprintf(stderr, "==== combined (post det+min)\n");
+		if (log > 1) { dump(fsm); }
+		fsm_eager_output_dump(stderr, fsm);
+		fprintf(stderr, "--- endids:\n");
+		fsm_endid_dump(stderr, fsm);
+		fprintf(stderr, "====\n");
+	}
+
+	struct cb_info outputs = { 0 };
+	fsm_eager_output_set_cb(fsm, append_eager_output_cb, &outputs);
+
+	for (size_t i_i = 0; i_i < MAX_INPUTS; i_i++) {
+		outputs.used = 0;
+		const char *input = test->inputs[i_i].input;
+		if (input == NULL) { break; }
+
+		size_t expected_id_count = 0;
+		for (size_t id_i = 0; id_i < MAX_ENDIDS; id_i++) {
+			const fsm_output_id_t id = test->inputs[i_i].expected_ids[id_i];
+			if (id == 0) { break; }
+			expected_id_count++;
+
+			/* must be ascending */
+			if (id_i > 0) {
+				assert(id > test->inputs[i_i].expected_ids[id_i - 1]);
+			}
+		}
+
+		if (log) {
+			fprintf(stderr, "%s: input %zd: \"%s\", expecting %zd ids:",
+			    __func__, i_i, input, expected_id_count);
+			for (size_t i = 0; i < expected_id_count; i++) {
+				fprintf(stderr, " %d", test->inputs[i_i].expected_ids[i]);
+			}
+		}
+
+		if (test->inputs[i_i].expect_fail) {
+			expected_id_count = 0;
+		}
+
+		fsm_state_t end; /* only set on match */
+		ret = fsm_exec(fsm, fsm_sgetc, &input, &end, NULL);
+
+		if (ret == 1) {
+#define ENDID_BUF_SIZE 32
+			fsm_end_id_t endid_buf[ENDID_BUF_SIZE] = {0};
+			const size_t endid_count = fsm_endid_count(fsm, end);
+			/* fprintf(stderr, "%s: endid_count %zd for state %d\n", __func__, endid_count, end); */
+			assert(endid_count < ENDID_BUF_SIZE);
+			if (!fsm_endid_get(fsm, end, /*ENDID_BUF_SIZE*/ endid_count, endid_buf)) {
+				assert(!"fsm_endid_get failed");
+			}
+
+			/* Copy endid outputs into outputs.ids[], since for testing
+			 * purposes we don't care about the difference between eager
+			 * output and endids here -- the values don't overlap. */
+			assert(outputs.used + endid_count <= MAX_IDS);
+			for (size_t endid_i = 0; endid_i < endid_count; endid_i++) {
+				if (log) {
+					fprintf(stderr, "-- adding endid %zd: %d\n", endid_i, endid_buf[endid_i]);
+				}
+				outputs.ids[outputs.used++] = (fsm_output_id_t)endid_buf[endid_i];
+			}
+		}
+
+		if (ret == 0) {
+			/* if it didn't match, ignore the eager output IDs. this should
+			 * eventually happen internal to fsm_exec or codegen. */
+			outputs.used = 0;
+		}
+
+		/* NEXT match IDs, sort outputs[] buffer first */
+		qsort(outputs.ids, outputs.used, sizeof(outputs.ids[0]), cmp_output);
+
+		if (log) {
+			fprintf(stderr, "-- got %zd:", outputs.used);
+			for (size_t i = 0; i < outputs.used; i++) {
+				fprintf(stderr, " %d", outputs.ids[i]);
+			}
+			fprintf(stderr, "\n");
+		}
+
+		if (expected_id_count == 0) {
+			assert(ret == 0 || outputs.used == 0); /* no match */
+			continue;
+		} else {
+			assert(ret == 1);
+		}
+
+		if (!allow_extra_outputs) {
+			assert(outputs.used == expected_id_count);
+		} else {
+			assert(outputs.used >= expected_id_count);
+		}
+
+		size_t floor = 0;
+		for (size_t exp_i = 0; exp_i < outputs.used; exp_i++) {
+			bool found = false;
+			for (size_t got_i = floor; got_i < outputs.used; got_i++) {
+				if (outputs.ids[got_i] == test->inputs[i_i].expected_ids[exp_i]) {
+					floor = got_i + 1;
+					found = true;
+					break;
+				}
+			}
+			assert(found);
+		}
+	}
+
+        fsm_free(fsm);
+
+	return EXIT_SUCCESS;;
+}
diff --git a/tests/eager_output/utils.h b/tests/eager_output/utils.h
new file mode 100644
index 000000000..672c01977
--- /dev/null
+++ b/tests/eager_output/utils.h
@@ -0,0 +1,64 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <errno.h>
+
+#include <assert.h>
+
+#include <re/re.h>
+
+#include <fsm/fsm.h>
+#include <fsm/bool.h>
+#include <fsm/pred.h>
+#include <fsm/options.h>
+#include <fsm/print.h>
+#include <fsm/walk.h>
+
+#define MAX_IDS 32
+
+#include <assert.h>
+
+#include <fsm/fsm.h>
+
+#define MAX_PATTERNS 150
+#define MAX_INPUTS 64
+#define MAX_ENDIDS 32
+
+struct eager_output_test {
+	const char *patterns[MAX_PATTERNS];
+
+	struct {
+		const char *input;
+		bool expect_fail;
+		/* Terminated by 0. pattern[i] => id of i+1. Must be sorted. */
+		fsm_output_id_t expected_ids[MAX_ENDIDS];
+	} inputs[MAX_INPUTS];
+};
+
+void
+append_eager_output_cb(fsm_output_id_t id, void *opaque);
+
+int
+cmp_output(const void *pa, const void *pb);
+
+int
+run_test(const struct eager_output_test *test, bool allow_extra_outputs, bool force_endids);
+
+struct cb_info {
+	size_t used;
+	fsm_end_id_t ids[MAX_IDS];
+};
+
+void
+dump(const struct fsm *fsm);
+
+void
+append_eager_output_cb(fsm_end_id_t id, void *opaque);
+
+#endif