diff --git a/Makefile b/Makefile
index 499239fd9..e6d578f2c 100644
--- a/Makefile
+++ b/Makefile
@@ -114,6 +114,7 @@ SUBDIR += tests/intersect
 SUBDIR += tests/eclosure
 SUBDIR += tests/equals
 SUBDIR += tests/subtract
+SUBDIR += tests/detect_required
 SUBDIR += tests/determinise
 SUBDIR += tests/endids
 SUBDIR += tests/epsilons
diff --git a/include/fsm/walk.h b/include/fsm/walk.h
index b433380d5..bc030a017 100644
--- a/include/fsm/walk.h
+++ b/include/fsm/walk.h
@@ -7,6 +7,8 @@
 #ifndef FSM_WALK_H
 #define FSM_WALK_H
 
+#include <adt/bitmap.h>
+
 struct fsm;
 struct fsm_state;
 
@@ -128,5 +130,34 @@ fsm_generate_matches_cb fsm_generate_cb_printf;
  * to escape all characters or just nonprintable ones. */
 fsm_generate_matches_cb fsm_generate_cb_printf_escaped;
 
+/* Walk a DFA and detect which characters MUST appear in the input for a
+ * match to be possible. For example, if input for the DFA corresponding
+ * to /^(abc|dbe)$/ does not contain 'b' at all, there's no way it can
+ * ever match, so executing the regex is unnecessary. This does not detect
+ * which characters must appear before/after others or how many times, just
+ * which must be present.
+ *
+ * The input must be a DFA. When run with EXPENSIVE_CHECKS this will
+ * check and return ERROR_MISUSE if it is not, otherwise this is an
+ * unchecked error.
+ *
+ * The bitmap will be cleared before populating. Afterward,
+ * bm_count(bitmap) will return how many required characters were
+ * found.
+ *
+ * There is an optional step_limit -- if this is reached, then it will
+ * return FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED and a
+ * cleared bitmap, because any partial information could still have been
+ * contradicted later. If the step_limit is 0 it will be ignored. */
+enum fsm_detect_required_characters_res {
+	FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN,
+	FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED,
+	FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE = -1,
+	FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC = -2,
+};
+enum fsm_detect_required_characters_res
+fsm_detect_required_characters(const struct fsm *dfa, size_t step_limit,
+    struct bm *bitmap);
+
 #endif
 
diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile
index 9af51a5a4..1fe70bec4 100644
--- a/src/libfsm/Makefile
+++ b/src/libfsm/Makefile
@@ -6,6 +6,7 @@ SRC += src/libfsm/complete.c
 SRC += src/libfsm/consolidate.c
 SRC += src/libfsm/clone.c
 SRC += src/libfsm/closure.c
+SRC += src/libfsm/detect_required.c
 SRC += src/libfsm/edge.c
 SRC += src/libfsm/empty.c
 SRC += src/libfsm/end.c
diff --git a/src/libfsm/detect_required.c b/src/libfsm/detect_required.c
new file mode 100644
index 000000000..3d6bf9edd
--- /dev/null
+++ b/src/libfsm/detect_required.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright 2024 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include <fsm/fsm.h>
+#include <fsm/walk.h>
+#include <fsm/pred.h>
+
+#include <adt/edgeset.h>
+#include <adt/queue.h>
+#include <adt/u64bitset.h>
+
+#include "internal.h"
+
+#define END_OF_FREELIST ((fsm_state_t)-1)
+#define DEF_CURSOR_CEIL 1	/* force frequent realloc */
+
+#define LOG_BASE 0
+#define LOG_RMAP (LOG_BASE + 0)
+#define LOG_QUEUE (LOG_BASE + 0)
+#define LOG_MERGE (LOG_BASE + 0)
+#define LOG_CURSOR (LOG_BASE + 0)
+#define LOG_PROGRESS (LOG_BASE + 0)
+
+#define USE_UNIQUE_ID 1
+
+struct drc_cursor {
+	struct bm bitmap;
+	fsm_state_t state;	/* or freelist id, or END_OF_FREELIST */
+#if USE_UNIQUE_ID
+	size_t unique_id;
+#endif
+	uint64_t visited[/* u64bitset_words(state_count) */];
+};
+
+struct drc_state {
+	const struct fsm_alloc *alloc;
+	const struct fsm *dfa;
+	const size_t state_count;
+	const size_t visited_words;
+
+#if USE_UNIQUE_ID
+	size_t unique_id_counter;
+#endif
+
+	struct {
+		size_t ceil;
+		struct drc_cursor **cursors;
+
+		/* This (and the cursor->state field) are used as a
+		 * freelist. There can never be more cursors than
+		 * there are end states, so the cursor ID must fit
+		 * in an fsm_state_t. */
+		fsm_state_t freelist;
+	} cursor;
+
+	size_t edge_count;
+	size_t unique_edge_count;
+	struct edge_alist {
+		fsm_state_t from;
+		fsm_state_t to;
+		bool unique;
+		uint8_t unique_char;
+	} *rmap;
+
+	/* Accumulator for intersection of bitmaps. Set to the bitmap of
+	 * the first cursor to reach the start state, and intersected
+	 * thereafter. */
+	struct bm accum;
+};
+
+static int
+cmp_rmap_by_to(const void *pa, const void *pb)
+{
+	const struct edge_alist *a = (const struct edge_alist *)pa;
+	const struct edge_alist *b = (const struct edge_alist *)pb;
+
+	/* this is a reverse mapping, so sort by the to state */
+	if (a->to < b->to) { return -1; }
+	if (a->to > b->to) { return 1; }
+
+	if (a->from < b->from) { return -1; }
+	if (a->from > b->from) { return 1; }
+
+	/* shouldn't get here: should be unique */
+	return 0;
+}
+
+static bool
+init_rmap(const struct fsm *dfa, struct drc_state *state)
+{
+	struct edge_group_iter iter;
+	struct edge_group_iter_info info;
+
+	state->edge_count = 0;
+	state->unique_edge_count = 0;
+
+	/* first pass: count edges */
+	for (fsm_state_t s_i = 0; s_i < state->state_count; s_i++) {
+		edge_set_group_iter_reset(dfa->states[s_i].edges,
+		    EDGE_GROUP_ITER_ALL, &iter);
+		while (edge_set_group_iter_next(&iter, &info)) {
+			if (info.to == s_i) { continue; } /* ignored */
+			state->edge_count++;
+		}
+	}
+
+	struct edge_alist *rmap = malloc(state->edge_count * sizeof(rmap[0]));
+	if (rmap == NULL) { return false; }
+
+	/* second pass: populate */
+	size_t rmap_used = 0;
+	for (fsm_state_t s_i = 0; s_i < state->state_count; s_i++) {
+		edge_set_group_iter_reset(dfa->states[s_i].edges,
+		    EDGE_GROUP_ITER_ALL, &iter);
+		while (edge_set_group_iter_next(&iter, &info)) {
+			/* filter self-edges, they don't impact the result */
+			if (info.to == s_i) { continue; }
+
+			struct edge_alist *elt = &rmap[rmap_used];
+			elt->from = s_i;
+			elt->to = info.to;
+
+			size_t label_count = 0;
+			for (size_t i = 0; i < 4; i++) {
+				label_count += (size_t)u64bitset_popcount(info.symbols[i]);
+			}
+			assert(label_count > 0);
+
+			if (label_count == 1) {
+				state->unique_edge_count++;
+				elt->unique = true;
+				bool unique_char_found = false;
+				for (size_t i = 0; i < 4; i++) {
+					const uint64_t w = info.symbols[i];
+					if (w == 0) { continue; }
+					for (uint64_t bit_i = 0; bit_i < 64; bit_i++) {
+						if (w & (1ULL << bit_i)) {
+							elt->unique_char = 64*i + bit_i;
+							unique_char_found = true;
+							break;
+						}
+					}
+				}
+				assert(unique_char_found);
+			} else {
+				elt->unique = false;
+			}
+
+			rmap_used++;
+		}
+	}
+
+	/* invert mapping */
+	qsort(rmap, state->edge_count, sizeof(rmap[0]), cmp_rmap_by_to);
+
+#if LOG_RMAP
+	for (size_t i = 0; i < rmap_used; i++) {
+		struct edge_alist *elt = &rmap[i];
+		fprintf(stderr, "%s: rmap[%zu]: from %u, to %u, unique ? %d",
+		    __func__, i, elt->from, elt->to, elt->unique);
+		if (elt->unique) {
+			fprintf(stderr, " -- 0x%02x\n", elt->unique_char);
+		} else {
+			fprintf(stderr, "\n");
+		}
+	}
+#endif
+
+	state->rmap = rmap;
+	return true;
+}
+
+static bool
+request_cursor(struct drc_state *state, fsm_state_t *cursor_id)
+{
+	fsm_state_t freelist = state->cursor.freelist;
+	if (freelist == END_OF_FREELIST) {
+		const size_t oceil = state->cursor.ceil;
+		const size_t nceil = oceil == 0
+		    ? DEF_CURSOR_CEIL
+		    : 2*state->cursor.ceil;
+
+#if LOG_CURSOR
+		fprintf(stderr, "%s: growing %zu -> %zu\n", __func__, oceil, nceil);
+#endif
+
+		struct drc_cursor **ncursors = f_realloc(state->alloc,
+		    state->cursor.cursors, nceil * sizeof(ncursors[0]));
+		if (ncursors == NULL) { return false; }
+
+		/* allocate new cursors */
+		for (size_t i = oceil; i < nceil; i++) {
+			struct drc_cursor *c = malloc(sizeof(*c)
+			    + state->visited_words * sizeof(c->visited[0]));
+			if (c == NULL) {
+				return false;
+			}
+			ncursors[i] = c;
+		}
+
+		/* link on freelist */
+		for (size_t i = oceil; i < nceil; i++) {
+			struct drc_cursor *c = ncursors[i];
+			fsm_state_t next = i + 1;
+			if (next == nceil) { next = END_OF_FREELIST; }
+			c->state = next;
+		}
+
+		state->cursor.ceil = nceil;
+		state->cursor.cursors = ncursors;
+		state->cursor.freelist = oceil;
+		freelist = state->cursor.freelist;
+	}
+
+	assert(freelist < state->cursor.ceil);
+	struct drc_cursor *c = state->cursor.cursors[freelist];
+	state->cursor.freelist = c->state; /* next link */
+	c->state = (fsm_state_t)-2;
+	bm_clear(&c->bitmap);
+	memset(c->visited, 0x00, state->visited_words * sizeof(c->visited[0]));
+
+#if LOG_CURSOR > 1
+	fprintf(stderr, "%s: requested cursor_id %u\n", __func__, freelist);
+#endif
+	*cursor_id = freelist;
+	return true;
+}
+
+static void
+release_cursor(struct drc_state *state, fsm_state_t cursor_id)
+{
+	assert(cursor_id < state->cursor.ceil);
+	struct drc_cursor *c = state->cursor.cursors[cursor_id];
+	c->state = state->cursor.freelist;
+
+#if LOG_CURSOR > 1
+	fprintf(stderr, "%s: released cursor_id %u\n", __func__, cursor_id);
+#endif
+	state->cursor.freelist = cursor_id;
+}
+
+static size_t
+rmap_seek(const struct edge_alist *rmap, size_t edge_count, fsm_state_t state)
+{
+	/* FIXME linear search, use bsearch later */
+	for (size_t i = 0; i < edge_count; i++) {
+		if (rmap[i].to == state) { return i; }
+	}
+
+	return edge_count;	/* not found */
+}
+
+static struct drc_cursor *
+get_cursor(struct drc_state *state, fsm_state_t cursor_id)
+{
+	/* this function exists to wrap the assert */
+	assert(cursor_id < state->cursor.ceil);
+	return state->cursor.cursors[cursor_id];
+}
+
+enum fsm_detect_required_characters_res
+fsm_detect_required_characters(const struct fsm *dfa, size_t step_limit, struct bm *bitmap)
+{
+	assert(dfa != NULL);
+	assert(bitmap != NULL);
+
+	#if EXPENSIVE_CHECKS
+	if (!fsm_all(dfa, fsm_isdfa)) {
+		return FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE;
+	}
+	#endif
+
+	enum fsm_detect_required_characters_res res = FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE;
+
+	const size_t state_count = fsm_countstates(dfa);
+	fsm_state_t start_state;
+	if (!fsm_getstart(dfa, &start_state)) {
+		goto cleanup;
+	}
+
+	struct drc_state state = {
+		.alloc = dfa->opt->alloc,
+		.dfa = dfa,
+		.state_count = state_count,
+		.visited_words = u64bitset_words(state_count),
+		.cursor.freelist = END_OF_FREELIST,
+	};
+
+	struct queue *q = NULL;
+
+	bm_clear(bitmap);
+
+	q = queue_new_dynamic(dfa->opt->alloc, state_count);
+	if (q == NULL) {
+		res = FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC;
+		goto cleanup;
+	}
+
+	if (!init_rmap(dfa, &state)) {
+		res = FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC;
+		goto cleanup;
+	}
+
+	/* If the DFA doesn't have any single-label edges, then walking
+	 * the paths from every end state won't add any constraints. */
+	if (state.unique_edge_count == 0) {
+		res = FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN;
+		goto cleanup;
+	}
+
+	size_t steps = 0;
+
+	/* Do the analysis for each end state, adding extra cursors
+	 * wherever the path diverges, and intersect the requirement
+	 * bitmaps across all cursors for all end states. The total cost
+	 * is proportional to the number of states and the number of end
+	 * states. */
+	bool first_path = true;
+	for (fsm_state_t s_i = 0; s_i < state_count; s_i++) {
+		if (!fsm_isend(dfa, s_i)) { continue; }
+
+#if LOG_PROGRESS
+		fprintf(stderr, "-- analyzing end-state %u\n", s_i);
+#endif
+
+		fsm_state_t s;
+		assert(!queue_pop(q, &s)); /* empty */
+
+		/* This is managed by ID rather than pointer because the
+		 * pointers become stale whenever the array is reallocated. */
+		fsm_state_t cursor_id;
+		if (!request_cursor(&state, &cursor_id)) {
+			res = FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC;
+			goto cleanup;
+		}
+
+		{
+			struct drc_cursor *cursor = get_cursor(&state, cursor_id);
+			cursor->state = s_i;
+
+#if LOG_QUEUE
+			fprintf(stderr, "%s: queue pushing %u (at end state)\n", __func__, cursor_id);
+#endif
+			if (!queue_push(q, cursor_id)) {
+				assert(!"internal error");
+				goto cleanup;
+			}
+
+#if LOG_PROGRESS > 1
+			fprintf(stderr, "%s: marking end state %u visited on cursor %u\n", __func__, s_i, cursor_id);
+#endif
+			u64bitset_set(cursor->visited, s_i);
+		}
+
+		while (queue_pop(q, &cursor_id)) {
+			steps++;
+			if ((steps % 10000) == 0) {
+				fprintf(stderr, " -- %zu steps...\n", steps);
+			}
+			if (steps == step_limit) {
+				res = FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED;
+				/* Note: this does not copy the partial info, since
+				 * further processing might find alternate routes that
+				 * clear the currently set constraints. */
+				goto cleanup;
+			}
+
+			struct drc_cursor *cursor = get_cursor(&state, cursor_id);
+#if LOG_QUEUE
+			fprintf(stderr, "%s: queue popped %u -- state %u\n", __func__, cursor_id, cursor->state);
+#endif
+			assert(cursor->state < state_count);
+
+			if (cursor->state == start_state) {
+#if LOG_MERGE
+				fprintf(stderr, "%s: cursor %u reached start_state %u with bitmap ",
+				    __func__, cursor_id, start_state);
+				bm_print(stderr, state.dfa->opt, &cursor->bitmap, 0, fsm_escputc);
+				fprintf(stderr, "\n");
+#endif
+				if (first_path) {
+					bm_copy(&state.accum, &cursor->bitmap);
+					first_path = false;
+				} else {
+					bm_intersect(&state.accum, &cursor->bitmap);
+				}
+
+#if LOG_MERGE
+				fprintf(stderr, "%s: merged accumulator is now ", __func__);
+				bm_print(stderr, state.dfa->opt, &state.accum, 0, fsm_escputc);
+				fprintf(stderr, "\n");
+#endif
+
+				if (!bm_any(&state.accum)) {
+					/* unconstrained path found -- further work cannot
+					 * add any new information, so we're done */
+#if LOG_PROGRESS
+					fprintf(stderr, "%s: unconstrained path found, we're done\n", __func__);
+#endif
+					res = FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN;
+					goto cleanup;
+				}
+
+				release_cursor(&state, cursor_id);
+				continue;
+			}
+
+			/* start of reverse edges */
+			size_t offset = rmap_seek(state.rmap, state.edge_count, cursor->state);
+
+			while (offset < state.edge_count) {
+				struct edge_alist *elt = &state.rmap[offset];
+
+#if LOG_PROGRESS > 1
+				fprintf(stderr, "%s: offset %zu, elt->from %u, elt->to %u, cursor->state %u, cursor->bitmap ",
+				    __func__, offset, elt->from, elt->to, cursor->state);
+				bm_print(stderr, state.dfa->opt, &cursor->bitmap, 0, fsm_escputc);
+				fprintf(stderr, "\n");
+#endif
+
+				if (elt->to != cursor->state) {
+					break;
+				}
+				assert(elt->to != elt->from); /* self-edges were filtered before */
+
+				if (u64bitset_get(cursor->visited, elt->from)) {
+#if LOG_PROGRESS > 2
+					fprintf(stderr, "%s: skipping %u, visited\n", __func__, elt->from);
+#endif
+					offset++;
+					continue;
+				}
+
+				fsm_state_t other_cursor_id;
+				if (!request_cursor(&state, &other_cursor_id)) {
+					res = FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC;
+					goto cleanup;
+				}
+
+				struct drc_cursor *ocursor = get_cursor(&state, other_cursor_id);
+				ocursor->state = elt->from;
+				bm_copy(&ocursor->bitmap, &cursor->bitmap);
+				memcpy(ocursor->visited, cursor->visited,
+				    state.visited_words * sizeof(cursor->visited[0]));
+
+#if USE_UNIQUE_ID
+				ocursor->unique_id = state.unique_id_counter++;
+#endif
+
+#if LOG_PROGRESS > 1
+				fprintf(stderr, "%s: marking %u visited on cursor %u\n", __func__, elt->from, other_cursor_id);
+#endif
+				u64bitset_set(ocursor->visited, elt->from);
+
+				if (elt->unique) {
+#if LOG_PROGRESS
+					fprintf(stderr, "%s: marking 0x%02x (%c) as required on cursor %u\n",
+					    __func__, elt->unique_char,
+					    isprint(elt->unique_char) ? elt->unique_char : '.',
+					    other_cursor_id);
+#endif
+					bm_set(&ocursor->bitmap, (size_t)elt->unique_char);
+				}
+
+#if LOG_QUEUE
+				fprintf(stderr, "%s: queue pushing %u, state %u (backlink %u -> %u)\n",
+				    __func__, other_cursor_id, ocursor->state, elt->from, elt->to);
+#endif
+				/* fprintf(stdout, "-- %u <- %u, %zu to %zu\n", elt->to, elt->from, cursor->unique_id, ocursor->unique_id); */
+
+				if (!queue_push(q, other_cursor_id)) {
+					assert(!"internal error");
+					goto cleanup;
+				}
+
+				offset++;
+			}
+
+			release_cursor(&state, cursor_id);
+		}
+	}
+
+	/* The final result is the intersection of every bitmap
+	 * reaching the start state. */
+	bm_copy(bitmap, &state.accum);
+
+#if LOG_PROGRESS
+	fprintf(stderr, "%s: final result: ", __func__);
+	bm_print(stderr, state.dfa->opt, &state.accum, 0, fsm_escputc);
+	fprintf(stderr, "\n");
+#endif
+
+	res = FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN;
+
+cleanup:
+	free(state.rmap);
+	for (size_t i = 0; i < state.cursor.ceil; i++) {
+		free(state.cursor.cursors[i]);
+	}
+	free(state.cursor.cursors);
+	queue_free(q);
+
+	return res;
+}
diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms
index a2570b8c9..1004bea42 100644
--- a/src/libfsm/libfsm.syms
+++ b/src/libfsm/libfsm.syms
@@ -15,6 +15,7 @@ fsm_reachableall
 fsm_reachableany
 fsm_walk_edges
 fsm_walk_states
+fsm_detect_required_characters
 
 # <fsm/pred.h>
 fsm_epsilonsonly
diff --git a/tests/detect_required/Makefile b/tests/detect_required/Makefile
new file mode 100644
index 000000000..34214f07e
--- /dev/null
+++ b/tests/detect_required/Makefile
@@ -0,0 +1,26 @@
+.include "../../share/mk/top.mk"
+
+TEST.tests/detect_required != ls -1 tests/detect_required/detect_required*.c
+TEST_SRCDIR.tests/detect_required = tests/detect_required
+TEST_OUTDIR.tests/detect_required = ${BUILD}/tests/detect_required
+
+.for n in ${TEST.tests/detect_required:T:R:C/^detect_required//}
+test:: ${TEST_OUTDIR.tests/detect_required}/res${n}
+SRC += ${TEST_SRCDIR.tests/detect_required}/detect_required${n}.c
+CFLAGS.${TEST_SRCDIR.tests/detect_required}/detect_required${n}.c = -UNDEBUG
+
+${TEST_OUTDIR.tests/detect_required}/run${n}: ${TEST_OUTDIR.tests/detect_required}/detect_required${n}.o ${TEST_OUTDIR.tests/detect_required}/testutil.o
+	${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/detect_required}/run${n} ${TEST_OUTDIR.tests/detect_required}/detect_required${n}.o ${TEST_OUTDIR.tests/detect_required}/testutil.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a
+
+${TEST_OUTDIR.tests/detect_required}/detect_required${n}.o: tests/detect_required/testutil.h
+
+${TEST_OUTDIR.tests/detect_required}/res${n}: ${TEST_OUTDIR.tests/detect_required}/run${n}
+	( ${TEST_OUTDIR.tests/detect_required}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/detect_required}/res${n}
+
+.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
+${TEST_OUTDIR.tests/detect_required}/run${n}: ${BUILD}/lib/${lib:R}.a
+.endfor
+.endfor
+
+${TEST_OUTDIR.tests/detect_required}/testutil.o: tests/detect_required/testutil.c
+	${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/detect_required}/testutil.o tests/detect_required/testutil.c
diff --git a/tests/detect_required/detect_required1.c b/tests/detect_required/detect_required1.c
new file mode 100644
index 000000000..52bb83477
--- /dev/null
+++ b/tests/detect_required/detect_required1.c
@@ -0,0 +1,32 @@
+#include "testutil.h"
+
+const struct testcase tests[] = {
+	{ .regex = "^$", .required = "" },
+	{ .regex = "^a$", .required = "a" },
+	{ .regex = "^abcde$", .required = "abcde" },
+	{ .regex = "^(ab|cd)$", .required = "" },
+	{ .regex = "^(ab|cd|ef)$", .required = "" },
+	{ .regex = "^(abc|def)$", .required = "" },
+	{ .regex = "^(abc|dbf)$", .required = "b" },
+	{ .regex = "^abc(def)*ghi$", .required = "abcghi" },
+	{ .regex = "^abc(def)+ghi$", .required = "abcdefghi" },
+	{ .regex = "^ghi(def)abc$", .required = "abcdefghi" },
+};
+
+int main()
+{
+	const bool first_fail = getenv("FIRST_FAIL") != NULL;
+	const size_t testcount = sizeof(tests)/sizeof(tests[0]);
+
+	size_t failures = 0;
+	for (size_t i = 0; i < testcount; i++) {
+		if (!run_test(&tests[i])) {
+			failures++;
+			if (first_fail) { break; }
+		}
+	}
+
+	return failures == 0
+	    ? EXIT_SUCCESS
+	    : EXIT_FAILURE;
+}
diff --git a/tests/detect_required/detect_required_step_limit.c b/tests/detect_required/detect_required_step_limit.c
new file mode 100644
index 000000000..6e5808b1e
--- /dev/null
+++ b/tests/detect_required/detect_required_step_limit.c
@@ -0,0 +1,58 @@
+#include "testutil.h"
+
+#include <re/re.h>
+#include <fsm/fsm.h>
+#include <fsm/options.h>
+#include <fsm/walk.h>
+#include <adt/bitmap.h>
+#include <fsm/print.h>
+
+static const struct fsm_options opt;
+
+int main()
+{
+	enum re_flags flags = 0;
+	struct re_err err;
+	const char *regex = "^abcde$";
+	
+	struct fsm *fsm = re_comp(RE_PCRE, fsm_sgetc, &regex, &opt, flags, &err);
+	assert(fsm != NULL);
+
+	if (!fsm_determinise(fsm)) {
+		assert(!"determinise");
+		return EXIT_FAILURE;
+	}
+	if (!fsm_minimise(fsm)) {
+		assert(!"minimise");
+		return EXIT_FAILURE;
+	}
+
+	struct bm bitmap;
+
+	/* keep decreasing the step limit until it's hit, and check that
+	 * the bitmap is cleared. */
+	bool hit_step_limit = false;
+	size_t step_limit = 25;
+	while (!hit_step_limit) {
+		assert(step_limit > 0);
+
+		const enum fsm_detect_required_characters_res res = fsm_detect_required_characters(fsm, step_limit, &bitmap);
+		if (res == FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED) {
+			hit_step_limit = true;
+
+			/* this should not contain any partially complete information */
+			for (size_t i = 0; i < 4; i++) {
+				const uint64_t *w = bm_nth_word(&bitmap, i);
+				if (*w != 0) {
+					fprintf(stderr, "-- Test failure: partial information set when step limit reached\n");
+					return EXIT_FAILURE;
+				}
+			}
+		}
+		
+		step_limit--;
+	}
+
+	fsm_free(fsm);
+	return EXIT_SUCCESS;
+}
diff --git a/tests/detect_required/testutil.c b/tests/detect_required/testutil.c
new file mode 100644
index 000000000..c08160355
--- /dev/null
+++ b/tests/detect_required/testutil.c
@@ -0,0 +1,106 @@
+#include "testutil.h"
+
+#include <string.h>
+
+#include <re/re.h>
+#include <fsm/fsm.h>
+#include <fsm/options.h>
+#include <fsm/walk.h>
+#include <adt/bitmap.h>
+#include <fsm/print.h>
+
+#include <fsm/pred.h>
+
+static const struct fsm_options opt = {
+	.group_edges = 1,
+};
+
+bool
+run_test(const struct testcase *tc)
+{
+	bool test_res = false;
+
+	enum re_flags flags = 0;
+	struct re_err err;
+	char *regex = (char *)tc->regex;
+	const char *required = tc->required ? tc->required : "";
+	const size_t step_limit = tc->step_limit ? tc->step_limit : DEF_STEP_LIMIT;
+
+	fprintf(stderr, "-- test: regex '%s', required '%s'\n", tc->regex, required);
+
+	struct fsm *fsm = re_comp(RE_PCRE, fsm_sgetc, &regex, &opt, flags, &err);
+	if (fsm == NULL) {
+		return false;
+	}
+	/* assert(fsm != NULL); */
+
+	if (!fsm_determinise(fsm)) {
+		assert(!"determinise");
+		return false;
+	}
+	if (!fsm_minimise(fsm)) {
+		assert(!"minimise");
+		return false;
+	}
+
+	if (getenv("PRINT_DOT")) {
+		fsm_print_dot(stderr, fsm);
+	}
+	if (getenv("PRINT_FSM")) {
+		fsm_print_fsm(stderr, fsm);
+	}
+
+	struct bm bitmap;
+	bm_clear(&bitmap);
+
+	{
+		const size_t statecount = fsm_countstates(fsm);
+		size_t ends = 0;
+		for (size_t i = 0; i < statecount; i++) {
+			if (fsm_isend(fsm, i)) {
+				ends++;
+			}
+		}
+		fprintf(stderr, "-- statecount %zu, %zu ends\n", statecount, ends);
+	}
+
+
+	const enum fsm_detect_required_characters_res res = fsm_detect_required_characters(fsm, step_limit, &bitmap);
+	if (res == FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED) {
+		fprintf(stderr, "-- step limit reached, halting\n");
+		goto cleanup;
+	}
+	assert(res == FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN);
+
+	char buf[257] = {0};
+	size_t used = 0;
+	assert(!bm_get(&bitmap, 0)); /* does not contain 0x00 */
+
+	int i = 0;
+	for (;;) {
+		const size_t next = bm_next(&bitmap, i, 1);
+		if (next > UCHAR_MAX) { break; }
+		buf[used++] = (char)next;
+		i = next;
+	}
+
+	if (0 != strcmp(required, buf)) {
+		fprintf(stderr, "Error: mismatch\n");
+		fprintf(stderr, "-- expected: [%s]\n", required);
+		fprintf(stderr, "-- got: [%s]\n", buf);
+		goto cleanup;
+	}
+
+	/* TODO: use fsm_generate_matches to check. it just yields one
+	 * character from an edge, so it won't indicate whether that
+	 * specific character is required, but if it generates without
+	 * something the test says is required that probably means the
+	 * test is wrong. */
+
+	test_res = true;
+
+cleanup:
+	fsm_free(fsm);
+
+	return test_res;
+}
diff --git a/tests/detect_required/testutil.h b/tests/detect_required/testutil.h
new file mode 100644
index 000000000..f9378c190
--- /dev/null
+++ b/tests/detect_required/testutil.h
@@ -0,0 +1,21 @@
+#ifndef TESTUTIL_H
+#define TESTUTIL_H
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdio.h>
+
+#define DEF_STEP_LIMIT 100000
+
+struct testcase {
+	const char *regex;
+	const char *required;
+	size_t max_gen_buffer;	/* 0: default */
+	size_t step_limit;
+};
+
+bool
+run_test(const struct testcase *tc);
+
+#endif