Skip to content

Commit

Permalink
WIP: fsm_detect_required_characters
Browse files Browse the repository at this point in the history
  • Loading branch information
silentbicycle committed May 21, 2024
1 parent 4951120 commit 05786ed
Show file tree
Hide file tree
Showing 10 changed files with 792 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ SUBDIR += tests/intersect
SUBDIR += tests/eclosure
SUBDIR += tests/equals
SUBDIR += tests/subtract
SUBDIR += tests/detect_required
SUBDIR += tests/determinise
SUBDIR += tests/endids
SUBDIR += tests/epsilons
Expand Down
31 changes: 31 additions & 0 deletions include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef FSM_WALK_H
#define FSM_WALK_H

#include <adt/bitmap.h>

struct fsm;
struct fsm_state;

Expand Down Expand Up @@ -128,5 +130,34 @@ fsm_generate_matches_cb fsm_generate_cb_printf;
* to escape all characters or just nonprintable ones. */
fsm_generate_matches_cb fsm_generate_cb_printf_escaped;

/* Walk a DFA and detect which characters MUST appear in the input for a
* match to be possible. For example, if input for the DFA corresponding
* to /^(abc|dbe)$/ does not contain 'b' at all, there's no way it can
* ever match, so executing the regex is unnecessary. This does not detect
* which characters must appear before/after others or how many times, just
* which must be present.
*
* The input must be a DFA. When run with EXPENSIVE_CHECKS this will
* check and return ERROR_MISUSE if it is not, otherwise this is an
* unchecked error.
*
* The bitmap will be cleared before populating. Afterward,
* bm_count(bitmap) will return how many required characters were
* found.
*
* There is an optional step_limit -- if this is reached, then it will
* return FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED and a
* cleared bitmap, because any partial information could still have been
* contradicted later. If the step_limit is 0 it will be ignored. */
enum fsm_detect_required_characters_res {
FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN,
FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE = -1,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC = -2,
};
enum fsm_detect_required_characters_res
fsm_detect_required_characters(const struct fsm *dfa, size_t step_limit,
struct bm *bitmap);

#endif

1 change: 1 addition & 0 deletions src/libfsm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SRC += src/libfsm/complete.c
SRC += src/libfsm/consolidate.c
SRC += src/libfsm/clone.c
SRC += src/libfsm/closure.c
SRC += src/libfsm/detect_required.c
SRC += src/libfsm/edge.c
SRC += src/libfsm/empty.c
SRC += src/libfsm/end.c
Expand Down
Loading

0 comments on commit 05786ed

Please sign in to comment.