Skip to content

Commit 9c33cca

Browse files
Multimodcraftershilangyu
authored andcommitted
Change tracking of look-around state to index
This makes it so we don't need to reset the lookaround state on each character advancement.
1 parent cccfc23 commit 9c33cca

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

regex-automata/src/nfa/thompson/pikevm.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,7 +1490,7 @@ impl PikeVM {
14901490
stack: &mut Vec<FollowEpsilon>,
14911491
curr: &mut ActiveStates,
14921492
next: &mut ActiveStates,
1493-
lookarounds: &mut Vec<bool>,
1493+
lookarounds: &mut Vec<Option<NonMaxUsize>>,
14941494
input: &Input<'_>,
14951495
at: usize,
14961496
slots: &mut [Option<NonMaxUsize>],
@@ -1527,7 +1527,7 @@ impl PikeVM {
15271527
stack: &mut Vec<FollowEpsilon>,
15281528
curr: &mut ActiveStates,
15291529
next: &mut ActiveStates,
1530-
lookarounds: &mut Vec<bool>,
1530+
lookarounds: &mut Vec<Option<NonMaxUsize>>,
15311531
input: &Input<'_>,
15321532
at: usize,
15331533
patset: &mut PatternSet,
@@ -1581,7 +1581,7 @@ impl PikeVM {
15811581
stack: &mut Vec<FollowEpsilon>,
15821582
curr_slot_table: &mut SlotTable,
15831583
next: &mut ActiveStates,
1584-
lookarounds: &mut Vec<bool>,
1584+
lookarounds: &mut Vec<Option<NonMaxUsize>>,
15851585
input: &Input<'_>,
15861586
at: usize,
15871587
sid: StateID,
@@ -1672,7 +1672,7 @@ impl PikeVM {
16721672
stack: &mut Vec<FollowEpsilon>,
16731673
curr_slots: &mut [Option<NonMaxUsize>],
16741674
next: &mut ActiveStates,
1675-
lookarounds: &mut Vec<bool>,
1675+
lookarounds: &mut Vec<Option<NonMaxUsize>>,
16761676
input: &Input<'_>,
16771677
at: usize,
16781678
sid: StateID,
@@ -1732,7 +1732,7 @@ impl PikeVM {
17321732
stack: &mut Vec<FollowEpsilon>,
17331733
curr_slots: &mut [Option<NonMaxUsize>],
17341734
next: &mut ActiveStates,
1735-
lookarounds: &mut Vec<bool>,
1735+
lookarounds: &mut Vec<Option<NonMaxUsize>>,
17361736
input: &Input<'_>,
17371737
at: usize,
17381738
mut sid: StateID,
@@ -1773,11 +1773,16 @@ impl PikeVM {
17731773
sid = next;
17741774
}
17751775
State::WriteLookaround { look_idx } => {
1776-
lookarounds[look_idx] = true;
1776+
// This is ok since `at` is always less than `usize::MAX`.
1777+
lookarounds[look_idx] = NonMaxUsize::new(at);
17771778
return;
17781779
}
17791780
State::CheckLookaround { look_idx, positive, next } => {
1780-
if lookarounds[look_idx] != positive {
1781+
let state = match lookarounds[look_idx] {
1782+
None => usize::MAX,
1783+
Some(pos) => pos.get(),
1784+
};
1785+
if (state == at) != positive {
17811786
return;
17821787
}
17831788
sid = next;
@@ -1963,9 +1968,10 @@ pub struct Cache {
19631968
/// The next set of states we're building that will be explored for the
19641969
/// next byte in the haystack.
19651970
next: ActiveStates,
1966-
/// This answers the question: "Does lookaround assertion x hold at the
1967-
/// current position in the haystack"
1968-
lookaround: Vec<bool>,
1971+
/// This answers the question: "What is the maximum position in the
1972+
/// haystack at which lookaround assertion x holds and which is <= to the
1973+
/// current position"
1974+
lookaround: Vec<Option<NonMaxUsize>>,
19691975
}
19701976

19711977
impl Cache {
@@ -1984,7 +1990,7 @@ impl Cache {
19841990
next: ActiveStates::new(re),
19851991
lookaround: {
19861992
let mut res = Vec::new();
1987-
res.resize(re.lookaround_count().as_usize(), false);
1993+
res.resize(re.lookaround_count().as_usize(), None);
19881994
res
19891995
},
19901996
}

0 commit comments

Comments
 (0)