feat(match): limit regex buffer size
This commit is contained in:
parent
10d37d1fe6
commit
bfe6b13ae7
|
@ -53,6 +53,18 @@ impl Default for RegexMatcherState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct RegexMatcherOptions {
|
||||||
|
pub max_buffer_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for RegexMatcherOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
max_buffer_size: 30,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RegexMatcher<Id> {
|
pub struct RegexMatcher<Id> {
|
||||||
ids: Vec<Id>,
|
ids: Vec<Id>,
|
||||||
// The RegexSet is used to efficiently determine which regexes match
|
// The RegexSet is used to efficiently determine which regexes match
|
||||||
|
@ -60,6 +72,8 @@ pub struct RegexMatcher<Id> {
|
||||||
|
|
||||||
// The single regexes are then used to find the captures
|
// The single regexes are then used to find the captures
|
||||||
regexes: Vec<Regex>,
|
regexes: Vec<Regex>,
|
||||||
|
|
||||||
|
max_buffer_size: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, Id> Matcher<'a, RegexMatcherState, Id> for RegexMatcher<Id>
|
impl<'a, Id> Matcher<'a, RegexMatcherState, Id> for RegexMatcher<Id>
|
||||||
|
@ -83,6 +97,11 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep the buffer length in check
|
||||||
|
if buffer.len() > self.max_buffer_size {
|
||||||
|
buffer.remove(0);
|
||||||
|
}
|
||||||
|
|
||||||
// Find matches
|
// Find matches
|
||||||
if self.regex_set.is_match(&buffer) {
|
if self.regex_set.is_match(&buffer) {
|
||||||
let mut matches = Vec::new();
|
let mut matches = Vec::new();
|
||||||
|
@ -129,7 +148,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Id: Clone> RegexMatcher<Id> {
|
impl<Id: Clone> RegexMatcher<Id> {
|
||||||
pub fn new(matches: &[RegexMatch<Id>]) -> Self {
|
pub fn new(matches: &[RegexMatch<Id>], opt: RegexMatcherOptions) -> Self {
|
||||||
let mut ids = Vec::new();
|
let mut ids = Vec::new();
|
||||||
let mut regexes = Vec::new();
|
let mut regexes = Vec::new();
|
||||||
let mut good_regexes = Vec::new();
|
let mut good_regexes = Vec::new();
|
||||||
|
@ -153,6 +172,7 @@ impl<Id: Clone> RegexMatcher<Id> {
|
||||||
ids,
|
ids,
|
||||||
regex_set,
|
regex_set,
|
||||||
regexes,
|
regexes,
|
||||||
|
max_buffer_size: opt.max_buffer_size,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,10 +199,13 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn matcher_simple_matches() {
|
fn matcher_simple_matches() {
|
||||||
let matcher = RegexMatcher::new(&[
|
let matcher = RegexMatcher::new(
|
||||||
|
&[
|
||||||
RegexMatch::new(1, "hello"),
|
RegexMatch::new(1, "hello"),
|
||||||
RegexMatch::new(2, "num\\d{1,3}s"),
|
RegexMatch::new(2, "num\\d{1,3}s"),
|
||||||
]);
|
],
|
||||||
|
RegexMatcherOptions::default(),
|
||||||
|
);
|
||||||
assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
|
assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
get_matches_after_str("hello", &matcher),
|
get_matches_after_str("hello", &matcher),
|
||||||
|
@ -205,10 +228,13 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn matcher_with_variables() {
|
fn matcher_with_variables() {
|
||||||
let matcher = RegexMatcher::new(&[
|
let matcher = RegexMatcher::new(
|
||||||
|
&[
|
||||||
RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"),
|
RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"),
|
||||||
RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"),
|
RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"),
|
||||||
]);
|
],
|
||||||
|
RegexMatcherOptions::default(),
|
||||||
|
);
|
||||||
assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
|
assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
get_matches_after_str("say hello(mary)", &matcher),
|
get_matches_after_str("say hello(mary)", &matcher),
|
||||||
|
@ -224,4 +250,22 @@ mod tests {
|
||||||
)]
|
)]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn matcher_max_buffer_size() {
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
&[
|
||||||
|
RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"),
|
||||||
|
RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"),
|
||||||
|
],
|
||||||
|
RegexMatcherOptions {
|
||||||
|
max_buffer_size: 15
|
||||||
|
},
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
get_matches_after_str("say hello(mary)", &matcher),
|
||||||
|
vec![match_result(1, "hello(mary)", &[("name", "mary")])]
|
||||||
|
);
|
||||||
|
assert_eq!(get_matches_after_str("hello(very long name over buffer)", &matcher), vec![]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user