feat(match): limit regex buffer size

This commit is contained in:
Federico Terzi 2021-05-06 21:20:46 +02:00
parent 10d37d1fe6
commit bfe6b13ae7

View File

@ -53,6 +53,18 @@ impl Default for RegexMatcherState {
} }
} }
pub struct RegexMatcherOptions {
pub max_buffer_size: usize,
}
impl Default for RegexMatcherOptions {
fn default() -> Self {
Self {
max_buffer_size: 30,
}
}
}
pub struct RegexMatcher<Id> { pub struct RegexMatcher<Id> {
ids: Vec<Id>, ids: Vec<Id>,
// The RegexSet is used to efficiently determine which regexes match // The RegexSet is used to efficiently determine which regexes match
@ -60,6 +72,8 @@ pub struct RegexMatcher<Id> {
// The single regexes are then used to find the captures // The single regexes are then used to find the captures
regexes: Vec<Regex>, regexes: Vec<Regex>,
max_buffer_size: usize,
} }
impl<'a, Id> Matcher<'a, RegexMatcherState, Id> for RegexMatcher<Id> impl<'a, Id> Matcher<'a, RegexMatcherState, Id> for RegexMatcher<Id>
@ -83,6 +97,11 @@ where
} }
} }
// Keep the buffer length in check
if buffer.len() > self.max_buffer_size {
buffer.remove(0);
}
// Find matches // Find matches
if self.regex_set.is_match(&buffer) { if self.regex_set.is_match(&buffer) {
let mut matches = Vec::new(); let mut matches = Vec::new();
@ -129,7 +148,7 @@ where
} }
impl<Id: Clone> RegexMatcher<Id> { impl<Id: Clone> RegexMatcher<Id> {
pub fn new(matches: &[RegexMatch<Id>]) -> Self { pub fn new(matches: &[RegexMatch<Id>], opt: RegexMatcherOptions) -> Self {
let mut ids = Vec::new(); let mut ids = Vec::new();
let mut regexes = Vec::new(); let mut regexes = Vec::new();
let mut good_regexes = Vec::new(); let mut good_regexes = Vec::new();
@ -153,6 +172,7 @@ impl<Id: Clone> RegexMatcher<Id> {
ids, ids,
regex_set, regex_set,
regexes, regexes,
max_buffer_size: opt.max_buffer_size,
} }
} }
} }
@ -179,10 +199,13 @@ mod tests {
#[test] #[test]
fn matcher_simple_matches() { fn matcher_simple_matches() {
let matcher = RegexMatcher::new(&[ let matcher = RegexMatcher::new(
RegexMatch::new(1, "hello"), &[
RegexMatch::new(2, "num\\d{1,3}s"), RegexMatch::new(1, "hello"),
]); RegexMatch::new(2, "num\\d{1,3}s"),
],
RegexMatcherOptions::default(),
);
assert_eq!(get_matches_after_str("hi", &matcher), vec![]); assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
assert_eq!( assert_eq!(
get_matches_after_str("hello", &matcher), get_matches_after_str("hello", &matcher),
@ -205,10 +228,13 @@ mod tests {
#[test] #[test]
fn matcher_with_variables() { fn matcher_with_variables() {
let matcher = RegexMatcher::new(&[ let matcher = RegexMatcher::new(
RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"), &[
RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"), RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"),
]); RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"),
],
RegexMatcherOptions::default(),
);
assert_eq!(get_matches_after_str("hi", &matcher), vec![]); assert_eq!(get_matches_after_str("hi", &matcher), vec![]);
assert_eq!( assert_eq!(
get_matches_after_str("say hello(mary)", &matcher), get_matches_after_str("say hello(mary)", &matcher),
@ -224,4 +250,22 @@ mod tests {
)] )]
); );
} }
#[test]
fn matcher_max_buffer_size() {
let matcher = RegexMatcher::new(
&[
RegexMatch::new(1, "hello\\((?P<name>.*?)\\)"),
RegexMatch::new(2, "multi\\((?P<name1>.*?),(?P<name2>.*?)\\)"),
],
RegexMatcherOptions {
max_buffer_size: 15
},
);
assert_eq!(
get_matches_after_str("say hello(mary)", &matcher),
vec![match_result(1, "hello(mary)", &[("name", "mary")])]
);
assert_eq!(get_matches_after_str("hello(very long name over buffer)", &matcher), vec![]);
}
} }