From bfe6b13ae7ae7277957ecf215e039c29f6140816 Mon Sep 17 00:00:00 2001 From: Federico Terzi Date: Thu, 6 May 2021 21:20:46 +0200 Subject: [PATCH] feat(match): limit regex buffer size --- espanso-match/src/regex/mod.rs | 62 +++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/espanso-match/src/regex/mod.rs b/espanso-match/src/regex/mod.rs index b0fd3f8..af34dd1 100644 --- a/espanso-match/src/regex/mod.rs +++ b/espanso-match/src/regex/mod.rs @@ -53,6 +53,18 @@ impl Default for RegexMatcherState { } } +pub struct RegexMatcherOptions { + pub max_buffer_size: usize, +} + +impl Default for RegexMatcherOptions { + fn default() -> Self { + Self { + max_buffer_size: 30, + } + } +} + pub struct RegexMatcher { ids: Vec, // The RegexSet is used to efficiently determine which regexes match @@ -60,6 +72,8 @@ pub struct RegexMatcher { // The single regexes are then used to find the captures regexes: Vec, + + max_buffer_size: usize, } impl<'a, Id> Matcher<'a, RegexMatcherState, Id> for RegexMatcher @@ -83,6 +97,11 @@ where } } + // Keep the buffer length in check + if buffer.len() > self.max_buffer_size { + buffer.remove(0); + } + // Find matches if self.regex_set.is_match(&buffer) { let mut matches = Vec::new(); @@ -129,7 +148,7 @@ where } impl RegexMatcher { - pub fn new(matches: &[RegexMatch]) -> Self { + pub fn new(matches: &[RegexMatch], opt: RegexMatcherOptions) -> Self { let mut ids = Vec::new(); let mut regexes = Vec::new(); let mut good_regexes = Vec::new(); @@ -153,6 +172,7 @@ impl RegexMatcher { ids, regex_set, regexes, + max_buffer_size: opt.max_buffer_size, } } } @@ -179,10 +199,13 @@ mod tests { #[test] fn matcher_simple_matches() { - let matcher = RegexMatcher::new(&[ - RegexMatch::new(1, "hello"), - RegexMatch::new(2, "num\\d{1,3}s"), - ]); + let matcher = RegexMatcher::new( + &[ + RegexMatch::new(1, "hello"), + RegexMatch::new(2, "num\\d{1,3}s"), + ], + RegexMatcherOptions::default(), + ); assert_eq!(get_matches_after_str("hi", &matcher), vec![]); assert_eq!( get_matches_after_str("hello", &matcher), @@ -205,10 +228,13 @@ mod tests { #[test] fn matcher_with_variables() { - let matcher = RegexMatcher::new(&[ - RegexMatch::new(1, "hello\\((?P.*?)\\)"), - RegexMatch::new(2, "multi\\((?P.*?),(?P.*?)\\)"), - ]); + let matcher = RegexMatcher::new( + &[ + RegexMatch::new(1, "hello\\((?P.*?)\\)"), + RegexMatch::new(2, "multi\\((?P.*?),(?P.*?)\\)"), + ], + RegexMatcherOptions::default(), + ); assert_eq!(get_matches_after_str("hi", &matcher), vec![]); assert_eq!( get_matches_after_str("say hello(mary)", &matcher), @@ -224,4 +250,22 @@ mod tests { )] ); } + + #[test] + fn matcher_max_buffer_size() { + let matcher = RegexMatcher::new( + &[ + RegexMatch::new(1, "hello\\((?P.*?)\\)"), + RegexMatch::new(2, "multi\\((?P.*?),(?P.*?)\\)"), + ], + RegexMatcherOptions { + max_buffer_size: 15 + }, + ); + assert_eq!( + get_matches_after_str("say hello(mary)", &matcher), + vec![match_result(1, "hello(mary)", &[("name", "mary")])] + ); + assert_eq!(get_matches_after_str("hello(very long name over buffer)", &matcher), vec![]); + } }