Skip to content

Commit

Permalink
fix: fixes for rule003 spelling
Browse files Browse the repository at this point in the history
- Calculating overlapping ignore ranges
- Bare prefixes
  • Loading branch information
charislam committed Dec 17, 2024
1 parent 2f6c6cb commit 30d3cda
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 52 deletions.
75 changes: 66 additions & 9 deletions src/geometry.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::cmp::Ordering;
use std::mem;
use std::ops::{Add, Deref, DerefMut, Range, SubAssign};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -207,6 +208,14 @@ impl AdjustedRange {
let end = first.end.max(second.end);
Self(Range { start, end })
}

pub fn overlaps(&self, other: &Self) -> bool {
if self.start > other.start {
other.overlaps(self)
} else {
self.end > other.start
}
}
}

#[derive(Debug, Default)]
Expand Down Expand Up @@ -282,9 +291,27 @@ impl RangeSet {
}

pub(crate) fn push(&mut self, range: AdjustedRange) {
log::trace!("Pushing range: {range:?}");
match self.overlaps_impl(&range) {
Ok(index) => {
self.0[index] = AdjustedRange::span_between(&self.0[index], &range);
if index < self.0.len() - 1 && self.0[index].overlaps(&self.0[index + 1]) {
let taken_vec = mem::take(&mut self.0);
self.0 = taken_vec.into_iter().fold(Vec::new(), |mut accum, range| {
if !accum.is_empty() {
let last_index = accum.len() - 1;
if accum[last_index].overlaps(&range) {
accum[last_index] =
AdjustedRange::span_between(&accum[last_index], &range);
} else {
accum.push(range);
}
} else {
accum.push(range);
}
accum
});
}
}
Err(index) => {
self.0.insert(index, range);
Expand All @@ -303,15 +330,24 @@ impl RangeSet {
}

fn overlaps_impl(&self, range: &AdjustedRange) -> Result<usize, usize> {
self.0.binary_search_by(|probe| {
if probe.end < range.start {
Ordering::Less
} else if probe.start > range.end {
Ordering::Greater
} else {
Ordering::Equal
}
})
self.0
.binary_search_by(|probe| {
if probe.end < range.start {
Ordering::Less
} else if probe.start > range.end {
Ordering::Greater
} else {
Ordering::Equal
}
})
.map(|index| {
// Ensure we return the first matching index
let mut first_index = index;
while first_index > 0 && self.0[first_index - 1].overlaps(range) {
first_index -= 1;
}
first_index
})
}
}

Expand Down Expand Up @@ -360,6 +396,27 @@ mod tests {
assert_eq!(set.0[0].end, AdjustedOffset::from(8));
}

#[test]
fn test_range_set_merges_multiple_overlapping_ranges() {
let mut set = super::RangeSet::new();

let range1 = AdjustedRange::new(AdjustedOffset::from(0), AdjustedOffset::from(5));
let range2 = AdjustedRange::new(AdjustedOffset::from(3), AdjustedOffset::from(18));
let range3 = AdjustedRange::new(AdjustedOffset::from(10), AdjustedOffset::from(15));
let range4 = AdjustedRange::new(AdjustedOffset::from(17), AdjustedOffset::from(20));
let range5 = AdjustedRange::new(AdjustedOffset::from(23), AdjustedOffset::from(25));

set.push(range1);
set.push(range3);
set.push(range4);
set.push(range5);
set.push(range2);

assert_eq!(set.0.len(), 2);
assert_eq!(set.0[0].start, AdjustedOffset::from(0));
assert_eq!(set.0[0].end, AdjustedOffset::from(20));
}

#[test]
fn test_range_set_merges_adjacent_ranges() {
let mut set = super::RangeSet::new();
Expand Down
83 changes: 67 additions & 16 deletions src/rules/rule003_spelling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use super::{

const DICTIONARY: &str = include_str!("./rule003_spelling/dictionary.txt");

#[derive(Debug, Clone)]
enum HyphenatedPart {
MaybePrefix,
MaybeSuffix,
Expand Down Expand Up @@ -161,7 +162,7 @@ impl Rule003Spelling {
continue;
}

if word_as_string.contains('-') {
if word_as_string.contains('-') && !self.is_correct_spelling(&word_as_string, None) {
// Deal with hyphenated words
let mut hyphenated_tokenizer = WordIterator::new(
word,
Expand Down Expand Up @@ -235,38 +236,46 @@ impl Rule003Spelling {
level: LintLevel,
errors: &mut Option<Vec<LintError>>,
) {
if word.len() < 2 {
if self.is_correct_spelling(word, hyphenation) {
return;
}

let error = LintError::new(
self.name(),
Rule003Spelling::message(&word),
level,
location,
None,
context,
);
errors.get_or_insert_with(Vec::new).push(error);
}

fn is_correct_spelling(&self, word: &str, hyphenation: Option<HyphenatedPart>) -> bool {
trace!("Checking spelling of word: {word} with hyphenation: {hyphenation:?}");
if word.len() < 2 {
return true;
}

if word
.chars()
.any(|c| !c.is_ascii_alphabetic() && !is_punctuation(&c))
.any(|c| !c.is_ascii_alphabetic() && !Self::is_included_punctuation(&c))
{
// Ignore words containing non-English alphabet and number
return;
return true;
}

let word = Self::normalize_word(word);
if self.dictionary.contains(word.as_ref()) {
return;
return true;
}

if let Some(HyphenatedPart::MaybePrefix) = hyphenation {
if self.prefixes.contains(word.as_ref()) {
return;
return true;
}
}

let error = LintError::new(
self.name(),
Rule003Spelling::message(&word),
level,
location,
None,
context,
);
errors.get_or_insert_with(Vec::new).push(error);
false
}

fn normalize_word_range(word: RopeSlice<'_>, offset: usize) -> AdjustedRange {
Expand Down Expand Up @@ -327,6 +336,21 @@ impl Rule003Spelling {
word
}
}

fn is_included_punctuation(c: &char) -> bool {
is_punctuation(c)
&& (*c == '-'
|| *c == '–'
|| *c == '—'
|| *c == '―'
|| *c == '\''
|| *c == '‘'
|| *c == '’'
|| *c == '“'
|| *c == '”'
|| *c == '"'
|| *c == '.')
}
}

#[cfg(test)]
Expand Down Expand Up @@ -686,4 +710,31 @@ mod tests {
);
assert!(errors.is_none());
}

#[test]
fn test_rule003_bare_prefixes() {
let mdx = "pre- and post-world";
let parse_result = parse(mdx).unwrap();
let context = RuleContext::new(parse_result, None).unwrap();

let mut rule = Rule003Spelling::default();
let settings = RuleSettings::with_array_of_strings("prefixes", vec!["pre", "post"]);
rule.setup(Some(&settings));

let errors = rule.check(
context
.ast()
.children()
.unwrap()
.get(0)
.unwrap()
.children()
.unwrap()
.get(0)
.unwrap(),
&context,
LintLevel::Error,
);
assert!(errors.is_none());
}
}
Loading

0 comments on commit 30d3cda

Please sign in to comment.