Skip to content

Commit 2e27e2a

Browse files
Add filters for excessive spaces and tabs
Co-authored-by: SamMorrowDrums <4811358+SamMorrowDrums@users.noreply.github.com>
1 parent 67d6012 commit 2e27e2a

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

pkg/filtering/content_filter.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ var (
3737

3838
// Excessive whitespace (more than 3 consecutive newlines)
3939
excessiveWhitespaceRegex = regexp.MustCompile(`\n{4,}`)
40+
41+
// Excessive spaces (15 or more consecutive spaces)
42+
excessiveSpacesRegex = regexp.MustCompile(` {15,}`)
43+
44+
// Excessive tabs (6 or more consecutive tabs)
45+
excessiveTabsRegex = regexp.MustCompile(`\t{6,}`)
4046
)
4147

4248
// Config holds configuration for content filtering
@@ -93,6 +99,12 @@ func FilterContent(input string, cfg *Config) string {
9399

94100
// Normalize excessive whitespace
95101
result = excessiveWhitespaceRegex.ReplaceAllString(result, "\n\n\n")
102+
103+
// Normalize excessive spaces
104+
result = excessiveSpacesRegex.ReplaceAllString(result, " ")
105+
106+
// Normalize excessive tabs
107+
result = excessiveTabsRegex.ReplaceAllString(result, " ")
96108

97109
return result
98110
}

pkg/filtering/content_filter_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ func TestFilterContent(t *testing.T) {
5959
expected: "Line 1\n\n\nLine 2",
6060
cfg: DefaultConfig(),
6161
},
62+
{
63+
name: "Text with excessive spaces",
64+
input: "Normal Excessive",
65+
expected: "Normal Excessive",
66+
cfg: DefaultConfig(),
67+
},
68+
{
69+
name: "Text with excessive tabs",
70+
input: "Normal\t\t\t\t\t\t\t\tExcessive",
71+
expected: "Normal Excessive",
72+
cfg: DefaultConfig(),
73+
},
6274
{
6375
name: "Text with HTML attributes",
6476
input: "<p data-hidden=\"true\" style=\"display:none\">Hidden paragraph</p>",

0 commit comments

Comments
 (0)