@@ -6,26 +6,30 @@ import 'package:html/dom.dart' as html;
6
6
class WhitespaceProcessing {
7
7
/// [processWhitespace] handles the removal of unnecessary whitespace from
8
8
/// a StyledElement tree.
9
+ ///
10
+ /// The criteria for determining which whitespace is replaceable is outlined
11
+ /// at https://www.w3.org/TR/css-text-3/
12
+ /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
9
13
static StyledElement processWhitespace (StyledElement tree) {
10
14
tree = _processInternalWhitespace (tree);
11
15
tree = _processInlineWhitespace (tree);
16
+ tree = _processBlockWhitespace (tree);
12
17
tree = _removeEmptyElements (tree);
13
18
return tree;
14
19
}
15
20
16
21
/// [_processInternalWhitespace] removes unnecessary whitespace from the StyledElement tree.
17
- ///
18
- /// The criteria for determining which whitespace is replaceable is outlined
19
- /// at https://www.w3.org/TR/css-text-3/
20
- /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
21
22
static StyledElement _processInternalWhitespace (StyledElement tree) {
22
- if ((tree.style.whiteSpace ?? WhiteSpace .normal) == WhiteSpace .pre) {
23
- // Preserve this whitespace
24
- } else if (tree is TextContentElement ) {
23
+ if (tree.style.whiteSpace == WhiteSpace .pre) {
24
+ return tree;
25
+ }
26
+
27
+ if (tree is TextContentElement ) {
25
28
tree.text = _removeUnnecessaryWhitespace (tree.text! );
26
29
} else {
27
30
tree.children.forEach (_processInternalWhitespace);
28
31
}
32
+
29
33
return tree;
30
34
}
31
35
@@ -36,13 +40,95 @@ class WhitespaceProcessing {
36
40
return _processInlineWhitespaceRecursive (tree, Context (false ));
37
41
}
38
42
43
+ /// [_processBlockWhitespace] removes unnecessary whitespace from block
44
+ /// rendering contexts. Specifically, a space at the beginning and end of
45
+ /// each inline rendering context should be removed.
46
+ static StyledElement _processBlockWhitespace (StyledElement tree) {
47
+ if (tree.style.whiteSpace == WhiteSpace .pre) {
48
+ return tree;
49
+ }
50
+
51
+ bool isBlockContext = false ;
52
+ for (final child in tree.children) {
53
+ if (child.style.display == Display .block || child.name == "br" ) {
54
+ isBlockContext = true ;
55
+ }
56
+
57
+ _processBlockWhitespace (child);
58
+ }
59
+
60
+ if (isBlockContext) {
61
+ for (int i = 0 ; i < tree.children.length; i++ ) {
62
+ final lastChild = i != 0 ? tree.children[i - 1 ] : null ;
63
+ final child = tree.children[i];
64
+ final nextChild =
65
+ (i + 1 ) != tree.children.length ? tree.children[i + 1 ] : null ;
66
+
67
+ if (child.style.whiteSpace == WhiteSpace .pre) {
68
+ continue ;
69
+ }
70
+
71
+ if (child.style.display == Display .block) {
72
+ _removeLeadingSpace (child);
73
+ _removeTrailingSpace (child);
74
+ }
75
+
76
+ if (lastChild? .style.display == Display .block ||
77
+ lastChild? .name == "br" ) {
78
+ _removeLeadingSpace (child);
79
+ }
80
+
81
+ if (nextChild? .style.display == Display .block ||
82
+ nextChild? .name == "br" ) {
83
+ _removeTrailingSpace (child);
84
+ }
85
+ }
86
+ }
87
+
88
+ return tree;
89
+ }
90
+
91
+ /// [_removeLeadingSpace] removes any leading space
92
+ /// from the text of the tree at this level, no matter how deep in the tree
93
+ /// it may be.
94
+ static void _removeLeadingSpace (StyledElement element) {
95
+ if (element.style.whiteSpace == WhiteSpace .pre) {
96
+ return ;
97
+ }
98
+
99
+ if (element is TextContentElement ) {
100
+ element.text = element.text? .trimLeft ();
101
+ } else if (element.children.isNotEmpty) {
102
+ _removeLeadingSpace (element.children.first);
103
+ }
104
+ }
105
+
106
+ /// [_removeTrailingSpace] removes any leading space
107
+ /// from the text of the tree at this level, no matter how deep in the tree
108
+ /// it may be.
109
+ static void _removeTrailingSpace (StyledElement element) {
110
+ if (element.style.whiteSpace == WhiteSpace .pre) {
111
+ return ;
112
+ }
113
+
114
+ if (element is TextContentElement ) {
115
+ element.text = element.text? .trimRight ();
116
+ } else if (element.children.isNotEmpty) {
117
+ _removeTrailingSpace (element.children.last);
118
+ }
119
+ }
120
+
39
121
/// [_processInlineWhitespaceRecursive] analyzes the whitespace between and among different
40
122
/// inline elements, and replaces any instance of two or more spaces with a single space, according
41
123
/// to the w3's HTML whitespace processing specification linked to above.
42
124
static StyledElement _processInlineWhitespaceRecursive (
43
125
StyledElement tree,
44
126
Context <bool > keepLeadingSpace,
45
127
) {
128
+ if (tree.style.whiteSpace == WhiteSpace .pre) {
129
+ return tree;
130
+ }
131
+
46
132
if (tree is TextContentElement ) {
47
133
/// initialize indices to negative numbers to make conditionals a little easier
48
134
int textIndex = - 1 ;
@@ -62,9 +148,9 @@ class WhitespaceProcessing {
62
148
final parentNodes = tree.element? .parent? .nodes;
63
149
64
150
/// find the index of the tree itself in the parent nodes
65
- if (( parentNodes? .length ?? 0 ) >= 1 ) {
151
+ if (parentNodes? .isNotEmpty ?? false ) {
66
152
elementIndex =
67
- parentNodes? .indexWhere ((element) => element == tree.element) ?? - 1 ;
153
+ parentNodes! .indexWhere ((element) => element == tree.element);
68
154
}
69
155
70
156
/// if the tree is any node except the last node in the node list and the
@@ -117,9 +203,7 @@ class WhitespaceProcessing {
117
203
/// update the [Context] to signify to that next text node whether it should
118
204
/// keep its whitespace. This is based on whether the current text ends with a
119
205
/// whitespace.
120
- if (textIndex ==
121
- ((tree.element? .nodes.length ?? 0 ) -
122
- 1 ) && //TODO is this the proper ??
206
+ if (textIndex == (tree.node.nodes.length - 1 ) &&
123
207
tree.element? .localName != "br" &&
124
208
parentAfterText.startsWith (' ' )) {
125
209
keepLeadingSpace.data = ! tree.text! .endsWith (' ' );
@@ -142,11 +226,11 @@ class WhitespaceProcessing {
142
226
/// (4) Replace any instances of two or more spaces with a single space.
143
227
static String _removeUnnecessaryWhitespace (String text) {
144
228
return text
145
- .replaceAll (RegExp (" \\ *(?=\n )" ), "\n " )
146
- .replaceAll (RegExp ("(?: \n )\\ *" ), "\n " )
229
+ .replaceAll (RegExp (r" *(?=\n)" ), "" )
230
+ .replaceAll (RegExp (r "(?<= \n) *" ), "" )
147
231
.replaceAll ("\n " , " " )
148
232
.replaceAll ("\t " , " " )
149
- .replaceAll (RegExp (" {2,}" ), " " );
233
+ .replaceAll (RegExp (r " {2,}" ), " " );
150
234
}
151
235
152
236
/// [_removeEmptyElements] recursively removes empty elements.
@@ -155,7 +239,7 @@ class WhitespaceProcessing {
155
239
/// or any block-level [TextContentElement] that contains only whitespace and doesn't follow
156
240
/// a block element or a line break.
157
241
static StyledElement _removeEmptyElements (StyledElement tree) {
158
- List <StyledElement > toRemove = < StyledElement > [] ;
242
+ Set <StyledElement > toRemove = < StyledElement > {} ;
159
243
bool lastChildBlock = true ;
160
244
tree.children.forEachIndexed ((index, child) {
161
245
if (child is EmptyContentElement ) {
0 commit comments