Skip to content

Commit cb04da1

Browse files
authored
Port path normalization optimization from strada (microsoft#389)
1 parent 3ffbd31 commit cb04da1

File tree

2 files changed

+254
-13
lines changed

2 files changed

+254
-13
lines changed

internal/tspath/path.go

Lines changed: 144 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,140 @@ func GetNormalizedPathComponents(path string, currentDirectory string) []string
321321
}
322322

323323
func GetNormalizedAbsolutePath(fileName string, currentDirectory string) string {
324-
return GetPathFromPathComponents(GetNormalizedPathComponents(fileName, currentDirectory))
324+
rootLength := GetRootLength(fileName)
325+
if rootLength == 0 && currentDirectory != "" {
326+
fileName = CombinePaths(currentDirectory, fileName)
327+
} else {
328+
// CombinePaths normalizes slashes, so not necessary in other branch
329+
fileName = NormalizeSlashes(fileName)
330+
}
331+
rootLength = GetRootLength(fileName)
332+
333+
if simpleNormalized, ok := simpleNormalizePath(fileName); ok {
334+
length := len(simpleNormalized)
335+
if length > rootLength {
336+
return RemoveTrailingDirectorySeparator(simpleNormalized)
337+
}
338+
if length == rootLength && rootLength != 0 {
339+
return EnsureTrailingDirectorySeparator(simpleNormalized)
340+
}
341+
return simpleNormalized
342+
}
343+
344+
length := len(fileName)
345+
root := fileName[:rootLength]
346+
// `normalized` is only initialized once `fileName` is determined to be non-normalized.
347+
// `changed` is set at the same time.
348+
var changed bool
349+
var normalized string
350+
var segmentStart int
351+
index := rootLength
352+
normalizedUpTo := index
353+
seenNonDotDotSegment := rootLength != 0
354+
for index < length {
355+
// At beginning of segment
356+
segmentStart = index
357+
ch := fileName[index]
358+
for ch == '/' {
359+
index++
360+
if index < length {
361+
ch = fileName[index]
362+
} else {
363+
break
364+
}
365+
}
366+
if index > segmentStart {
367+
// Seen superfluous separator
368+
if !changed {
369+
normalized = fileName[:max(rootLength, segmentStart-1)]
370+
changed = true
371+
}
372+
if index == length {
373+
break
374+
}
375+
segmentStart = index
376+
}
377+
// Past any superfluous separators
378+
segmentEnd := strings.IndexByte(fileName[index+1:], '/')
379+
if segmentEnd == -1 {
380+
segmentEnd = length
381+
} else {
382+
segmentEnd += index + 1
383+
}
384+
segmentLength := segmentEnd - segmentStart
385+
if segmentLength == 1 && fileName[index] == '.' {
386+
// "." segment (skip)
387+
if !changed {
388+
normalized = fileName[:normalizedUpTo]
389+
changed = true
390+
}
391+
} else if segmentLength == 2 && fileName[index] == '.' && fileName[index+1] == '.' {
392+
// ".." segment
393+
if !seenNonDotDotSegment {
394+
if changed {
395+
if len(normalized) == rootLength {
396+
normalized += ".."
397+
} else {
398+
normalized += "/.."
399+
}
400+
} else {
401+
normalizedUpTo = index + 2
402+
}
403+
} else if !changed {
404+
if normalizedUpTo-1 >= 0 {
405+
normalized = fileName[:max(rootLength, strings.LastIndexByte(fileName[:normalizedUpTo-1], '/'))]
406+
} else {
407+
normalized = fileName[:normalizedUpTo]
408+
}
409+
changed = true
410+
seenNonDotDotSegment = (len(normalized) != rootLength || rootLength != 0) && normalized != ".." && !strings.HasSuffix(normalized, "/..")
411+
} else {
412+
lastSlash := strings.LastIndexByte(normalized, '/')
413+
if lastSlash != -1 {
414+
normalized = normalized[:max(rootLength, lastSlash)]
415+
} else {
416+
normalized = root
417+
}
418+
seenNonDotDotSegment = (len(normalized) != rootLength || rootLength != 0) && normalized != ".." && !strings.HasSuffix(normalized, "/..")
419+
}
420+
} else if changed {
421+
if len(normalized) != rootLength {
422+
normalized += "/"
423+
}
424+
seenNonDotDotSegment = true
425+
normalized += fileName[segmentStart:segmentEnd]
426+
} else {
427+
seenNonDotDotSegment = true
428+
normalizedUpTo = segmentEnd
429+
}
430+
index = segmentEnd + 1
431+
}
432+
if changed {
433+
return normalized
434+
}
435+
if length > rootLength {
436+
return RemoveTrailingDirectorySeparators(fileName)
437+
}
438+
if length == rootLength {
439+
return EnsureTrailingDirectorySeparator(fileName)
440+
}
441+
return fileName
442+
}
443+
444+
func simpleNormalizePath(path string) (string, bool) {
445+
// Most paths don't require normalization
446+
if !hasRelativePathSegment(path) {
447+
return path, true
448+
}
449+
// Some paths only require cleanup of `/./` or leading `./`
450+
simplified := strings.ReplaceAll(path, "/./", "/")
451+
trimmed := strings.TrimPrefix(simplified, "./")
452+
if trimmed != path && !hasRelativePathSegment(trimmed) && !(trimmed != simplified && strings.HasPrefix(trimmed, "/")) {
453+
// If we trimmed a leading "./" and the path now starts with "/", we changed the meaning
454+
path = trimmed
455+
return path, true
456+
}
457+
return "", false
325458
}
326459

327460
func hasRelativePathSegment(p string) bool {
@@ -350,19 +483,10 @@ func hasRelativePathSegment(p string) bool {
350483

351484
func NormalizePath(path string) string {
352485
path = NormalizeSlashes(path)
353-
// Most paths don't require normalization
354-
if !hasRelativePathSegment(path) {
355-
return path
486+
if normalized, ok := simpleNormalizePath(path); ok {
487+
return normalized
356488
}
357-
// Some paths only require cleanup of `/./` or leading `./`
358-
simplified := strings.ReplaceAll(path, "/./", "/")
359-
simplified = strings.TrimPrefix(simplified, "./")
360-
if simplified != path && !hasRelativePathSegment(simplified) {
361-
path = simplified
362-
return path
363-
}
364-
// Other paths require full normalization
365-
normalized := GetPathFromPathComponents(reducePathComponents(GetPathComponents(path, "")))
489+
normalized := GetNormalizedAbsolutePath(path, "")
366490
if normalized != "" && HasTrailingDirectorySeparator(path) {
367491
normalized = EnsureTrailingDirectorySeparator(normalized)
368492
}
@@ -426,6 +550,13 @@ func (p Path) RemoveTrailingDirectorySeparator() Path {
426550
return Path(RemoveTrailingDirectorySeparator(string(p)))
427551
}
428552

553+
func RemoveTrailingDirectorySeparators(path string) string {
554+
for HasTrailingDirectorySeparator(path) {
555+
path = RemoveTrailingDirectorySeparator(path)
556+
}
557+
return path
558+
}
559+
429560
func EnsureTrailingDirectorySeparator(path string) string {
430561
if !HasTrailingDirectorySeparator(path) {
431562
return path + "/"

internal/tspath/path_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package tspath
22

33
import (
4+
"fmt"
45
"regexp"
56
"strings"
67
"testing"
@@ -377,6 +378,21 @@ func TestGetNormalizedAbsolutePath(t *testing.T) {
377378
assert.Equal(t, GetNormalizedAbsolutePath("/base/./a../b", ""), "/base/a../b")
378379
assert.Equal(t, GetNormalizedAbsolutePath("/base/../a../b", ""), "/a../b")
379380

381+
assert.Equal(t, GetNormalizedAbsolutePath("a/..", ""), "")
382+
assert.Equal(t, GetNormalizedAbsolutePath("/a//", ""), "/a")
383+
assert.Equal(t, GetNormalizedAbsolutePath("//a", "a"), "//a/")
384+
assert.Equal(t, GetNormalizedAbsolutePath("/\\", ""), "//")
385+
assert.Equal(t, GetNormalizedAbsolutePath("a///", "a"), "a/a")
386+
assert.Equal(t, GetNormalizedAbsolutePath("/.//", ""), "/")
387+
assert.Equal(t, GetNormalizedAbsolutePath("//\\\\", ""), "///")
388+
assert.Equal(t, GetNormalizedAbsolutePath(".//a", "."), "a")
389+
assert.Equal(t, GetNormalizedAbsolutePath("a/../..", ""), "..")
390+
assert.Equal(t, GetNormalizedAbsolutePath("../..", "\\a"), "/")
391+
assert.Equal(t, GetNormalizedAbsolutePath("a:", "b"), "a:/")
392+
assert.Equal(t, GetNormalizedAbsolutePath("a/../..", ".."), "../..")
393+
assert.Equal(t, GetNormalizedAbsolutePath("a/../..", "b"), "")
394+
assert.Equal(t, GetNormalizedAbsolutePath("a//../..", ".."), "../..")
395+
380396
// Consecutive intermediate slashes are normalized to a single slash.
381397
assert.Equal(t, GetNormalizedAbsolutePath("a//b", ""), "a/b")
382398
assert.Equal(t, GetNormalizedAbsolutePath("a///b", ""), "a/b")
@@ -401,6 +417,75 @@ func TestGetNormalizedAbsolutePath(t *testing.T) {
401417
assert.Equal(t, GetNormalizedAbsolutePath("\\\\a\\b\\\\c", ""), "//a/b/c")
402418
}
403419

420+
var getNormalizedAbsolutePathTests = map[string][][]string{
421+
"non-normalized inputs": {
422+
{"/.", ""},
423+
{"/./", ""},
424+
{"/../", ""},
425+
{"/a/", ""},
426+
{"/a/.", ""},
427+
{"/a/foo.", ""},
428+
{"/a/./", ""},
429+
{"/a/./b", ""},
430+
{"/a/./b/", ""},
431+
{"/a/..", ""},
432+
{"/a/../", ""},
433+
{"/a/../", ""},
434+
{"/a/../b", ""},
435+
{"/a/../b/", ""},
436+
{"/a/..", ""},
437+
{"/a/..", "/"},
438+
{"/a/..", "b/"},
439+
{"/a/..", "/b"},
440+
{"/a/.", "b"},
441+
{"/a/.", "."},
442+
},
443+
"normalized inputs": {
444+
{"/a/b", ""},
445+
{"/one/two/three", ""},
446+
{"/users/root/project/src/foo.ts", ""},
447+
},
448+
"normalized inputs (long)": {
449+
{"/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z", ""},
450+
{"/one/two/three/four/five/six/seven/eight/nine/ten/eleven/twelve/thirteen/fourteen/fifteen/sixteen/seventeen/eighteen/nineteen/twenty", ""},
451+
{"/users/root/project/src/foo/bar/baz/qux/quux/corge/grault/garply/waldo/fred/plugh/xyzzy/thud", ""},
452+
{"/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam", ""},
453+
},
454+
}
455+
456+
func BenchmarkGetNormalizedAbsolutePath(b *testing.B) {
457+
funcs := map[string]func(string, string) string{
458+
"GetNormalizedAbsolutePath": GetNormalizedAbsolutePath,
459+
"GetNormalizedAbsolutePath (old)": getNormalizedAbsolutePath_old,
460+
}
461+
for name, tests := range getNormalizedAbsolutePathTests {
462+
b.Run(name, func(b *testing.B) {
463+
for fnName, fn := range funcs {
464+
b.Run(fnName, func(b *testing.B) {
465+
b.ReportAllocs()
466+
for _, test := range tests {
467+
for range b.N {
468+
fn(test[0], test[1])
469+
}
470+
}
471+
})
472+
}
473+
})
474+
}
475+
}
476+
477+
func FuzzGetNormalizedAbsolutePath(f *testing.F) {
478+
for _, tests := range getNormalizedAbsolutePathTests {
479+
for _, test := range tests {
480+
f.Add(test[0], test[1])
481+
}
482+
}
483+
484+
f.Fuzz(func(t *testing.T, p string, dir string) {
485+
assert.Equal(t, GetNormalizedAbsolutePath(p, dir), getNormalizedAbsolutePath_old(p, dir), fmt.Sprintf("p=%q, dir=%q", p, dir))
486+
})
487+
}
488+
404489
func TestGetRelativePathToDirectoryOrUrl(t *testing.T) {
405490
t.Parallel()
406491
// !!!
@@ -594,3 +679,28 @@ func shortenName(name string) string {
594679
}
595680
return name
596681
}
682+
683+
func normalizePath_old(path string) string {
684+
path = NormalizeSlashes(path)
685+
// Most paths don't require normalization
686+
if !hasRelativePathSegment(path) {
687+
return path
688+
}
689+
// Some paths only require cleanup of `/./` or leading `./`
690+
simplified := strings.ReplaceAll(path, "/./", "/")
691+
simplified = strings.TrimPrefix(simplified, "./")
692+
if simplified != path && !hasRelativePathSegment(simplified) {
693+
path = simplified
694+
return path
695+
}
696+
// Other paths require full normalization
697+
normalized := GetPathFromPathComponents(reducePathComponents(GetPathComponents(path, "")))
698+
if normalized != "" && HasTrailingDirectorySeparator(path) {
699+
normalized = EnsureTrailingDirectorySeparator(normalized)
700+
}
701+
return normalized
702+
}
703+
704+
func getNormalizedAbsolutePath_old(fileName string, currentDirectory string) string {
705+
return GetPathFromPathComponents(GetNormalizedPathComponents(fileName, currentDirectory))
706+
}

0 commit comments

Comments
 (0)