From 54efa7ea1308929dfd2984a667f625446a963d1b Mon Sep 17 00:00:00 2001 From: lordwelch Date: Sun, 11 Oct 2020 21:35:12 -0700 Subject: [PATCH] Initial commit --- LICENSE | 21 ++++++++ README.md | 60 +++++++++++++++++++++ base.go | 75 ++++++++++++++++++++++++++ cmd/pathvalidate.go | 13 +++++ const.go | 50 +++++++++++++++++ go.mod | 5 ++ go.sum | 3 ++ pathvalidate.go | 126 +++++++++++++++++++++++++++++++++++++++++++ pathvalidate_test.go | 47 ++++++++++++++++ 9 files changed, 400 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 base.go create mode 100644 cmd/pathvalidate.go create mode 100644 const.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pathvalidate.go create mode 100644 pathvalidate_test.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e0372c8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Timmy Welch + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..09f869e --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +# pathvalidate +[![PkgGoDev](https://pkg.go.dev/badge/github.com/lordwelch/pathvalidate)](https://pkg.go.dev/github.com/lordwelch/pathvalidate) +[![Go Report Card](https://goreportcard.com/badge/github.com/lordwelch/pathvalidate)](https://goreportcard.com/report/github.com/lordwelch/pathvalidate) + +Path santization based on pathvalidate from Python https://pypi.org/project/pathvalidate/ + +import path: `github.com/lordwelch/pathvalidate` + +Example: +```Go +# Validate Path +err := pathvalidate.ValidateFilepath("Simple/Name", '_') +sanitized, err := pathvalidate.SanitizeFilepath("Simple/Name", '_') + +# Validate Filename +err := pathvalidate.ValidateFilename("Simple/Name") +sanitized, err := pathvalidate.SanitizeFilename("Simple/Name") +``` +Output: +``` +# Validate Path +err: +sanitized: Simple/Name err: + +# Validate Filename +err: pathvalidate: invalid character: '/' (0x2f) +sanitized: Simple_Name err: +``` +## defaults +### Windows +Invalid Path: Unicode categories: Cc, Cf, Z excluding space + `:*?"<>|` + +Invalid Filename: Invalid Path + `/` + `\` + +Max Path Length: 260 +#### Reserved words + +NTFS Reserved Names: $MFT, $MFTMIRR, $LOGFILE $VOLUME, $ATTRDEF, $BITMAP, $BOOT, $BADCLUS, $SECURE, $UPCASE, $EXTEND, $QUOTA, $OBJID, $REPARSE + +Windows Reserved Names: CON, PRN, AUX, CLOCK$, NUL, COM1, COM2, COM3, COM4, COM5, COM6, COM7, COM8, COM9, COM10, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, LPT9, LPT10 + +### Linux +Invalid Path: Unicode categories: Cc, Cf, Z excluding space + + +Invalid Filename: Invalid Path + `/` + +Max Path Length: 4096 +#### Reserved words + +None + +### Darwin +Invalid Path: Unicode categories: Cc, Cf, Z excluding space + + +Invalid Filename: Invalid Path + `/` + +Max Path Length: 4096 +#### Reserved words + +`:` diff --git a/base.go b/base.go new file mode 100644 index 0000000..4881ab9 --- /dev/null +++ b/base.go @@ -0,0 +1,75 @@ +package pathvalidate + +import ( + "fmt" + "path/filepath" + "runtime" + "sort" + "strings" +) + +type BaseFile struct { + ReservedKeywords []string + MinLength int + MaxLength int +} + +var DefaultBaseFile = BaseFile{ + MaxLength: getDefaultMaxLength(runtime.GOOS), + ReservedKeywords: getDefaultKeywords(runtime.GOOS), + MinLength: 1, +} + +func getDefaultKeywords(platform string) []string { + switch platform { + case "windows": + return append(WindowsReserved, NTFSReserved...) + case "darwin": + return DarwinReserved + default: + return nil + } +} + +func getDefaultMaxLength(platform string) int { + switch platform { + case "linux": + return 4096 + case "windows": + return 260 + case "darwin": + return 1024 + default: + return DefaultMaxFilenameLength + } +} + +func (bf BaseFile) IsReservedKeyword(name string) bool { + sort.Strings(bf.ReservedKeywords) + index := sort.SearchStrings(bf.ReservedKeywords, strings.ToUpper(name)) + return index < len(bf.ReservedKeywords) && bf.ReservedKeywords[index] == strings.ToUpper(name) +} + +func (bf BaseFile) UpdateReservedKeywords(name, suffix string) string { + ext := filepath.Ext(name) + rootName := extractRootName(name) + if bf.IsReservedKeyword(strings.ToUpper(rootName)) { + return rootName + suffix + ext + } + + return name +} + +func (bf BaseFile) validateReservedKeywords(name string) error { + rootName := extractRootName(name) + if bf.IsReservedKeyword(strings.ToUpper(rootName)) { + return fmt.Errorf("%w: %s", ErrReservedWord, rootName) + } + + return nil +} + +func extractRootName(path string) string { + base := filepath.Base(filepath.Clean(path)) + return strings.TrimSuffix(base, filepath.Ext(base)) +} diff --git a/cmd/pathvalidate.go b/cmd/pathvalidate.go new file mode 100644 index 0000000..fecf867 --- /dev/null +++ b/cmd/pathvalidate.go @@ -0,0 +1,13 @@ +package main + +import ( + "fmt" + "os" + + "github.com/lordwelch/pathvalidate" +) + +func main() { + fmt.Println(pathvalidate.ValidateFilepath(os.Args[1])) + fmt.Println(pathvalidate.SanitizeFilepath(os.Args[1], '_')) +} diff --git a/const.go b/const.go new file mode 100644 index 0000000..563a236 --- /dev/null +++ b/const.go @@ -0,0 +1,50 @@ +package pathvalidate + +import ( + "errors" + "unicode" + + "golang.org/x/text/unicode/rangetable" +) + +var ( + NTFSReserved = []string{ + "$MFT", + "$MFTMIRR", + "$LOGFILE", + "$VOLUME", + "$ATTRDEF", + "$BITMAP", + "$BOOT", + "$BADCLUS", + "$SECURE", + "$UPCASE", + "$EXTEND", + "$QUOTA", + "$OBJID", + "$REPARSE", + } // Only in root directory + + WindowsReserved = []string{ + "CON", "PRN", "AUX", "CLOCK$", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "COM10", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", "LPT10", + } + + DarwinReserved = []string{":"} // Is this needed? +) + +var ( + InvalidPath = rangetable.Merge(unicode.Cc, unicode.Cf, unicode.Z) + InvalidFilename = rangetable.Merge(InvalidPath, rangetable.New('/')) + InvalidWindowsPath = rangetable.Merge(InvalidPath, rangetable.New(':', '*', '?', '"', '<', '>', '|')) + InvalidWindowsFilename = rangetable.Merge(InvalidFilename, InvalidWindowsPath, rangetable.New('\\')) + DefaultMaxFilenameLength = 255 +) + +var ( + ErrInvalidChar = errors.New("pathvalidate: invalid character") + ErrMaxLength = errors.New("pathvalidate: max length exceeded") + ErrMinLength = errors.New("pathvalidate: min length not met") + ErrReservedWord = errors.New("pathvalidate: reserved word found") +) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3d4696a --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/lordwelch/pathvalidate + +go 1.15 + +require golang.org/x/text v0.3.3 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..fd5b10f --- /dev/null +++ b/go.sum @@ -0,0 +1,3 @@ +golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/pathvalidate.go b/pathvalidate.go new file mode 100644 index 0000000..3d5e99b --- /dev/null +++ b/pathvalidate.go @@ -0,0 +1,126 @@ +package pathvalidate + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "unicode" + "unicode/utf8" +) + +var ( + DefaultFilenameSanitizer = FilenameSanitizer{} + DefaultFilepathSanitizer = FilepathSanitizer{} +) + +type FilepathSanitizer struct { + FilenameSanitizer +} + +func (fps FilepathSanitizer) Sanitize(path string, replacement rune) (string, error) { + var ( + err error + ) + cleaned := filepath.Clean(path) + split := strings.Split(cleaned, string(os.PathSeparator)) + splitS := make([]string, 0, len(split)) + for _, name := range split { + name, err = fps.FilenameSanitizer.Sanitize(name, replacement) + if err != nil { + return path, err + } + splitS = append(splitS, name) + } + return filepath.Join(splitS...), nil +} + +func (fps FilepathSanitizer) Validate(path string) error { + cleaned := filepath.Clean(path) + split := strings.Split(cleaned, string(os.PathSeparator)) + for _, name := range split { + if err := fps.FilenameSanitizer.Validate(name); err != nil { + return err + } + } + return nil +} + +type FilenameSanitizer struct { + BaseFile +} + +func (f FilenameSanitizer) Sanitize(path string, replacement rune) (string, error) { + var ( + err error + ) + if f.BaseFile.MinLength == 0 { + f.BaseFile = DefaultBaseFile + } + replace := func(r rune) rune { + if unicode.Is(InvalidFilename, r) && r != ' ' { + return replacement + } + return r + } + sanitized := strings.Map(replace, path) + sanitized = f.UpdateReservedKeywords(sanitized, "_") + sanitized = strings.TrimSpace(sanitized) + err = f.Validate(sanitized) + if err != nil { + return path, fmt.Errorf("could not validate sanitized filename: %w", err) + } + return sanitized, nil +} + +func (f FilenameSanitizer) Validate(path string) error { + if f.BaseFile.MinLength == 0 { + f.BaseFile = DefaultBaseFile + } + nameLen := utf8.RuneCountInString(path) + cleaned := filepath.Clean(path) + + if nameLen > f.MaxLength { + return fmt.Errorf("%w: wanted <= %d, got = %d", ErrMaxLength, f.MaxLength, nameLen) + } + + if nameLen < f.MinLength { + return fmt.Errorf("%w: wanted >= %d, got = %d", ErrMinLength, f.MinLength, nameLen) + } + + err := f.validateReservedKeywords(cleaned) + if err != nil { + return err + } + + validate := func(r rune) bool { + return unicode.Is(InvalidFilename, r) && r != ' ' + } + if n := strings.IndexFunc(cleaned, validate); n != -1 { + r, _ := utf8.DecodeRuneInString(cleaned[n:]) + return fmt.Errorf("%w: '%s' (%#x)", ErrInvalidChar, string(r), r) + } + if cleaned[0] == ' ' { + return fmt.Errorf("%w: space at beginning of string", ErrInvalidChar) + } + if cleaned[len(cleaned)-1] == ' ' { + return fmt.Errorf("%w: space at end of string", ErrInvalidChar) + } + return nil +} + +func SanitizeFilename(path string, replacement rune) (string, error) { + return DefaultFilenameSanitizer.Sanitize(path, replacement) +} + +func ValidateFilename(path string) error { + return DefaultFilenameSanitizer.Validate(path) +} + +func SanitizeFilepath(path string, replacement rune) (string, error) { + return DefaultFilepathSanitizer.Sanitize(path, replacement) +} + +func ValidateFilepath(path string) error { + return DefaultFilepathSanitizer.Validate(path) +} diff --git a/pathvalidate_test.go b/pathvalidate_test.go new file mode 100644 index 0000000..7a91500 --- /dev/null +++ b/pathvalidate_test.go @@ -0,0 +1,47 @@ +package pathvalidate_test + +import ( + "errors" + "strings" + "testing" + + "github.com/lordwelch/pathvalidate" +) + +var tests = []struct { + path, sanitized string + err error +}{ + {"hello\t", "hello_", pathvalidate.ErrInvalidChar}, + {"hello\r", "hello_", pathvalidate.ErrInvalidChar}, + {"hello\n", "hello_", pathvalidate.ErrInvalidChar}, + {"hello ", "hello", pathvalidate.ErrInvalidChar}, + {"hello/world", "hello_world", pathvalidate.ErrInvalidChar}, + {"nul", "nul_", pathvalidate.ErrReservedWord}, + {"nul.test", "nul_.test", pathvalidate.ErrReservedWord}, + {"hello" + strings.Repeat(" ", 4090) + "world", "hello" + strings.Repeat(" ", 4090) + "world", pathvalidate.ErrMaxLength}, + {"", "", pathvalidate.ErrMinLength}, + {"hello world", "hello world", nil}, +} + +func TestValidate(t *testing.T) { + pathvalidate.DefaultBaseFile.ReservedKeywords = pathvalidate.WindowsReserved + for _, test := range tests { + if err := pathvalidate.ValidateFilename(test.path); !errors.Is(err, test.err) { + t.Errorf("got %v, want %v", err, test.err) + } + } +} + +func TestSanitize(t *testing.T) { + + for _, test := range tests { + // Skips length tests as there is no way to intelligently sanitize them + if errors.Is(test.err, pathvalidate.ErrMaxLength) || errors.Is(test.err, pathvalidate.ErrMinLength) { + continue + } + if got, err := pathvalidate.SanitizeFilename(test.path, '_'); err != nil || got != test.sanitized { + t.Errorf("got value: %v; error: %v, want value: %v; error: %v", got, err, test.sanitized, nil) + } + } +}