Initial commit

This commit is contained in:
lordwelch 2020-10-11 21:35:12 -07:00
commit 54efa7ea13
9 changed files with 400 additions and 0 deletions

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 Timmy Welch
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

60
README.md Normal file
View File

@ -0,0 +1,60 @@
# pathvalidate
[![PkgGoDev](https://pkg.go.dev/badge/github.com/lordwelch/pathvalidate)](https://pkg.go.dev/github.com/lordwelch/pathvalidate)
[![Go Report Card](https://goreportcard.com/badge/github.com/lordwelch/pathvalidate)](https://goreportcard.com/report/github.com/lordwelch/pathvalidate)
Path santization based on pathvalidate from Python https://pypi.org/project/pathvalidate/
import path: `github.com/lordwelch/pathvalidate`
Example:
```Go
# Validate Path
err := pathvalidate.ValidateFilepath("Simple/Name", '_')
sanitized, err := pathvalidate.SanitizeFilepath("Simple/Name", '_')
# Validate Filename
err := pathvalidate.ValidateFilename("Simple/Name")
sanitized, err := pathvalidate.SanitizeFilename("Simple/Name")
```
Output:
```
# Validate Path
err: <nil>
sanitized: Simple/Name err: <nil>
# Validate Filename
err: pathvalidate: invalid character: '/' (0x2f)
sanitized: Simple_Name err: <nil>
```
## defaults
### Windows
Invalid Path: Unicode categories: Cc, Cf, Z excluding space + `:*?"<>|`
Invalid Filename: Invalid Path + `/` + `\`
Max Path Length: 260
#### Reserved words
NTFS Reserved Names: $MFT, $MFTMIRR, $LOGFILE $VOLUME, $ATTRDEF, $BITMAP, $BOOT, $BADCLUS, $SECURE, $UPCASE, $EXTEND, $QUOTA, $OBJID, $REPARSE
Windows Reserved Names: CON, PRN, AUX, CLOCK$, NUL, COM1, COM2, COM3, COM4, COM5, COM6, COM7, COM8, COM9, COM10, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, LPT9, LPT10
### Linux
Invalid Path: Unicode categories: Cc, Cf, Z excluding space +
Invalid Filename: Invalid Path + `/`
Max Path Length: 4096
#### Reserved words
None
### Darwin
Invalid Path: Unicode categories: Cc, Cf, Z excluding space +
Invalid Filename: Invalid Path + `/`
Max Path Length: 4096
#### Reserved words
`:`

75
base.go Normal file
View File

@ -0,0 +1,75 @@
package pathvalidate
import (
"fmt"
"path/filepath"
"runtime"
"sort"
"strings"
)
type BaseFile struct {
ReservedKeywords []string
MinLength int
MaxLength int
}
var DefaultBaseFile = BaseFile{
MaxLength: getDefaultMaxLength(runtime.GOOS),
ReservedKeywords: getDefaultKeywords(runtime.GOOS),
MinLength: 1,
}
func getDefaultKeywords(platform string) []string {
switch platform {
case "windows":
return append(WindowsReserved, NTFSReserved...)
case "darwin":
return DarwinReserved
default:
return nil
}
}
func getDefaultMaxLength(platform string) int {
switch platform {
case "linux":
return 4096
case "windows":
return 260
case "darwin":
return 1024
default:
return DefaultMaxFilenameLength
}
}
func (bf BaseFile) IsReservedKeyword(name string) bool {
sort.Strings(bf.ReservedKeywords)
index := sort.SearchStrings(bf.ReservedKeywords, strings.ToUpper(name))
return index < len(bf.ReservedKeywords) && bf.ReservedKeywords[index] == strings.ToUpper(name)
}
func (bf BaseFile) UpdateReservedKeywords(name, suffix string) string {
ext := filepath.Ext(name)
rootName := extractRootName(name)
if bf.IsReservedKeyword(strings.ToUpper(rootName)) {
return rootName + suffix + ext
}
return name
}
func (bf BaseFile) validateReservedKeywords(name string) error {
rootName := extractRootName(name)
if bf.IsReservedKeyword(strings.ToUpper(rootName)) {
return fmt.Errorf("%w: %s", ErrReservedWord, rootName)
}
return nil
}
func extractRootName(path string) string {
base := filepath.Base(filepath.Clean(path))
return strings.TrimSuffix(base, filepath.Ext(base))
}

13
cmd/pathvalidate.go Normal file
View File

@ -0,0 +1,13 @@
package main
import (
"fmt"
"os"
"github.com/lordwelch/pathvalidate"
)
func main() {
fmt.Println(pathvalidate.ValidateFilepath(os.Args[1]))
fmt.Println(pathvalidate.SanitizeFilepath(os.Args[1], '_'))
}

50
const.go Normal file
View File

@ -0,0 +1,50 @@
package pathvalidate
import (
"errors"
"unicode"
"golang.org/x/text/unicode/rangetable"
)
var (
NTFSReserved = []string{
"$MFT",
"$MFTMIRR",
"$LOGFILE",
"$VOLUME",
"$ATTRDEF",
"$BITMAP",
"$BOOT",
"$BADCLUS",
"$SECURE",
"$UPCASE",
"$EXTEND",
"$QUOTA",
"$OBJID",
"$REPARSE",
} // Only in root directory
WindowsReserved = []string{
"CON", "PRN", "AUX", "CLOCK$", "NUL",
"COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "COM10",
"LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", "LPT10",
}
DarwinReserved = []string{":"} // Is this needed?
)
var (
InvalidPath = rangetable.Merge(unicode.Cc, unicode.Cf, unicode.Z)
InvalidFilename = rangetable.Merge(InvalidPath, rangetable.New('/'))
InvalidWindowsPath = rangetable.Merge(InvalidPath, rangetable.New(':', '*', '?', '"', '<', '>', '|'))
InvalidWindowsFilename = rangetable.Merge(InvalidFilename, InvalidWindowsPath, rangetable.New('\\'))
DefaultMaxFilenameLength = 255
)
var (
ErrInvalidChar = errors.New("pathvalidate: invalid character")
ErrMaxLength = errors.New("pathvalidate: max length exceeded")
ErrMinLength = errors.New("pathvalidate: min length not met")
ErrReservedWord = errors.New("pathvalidate: reserved word found")
)

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module github.com/lordwelch/pathvalidate
go 1.15
require golang.org/x/text v0.3.3

3
go.sum Normal file
View File

@ -0,0 +1,3 @@
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

126
pathvalidate.go Normal file
View File

@ -0,0 +1,126 @@
package pathvalidate
import (
"fmt"
"os"
"path/filepath"
"strings"
"unicode"
"unicode/utf8"
)
var (
DefaultFilenameSanitizer = FilenameSanitizer{}
DefaultFilepathSanitizer = FilepathSanitizer{}
)
type FilepathSanitizer struct {
FilenameSanitizer
}
func (fps FilepathSanitizer) Sanitize(path string, replacement rune) (string, error) {
var (
err error
)
cleaned := filepath.Clean(path)
split := strings.Split(cleaned, string(os.PathSeparator))
splitS := make([]string, 0, len(split))
for _, name := range split {
name, err = fps.FilenameSanitizer.Sanitize(name, replacement)
if err != nil {
return path, err
}
splitS = append(splitS, name)
}
return filepath.Join(splitS...), nil
}
func (fps FilepathSanitizer) Validate(path string) error {
cleaned := filepath.Clean(path)
split := strings.Split(cleaned, string(os.PathSeparator))
for _, name := range split {
if err := fps.FilenameSanitizer.Validate(name); err != nil {
return err
}
}
return nil
}
type FilenameSanitizer struct {
BaseFile
}
func (f FilenameSanitizer) Sanitize(path string, replacement rune) (string, error) {
var (
err error
)
if f.BaseFile.MinLength == 0 {
f.BaseFile = DefaultBaseFile
}
replace := func(r rune) rune {
if unicode.Is(InvalidFilename, r) && r != ' ' {
return replacement
}
return r
}
sanitized := strings.Map(replace, path)
sanitized = f.UpdateReservedKeywords(sanitized, "_")
sanitized = strings.TrimSpace(sanitized)
err = f.Validate(sanitized)
if err != nil {
return path, fmt.Errorf("could not validate sanitized filename: %w", err)
}
return sanitized, nil
}
func (f FilenameSanitizer) Validate(path string) error {
if f.BaseFile.MinLength == 0 {
f.BaseFile = DefaultBaseFile
}
nameLen := utf8.RuneCountInString(path)
cleaned := filepath.Clean(path)
if nameLen > f.MaxLength {
return fmt.Errorf("%w: wanted <= %d, got = %d", ErrMaxLength, f.MaxLength, nameLen)
}
if nameLen < f.MinLength {
return fmt.Errorf("%w: wanted >= %d, got = %d", ErrMinLength, f.MinLength, nameLen)
}
err := f.validateReservedKeywords(cleaned)
if err != nil {
return err
}
validate := func(r rune) bool {
return unicode.Is(InvalidFilename, r) && r != ' '
}
if n := strings.IndexFunc(cleaned, validate); n != -1 {
r, _ := utf8.DecodeRuneInString(cleaned[n:])
return fmt.Errorf("%w: '%s' (%#x)", ErrInvalidChar, string(r), r)
}
if cleaned[0] == ' ' {
return fmt.Errorf("%w: space at beginning of string", ErrInvalidChar)
}
if cleaned[len(cleaned)-1] == ' ' {
return fmt.Errorf("%w: space at end of string", ErrInvalidChar)
}
return nil
}
func SanitizeFilename(path string, replacement rune) (string, error) {
return DefaultFilenameSanitizer.Sanitize(path, replacement)
}
func ValidateFilename(path string) error {
return DefaultFilenameSanitizer.Validate(path)
}
func SanitizeFilepath(path string, replacement rune) (string, error) {
return DefaultFilepathSanitizer.Sanitize(path, replacement)
}
func ValidateFilepath(path string) error {
return DefaultFilepathSanitizer.Validate(path)
}

47
pathvalidate_test.go Normal file
View File

@ -0,0 +1,47 @@
package pathvalidate_test
import (
"errors"
"strings"
"testing"
"github.com/lordwelch/pathvalidate"
)
var tests = []struct {
path, sanitized string
err error
}{
{"hello\t", "hello_", pathvalidate.ErrInvalidChar},
{"hello\r", "hello_", pathvalidate.ErrInvalidChar},
{"hello\n", "hello_", pathvalidate.ErrInvalidChar},
{"hello ", "hello", pathvalidate.ErrInvalidChar},
{"hello/world", "hello_world", pathvalidate.ErrInvalidChar},
{"nul", "nul_", pathvalidate.ErrReservedWord},
{"nul.test", "nul_.test", pathvalidate.ErrReservedWord},
{"hello" + strings.Repeat(" ", 4090) + "world", "hello" + strings.Repeat(" ", 4090) + "world", pathvalidate.ErrMaxLength},
{"", "", pathvalidate.ErrMinLength},
{"hello world", "hello world", nil},
}
func TestValidate(t *testing.T) {
pathvalidate.DefaultBaseFile.ReservedKeywords = pathvalidate.WindowsReserved
for _, test := range tests {
if err := pathvalidate.ValidateFilename(test.path); !errors.Is(err, test.err) {
t.Errorf("got %v, want %v", err, test.err)
}
}
}
func TestSanitize(t *testing.T) {
for _, test := range tests {
// Skips length tests as there is no way to intelligently sanitize them
if errors.Is(test.err, pathvalidate.ErrMaxLength) || errors.Is(test.err, pathvalidate.ErrMinLength) {
continue
}
if got, err := pathvalidate.SanitizeFilename(test.path, '_'); err != nil || got != test.sanitized {
t.Errorf("got value: %v; error: %v, want value: %v; error: %v", got, err, test.sanitized, nil)
}
}
}