package main import ( "bytes" "encoding/xml" "io" "os" "strconv" "strings" "unicode/utf16" "unicode/utf8" ) type lex struct { start int input []byte // the string being scanned pos int // current position in the input width int // width of last rune read from input } func (l *lex) next() rune { if l.pos >= len(l.input) { l.width = 0 return -1 } r, _ := utf8.DecodeRune(l.input[l.pos:]) l.pos++ return r } func (l *lex) peek() rune { if l.pos >= len(l.input) { return -1 } r, _ := utf8.DecodeRune(l.input[l.pos:]) return r } func (l *lex) backup() { l.pos-- } type xmlDecoder struct { reader io.Reader previous []byte temp []byte } func (l *lex) acceptRun(valid string) { for strings.ContainsRune(valid, l.next()) { } l.backup() } func (l *lex) getResult() []byte { result := l.input[l.start:l.pos] l.start = l.pos return result } func (l *lex) getEntities() ([][]byte, []byte, []byte) { var ( rest []byte result = make([][]byte, 0) ) for l.peek() == '&' { l.next() if l.next() != '#' { l.backup() l.backup() break } l.acceptRun("1234567890") if r := l.next(); r == ';' { result = append(result, l.getResult()) } else { l.pos = l.start rest = make([]byte, len(l.input)-l.pos) copy(rest, l.input[l.pos:]) break } } if len(result) > 0 && string(result[len(result)-1]) == "�" { rest = result[len(result)-1] result = result[:len(result)-1] } return result, l.input[:l.pos], rest } func (x *xmlDecoder) Read(data []byte) (n int, err error) { start := 0 if x.previous != nil { start = len(x.previous) copy(data, x.previous) x.previous = nil } n, err = x.reader.Read(data[start:]) if err != nil { return n, err } resultLen := n i := 0 data = data[:n] workingData := data for index := bytes.Index(workingData, []byte("&#")); index >= 0; i++ { var ( entities [][]byte xmlEntity []byte l = &lex{ input: workingData[index:], } ) entities, xmlEntity, x.previous = l.getEntities() if x.previous != nil { resultLen -= len(x.previous) break } result := &strings.Builder{} entitiesUINT16 := []uint16{} for i, e := range entities { if len(e) > 2 { e = e[2 : len(e)-1] entities[i] = entities[i][0:0] v, err := strconv.Atoi(string(e)) if err != nil { os.Exit(91) } entitiesUINT16 = append(entitiesUINT16, uint16(v)) } } runes := utf16.Decode(entitiesUINT16) err = xml.EscapeText(result, []byte(string(runes))) if err != nil { os.Exit(92) } resultBytes := []byte(result.String()) if len(xmlEntity) == len(resultBytes) { copy(xmlEntity, resultBytes) } else { copy(workingData[index:], resultBytes) copy(workingData[index+len(resultBytes):], workingData[index+len(xmlEntity):]) resultLen += len(resultBytes) - len(xmlEntity) workingData = workingData[:len(workingData)+(len(resultBytes)-len(xmlEntity))] } workingData = workingData[index+len(resultBytes):] index = bytes.Index(workingData, []byte("&#")) } data = data[:resultLen] x.temp = make([]byte, resultLen) copy(x.temp, data) x.previous = nil return resultLen, nil }