184 lines
3.1 KiB
Go
184 lines
3.1 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/xml"
|
|
"io"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf16"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type lex struct {
|
|
start int
|
|
input []byte // the string being scanned
|
|
pos int // current position in the input
|
|
width int // width of last rune read from input
|
|
}
|
|
|
|
func (l *lex) next() rune {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
|
|
return -1
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(l.input[l.pos:])
|
|
l.pos++
|
|
|
|
return r
|
|
}
|
|
|
|
func (l *lex) peek() rune {
|
|
if l.pos >= len(l.input) {
|
|
return -1
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(l.input[l.pos:])
|
|
|
|
return r
|
|
}
|
|
|
|
func (l *lex) backup() {
|
|
l.pos--
|
|
}
|
|
|
|
type xmlDecoder struct {
|
|
reader io.Reader
|
|
previous []byte
|
|
temp []byte
|
|
}
|
|
|
|
func (l *lex) acceptRun(valid string) {
|
|
for strings.ContainsRune(valid, l.next()) {
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
func (l *lex) getResult() []byte {
|
|
result := l.input[l.start:l.pos]
|
|
l.start = l.pos
|
|
|
|
return result
|
|
}
|
|
|
|
func (l *lex) getEntities() ([][]byte, []byte, []byte) {
|
|
var (
|
|
rest []byte
|
|
result = make([][]byte, 0)
|
|
)
|
|
|
|
for l.peek() == '&' {
|
|
l.next()
|
|
|
|
if l.next() != '#' {
|
|
l.backup()
|
|
l.backup()
|
|
|
|
break
|
|
}
|
|
|
|
l.acceptRun("1234567890")
|
|
|
|
if r := l.next(); r == ';' {
|
|
result = append(result, l.getResult())
|
|
} else {
|
|
l.pos = l.start
|
|
rest = make([]byte, len(l.input)-l.pos)
|
|
copy(rest, l.input[l.pos:])
|
|
|
|
break
|
|
}
|
|
}
|
|
|
|
if len(result) > 0 && string(result[len(result)-1]) == "�" {
|
|
rest = result[len(result)-1]
|
|
result = result[:len(result)-1]
|
|
}
|
|
|
|
return result, l.input[:l.pos], rest
|
|
}
|
|
|
|
func (x *xmlDecoder) Read(data []byte) (n int, err error) {
|
|
start := 0
|
|
|
|
if x.previous != nil {
|
|
start = len(x.previous)
|
|
copy(data, x.previous)
|
|
x.previous = nil
|
|
}
|
|
|
|
n, err = x.reader.Read(data[start:])
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
|
|
resultLen := n
|
|
i := 0
|
|
data = data[:n]
|
|
workingData := data
|
|
|
|
for index := bytes.Index(workingData, []byte("&#")); index >= 0; i++ {
|
|
var (
|
|
entities [][]byte
|
|
xmlEntity []byte
|
|
l = &lex{
|
|
input: workingData[index:],
|
|
}
|
|
)
|
|
|
|
entities, xmlEntity, x.previous = l.getEntities()
|
|
if x.previous != nil {
|
|
resultLen -= len(x.previous)
|
|
|
|
break
|
|
}
|
|
|
|
result := &strings.Builder{}
|
|
entitiesUINT16 := []uint16{}
|
|
|
|
for i, e := range entities {
|
|
if len(e) > 2 {
|
|
e = e[2 : len(e)-1]
|
|
entities[i] = entities[i][0:0]
|
|
|
|
v, err := strconv.Atoi(string(e))
|
|
if err != nil {
|
|
os.Exit(91)
|
|
}
|
|
|
|
entitiesUINT16 = append(entitiesUINT16, uint16(v))
|
|
}
|
|
}
|
|
|
|
runes := utf16.Decode(entitiesUINT16)
|
|
|
|
err = xml.EscapeText(result, []byte(string(runes)))
|
|
if err != nil {
|
|
os.Exit(92)
|
|
}
|
|
|
|
resultBytes := []byte(result.String())
|
|
if len(xmlEntity) == len(resultBytes) {
|
|
copy(xmlEntity, resultBytes)
|
|
} else {
|
|
copy(workingData[index:], resultBytes)
|
|
copy(workingData[index+len(resultBytes):], workingData[index+len(xmlEntity):])
|
|
resultLen += len(resultBytes) - len(xmlEntity)
|
|
workingData = workingData[:len(workingData)+(len(resultBytes)-len(xmlEntity))]
|
|
}
|
|
|
|
workingData = workingData[index+len(resultBytes):]
|
|
index = bytes.Index(workingData, []byte("&#"))
|
|
}
|
|
|
|
data = data[:resultLen]
|
|
x.temp = make([]byte, resultLen)
|
|
copy(x.temp, data)
|
|
x.previous = nil
|
|
|
|
return resultLen, nil
|
|
}
|