sms/xml.go
2022-10-21 14:23:42 -07:00

184 lines
3.1 KiB
Go

package main
import (
"bytes"
"encoding/xml"
"io"
"os"
"strconv"
"strings"
"unicode/utf16"
"unicode/utf8"
)
type lex struct {
start int
input []byte // the string being scanned
pos int // current position in the input
width int // width of last rune read from input
}
func (l *lex) next() rune {
if l.pos >= len(l.input) {
l.width = 0
return -1
}
r, _ := utf8.DecodeRune(l.input[l.pos:])
l.pos++
return r
}
func (l *lex) peek() rune {
if l.pos >= len(l.input) {
return -1
}
r, _ := utf8.DecodeRune(l.input[l.pos:])
return r
}
func (l *lex) backup() {
l.pos--
}
type xmlDecoder struct {
reader io.Reader
previous []byte
temp []byte
}
func (l *lex) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *lex) getResult() []byte {
result := l.input[l.start:l.pos]
l.start = l.pos
return result
}
func (l *lex) getEntities() ([][]byte, []byte, []byte) {
var (
rest []byte
result = make([][]byte, 0)
)
for l.peek() == '&' {
l.next()
if l.next() != '#' {
l.backup()
l.backup()
break
}
l.acceptRun("1234567890")
if r := l.next(); r == ';' {
result = append(result, l.getResult())
} else {
l.pos = l.start
rest = make([]byte, len(l.input)-l.pos)
copy(rest, l.input[l.pos:])
break
}
}
if len(result) > 0 && string(result[len(result)-1]) == "�" {
rest = result[len(result)-1]
result = result[:len(result)-1]
}
return result, l.input[:l.pos], rest
}
func (x *xmlDecoder) Read(data []byte) (n int, err error) {
start := 0
if x.previous != nil {
start = len(x.previous)
copy(data, x.previous)
x.previous = nil
}
n, err = x.reader.Read(data[start:])
if err != nil {
return n, err
}
resultLen := n
i := 0
data = data[:n]
workingData := data
for index := bytes.Index(workingData, []byte("&#")); index >= 0; i++ {
var (
entities [][]byte
xmlEntity []byte
l = &lex{
input: workingData[index:],
}
)
entities, xmlEntity, x.previous = l.getEntities()
if x.previous != nil {
resultLen -= len(x.previous)
break
}
result := &strings.Builder{}
entitiesUINT16 := []uint16{}
for i, e := range entities {
if len(e) > 2 {
e = e[2 : len(e)-1]
entities[i] = entities[i][0:0]
v, err := strconv.Atoi(string(e))
if err != nil {
os.Exit(91)
}
entitiesUINT16 = append(entitiesUINT16, uint16(v))
}
}
runes := utf16.Decode(entitiesUINT16)
err = xml.EscapeText(result, []byte(string(runes)))
if err != nil {
os.Exit(92)
}
resultBytes := []byte(result.String())
if len(xmlEntity) == len(resultBytes) {
copy(xmlEntity, resultBytes)
} else {
copy(workingData[index:], resultBytes)
copy(workingData[index+len(resultBytes):], workingData[index+len(xmlEntity):])
resultLen += len(resultBytes) - len(xmlEntity)
workingData = workingData[:len(workingData)+(len(resultBytes)-len(xmlEntity))]
}
workingData = workingData[index+len(resultBytes):]
index = bytes.Index(workingData, []byte("&#"))
}
data = data[:resultLen]
x.temp = make([]byte, resultLen)
copy(x.temp, data)
x.previous = nil
return resultLen, nil
}