Lots of stuff

This commit is contained in:
Simon Martens
2025-06-24 18:20:06 +02:00
parent 3127446dab
commit 9563145aeb
29 changed files with 1694 additions and 1386 deletions

39
xmlparsing/helpers.go Normal file
View File

@@ -0,0 +1,39 @@
package xmlparsing
import (
"encoding/xml"
"io"
"log/slog"
"os"
"path/filepath"
)
func UnmarshalFile[T any](filename string, data T) error {
slog.Debug("Unmarshalling file: ", "file", filename)
xmlFile, err := os.Open(filename)
if err != nil {
return err
}
defer xmlFile.Close()
byteValue, err := io.ReadAll(xmlFile)
if err != nil {
return err
}
err = xml.Unmarshal(byteValue, &data)
if err != nil {
return err
}
return nil
}
func XMLFilesForPath(path string) ([]string, error) {
if _, err := os.Stat(path); os.IsNotExist(err) {
return nil, err
}
matches, err := filepath.Glob(filepath.Join(path, "*.xml"))
return matches, err
}

12
xmlparsing/item.go Normal file
View File

@@ -0,0 +1,12 @@
package xmlparsing
type ItemInfo struct {
Source string
Parse ParseMeta
}
// INFO: These are just root elements that hold the data of the XML files.
// They get discarded after a parse.
type XMLRootElement[T any] interface {
Children() []T
}

15
xmlparsing/library.go Normal file
View File

@@ -0,0 +1,15 @@
package xmlparsing
import "sync"
type Library struct {
pmux sync.Mutex
Parses []ParseMeta
}
func (l *Library) Latest() ParseMeta {
if len(l.Parses) == 0 {
return ParseMeta{}
}
return l.Parses[len(l.Parses)-1]
}

32
xmlparsing/models.go Normal file
View File

@@ -0,0 +1,32 @@
package xmlparsing
import "fmt"
type IXMLItem interface {
fmt.Stringer
// INFO:
// - Keys should be unique
// - Keys[0] has the special meaning of the primary key (for FTS etc.)
Keys() []any
Type() string
}
type ILibrary interface {
Parse(meta ParseMeta) error
}
type ResolvingMap[T IXMLItem] map[string][]Resolved[T]
type ReferenceResolver[T IXMLItem] interface {
References() ResolvingMap[T]
}
type Resolved[T IXMLItem] struct {
Item *T
Reference string
Category string
Cert bool
Conjecture bool
Comment string
MetaData map[string]string
}

View File

@@ -0,0 +1,57 @@
package xmlparsing
import (
"encoding/xml"
"strings"
)
type OptionalBool int
const (
Unspecified OptionalBool = iota
True
False
)
func (b OptionalBool) IsTrue() bool {
return b == True
}
func (b OptionalBool) IsFalse() bool {
return b == False || b == Unspecified
}
func (b *OptionalBool) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
var attr struct {
Value string `xml:"value,attr"`
}
if err := d.DecodeElement(&attr, &start); err != nil {
return err
}
switch strings.ToLower(attr.Value) {
case "true":
*b = True
case "false":
*b = False
default:
*b = Unspecified
}
return nil
}
func (b OptionalBool) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
if b == Unspecified {
return nil
}
value := "false"
if b == True {
value = "true"
}
type alias struct {
Value string `xml:"value,attr"`
}
return e.EncodeElement(alias{Value: value}, start)
}

210
xmlparsing/parser.go Normal file
View File

@@ -0,0 +1,210 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type Parser struct {
Stack []*Token
LastCharData []*Token
pipeline []*Token
decoder *xml.Decoder
}
func NewFromTokens(tokens []*Token) *Parser {
return &Parser{
Stack: make([]*Token, 0, len(tokens)),
LastCharData: make([]*Token, 0, len(tokens)),
pipeline: tokens,
decoder: nil, // No decoder needed for pre-parsed tokens
}
}
func NewParser(xmlData string) *Parser {
return &Parser{
decoder: xml.NewDecoder(strings.NewReader(xmlData)),
}
}
func (p *Parser) GetStack() []*Token {
return p.Stack
}
func (p *Parser) Pipeline() []*Token {
return p.pipeline
}
func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
if index < 0 || index >= len(p.pipeline) {
return func(yield func(*Token, error) bool) {
yield(nil, nil) // No tokens to yield
return
}
}
return func(yield func(*Token, error) bool) {
for i := index; i < len(p.pipeline); i++ {
if !yield(p.pipeline[i], nil) {
return
}
}
for {
token, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if token == nil {
// EOF
return
}
if !yield(token, nil) {
return
}
}
}
}
func (p *Parser) Reset() {
p.Stack = []*Token{}
}
func (p *Parser) Token() (*Token, error) {
if p.decoder == nil {
return nil, nil // No more tokens to parse
}
start := p.decoder.InputOffset()
token, err := p.decoder.Token()
end := p.decoder.InputOffset()
if err == io.EOF {
return nil, nil
} else if err != nil {
return nil, err
}
var customToken Token = Token{
parser: p,
Index: len(p.pipeline),
Inner: token,
StartOffset: start + 1,
EndOffset: end,
Stack: make([]*Token, len(p.Stack)),
}
// INFO: these are just pointers, so it should go fast
copy(customToken.Stack, p.Stack)
switch t := token.(type) {
case xml.StartElement:
attr := mapAttributes(t.Attr)
customToken.Name = t.Name.Local
customToken.Attributes = attr
customToken.Type = StartElement
if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
}
p.Stack = append(p.Stack, &customToken)
case xml.EndElement:
if len(p.Stack) > 0 {
element := p.Stack[len(p.Stack)-1]
element.childrenParsed = true
element.chardataParsed = true
p.Stack = p.Stack[:len(p.Stack)-1]
}
customToken.Name = t.Name.Local
customToken.Attributes = map[string]string{}
customToken.Type = EndElement
case xml.CharData:
text := string(t)
if text != "" && len(p.Stack) > 0 {
for i := range p.Stack {
if !p.Stack[i].chardataParsed {
p.Stack[i].charData += text
}
}
}
customToken.Data = text
customToken.Type = CharData
p.LastCharData = append(p.LastCharData, &customToken)
case xml.Comment:
customToken.Type = Comment
customToken.Data = string(t)
case xml.ProcInst:
customToken.Name = t.Target
customToken.Data = string(t.Inst)
customToken.Type = ProcInst
case xml.Directive:
customToken.Data = string(t)
customToken.Type = Directive
}
p.pipeline = append(p.pipeline, &customToken)
return &customToken, nil
}
func (p *Parser) Previous(index int) (tokens []*Token) {
if index < 0 || index >= len(p.pipeline) {
return
}
return p.pipeline[:index]
}
func (p *Parser) All() ([]*Token, error) {
for _, err := range p.Iterate() {
if err != nil {
return nil, err
}
}
return p.pipeline, nil
}
func (p *Parser) Iterate() iter.Seq2[*Token, error] {
var cursor int
return func(yield func(*Token, error) bool) {
for {
var token *Token
// INFO: cursor should be max. len(p.pipeline)
if cursor >= len(p.pipeline) {
t, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if t == nil {
return // EOF
}
token = t
} else {
token = p.pipeline[cursor]
}
cursor++
if !yield(token, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

97
xmlparsing/parser_test.go Normal file
View File

@@ -0,0 +1,97 @@
package xmlparsing
import (
"testing"
)
type TestState struct {
ParsedElements []string
}
func TestIterate_ValidXML(t *testing.T) {
xmlData := `<root>
<child attr="value">Text</child>
<!-- This is a comment -->
<?xml-stylesheet type="text/css" href="style.css"?>
<!DOCTYPE note>
</root>`
state := TestState{}
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult == nil {
t.Fatal("Received nil token result")
}
state.ParsedElements = append(state.ParsedElements, tokenResult.Token.Name)
}
if len(state.ParsedElements) == 0 {
t.Fatal("No elements were parsed")
}
}
func TestIterate_InvalidXML(t *testing.T) {
xmlData := `<root><child></root>`
state := TestState{}
var global error
for _, err := range Iterate(xmlData, state) {
if err != nil {
global = err
}
}
if global == nil {
t.Fatal("Expected error, but got nil")
}
}
func TestIterate_EmptyXML(t *testing.T) {
xmlData := ""
state := TestState{}
for _, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Expected iter.ErrEnd, but got: %v", err)
}
}
}
func TestIterate_CharDataTracking(t *testing.T) {
xmlData := `<root>
<child>First</child>
<child>Second</child>
</root>`
state := TestState{}
charDataCount := 0
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult.Token.Name == "CharData" {
charDataCount++
}
}
if charDataCount != 5 {
t.Fatalf("Expected 2 CharData elements, got %d", charDataCount)
}
}
func TestIterate_AttributeParsing(t *testing.T) {
xmlData := `<root>
<child attr1="value1" attr2="value2">Content</child>
</root>`
state := TestState{}
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult.Token.Name == "child" && tokenResult.Token.Type == StartElement {
if tokenResult.Token.Attributes["attr1"] != "value1" || tokenResult.Token.Attributes["attr2"] != "value2" {
t.Fatalf("Incorrect attributes parsed: %v", tokenResult.Token.Attributes)
}
}
}
}

48
xmlparsing/resolver.go Normal file
View File

@@ -0,0 +1,48 @@
package xmlparsing
// INFO: This is used to resolve references (back-links) between XML items.
import (
"fmt"
"sync"
)
type Resolver[T IXMLItem] struct {
// INFO: map[type][ID]
index map[string]map[any][]Resolved[T]
mu sync.RWMutex
}
func NewResolver[T IXMLItem]() *Resolver[T] {
return &Resolver[T]{index: make(map[string]map[any][]Resolved[T])}
}
func (r *Resolver[T]) Add(typeName, refID string, item Resolved[T]) {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.index[typeName]; !exists {
r.index[typeName] = make(map[any][]Resolved[T])
}
r.index[typeName][refID] = append(r.index[typeName][refID], item)
}
func (r *Resolver[T]) Get(typeName string, refID any) ([]Resolved[T], error) {
r.mu.RLock()
defer r.mu.RUnlock()
if typeIndex, exists := r.index[typeName]; exists {
if items, ok := typeIndex[refID]; ok {
return items, nil
}
return nil, fmt.Errorf("no references found for refID '%s' of type '%s'", refID, typeName)
}
return nil, fmt.Errorf("no index exists for type '%s'", typeName)
}
func (r *Resolver[T]) Clear() {
r.mu.Lock()
defer r.mu.Unlock()
r.index = make(map[string]map[any][]Resolved[T])
}

126
xmlparsing/token.go Normal file
View File

@@ -0,0 +1,126 @@
package xmlparsing
import (
"encoding/xml"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
Stack []*Token
StartOffset int64
EndOffset int64
Index int
charData string
children []*Token
parser *Parser
childrenParsed bool
chardataParsed bool
}
func (t *Token) String() string {
builder := strings.Builder{}
return builder.String()
}
func (t *Token) Element() (tokens []*Token) {
if t.Type != StartElement {
return
}
for token, err := range t.parser.PeekFrom(t.Index) {
if err != nil || token == nil {
return tokens
}
tokens = append(tokens, token)
if token.Type == EndElement && token.Name == t.Name {
return tokens
}
}
return
}
func (t *Token) Next() iter.Seq2[*Token, error] {
return t.parser.PeekFrom(t.Index)
}
func (t *Token) Previous() (tokens []*Token) {
if t.Index <= 0 {
return
}
return t.parser.Previous(t.Index)
}
func (t *Token) Children() (tokens []*Token) {
if t.childrenParsed {
return t.children
}
tokens = t.Element()
if len(tokens) == 0 {
return
}
for _, token := range tokens {
if token.Type == StartElement {
t.children = append(t.children, token)
}
}
t.childrenParsed = true
return t.children
}
func (t *Token) CharData() string {
if t.Type == CharData || t.Type == ProcInst || t.Type == Comment || t.Type == Directive {
return t.Data
}
if t.chardataParsed {
return t.charData
}
tokens := t.Element()
if len(tokens) == 0 {
return ""
}
var builder strings.Builder
for _, token := range tokens {
if token.Type == CharData {
builder.WriteString(token.Data)
}
}
t.chardataParsed = true
t.charData = builder.String()
return builder.String()
}
func (t *Token) SubParser() *Parser {
if t.Type != StartElement {
return nil
}
tokens := t.Element()
return NewFromTokens(tokens)
}

215
xmlparsing/xmlprovider.go Normal file
View File

@@ -0,0 +1,215 @@
package xmlparsing
import (
"iter"
"slices"
"sync"
"time"
)
type ParseSource int
const (
SourceUnknown ParseSource = iota
Path
Commit
)
type ParseMeta struct {
Source ParseSource
BaseDir string
Commit string
Date time.Time
FailedPaths []string
}
func (p ParseMeta) Equals(other ParseMeta) bool {
return p.Source == other.Source && p.BaseDir == other.BaseDir && p.Commit == other.Commit && p.Date == other.Date
}
func (p ParseMeta) Failed(path string) bool {
return slices.Contains(p.FailedPaths, path)
}
// An XMLParser is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
type XMLParser[T IXMLItem] struct {
// INFO: map is type map[string]*T
Items sync.Map
// INFO: map is type [string]ItemInfo
Infos sync.Map
// INFO: Resolver is used to resolve references (back-links) between XML items.
Resolver Resolver[T]
mu sync.RWMutex
// TODO: This array is meant to be for iteration purposes, since iteration over the sync.Map is slow.
array []T
}
func NewXMLParser[T IXMLItem]() *XMLParser[T] {
return &XMLParser[T]{Resolver: *NewResolver[T]()}
}
// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
// Prepare & Cleanup are called once per parse. Serialize is called for every path.
// and can be called concurretly.
func (p *XMLParser[T]) Prepare() {
p.mu.Lock()
defer p.mu.Unlock()
p.array = make([]T, 0, len(p.array))
p.Resolver.Clear()
}
func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, latest ParseMeta) error {
if err := UnmarshalFile(path, dataholder); err != nil {
return err
}
newItems := dataholder.Children()
for _, item := range newItems {
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.Keys() {
p.Infos.Store(id, ItemInfo{Source: path, Parse: latest})
p.Items.Store(id, &item)
}
p.addResolvable(item)
}
p.mu.Lock()
defer p.mu.Unlock()
p.array = append(p.array, newItems...)
return nil
}
// INFO: Cleanup is called after all paths have been serialized.
// It deletes all items that have not been parsed in the last commit,
// and whose filepath has not been marked as failed.
func (p *XMLParser[T]) Cleanup(latest ParseMeta) {
todelete := make([]any, 0)
toappend := make([]*T, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if !info.Parse.Equals(latest) {
if !latest.Failed(info.Source) {
todelete = append(todelete, key)
} else {
item, ok := p.Items.Load(key)
if ok {
i := item.(*T)
if !slices.Contains(toappend, i) {
toappend = append(toappend, i)
}
}
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
p.mu.Lock()
defer p.mu.Unlock()
for _, item := range toappend {
p.array = append(p.array, *item)
p.addResolvable(*item)
}
}
func (p *XMLParser[T]) addResolvable(item T) {
// INFO: If the item has a GetReferences method, we add the references to the resolver.
if rr, ok := any(item).(ReferenceResolver[T]); ok {
for name, ids := range rr.References() {
for _, res := range ids {
res.Item = &item
p.Resolver.Add(name, res.Reference, res)
}
}
}
}
func (p *XMLParser[T]) ReverseLookup(item IXMLItem) (ret []Resolved[T]) {
// INFO: this runs just once for the first key
keys := item.Keys()
for _, key := range keys {
r, err := p.Resolver.Get(item.Type(), key)
if err == nil {
ret = append(ret, r...)
}
}
return
}
func (a *XMLParser[T]) String() (s string) {
a.RLock()
defer a.RUnlock()
for _, item := range a.array {
s += item.String()
}
return
}
func (p *XMLParser[T]) Info(id string) ItemInfo {
info, ok := p.Infos.Load(id)
if !ok {
return ItemInfo{}
}
return info.(ItemInfo)
}
func (p *XMLParser[T]) Item(id any) *T {
item, ok := p.Items.Load(id)
if !ok {
return nil
}
i := item.(*T)
return i
}
func (p *XMLParser[T]) Filter(f func(T) bool) iter.Seq[T] {
return func(yield func(T) bool) {
p.mu.RLock()
defer p.mu.RUnlock()
for _, v := range p.array {
if f(v) && !yield(v) {
return
}
}
}
}
func (p *XMLParser[T]) Iterate() iter.Seq[T] {
return func(yield func(T) bool) {
p.mu.RLock()
defer p.mu.RUnlock()
for _, v := range p.array {
if !yield(v) {
return
}
}
}
}
func (p *XMLParser[T]) Count() int {
p.RLock()
defer p.RUnlock()
return len(p.array)
}
// INFO: These are reading locks.
func (p *XMLParser[T]) RLock() {
p.mu.RLock()
}
func (p *XMLParser[T]) RUnlock() {
p.mu.RUnlock()
}

1
xmlparsing/xmlsort.go Normal file
View File

@@ -0,0 +1 @@
package xmlparsing

474
xmlparsing/xsdtime.go Normal file
View File

@@ -0,0 +1,474 @@
package xmlparsing
import (
"errors"
"fmt"
"math"
"strconv"
)
// An implementation of the xsd 1.1 datatypes:
// date, gDay, gMonth, gMonthDay, gYear, gYearMonth.
type XSDDatetype int
type Seperator byte
const (
DEFAULT_YEAR = 0
DEFAULT_DAY = 1
DEFAULT_MONTH = 1
MIN_ALLOWED_NUMBER = 0x30 // 0
MAX_ALLOWED_NUMBER = 0x39 // 9
SIGN = 0x2D // -
SEPERATOR = 0x2D // -
PLUS = 0x2B // +
COLON = 0x3A // :
TIMEZONE = 0x5A // Z
NONE = 0x00 // 0
)
const (
Unknown XSDDatetype = iota
Invalid
Date
GDay
GMonth
GYear
GMonthDay
GYearMonth
)
type XSDDate struct {
base []byte
Year int
Month int
Day int
hasTimezone bool
hasYear bool
hasMonth bool
hasDay bool
TZH int
TZM int
state XSDDatetype
error bool
// INFO: XSD Date Datatypes typically describe a duration in the value space.
// TimeError bool
// BaseTime time.Time
// BaseDuration time.Duration
}
// Sanity check:
// MONTH DAY + Date: Sanity check Month and Day. Additional checks:
// - Month: 2 - Day < 30
// - Month: 4, 6, 9, 11 - Day < 31
// - Month: 1, 3, 5, 7, 8, 10, 12 - Day < 32
// YEAR + Date: Sanity check Year + February 29. Check zero padding.
// Additional checks:
// - Feb 29 on leap years: y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)
// -> Check last 2 digits: if both are zero, check first two digits.
// Else if last digit is n % 4 == 0, the second to last digit m % 2 == 0
// Else if last digit is n % 4 == 2, the second to last digit m % 2 == 1
// Else its not a leap year.
// - no 0000 Year
//
func New(s string) (XSDDate, error) {
dt := XSDDate{base: []byte(s)}
err := dt.Parse(dt.base)
return dt, err
}
func (d XSDDate) String() string {
var s string
if d.Year != 0 {
s += fmt.Sprintf("%d", d.Year)
}
if d.Month != 0 {
if d.Year == 0 {
s += "-"
}
s += fmt.Sprintf("-%02d", d.Month)
}
if d.Day != 0 {
if d.Year == 0 && d.Month == 0 {
s += "--"
}
s += fmt.Sprintf("-%02d", d.Day)
}
if d.hasTimezone {
if d.TZH == 0 && d.TZM == 0 {
s += "Z"
} else {
sep := "+"
hint := d.TZH
if hint < 0 {
sep = "-"
hint *= -1
}
h := fmt.Sprintf("%02d", hint)
s += fmt.Sprintf("%v%v:%02d", sep, h, d.TZM)
}
}
return s
}
func (d *XSDDate) UnmarshalText(text []byte) error {
return d.Parse(text)
}
func (d XSDDate) MarshalText() ([]byte, error) {
return []byte(d.String()), nil
}
func (xsdd *XSDDate) Parse(s []byte) error {
xsdd.base = s
// The smallest possible date is 4 chars long
if len(s) < 4 {
return xsdd.parseError("Date too short")
}
// Check for Z, then check for timezone
if len(s) >= 5 && s[len(s)-1] == TIMEZONE {
xsdd.hasTimezone = true
s = s[:len(s)-1]
} else if len(s) >= 10 {
err := xsdd.parseTimezone(s[len(s)-6:])
if err == nil {
s = s[:len(s)-6]
}
}
// Year
if s[1] != SEPERATOR {
i := 3
for ; i < len(s); i++ {
if s[i] < MIN_ALLOWED_NUMBER || s[i] > MAX_ALLOWED_NUMBER {
break
}
}
yint, err := Btoi(s[:i])
if err != nil {
return xsdd.parseError(fmt.Sprintf("Invalid year: %v", s[:i]))
}
xsdd.Year = yint
xsdd.hasYear = true
if i == len(s) {
return nil
}
s = s[i+1:]
} else {
s = s[2:]
}
// Left are 02 (Month), -02 (Day), 02-02 (Date)
if s[0] != SEPERATOR {
mstr := s[:2]
mint, err := Btoi(mstr)
if err != nil {
return xsdd.parseError(fmt.Sprintf("Invalid month: %v", mstr))
}
xsdd.Month = mint
xsdd.hasMonth = true
s = s[2:]
if len(s) == 0 {
return nil
} else if len(s) != 3 || s[0] != SEPERATOR {
return xsdd.parseError(fmt.Sprintf("Invalid date ending: %v", s))
}
}
s = s[1:]
// Left is 02 Day
dint, err := Btoi(s)
if err != nil {
return xsdd.parseError(fmt.Sprintf("Invalid day: %v", s))
}
// INFO: We do not check len here, it is handled above
xsdd.Day = dint
xsdd.hasDay = true
return nil
}
var WD_CALC_MATRIX = []int{0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4}
func (xsdd XSDDate) Weekday() int {
y := xsdd.Year
if xsdd.Month < 3 {
y--
}
return (y + y/4 - y/100 + y/400 + WD_CALC_MATRIX[xsdd.Month-1] + xsdd.Day) % 7
}
func (xsdd XSDDate) Base() []byte {
return xsdd.base
}
func (xsdd XSDDate) Type() XSDDatetype {
if xsdd.state == Unknown {
_ = xsdd.Validate()
}
return xsdd.state
}
func (xsdd *XSDDate) Validate() bool {
if xsdd.error || len(xsdd.base) == 0 {
xsdd.state = Invalid
return false
}
xsdd.state = xsdd.inferState()
if xsdd.state == Invalid {
return false
}
return true
}
func (xsdd *XSDDate) parseError(s string) error {
xsdd.error = true
xsdd.state = Invalid
return errors.New(s)
}
func (xsdd *XSDDate) parseTimezone(s []byte) error {
// INFO: We assume the check for 'Z' has already been done
if len(s) != 6 || s[3] != COLON || (s[0] != PLUS && s[0] != SIGN) {
return fmt.Errorf("Invalid timezone")
}
h, err := Btoi(s[:3])
if err != nil {
return fmt.Errorf("Invalid hour: %v", s[:3])
}
m, err := Btoi(s[4:])
if err != nil {
return fmt.Errorf("Invalid minute: %v", s[4:])
}
xsdd.hasTimezone = true
xsdd.TZH = h
xsdd.TZM = m
return nil
}
func (xsdd XSDDate) inferState() XSDDatetype {
if xsdd.hasYear && xsdd.hasMonth && xsdd.hasDay {
if !validDayMonthYear(xsdd.Year, xsdd.Month, xsdd.Day) {
return Invalid
}
return Date
} else if xsdd.hasYear && xsdd.hasMonth {
if !validMonth(xsdd.Month) || !validYear(xsdd.Year) {
return Invalid
}
return GYearMonth
} else if xsdd.hasMonth && xsdd.hasDay {
if !validDayMonth(xsdd.Day, xsdd.Month) {
return Invalid
}
return GMonthDay
} else if xsdd.hasYear {
if !validYear(xsdd.Year) {
return Invalid
}
return GYear
} else if xsdd.hasMonth {
if !validMonth(xsdd.Month) {
return Invalid
}
return GMonth
} else if xsdd.hasDay {
if !validDay(xsdd.Day) {
return Invalid
}
return GDay
}
return Invalid
}
func (xsdd XSDDate) Before(other XSDDate) bool {
if xsdd.Year < other.Year {
return true
} else if xsdd.Year > other.Year {
return false
}
if xsdd.Month < other.Month {
return true
} else if xsdd.Month > other.Month {
return false
}
if xsdd.Day < other.Day {
return true
}
return false
}
func (xsddate *XSDDate) Compare(other *XSDDate) int {
if !xsddate.Validate() {
return -1
}
if !other.Validate() {
return 1
}
if xsddate.Year < other.Year {
return -1
} else if xsddate.Year > other.Year {
return 1
}
if xsddate.Month < other.Month {
return -1
} else if xsddate.Month > other.Month {
return 1
}
if xsddate.Day < other.Day {
return -1
} else if xsddate.Day > other.Day {
return 1
}
return 0
}
func validDay(i int) bool {
if i < 1 || i > 31 {
return false
}
return true
}
func validMonth(i int) bool {
if i < 1 || i > 12 {
return false
}
return true
}
func validYear(i int) bool {
if i == 0 {
return false
}
return true
}
func validDayMonth(d int, m int) bool {
if !validDay(d) || !validMonth(m) {
return false
}
if m == 2 {
if d > 29 {
return false
}
} else if m == 4 || m == 6 || m == 9 || m == 11 {
if d > 30 {
return false
}
}
return true
}
func validDayMonthYear(y int, m int, d int) bool {
if !validDay(d) || !validMonth(m) || !validYear(y) {
return false
}
if m == 2 {
if d == 29 {
if y%4 == 0 && (y%100 != 0 || y%400 == 0) {
return true
}
return false
}
}
return true
}
var ErrNoNumber = errors.New("Byte input is NaN")
var ErrOverflow = errors.New("Byte input overflows int")
// INFO: converts ASCII []byte to the integer represented by the string w/o alloc.
func Btoi(bs []byte) (int, error) {
l := len(bs)
if l == 0 {
return 0, ErrNoNumber
}
// slow path for large numbers (-> strconv.Atoi):
if strconv.IntSize == 32 && l > 9 || strconv.IntSize == 64 && l > 18 {
i, err := strconv.ParseInt(string(bs), 10, 64)
if err != nil {
return 0, err
}
if strconv.IntSize == 32 {
if i > int64(math.MaxInt32) || i < int64(math.MinInt32) {
return 0, ErrOverflow
}
} else {
if i > int64(math.MaxInt64) || i < int64(math.MinInt64) {
return 0, ErrOverflow
}
}
return int(i), nil
}
var ret int
m := false
if bs[0] == '+' {
bs = bs[1:]
} else if bs[0] == '-' {
bs = bs[1:]
m = true
}
for _, b := range bs {
if b < '0' || b > '9' {
return 0, ErrNoNumber
}
ret = ret*10 + int(b-'0')
}
if m {
ret *= -1
}
return ret, nil
}

View File

@@ -0,0 +1,69 @@
package xmlparsing
import "testing"
type Test struct {
Input string
Output XSDDate
Type XSDDatetype
}
var tests = []Test{
{"2006-01-02", XSDDate{Year: 2006, Month: 1, Day: 2}, GYear},
{"-1222-01-02", XSDDate{Year: -1222, Month: 1, Day: 2}, Date},
{"-2777", XSDDate{Year: -2777}, GYear},
{"1988-12:30", XSDDate{Year: 1988, hasTimezone: true, TZH: -12, TZM: 30}, GYear},
{"--03+05:00", XSDDate{Month: 3, hasTimezone: true, TZH: 5, TZM: 0}, GMonth},
{"---29", XSDDate{Day: 29}, GDay},
{"-1234567-12Z", XSDDate{Year: -1234567, Month: 12, hasTimezone: true, TZH: 0, TZM: 0}, GYearMonth},
{"-1234567-12+05:00", XSDDate{Year: -1234567, Month: 12, hasTimezone: true, TZH: 5, TZM: 0}, GYearMonth},
{"--12-31", XSDDate{Month: 12, Day: 31}, GMonthDay},
}
func TestXSDTimeParse(t *testing.T) {
for _, test := range tests {
dt, err := New(test.Input)
if err != nil {
t.Errorf("Error parsing %v: %v", test.Input, err)
continue
}
if dt.Year != test.Output.Year {
t.Errorf("Year mismatch for %v: expected %v, got %v", test.Input, test.Output.Year, dt.Year)
}
if dt.Month != test.Output.Month {
t.Errorf("Month mismatch for %v: expected %v, got %v", test.Input, test.Output.Month, dt.Month)
}
if dt.Day != test.Output.Day {
t.Errorf("Day mismatch for %v: expected %v, got %v", test.Input, test.Output.Day, dt.Day)
}
if dt.hasTimezone != test.Output.hasTimezone {
t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.hasTimezone, dt.hasTimezone)
}
if dt.TZH != test.Output.TZH {
t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.TZH, dt.TZH)
}
if dt.TZM != test.Output.TZM {
t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.TZM, dt.TZM)
}
}
}
func TestXSDTimeString(t *testing.T) {
for _, test := range tests {
dt, err := New(test.Input)
if err != nil {
t.Errorf("Error parsing %v: %v", test.Input, err)
continue
}
if dt.String() != test.Input {
t.Errorf("String mismatch for %v: expected %v, got %v", test.Input, test.Input, dt.String())
}
}
}