Lots of stuff

2025-12-16 06:25:31 +00:00 · 2025-06-24 18:20:06 +02:00
parent 3127446dab
commit 9563145aeb
29 changed files with 1694 additions and 1386 deletions
--- a/xmlparsing/helpers.go
+++ b/xmlparsing/helpers.go
@@ -0,0 +1,39 @@
+package xmlparsing
+
+import (
+	"encoding/xml"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+)
+
+func UnmarshalFile[T any](filename string, data T) error {
+	slog.Debug("Unmarshalling file: ", "file", filename)
+	xmlFile, err := os.Open(filename)
+	if err != nil {
+		return err
+	}
+	defer xmlFile.Close()
+
+	byteValue, err := io.ReadAll(xmlFile)
+	if err != nil {
+		return err
+	}
+
+	err = xml.Unmarshal(byteValue, &data)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func XMLFilesForPath(path string) ([]string, error) {
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		return nil, err
+	}
+
+	matches, err := filepath.Glob(filepath.Join(path, "*.xml"))
+
+	return matches, err
+}
--- a/xmlparsing/item.go
+++ b/xmlparsing/item.go
@@ -0,0 +1,12 @@
+package xmlparsing
+
+type ItemInfo struct {
+	Source string
+	Parse  ParseMeta
+}
+
+// INFO: These are just root elements that hold the data of the XML files.
+// They get discarded after a parse.
+type XMLRootElement[T any] interface {
+	Children() []T
+}
--- a/xmlparsing/library.go
+++ b/xmlparsing/library.go
@@ -0,0 +1,15 @@
+package xmlparsing
+
+import "sync"
+
+type Library struct {
+	pmux   sync.Mutex
+	Parses []ParseMeta
+}
+
+func (l *Library) Latest() ParseMeta {
+	if len(l.Parses) == 0 {
+		return ParseMeta{}
+	}
+	return l.Parses[len(l.Parses)-1]
+}
--- a/xmlparsing/models.go
+++ b/xmlparsing/models.go
@@ -0,0 +1,32 @@
+package xmlparsing
+
+import "fmt"
+
+type IXMLItem interface {
+	fmt.Stringer
+	// INFO:
+	// - Keys should be unique
+	// - Keys[0] has the special meaning of the primary key (for FTS etc.)
+	Keys() []any
+	Type() string
+}
+
+type ILibrary interface {
+	Parse(meta ParseMeta) error
+}
+
+type ResolvingMap[T IXMLItem] map[string][]Resolved[T]
+
+type ReferenceResolver[T IXMLItem] interface {
+	References() ResolvingMap[T]
+}
+
+type Resolved[T IXMLItem] struct {
+	Item       *T
+	Reference  string
+	Category   string
+	Cert       bool
+	Conjecture bool
+	Comment    string
+	MetaData   map[string]string
+}
--- a/xmlparsing/optionalbool.go
+++ b/xmlparsing/optionalbool.go
@@ -0,0 +1,57 @@
+package xmlparsing
+
+import (
+	"encoding/xml"
+	"strings"
+)
+
+type OptionalBool int
+
+const (
+	Unspecified OptionalBool = iota
+	True
+	False
+)
+
+func (b OptionalBool) IsTrue() bool {
+	return b == True
+}
+
+func (b OptionalBool) IsFalse() bool {
+	return b == False || b == Unspecified
+}
+
+func (b *OptionalBool) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+	var attr struct {
+		Value string `xml:"value,attr"`
+	}
+	if err := d.DecodeElement(&attr, &start); err != nil {
+		return err
+	}
+
+	switch strings.ToLower(attr.Value) {
+	case "true":
+		*b = True
+	case "false":
+		*b = False
+	default:
+		*b = Unspecified
+	}
+	return nil
+}
+
+func (b OptionalBool) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
+	if b == Unspecified {
+		return nil
+	}
+
+	value := "false"
+	if b == True {
+		value = "true"
+	}
+
+	type alias struct {
+		Value string `xml:"value,attr"`
+	}
+	return e.EncodeElement(alias{Value: value}, start)
+}
--- a/xmlparsing/parser.go
+++ b/xmlparsing/parser.go
@@ -0,0 +1,210 @@
+package xmlparsing
+
+import (
+	"encoding/xml"
+	"io"
+	"iter"
+	"strings"
+)
+
+type Parser struct {
+	Stack        []*Token
+	LastCharData []*Token
+	pipeline     []*Token
+	decoder      *xml.Decoder
+}
+
+func NewFromTokens(tokens []*Token) *Parser {
+	return &Parser{
+		Stack:        make([]*Token, 0, len(tokens)),
+		LastCharData: make([]*Token, 0, len(tokens)),
+		pipeline:     tokens,
+		decoder:      nil, // No decoder needed for pre-parsed tokens
+	}
+}
+
+func NewParser(xmlData string) *Parser {
+	return &Parser{
+		decoder: xml.NewDecoder(strings.NewReader(xmlData)),
+	}
+}
+
+func (p *Parser) GetStack() []*Token {
+	return p.Stack
+}
+
+func (p *Parser) Pipeline() []*Token {
+	return p.pipeline
+}
+
+func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
+	if index < 0 || index >= len(p.pipeline) {
+		return func(yield func(*Token, error) bool) {
+			yield(nil, nil) // No tokens to yield
+			return
+		}
+	}
+
+	return func(yield func(*Token, error) bool) {
+		for i := index; i < len(p.pipeline); i++ {
+			if !yield(p.pipeline[i], nil) {
+				return
+			}
+		}
+
+		for {
+			token, err := p.Token()
+			if err != nil {
+				yield(nil, err)
+				return
+			}
+
+			if token == nil {
+				// EOF
+				return
+			}
+
+			if !yield(token, nil) {
+				return
+			}
+		}
+	}
+}
+
+func (p *Parser) Reset() {
+	p.Stack = []*Token{}
+}
+
+func (p *Parser) Token() (*Token, error) {
+	if p.decoder == nil {
+		return nil, nil // No more tokens to parse
+	}
+
+	start := p.decoder.InputOffset()
+	token, err := p.decoder.Token()
+	end := p.decoder.InputOffset()
+	if err == io.EOF {
+		return nil, nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	var customToken Token = Token{
+		parser:      p,
+		Index:       len(p.pipeline),
+		Inner:       token,
+		StartOffset: start + 1,
+		EndOffset:   end,
+		Stack:       make([]*Token, len(p.Stack)),
+	}
+
+	// INFO: these are just pointers, so it should go fast
+	copy(customToken.Stack, p.Stack)
+
+	switch t := token.(type) {
+	case xml.StartElement:
+		attr := mapAttributes(t.Attr)
+		customToken.Name = t.Name.Local
+		customToken.Attributes = attr
+		customToken.Type = StartElement
+		if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
+			p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
+		}
+		p.Stack = append(p.Stack, &customToken)
+
+	case xml.EndElement:
+		if len(p.Stack) > 0 {
+			element := p.Stack[len(p.Stack)-1]
+			element.childrenParsed = true
+			element.chardataParsed = true
+			p.Stack = p.Stack[:len(p.Stack)-1]
+		}
+		customToken.Name = t.Name.Local
+		customToken.Attributes = map[string]string{}
+		customToken.Type = EndElement
+
+	case xml.CharData:
+		text := string(t)
+		if text != "" && len(p.Stack) > 0 {
+			for i := range p.Stack {
+				if !p.Stack[i].chardataParsed {
+					p.Stack[i].charData += text
+				}
+			}
+		}
+		customToken.Data = text
+		customToken.Type = CharData
+		p.LastCharData = append(p.LastCharData, &customToken)
+
+	case xml.Comment:
+		customToken.Type = Comment
+		customToken.Data = string(t)
+
+	case xml.ProcInst:
+		customToken.Name = t.Target
+		customToken.Data = string(t.Inst)
+		customToken.Type = ProcInst
+
+	case xml.Directive:
+		customToken.Data = string(t)
+		customToken.Type = Directive
+	}
+
+	p.pipeline = append(p.pipeline, &customToken)
+	return &customToken, nil
+}
+
+func (p *Parser) Previous(index int) (tokens []*Token) {
+	if index < 0 || index >= len(p.pipeline) {
+		return
+	}
+
+	return p.pipeline[:index]
+}
+
+func (p *Parser) All() ([]*Token, error) {
+	for _, err := range p.Iterate() {
+		if err != nil {
+			return nil, err
+		}
+	}
+	return p.pipeline, nil
+}
+
+func (p *Parser) Iterate() iter.Seq2[*Token, error] {
+	var cursor int
+	return func(yield func(*Token, error) bool) {
+		for {
+			var token *Token
+			// INFO: cursor should be max. len(p.pipeline)
+			if cursor >= len(p.pipeline) {
+				t, err := p.Token()
+				if err != nil {
+					yield(nil, err)
+					return
+				}
+				if t == nil {
+					return // EOF
+				}
+
+				token = t
+			} else {
+				token = p.pipeline[cursor]
+			}
+
+			cursor++
+			if !yield(token, nil) {
+				return
+			}
+		}
+	}
+}
+
+// mapAttributes converts xml.Attr to a map[string]string.
+func mapAttributes(attrs []xml.Attr) map[string]string {
+	attrMap := make(map[string]string)
+	for _, attr := range attrs {
+		attrMap[attr.Name.Local] = attr.Value
+	}
+	return attrMap
+}
--- a/xmlparsing/parser_test.go
+++ b/xmlparsing/parser_test.go
@@ -0,0 +1,97 @@
+package xmlparsing
+
+import (
+	"testing"
+)
+
+type TestState struct {
+	ParsedElements []string
+}
+
+func TestIterate_ValidXML(t *testing.T) {
+	xmlData := `<root>
+		<child attr="value">Text</child>
+		<!-- This is a comment -->
+		<?xml-stylesheet type="text/css" href="style.css"?>
+		<!DOCTYPE note>
+	</root>`
+
+	state := TestState{}
+	for tokenResult, err := range Iterate(xmlData, state) {
+		if err != nil {
+			t.Fatalf("Unexpected error: %v", err)
+		}
+		if tokenResult == nil {
+			t.Fatal("Received nil token result")
+		}
+		state.ParsedElements = append(state.ParsedElements, tokenResult.Token.Name)
+	}
+
+	if len(state.ParsedElements) == 0 {
+		t.Fatal("No elements were parsed")
+	}
+}
+
+func TestIterate_InvalidXML(t *testing.T) {
+	xmlData := `<root><child></root>`
+	state := TestState{}
+	var global error
+	for _, err := range Iterate(xmlData, state) {
+		if err != nil {
+			global = err
+		}
+	}
+	if global == nil {
+		t.Fatal("Expected error, but got nil")
+	}
+}
+
+func TestIterate_EmptyXML(t *testing.T) {
+	xmlData := ""
+	state := TestState{}
+	for _, err := range Iterate(xmlData, state) {
+		if err != nil {
+			t.Fatalf("Expected iter.ErrEnd, but got: %v", err)
+		}
+	}
+}
+
+func TestIterate_CharDataTracking(t *testing.T) {
+	xmlData := `<root>
+		<child>First</child>
+		<child>Second</child>
+	</root>`
+
+	state := TestState{}
+	charDataCount := 0
+	for tokenResult, err := range Iterate(xmlData, state) {
+		if err != nil {
+			t.Fatalf("Unexpected error: %v", err)
+		}
+		if tokenResult.Token.Name == "CharData" {
+			charDataCount++
+		}
+	}
+
+	if charDataCount != 5 {
+		t.Fatalf("Expected 2 CharData elements, got %d", charDataCount)
+	}
+}
+
+func TestIterate_AttributeParsing(t *testing.T) {
+	xmlData := `<root>
+		<child attr1="value1" attr2="value2">Content</child>
+	</root>`
+
+	state := TestState{}
+	for tokenResult, err := range Iterate(xmlData, state) {
+		if err != nil {
+			t.Fatalf("Unexpected error: %v", err)
+		}
+		if tokenResult.Token.Name == "child" && tokenResult.Token.Type == StartElement {
+			if tokenResult.Token.Attributes["attr1"] != "value1" || tokenResult.Token.Attributes["attr2"] != "value2" {
+				t.Fatalf("Incorrect attributes parsed: %v", tokenResult.Token.Attributes)
+			}
+		}
+	}
+}
--- a/xmlparsing/resolver.go
+++ b/xmlparsing/resolver.go
@@ -0,0 +1,48 @@
+package xmlparsing
+
+// INFO: This is used to resolve references (back-links) between XML items.
+
+import (
+	"fmt"
+	"sync"
+)
+
+type Resolver[T IXMLItem] struct {
+	// INFO: map[type][ID]
+	index map[string]map[any][]Resolved[T]
+	mu    sync.RWMutex
+}
+
+func NewResolver[T IXMLItem]() *Resolver[T] {
+	return &Resolver[T]{index: make(map[string]map[any][]Resolved[T])}
+}
+
+func (r *Resolver[T]) Add(typeName, refID string, item Resolved[T]) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if _, exists := r.index[typeName]; !exists {
+		r.index[typeName] = make(map[any][]Resolved[T])
+	}
+	r.index[typeName][refID] = append(r.index[typeName][refID], item)
+}
+
+func (r *Resolver[T]) Get(typeName string, refID any) ([]Resolved[T], error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if typeIndex, exists := r.index[typeName]; exists {
+		if items, ok := typeIndex[refID]; ok {
+			return items, nil
+		}
+		return nil, fmt.Errorf("no references found for refID '%s' of type '%s'", refID, typeName)
+	}
+	return nil, fmt.Errorf("no index exists for type '%s'", typeName)
+}
+
+func (r *Resolver[T]) Clear() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	r.index = make(map[string]map[any][]Resolved[T])
+}
--- a/xmlparsing/token.go
+++ b/xmlparsing/token.go
@@ -0,0 +1,126 @@
+package xmlparsing
+
+import (
+	"encoding/xml"
+	"iter"
+	"strings"
+)
+
+type TokenType int
+
+const (
+	StartElement TokenType = iota
+	EndElement
+	CharData
+	Comment
+	ProcInst
+	Directive
+)
+
+type Token struct {
+	Name           string
+	Attributes     map[string]string
+	Inner          xml.Token
+	Type           TokenType
+	Data           string
+	Stack          []*Token
+	StartOffset    int64
+	EndOffset      int64
+	Index          int
+	charData       string
+	children       []*Token
+	parser         *Parser
+	childrenParsed bool
+	chardataParsed bool
+}
+
+func (t *Token) String() string {
+	builder := strings.Builder{}
+	return builder.String()
+}
+
+func (t *Token) Element() (tokens []*Token) {
+	if t.Type != StartElement {
+		return
+	}
+
+	for token, err := range t.parser.PeekFrom(t.Index) {
+		if err != nil || token == nil {
+			return tokens
+		}
+
+		tokens = append(tokens, token)
+		if token.Type == EndElement && token.Name == t.Name {
+			return tokens
+		}
+	}
+
+	return
+}
+
+func (t *Token) Next() iter.Seq2[*Token, error] {
+	return t.parser.PeekFrom(t.Index)
+}
+
+func (t *Token) Previous() (tokens []*Token) {
+	if t.Index <= 0 {
+		return
+	}
+
+	return t.parser.Previous(t.Index)
+}
+
+func (t *Token) Children() (tokens []*Token) {
+	if t.childrenParsed {
+		return t.children
+	}
+
+	tokens = t.Element()
+	if len(tokens) == 0 {
+		return
+	}
+
+	for _, token := range tokens {
+		if token.Type == StartElement {
+			t.children = append(t.children, token)
+		}
+	}
+
+	t.childrenParsed = true
+	return t.children
+}
+
+func (t *Token) CharData() string {
+	if t.Type == CharData || t.Type == ProcInst || t.Type == Comment || t.Type == Directive {
+		return t.Data
+	}
+
+	if t.chardataParsed {
+		return t.charData
+	}
+	tokens := t.Element()
+	if len(tokens) == 0 {
+		return ""
+	}
+
+	var builder strings.Builder
+	for _, token := range tokens {
+		if token.Type == CharData {
+			builder.WriteString(token.Data)
+		}
+	}
+
+	t.chardataParsed = true
+	t.charData = builder.String()
+	return builder.String()
+}
+
+func (t *Token) SubParser() *Parser {
+	if t.Type != StartElement {
+		return nil
+	}
+
+	tokens := t.Element()
+
+	return NewFromTokens(tokens)
+}
--- a/xmlparsing/xmlprovider.go
+++ b/xmlparsing/xmlprovider.go
@@ -0,0 +1,215 @@
+package xmlparsing
+
+import (
+	"iter"
+	"slices"
+	"sync"
+	"time"
+)
+
+type ParseSource int
+
+const (
+	SourceUnknown ParseSource = iota
+	Path
+	Commit
+)
+
+type ParseMeta struct {
+	Source  ParseSource
+	BaseDir string
+	Commit  string
+	Date    time.Time
+
+	FailedPaths []string
+}
+
+func (p ParseMeta) Equals(other ParseMeta) bool {
+	return p.Source == other.Source && p.BaseDir == other.BaseDir && p.Commit == other.Commit && p.Date == other.Date
+}
+
+func (p ParseMeta) Failed(path string) bool {
+	return slices.Contains(p.FailedPaths, path)
+}
+
+// An XMLParser is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
+type XMLParser[T IXMLItem] struct {
+	// INFO: map is type map[string]*T
+	Items sync.Map
+	// INFO: map is type [string]ItemInfo
+	Infos sync.Map
+
+	// INFO: Resolver is used to resolve references (back-links) between XML items.
+	Resolver Resolver[T]
+
+	mu sync.RWMutex
+	// TODO: This array is meant to be for iteration purposes, since iteration over the sync.Map is slow.
+	array []T
+}
+
+func NewXMLParser[T IXMLItem]() *XMLParser[T] {
+	return &XMLParser[T]{Resolver: *NewResolver[T]()}
+}
+
+// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
+// Prepare & Cleanup are called once per parse. Serialize is called for every path.
+// and can be called concurretly.
+func (p *XMLParser[T]) Prepare() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	p.array = make([]T, 0, len(p.array))
+	p.Resolver.Clear()
+}
+
+func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, latest ParseMeta) error {
+	if err := UnmarshalFile(path, dataholder); err != nil {
+		return err
+	}
+
+	newItems := dataholder.Children()
+
+	for _, item := range newItems {
+		// INFO: Mostly it's just one ID, so the double loop is not that bad.
+		for _, id := range item.Keys() {
+			p.Infos.Store(id, ItemInfo{Source: path, Parse: latest})
+			p.Items.Store(id, &item)
+		}
+
+		p.addResolvable(item)
+	}
+
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.array = append(p.array, newItems...)
+	return nil
+}
+
+// INFO: Cleanup is called after all paths have been serialized.
+// It deletes all items that have not been parsed in the last commit,
+// and whose filepath has not been marked as failed.
+func (p *XMLParser[T]) Cleanup(latest ParseMeta) {
+	todelete := make([]any, 0)
+	toappend := make([]*T, 0)
+	p.Infos.Range(func(key, value interface{}) bool {
+		info := value.(ItemInfo)
+		if !info.Parse.Equals(latest) {
+			if !latest.Failed(info.Source) {
+				todelete = append(todelete, key)
+			} else {
+				item, ok := p.Items.Load(key)
+				if ok {
+					i := item.(*T)
+					if !slices.Contains(toappend, i) {
+						toappend = append(toappend, i)
+					}
+				}
+			}
+		}
+		return true
+	})
+
+	for _, key := range todelete {
+		p.Infos.Delete(key)
+		p.Items.Delete(key)
+	}
+
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for _, item := range toappend {
+		p.array = append(p.array, *item)
+		p.addResolvable(*item)
+	}
+}
+
+func (p *XMLParser[T]) addResolvable(item T) {
+	// INFO: If the item has a GetReferences method, we add the references to the resolver.
+	if rr, ok := any(item).(ReferenceResolver[T]); ok {
+		for name, ids := range rr.References() {
+			for _, res := range ids {
+				res.Item = &item
+				p.Resolver.Add(name, res.Reference, res)
+			}
+		}
+	}
+}
+
+func (p *XMLParser[T]) ReverseLookup(item IXMLItem) (ret []Resolved[T]) {
+	// INFO: this runs just once for the first key
+	keys := item.Keys()
+
+	for _, key := range keys {
+		r, err := p.Resolver.Get(item.Type(), key)
+		if err == nil {
+			ret = append(ret, r...)
+		}
+	}
+
+	return
+}
+
+func (a *XMLParser[T]) String() (s string) {
+	a.RLock()
+	defer a.RUnlock()
+	for _, item := range a.array {
+		s += item.String()
+	}
+	return
+}
+
+func (p *XMLParser[T]) Info(id string) ItemInfo {
+	info, ok := p.Infos.Load(id)
+	if !ok {
+		return ItemInfo{}
+	}
+	return info.(ItemInfo)
+}
+
+func (p *XMLParser[T]) Item(id any) *T {
+	item, ok := p.Items.Load(id)
+	if !ok {
+		return nil
+	}
+
+	i := item.(*T)
+	return i
+}
+
+func (p *XMLParser[T]) Filter(f func(T) bool) iter.Seq[T] {
+	return func(yield func(T) bool) {
+		p.mu.RLock()
+		defer p.mu.RUnlock()
+		for _, v := range p.array {
+			if f(v) && !yield(v) {
+				return
+			}
+		}
+	}
+}
+
+func (p *XMLParser[T]) Iterate() iter.Seq[T] {
+	return func(yield func(T) bool) {
+		p.mu.RLock()
+		defer p.mu.RUnlock()
+		for _, v := range p.array {
+			if !yield(v) {
+				return
+			}
+		}
+	}
+}
+
+func (p *XMLParser[T]) Count() int {
+	p.RLock()
+	defer p.RUnlock()
+	return len(p.array)
+}
+
+// INFO: These are reading locks.
+func (p *XMLParser[T]) RLock() {
+	p.mu.RLock()
+}
+
+func (p *XMLParser[T]) RUnlock() {
+	p.mu.RUnlock()
+}
--- a/xmlparsing/xmlsort.go
+++ b/xmlparsing/xmlsort.go
@@ -0,0 +1 @@
+package xmlparsing
--- a/xmlparsing/xsdtime.go
+++ b/xmlparsing/xsdtime.go
@@ -0,0 +1,474 @@
+package xmlparsing
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strconv"
+)
+
+// An implementation of the xsd 1.1 datatypes:
+// date, gDay, gMonth, gMonthDay, gYear, gYearMonth.
+
+type XSDDatetype int
+type Seperator byte
+
+const (
+	DEFAULT_YEAR  = 0
+	DEFAULT_DAY   = 1
+	DEFAULT_MONTH = 1
+
+	MIN_ALLOWED_NUMBER = 0x30 // 0
+	MAX_ALLOWED_NUMBER = 0x39 // 9
+	SIGN               = 0x2D // -
+	SEPERATOR          = 0x2D // -
+	PLUS               = 0x2B // +
+	COLON              = 0x3A // :
+	TIMEZONE           = 0x5A // Z
+	NONE               = 0x00 // 0
+)
+
+const (
+	Unknown XSDDatetype = iota
+	Invalid
+	Date
+	GDay
+	GMonth
+	GYear
+	GMonthDay
+	GYearMonth
+)
+
+type XSDDate struct {
+	base []byte
+
+	Year  int
+	Month int
+	Day   int
+
+	hasTimezone bool
+	hasYear     bool
+	hasMonth    bool
+	hasDay      bool
+
+	TZH int
+	TZM int
+
+	state XSDDatetype
+	error bool
+
+	// INFO: XSD Date Datatypes typically describe a duration in the value space.
+	// TimeError  bool
+	// BaseTime     time.Time
+	// BaseDuration time.Duration
+}
+
+// Sanity check:
+// MONTH DAY + Date: Sanity check Month and Day. Additional checks:
+//		- Month: 2 - Day < 30
+// 		- Month: 4, 6, 9, 11 - Day < 31
+// 		- Month: 1, 3, 5, 7, 8, 10, 12 - Day < 32
+// YEAR + Date: Sanity check Year + February 29. Check zero padding.
+// Additional checks:
+//		- Feb 29 on leap years: y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)
+//		-> Check last 2 digits: if both are zero, check first two digits.
+//			 Else if last digit is n % 4 == 0, the second to last digit m % 2 == 0
+//			 Else if last digit is n % 4 == 2, the second to last digit m % 2 == 1
+//			 Else its not a leap year.
+//		- no 0000 Year
+//
+
+func New(s string) (XSDDate, error) {
+	dt := XSDDate{base: []byte(s)}
+	err := dt.Parse(dt.base)
+	return dt, err
+}
+
+func (d XSDDate) String() string {
+	var s string
+	if d.Year != 0 {
+		s += fmt.Sprintf("%d", d.Year)
+	}
+
+	if d.Month != 0 {
+		if d.Year == 0 {
+			s += "-"
+		}
+		s += fmt.Sprintf("-%02d", d.Month)
+	}
+
+	if d.Day != 0 {
+		if d.Year == 0 && d.Month == 0 {
+			s += "--"
+		}
+		s += fmt.Sprintf("-%02d", d.Day)
+	}
+
+	if d.hasTimezone {
+		if d.TZH == 0 && d.TZM == 0 {
+			s += "Z"
+		} else {
+			sep := "+"
+			hint := d.TZH
+			if hint < 0 {
+				sep = "-"
+				hint *= -1
+			}
+			h := fmt.Sprintf("%02d", hint)
+
+			s += fmt.Sprintf("%v%v:%02d", sep, h, d.TZM)
+		}
+	}
+
+	return s
+}
+
+func (d *XSDDate) UnmarshalText(text []byte) error {
+	return d.Parse(text)
+}
+
+func (d XSDDate) MarshalText() ([]byte, error) {
+	return []byte(d.String()), nil
+}
+
+func (xsdd *XSDDate) Parse(s []byte) error {
+	xsdd.base = s
+
+	// The smallest possible date is 4 chars long
+	if len(s) < 4 {
+		return xsdd.parseError("Date too short")
+	}
+
+	// Check for Z, then check for timezone
+	if len(s) >= 5 && s[len(s)-1] == TIMEZONE {
+		xsdd.hasTimezone = true
+		s = s[:len(s)-1]
+	} else if len(s) >= 10 {
+		err := xsdd.parseTimezone(s[len(s)-6:])
+		if err == nil {
+			s = s[:len(s)-6]
+		}
+	}
+
+	// Year
+	if s[1] != SEPERATOR {
+		i := 3
+		for ; i < len(s); i++ {
+			if s[i] < MIN_ALLOWED_NUMBER || s[i] > MAX_ALLOWED_NUMBER {
+				break
+			}
+		}
+
+		yint, err := Btoi(s[:i])
+		if err != nil {
+			return xsdd.parseError(fmt.Sprintf("Invalid year: %v", s[:i]))
+		}
+		xsdd.Year = yint
+		xsdd.hasYear = true
+
+		if i == len(s) {
+			return nil
+		}
+
+		s = s[i+1:]
+	} else {
+		s = s[2:]
+	}
+
+	// Left are 02 (Month), -02 (Day), 02-02 (Date)
+	if s[0] != SEPERATOR {
+		mstr := s[:2]
+		mint, err := Btoi(mstr)
+		if err != nil {
+			return xsdd.parseError(fmt.Sprintf("Invalid month: %v", mstr))
+		}
+
+		xsdd.Month = mint
+		xsdd.hasMonth = true
+		s = s[2:]
+		if len(s) == 0 {
+			return nil
+		} else if len(s) != 3 || s[0] != SEPERATOR {
+			return xsdd.parseError(fmt.Sprintf("Invalid date ending: %v", s))
+		}
+	}
+
+	s = s[1:]
+
+	// Left is 02 Day
+	dint, err := Btoi(s)
+	if err != nil {
+		return xsdd.parseError(fmt.Sprintf("Invalid day: %v", s))
+	}
+
+	// INFO: We do not check len here, it is handled above
+	xsdd.Day = dint
+	xsdd.hasDay = true
+
+	return nil
+}
+
+var WD_CALC_MATRIX = []int{0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4}
+
+func (xsdd XSDDate) Weekday() int {
+	y := xsdd.Year
+	if xsdd.Month < 3 {
+		y--
+	}
+	return (y + y/4 - y/100 + y/400 + WD_CALC_MATRIX[xsdd.Month-1] + xsdd.Day) % 7
+}
+
+func (xsdd XSDDate) Base() []byte {
+	return xsdd.base
+}
+
+func (xsdd XSDDate) Type() XSDDatetype {
+	if xsdd.state == Unknown {
+		_ = xsdd.Validate()
+	}
+
+	return xsdd.state
+}
+
+func (xsdd *XSDDate) Validate() bool {
+	if xsdd.error || len(xsdd.base) == 0 {
+		xsdd.state = Invalid
+		return false
+	}
+
+	xsdd.state = xsdd.inferState()
+	if xsdd.state == Invalid {
+		return false
+	}
+
+	return true
+}
+
+func (xsdd *XSDDate) parseError(s string) error {
+	xsdd.error = true
+	xsdd.state = Invalid
+	return errors.New(s)
+}
+
+func (xsdd *XSDDate) parseTimezone(s []byte) error {
+	// INFO: We assume the check for 'Z' has already been done
+	if len(s) != 6 || s[3] != COLON || (s[0] != PLUS && s[0] != SIGN) {
+		return fmt.Errorf("Invalid timezone")
+	}
+
+	h, err := Btoi(s[:3])
+	if err != nil {
+		return fmt.Errorf("Invalid hour: %v", s[:3])
+	}
+
+	m, err := Btoi(s[4:])
+	if err != nil {
+		return fmt.Errorf("Invalid minute: %v", s[4:])
+	}
+
+	xsdd.hasTimezone = true
+	xsdd.TZH = h
+	xsdd.TZM = m
+
+	return nil
+}
+
+func (xsdd XSDDate) inferState() XSDDatetype {
+	if xsdd.hasYear && xsdd.hasMonth && xsdd.hasDay {
+		if !validDayMonthYear(xsdd.Year, xsdd.Month, xsdd.Day) {
+			return Invalid
+		}
+		return Date
+	} else if xsdd.hasYear && xsdd.hasMonth {
+		if !validMonth(xsdd.Month) || !validYear(xsdd.Year) {
+			return Invalid
+		}
+		return GYearMonth
+	} else if xsdd.hasMonth && xsdd.hasDay {
+		if !validDayMonth(xsdd.Day, xsdd.Month) {
+			return Invalid
+		}
+		return GMonthDay
+	} else if xsdd.hasYear {
+		if !validYear(xsdd.Year) {
+			return Invalid
+		}
+		return GYear
+	} else if xsdd.hasMonth {
+		if !validMonth(xsdd.Month) {
+			return Invalid
+		}
+		return GMonth
+	} else if xsdd.hasDay {
+		if !validDay(xsdd.Day) {
+			return Invalid
+		}
+		return GDay
+	}
+
+	return Invalid
+}
+
+func (xsdd XSDDate) Before(other XSDDate) bool {
+	if xsdd.Year < other.Year {
+		return true
+	} else if xsdd.Year > other.Year {
+		return false
+	}
+
+	if xsdd.Month < other.Month {
+		return true
+	} else if xsdd.Month > other.Month {
+		return false
+	}
+
+	if xsdd.Day < other.Day {
+		return true
+	}
+
+	return false
+}
+
+func (xsddate *XSDDate) Compare(other *XSDDate) int {
+	if !xsddate.Validate() {
+		return -1
+	}
+
+	if !other.Validate() {
+		return 1
+	}
+
+	if xsddate.Year < other.Year {
+		return -1
+	} else if xsddate.Year > other.Year {
+		return 1
+	}
+
+	if xsddate.Month < other.Month {
+		return -1
+	} else if xsddate.Month > other.Month {
+		return 1
+	}
+
+	if xsddate.Day < other.Day {
+		return -1
+	} else if xsddate.Day > other.Day {
+		return 1
+	}
+
+	return 0
+}
+
+func validDay(i int) bool {
+	if i < 1 || i > 31 {
+		return false
+	}
+
+	return true
+}
+
+func validMonth(i int) bool {
+	if i < 1 || i > 12 {
+		return false
+	}
+
+	return true
+}
+
+func validYear(i int) bool {
+	if i == 0 {
+		return false
+	}
+
+	return true
+}
+
+func validDayMonth(d int, m int) bool {
+	if !validDay(d) || !validMonth(m) {
+		return false
+	}
+
+	if m == 2 {
+		if d > 29 {
+			return false
+		}
+	} else if m == 4 || m == 6 || m == 9 || m == 11 {
+		if d > 30 {
+			return false
+		}
+	}
+
+	return true
+}
+
+func validDayMonthYear(y int, m int, d int) bool {
+	if !validDay(d) || !validMonth(m) || !validYear(y) {
+		return false
+	}
+
+	if m == 2 {
+		if d == 29 {
+			if y%4 == 0 && (y%100 != 0 || y%400 == 0) {
+				return true
+			}
+
+			return false
+		}
+	}
+
+	return true
+}
+
+var ErrNoNumber = errors.New("Byte input is NaN")
+var ErrOverflow = errors.New("Byte input overflows int")
+
+// INFO: converts ASCII []byte to the integer represented by the string w/o alloc.
+func Btoi(bs []byte) (int, error) {
+	l := len(bs)
+	if l == 0 {
+		return 0, ErrNoNumber
+	}
+
+	// slow path for large numbers (-> strconv.Atoi):
+	if strconv.IntSize == 32 && l > 9 || strconv.IntSize == 64 && l > 18 {
+		i, err := strconv.ParseInt(string(bs), 10, 64)
+		if err != nil {
+			return 0, err
+		}
+
+		if strconv.IntSize == 32 {
+			if i > int64(math.MaxInt32) || i < int64(math.MinInt32) {
+				return 0, ErrOverflow
+			}
+		} else {
+			if i > int64(math.MaxInt64) || i < int64(math.MinInt64) {
+				return 0, ErrOverflow
+			}
+		}
+
+		return int(i), nil
+	}
+
+	var ret int
+	m := false
+	if bs[0] == '+' {
+		bs = bs[1:]
+	} else if bs[0] == '-' {
+		bs = bs[1:]
+		m = true
+	}
+
+	for _, b := range bs {
+		if b < '0' || b > '9' {
+			return 0, ErrNoNumber
+		}
+
+		ret = ret*10 + int(b-'0')
+	}
+
+	if m {
+		ret *= -1
+	}
+
+	return ret, nil
+}
--- a/xmlparsing/xsdtime_test.go
+++ b/xmlparsing/xsdtime_test.go
@@ -0,0 +1,69 @@
+package xmlparsing
+
+import "testing"
+
+type Test struct {
+	Input  string
+	Output XSDDate
+	Type   XSDDatetype
+}
+
+var tests = []Test{
+	{"2006-01-02", XSDDate{Year: 2006, Month: 1, Day: 2}, GYear},
+	{"-1222-01-02", XSDDate{Year: -1222, Month: 1, Day: 2}, Date},
+	{"-2777", XSDDate{Year: -2777}, GYear},
+	{"1988-12:30", XSDDate{Year: 1988, hasTimezone: true, TZH: -12, TZM: 30}, GYear},
+	{"--03+05:00", XSDDate{Month: 3, hasTimezone: true, TZH: 5, TZM: 0}, GMonth},
+	{"---29", XSDDate{Day: 29}, GDay},
+	{"-1234567-12Z", XSDDate{Year: -1234567, Month: 12, hasTimezone: true, TZH: 0, TZM: 0}, GYearMonth},
+	{"-1234567-12+05:00", XSDDate{Year: -1234567, Month: 12, hasTimezone: true, TZH: 5, TZM: 0}, GYearMonth},
+	{"--12-31", XSDDate{Month: 12, Day: 31}, GMonthDay},
+}
+
+func TestXSDTimeParse(t *testing.T) {
+	for _, test := range tests {
+		dt, err := New(test.Input)
+		if err != nil {
+			t.Errorf("Error parsing %v: %v", test.Input, err)
+			continue
+		}
+
+		if dt.Year != test.Output.Year {
+			t.Errorf("Year mismatch for %v: expected %v, got %v", test.Input, test.Output.Year, dt.Year)
+		}
+
+		if dt.Month != test.Output.Month {
+			t.Errorf("Month mismatch for %v: expected %v, got %v", test.Input, test.Output.Month, dt.Month)
+		}
+
+		if dt.Day != test.Output.Day {
+			t.Errorf("Day mismatch for %v: expected %v, got %v", test.Input, test.Output.Day, dt.Day)
+		}
+
+		if dt.hasTimezone != test.Output.hasTimezone {
+			t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.hasTimezone, dt.hasTimezone)
+		}
+
+		if dt.TZH != test.Output.TZH {
+			t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.TZH, dt.TZH)
+		}
+
+		if dt.TZM != test.Output.TZM {
+			t.Errorf("Timezone mismatch for %v: expected %v, got %v", test.Input, test.Output.TZM, dt.TZM)
+		}
+	}
+}
+
+func TestXSDTimeString(t *testing.T) {
+	for _, test := range tests {
+		dt, err := New(test.Input)
+		if err != nil {
+			t.Errorf("Error parsing %v: %v", test.Input, err)
+			continue
+		}
+
+		if dt.String() != test.Input {
+			t.Errorf("String mismatch for %v: expected %v, got %v", test.Input, test.Input, dt.String())
+		}
+	}
+}