diff --git a/xml/parser.go b/xml/parser.go new file mode 100644 index 0000000..977cd74 --- /dev/null +++ b/xml/parser.go @@ -0,0 +1,130 @@ +package xmlparsing + +import ( + "encoding/xml" + "io" + "iter" + "strings" +) + +type TokenType int + +const ( + StartElement TokenType = iota + EndElement + CharData + Comment + ProcInst + Directive +) + +type Element struct { + Name string + Attributes map[string]string + CharData string +} + +type Token struct { + Name string + Attributes map[string]string + Inner xml.Token + Type TokenType + Data string +} + +type TokenResult[T any] struct { + State T + Token Token + Stack []Element +} + +func Iterate[T any](xmlData string, initialState T) iter.Seq2[*TokenResult[T], error] { + decoder := xml.NewDecoder(strings.NewReader(xmlData)) + stack := []Element{} + state := initialState + return iter.Seq2[*TokenResult[T], error](func(yield func(*TokenResult[T], error) bool) { + for { + token, err := decoder.Token() + if err == io.EOF { + return + } + if err != nil { + yield(nil, err) + return + } + + var customToken Token + switch t := token.(type) { + case xml.StartElement: + elem := Element{ + Name: t.Name.Local, + Attributes: mapAttributes(t.Attr), + CharData: "", + } + stack = append(stack, elem) + customToken = Token{ + Name: t.Name.Local, + Attributes: elem.Attributes, + Inner: t, + Type: StartElement, + } + case xml.EndElement: + if len(stack) > 0 { + stack = stack[:len(stack)-1] + } + customToken = Token{Name: t.Name.Local, Inner: t, Type: EndElement} + case xml.CharData: + text := strings.TrimSpace(string(t)) + if text != "" && len(stack) > 0 { + stack[len(stack)-1].CharData += text + " " + } + customToken = Token{ + Name: "CharData", + Inner: t, + Data: text, + Type: CharData, + } + case xml.Comment: + customToken = Token{ + Name: "Comment", + Inner: t, + Data: string(t), + Type: Comment, + } + case xml.ProcInst: + customToken = Token{ + Name: t.Target, + Inner: t, + Data: string(t.Inst), + Type: ProcInst, + } + case xml.Directive: + customToken = Token{ + Name: "Directive", + Inner: t, + Data: string(t), + Type: Directive, + } + } + + result := &TokenResult[T]{ + State: state, + Token: customToken, + Stack: append([]Element{}, stack...), + } + + if !yield(result, nil) { + return + } + } + }) +} + +// mapAttributes converts xml.Attr to a map[string]string. +func mapAttributes(attrs []xml.Attr) map[string]string { + attrMap := make(map[string]string) + for _, attr := range attrs { + attrMap[attr.Name.Local] = attr.Value + } + return attrMap +} diff --git a/xml/parser_test.go b/xml/parser_test.go new file mode 100644 index 0000000..9551620 --- /dev/null +++ b/xml/parser_test.go @@ -0,0 +1,97 @@ +package xmlparsing + +import ( + "testing" +) + +type TestState struct { + ParsedElements []string +} + +func TestIterate_ValidXML(t *testing.T) { + xmlData := ` + Text + + + + ` + + state := TestState{} + for tokenResult, err := range Iterate(xmlData, state) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if tokenResult == nil { + t.Fatal("Received nil token result") + } + state.ParsedElements = append(state.ParsedElements, tokenResult.Token.Name) + } + + if len(state.ParsedElements) == 0 { + t.Fatal("No elements were parsed") + } +} + +func TestIterate_InvalidXML(t *testing.T) { + xmlData := `` + state := TestState{} + var global error + for _, err := range Iterate(xmlData, state) { + if err != nil { + global = err + } + } + if global == nil { + t.Fatal("Expected error, but got nil") + } +} + +func TestIterate_EmptyXML(t *testing.T) { + xmlData := "" + state := TestState{} + for _, err := range Iterate(xmlData, state) { + if err != nil { + t.Fatalf("Expected iter.ErrEnd, but got: %v", err) + } + } +} + +func TestIterate_CharDataTracking(t *testing.T) { + xmlData := ` + First + Second + ` + + state := TestState{} + charDataCount := 0 + for tokenResult, err := range Iterate(xmlData, state) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if tokenResult.Token.Name == "CharData" { + charDataCount++ + } + } + + if charDataCount != 5 { + t.Fatalf("Expected 2 CharData elements, got %d", charDataCount) + } +} + +func TestIterate_AttributeParsing(t *testing.T) { + xmlData := ` + Content + ` + + state := TestState{} + for tokenResult, err := range Iterate(xmlData, state) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if tokenResult.Token.Name == "child" && tokenResult.Token.Type == StartElement { + if tokenResult.Token.Attributes["attr1"] != "value1" || tokenResult.Token.Attributes["attr2"] != "value2" { + t.Fatalf("Incorrect attributes parsed: %v", tokenResult.Token.Attributes) + } + } + } +}