package xmlparsing import ( "iter" "slices" "sync" "time" ) type ParseSource int const ( SourceUnknown ParseSource = iota Path Commit ) type ParseMeta struct { Source ParseSource BaseDir string Commit string Date time.Time FailedPaths []string } func (p ParseMeta) Equals(other ParseMeta) bool { return p.Source == other.Source && p.BaseDir == other.BaseDir && p.Commit == other.Commit && p.Date == other.Date } func (p ParseMeta) Failed(path string) bool { return slices.Contains(p.FailedPaths, path) } // An XMLParser is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path. type XMLParser[T IXMLItem] struct { // INFO: map is type map[string]*T Items sync.Map // INFO: map is type [string]ItemInfo Infos sync.Map // INFO: Resolver is used to resolve references (back-links) between XML items. Resolver Resolver[T] mu sync.RWMutex // TODO: This array is meant to be for iteration purposes, since iteration over the sync.Map is slow. array []T } func NewXMLParser[T IXMLItem]() *XMLParser[T] { return &XMLParser[T]{Resolver: *NewResolver[T]()} } // INFO: To parse sth, we call Prepare, then Serialize, then Cleanup. // Prepare & Cleanup are called once per parse. Serialize is called for every path. // and can be called concurretly. func (p *XMLParser[T]) Prepare() { p.mu.Lock() defer p.mu.Unlock() p.array = make([]T, 0, len(p.array)) p.Resolver.Clear() } func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, latest ParseMeta) error { if err := UnmarshalFile(path, dataholder); err != nil { return err } newItems := dataholder.Children() for _, item := range newItems { // INFO: Mostly it's just one ID, so the double loop is not that bad. for _, id := range item.Keys() { p.Infos.Store(id, ItemInfo{Source: path, Parse: latest}) p.Items.Store(id, &item) } p.addResolvable(item) } p.mu.Lock() defer p.mu.Unlock() p.array = append(p.array, newItems...) return nil } // INFO: Cleanup is called after all paths have been serialized. // It deletes all items that have not been parsed in the last commit, // and whose filepath has not been marked as failed. func (p *XMLParser[T]) Cleanup(latest ParseMeta) { todelete := make([]any, 0) toappend := make([]*T, 0) p.Infos.Range(func(key, value interface{}) bool { info := value.(ItemInfo) if !info.Parse.Equals(latest) { if !latest.Failed(info.Source) { todelete = append(todelete, key) } else { item, ok := p.Items.Load(key) if ok { i := item.(*T) if !slices.Contains(toappend, i) { toappend = append(toappend, i) } } } } return true }) for _, key := range todelete { p.Infos.Delete(key) p.Items.Delete(key) } p.mu.Lock() defer p.mu.Unlock() for _, item := range toappend { p.array = append(p.array, *item) p.addResolvable(*item) } } func (p *XMLParser[T]) addResolvable(item T) { // INFO: If the item has a GetReferences method, we add the references to the resolver. if rr, ok := any(item).(ReferenceResolver[T]); ok { for name, ids := range rr.References() { for _, res := range ids { res.Item = &item p.Resolver.Add(name, res.Reference, res) } } } } func (p *XMLParser[T]) ReverseLookup(item IXMLItem) (ret []Resolved[T]) { // INFO: this runs just once for the first key keys := item.Keys() for _, key := range keys { r, err := p.Resolver.Get(item.Type(), key) if err == nil { ret = append(ret, r...) } } return } func (a *XMLParser[T]) String() (s string) { a.RLock() defer a.RUnlock() for _, item := range a.array { s += item.String() } return } func (p *XMLParser[T]) Info(id string) ItemInfo { info, ok := p.Infos.Load(id) if !ok { return ItemInfo{} } return info.(ItemInfo) } func (p *XMLParser[T]) Item(id any) *T { item, ok := p.Items.Load(id) if !ok { return nil } i := item.(*T) return i } func (p *XMLParser[T]) Filter(f func(T) bool) iter.Seq[T] { return func(yield func(T) bool) { p.mu.RLock() defer p.mu.RUnlock() for _, v := range p.array { if f(v) && !yield(v) { return } } } } func (p *XMLParser[T]) Iterate() iter.Seq[T] { return func(yield func(T) bool) { p.mu.RLock() defer p.mu.RUnlock() for _, v := range p.array { if !yield(v) { return } } } } func (p *XMLParser[T]) Count() int { p.RLock() defer p.RUnlock() return len(p.array) } // INFO: These are reading locks. func (p *XMLParser[T]) RLock() { p.mu.RLock() } func (p *XMLParser[T]) RUnlock() { p.mu.RUnlock() }