Files
kgpz_web/providers/geonames/geonames.go
Simon Martens f90468085c orte provider
2025-09-25 15:01:26 +02:00

266 lines
5.7 KiB
Go

package geonames
import (
"encoding/json"
"errors"
"io"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
)
const (
GEONAMES_API_URL = "http://api.geonames.org/getJSON"
GEONAMES_USERNAME = "theodorspringmans"
)
type GeonamesProvider struct {
// Mutex is for file reading & writing, not place map access
mu sync.Mutex
Places sync.Map
// INFO: this holds all errors that occurred during fetching
// and is used to prevent further fetches of the same place.
errmu sync.Mutex
errs map[string]int
}
func NewGeonamesProvider() *GeonamesProvider {
return &GeonamesProvider{
errs: make(map[string]int),
}
}
func (p *GeonamesProvider) ReadCache(folder string) error {
p.mu.Lock()
defer p.mu.Unlock()
if err := p.readPlaces(folder); err != nil {
return err
}
return nil
}
func (p *GeonamesProvider) readPlaces(folder string) error {
info, err := os.Stat(folder)
if os.IsNotExist(err) {
return os.MkdirAll(folder, 0755)
}
if err != nil || !info.IsDir() {
return err
}
files, err := filepath.Glob(filepath.Join(folder, "*.json"))
// TODO: try to recover by recreating the folder
if err != nil {
return err
}
wg := sync.WaitGroup{}
wg.Add(len(files))
for _, file := range files {
go func(file string) {
p.readPlace(file)
wg.Done()
}(file)
}
wg.Wait()
return nil
}
func (p *GeonamesProvider) readPlace(file string) {
place := Place{}
f, err := os.Open(file)
if err != nil {
logging.Error(err, "Error opening file for reading: "+file)
return
}
defer f.Close()
bytevalue, err := io.ReadAll(f)
if err != nil {
logging.Error(err, "Error reading file: "+file)
return
}
if err := json.Unmarshal(bytevalue, &place); err != nil {
logging.Error(err, "Error unmarshalling file:"+file)
return
}
if place.KGPZURL != "" {
p.Places.Store(place.KGPZURL, place)
return
}
}
func (p *GeonamesProvider) WriteCache(folder string) error {
p.mu.Lock()
defer p.mu.Unlock()
if err := p.writePlaces(folder); err != nil {
return err
}
return nil
}
// INFO: this writes all places to the cache folder
// We do that on every fetch, it's easier that way
func (p *GeonamesProvider) writePlaces(folder string) error {
info, err := os.Stat(folder)
if err == os.ErrNotExist {
return os.MkdirAll(folder, 0755)
}
if err != nil || !info.IsDir() {
return err
}
wg := sync.WaitGroup{}
p.Places.Range(func(key, value interface{}) bool {
wg.Add(1)
go func(key string, value Place) {
p.writePlace(folder, key, value)
wg.Done()
}(key.(string), value.(Place))
return true
})
wg.Wait()
return nil
}
// INFO: this overwrites any existing files
func (p *GeonamesProvider) writePlace(folder, id string, place Place) {
// JSON marshalling of the place and sanity check:
filepath := filepath.Join(folder, place.KGPZID+".json")
f, err := os.Create(filepath)
if err != nil {
logging.Error(err, "Error creating file for writing: "+id)
return
}
defer f.Close()
bytevalue, err := json.Marshal(place)
if err != nil {
logging.Error(err, "Error marshalling place: "+id)
return
}
if _, err := f.Write(bytevalue); err != nil {
logging.Error(err, "Error writing file: "+id)
return
}
}
func (p *GeonamesProvider) Place(id string) *Place {
place, ok := p.Places.Load(id)
if !ok {
return nil
}
plc := place.(Place)
return &plc
}
func (p *GeonamesProvider) FetchPlaces(places []GeonamesData) {
wg := sync.WaitGroup{}
for _, place := range places {
if place.ID == "" || place.Geonames == "" {
continue
}
// TODO: place already fetched; check for updates??
if _, ok := p.Places.Load(place.Geonames); ok {
continue
}
p.errmu.Lock()
if _, ok := p.errs[place.Geonames]; ok {
continue
}
p.errmu.Unlock()
wg.Add(1)
go func(place *GeonamesData) {
defer wg.Done()
p.fetchPlace(place.ID, place.Geonames)
}(&place)
}
wg.Wait()
}
func (p *GeonamesProvider) fetchPlace(ID, GeonamesURL string) {
SPLITURL := strings.Split(GeonamesURL, "/")
if len(SPLITURL) < 2 {
logging.Error(nil, "Error parsing Geonames ID from: "+GeonamesURL)
return
}
GeonamesID := SPLITURL[len(SPLITURL)-1]
requestURL := GEONAMES_API_URL + "?geonameId=" + GeonamesID + "&username=" + GEONAMES_USERNAME
logging.Debug("Fetching place: " + ID + " with URL: " + requestURL)
request, err := http.NewRequest("GET", requestURL, nil)
if err != nil {
logging.Error(err, "Error creating request: "+ID)
return
}
var response *http.Response
// INFO: we do 3 retries with increasing time between them
for i := 0; i < 3; i++ {
response, err = http.DefaultClient.Do(request)
if err == nil && response.StatusCode < 400 {
if i > 0 {
logging.Info("Successfully fetched place: " + ID + " after " + strconv.Itoa(i) + " retries")
}
break
}
time.Sleep(time.Duration(i+1) * time.Second)
logging.Error(err, "Retry fetching place: "+ID)
}
if err != nil {
logging.Error(err, "Error fetching place: "+ID)
return
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
if response.StatusCode < 500 {
p.errmu.Lock()
p.errs[GeonamesURL] = response.StatusCode
p.errmu.Unlock()
}
logging.Error(errors.New("Error fetching place: " + ID + " with status code: " + http.StatusText(response.StatusCode)))
return
}
body, err := io.ReadAll(response.Body)
if err != nil {
logging.Error(err, "Error reading response body: "+ID)
return
}
// For debug purposes: Write response body to file:
// os.WriteFile("geonames_responses/"+ID+".json", body, 0644)
geonamesPlace := Place{}
if err := json.Unmarshal(body, &geonamesPlace); err != nil {
logging.Error(err, "Error unmarshalling response body: "+ID)
return
}
geonamesPlace.KGPZID = ID
geonamesPlace.KGPZURL = GeonamesURL
p.Places.Store(GeonamesURL, geonamesPlace)
}