mirror of
				https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
				synced 2025-10-30 17:45:30 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			268 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			268 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package geonames
 | |
| 
 | |
| import (
 | |
| 	"encoding/json"
 | |
| 	"errors"
 | |
| 	"io"
 | |
| 	"net/http"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	GEONAMES_API_URL = "http://api.geonames.org/getJSON"
 | |
| 	GEONAMES_USERNAME = "theodorspringmans"
 | |
| )
 | |
| 
 | |
| type GeonamesProvider struct {
 | |
| 	// Mutex is for file reading & writing, not place map access
 | |
| 	mu     sync.Mutex
 | |
| 	Places sync.Map
 | |
| 
 | |
| 	// INFO: this holds all errors that occurred during fetching
 | |
| 	// and is used to prevent further fetches of the same place.
 | |
| 	errmu sync.Mutex
 | |
| 	errs  map[string]int
 | |
| }
 | |
| 
 | |
| func NewGeonamesProvider() *GeonamesProvider {
 | |
| 	return &GeonamesProvider{
 | |
| 		errs: make(map[string]int),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) ReadCache(folder string) error {
 | |
| 	p.mu.Lock()
 | |
| 	defer p.mu.Unlock()
 | |
| 	if err := p.readPlaces(folder); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) readPlaces(folder string) error {
 | |
| 	info, err := os.Stat(folder)
 | |
| 	if os.IsNotExist(err) {
 | |
| 		return os.MkdirAll(folder, 0755)
 | |
| 	}
 | |
| 	if err != nil || !info.IsDir() {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	files, err := filepath.Glob(filepath.Join(folder, "*.json"))
 | |
| 	// TODO: try to recover by recreating the folder
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	wg := sync.WaitGroup{}
 | |
| 	wg.Add(len(files))
 | |
| 
 | |
| 	for _, file := range files {
 | |
| 		go func(file string) {
 | |
| 			p.readPlace(file)
 | |
| 			wg.Done()
 | |
| 		}(file)
 | |
| 	}
 | |
| 
 | |
| 	wg.Wait()
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) readPlace(file string) {
 | |
| 	place := Place{}
 | |
| 	f, err := os.Open(file)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error opening file for reading: "+file)
 | |
| 		return
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 
 | |
| 	bytevalue, err := io.ReadAll(f)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error reading file: "+file)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if err := json.Unmarshal(bytevalue, &place); err != nil {
 | |
| 		logging.Error(err, "Error unmarshalling file:"+file)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if place.KGPZURL != "" {
 | |
| 		p.Places.Store(place.KGPZURL, place)
 | |
| 		return
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) WriteCache(folder string) error {
 | |
| 	p.mu.Lock()
 | |
| 	defer p.mu.Unlock()
 | |
| 	if err := p.writePlaces(folder); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // INFO: this writes all places to the cache folder
 | |
| // We do that on every fetch, it's easier that way
 | |
| func (p *GeonamesProvider) writePlaces(folder string) error {
 | |
| 	info, err := os.Stat(folder)
 | |
| 	if err == os.ErrNotExist {
 | |
| 		return os.MkdirAll(folder, 0755)
 | |
| 	}
 | |
| 	if err != nil || !info.IsDir() {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	wg := sync.WaitGroup{}
 | |
| 	p.Places.Range(func(key, value interface{}) bool {
 | |
| 		wg.Add(1)
 | |
| 		go func(key string, value Place) {
 | |
| 			p.writePlace(folder, key, value)
 | |
| 			wg.Done()
 | |
| 		}(key.(string), value.(Place))
 | |
| 		return true
 | |
| 	})
 | |
| 
 | |
| 	wg.Wait()
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // INFO: this overwrites any existing files
 | |
| func (p *GeonamesProvider) writePlace(folder, id string, place Place) {
 | |
| 	// JSON marshalling of the place and sanity check:
 | |
| 	filepath := filepath.Join(folder, place.KGPZID+".json")
 | |
| 	f, err := os.Create(filepath)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error creating file for writing: "+id)
 | |
| 		return
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 
 | |
| 	bytevalue, err := json.Marshal(place)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error marshalling place: "+id)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if _, err := f.Write(bytevalue); err != nil {
 | |
| 		logging.Error(err, "Error writing file: "+id)
 | |
| 		return
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) Place(id string) *Place {
 | |
| 	place, ok := p.Places.Load(id)
 | |
| 	if !ok {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	plc := place.(Place)
 | |
| 	return &plc
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) FetchPlaces(places []GeonamesData) {
 | |
| 	wg := sync.WaitGroup{}
 | |
| 	for _, place := range places {
 | |
| 		if place.ID == "" || place.Geonames == "" {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// TODO: place already fetched; check for updates??
 | |
| 		if _, ok := p.Places.Load(place.Geonames); ok {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		p.errmu.Lock()
 | |
| 		if _, ok := p.errs[place.Geonames]; ok {
 | |
| 			continue
 | |
| 		}
 | |
| 		p.errmu.Unlock()
 | |
| 
 | |
| 		wg.Add(1)
 | |
| 		go func(place *GeonamesData) {
 | |
| 			defer wg.Done()
 | |
| 			p.fetchPlace(place.ID, place.Geonames)
 | |
| 		}(&place)
 | |
| 	}
 | |
| 	wg.Wait()
 | |
| }
 | |
| 
 | |
| func (p *GeonamesProvider) fetchPlace(ID, GeonamesURL string) {
 | |
| 	// Remove trailing slash if present
 | |
| 	cleanURL := strings.TrimSuffix(GeonamesURL, "/")
 | |
| 	SPLITURL := strings.Split(cleanURL, "/")
 | |
| 	if len(SPLITURL) < 2 {
 | |
| 		logging.Error(nil, "Error parsing Geonames ID from: "+GeonamesURL)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	GeonamesID := SPLITURL[len(SPLITURL)-1]
 | |
| 
 | |
| 	requestURL := GEONAMES_API_URL + "?geonameId=" + GeonamesID + "&username=" + GEONAMES_USERNAME
 | |
| 	logging.Debug("Fetching place: " + ID + " with URL: " + requestURL)
 | |
| 	request, err := http.NewRequest("GET", requestURL, nil)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error creating request: "+ID)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	var response *http.Response
 | |
| 
 | |
| 	// INFO: we do 3 retries with increasing time between them
 | |
| 	for i := 0; i < 3; i++ {
 | |
| 		response, err = http.DefaultClient.Do(request)
 | |
| 		if err == nil && response.StatusCode < 400 {
 | |
| 			if i > 0 {
 | |
| 				logging.Info("Successfully fetched place: " + ID + " after " + strconv.Itoa(i) + " retries")
 | |
| 			}
 | |
| 			break
 | |
| 		}
 | |
| 
 | |
| 		time.Sleep(time.Duration(i+1) * time.Second)
 | |
| 		logging.Error(err, "Retry fetching place: "+ID)
 | |
| 	}
 | |
| 
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error fetching place: "+ID)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	defer response.Body.Close()
 | |
| 
 | |
| 	if response.StatusCode != http.StatusOK {
 | |
| 		if response.StatusCode < 500 {
 | |
| 			p.errmu.Lock()
 | |
| 			p.errs[GeonamesURL] = response.StatusCode
 | |
| 			p.errmu.Unlock()
 | |
| 		}
 | |
| 		logging.Error(errors.New("Error fetching place: " + ID + " with status code: " + http.StatusText(response.StatusCode)))
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	body, err := io.ReadAll(response.Body)
 | |
| 	if err != nil {
 | |
| 		logging.Error(err, "Error reading response body: "+ID)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// For debug purposes: Write response body to file:
 | |
| 	// os.WriteFile("geonames_responses/"+ID+".json", body, 0644)
 | |
| 
 | |
| 	geonamesPlace := Place{}
 | |
| 	if err := json.Unmarshal(body, &geonamesPlace); err != nil {
 | |
| 		logging.Error(err, "Error unmarshalling response body: "+ID)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	geonamesPlace.KGPZID = ID
 | |
| 	geonamesPlace.KGPZURL = GeonamesURL
 | |
| 	p.Places.Store(GeonamesURL, geonamesPlace)
 | |
| } | 
