mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-29 09:05:30 +00:00
orte provider
This commit is contained in:
266
providers/geonames/geonames.go
Normal file
266
providers/geonames/geonames.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package geonames
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||
)
|
||||
|
||||
const (
|
||||
GEONAMES_API_URL = "http://api.geonames.org/getJSON"
|
||||
GEONAMES_USERNAME = "theodorspringmans"
|
||||
)
|
||||
|
||||
type GeonamesProvider struct {
|
||||
// Mutex is for file reading & writing, not place map access
|
||||
mu sync.Mutex
|
||||
Places sync.Map
|
||||
|
||||
// INFO: this holds all errors that occurred during fetching
|
||||
// and is used to prevent further fetches of the same place.
|
||||
errmu sync.Mutex
|
||||
errs map[string]int
|
||||
}
|
||||
|
||||
func NewGeonamesProvider() *GeonamesProvider {
|
||||
return &GeonamesProvider{
|
||||
errs: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) ReadCache(folder string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if err := p.readPlaces(folder); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) readPlaces(folder string) error {
|
||||
info, err := os.Stat(folder)
|
||||
if os.IsNotExist(err) {
|
||||
return os.MkdirAll(folder, 0755)
|
||||
}
|
||||
if err != nil || !info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
files, err := filepath.Glob(filepath.Join(folder, "*.json"))
|
||||
// TODO: try to recover by recreating the folder
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(files))
|
||||
|
||||
for _, file := range files {
|
||||
go func(file string) {
|
||||
p.readPlace(file)
|
||||
wg.Done()
|
||||
}(file)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) readPlace(file string) {
|
||||
place := Place{}
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error opening file for reading: "+file)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bytevalue, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error reading file: "+file)
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bytevalue, &place); err != nil {
|
||||
logging.Error(err, "Error unmarshalling file:"+file)
|
||||
return
|
||||
}
|
||||
|
||||
if place.KGPZURL != "" {
|
||||
p.Places.Store(place.KGPZURL, place)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) WriteCache(folder string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if err := p.writePlaces(folder); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// INFO: this writes all places to the cache folder
|
||||
// We do that on every fetch, it's easier that way
|
||||
func (p *GeonamesProvider) writePlaces(folder string) error {
|
||||
info, err := os.Stat(folder)
|
||||
if err == os.ErrNotExist {
|
||||
return os.MkdirAll(folder, 0755)
|
||||
}
|
||||
if err != nil || !info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
p.Places.Range(func(key, value interface{}) bool {
|
||||
wg.Add(1)
|
||||
go func(key string, value Place) {
|
||||
p.writePlace(folder, key, value)
|
||||
wg.Done()
|
||||
}(key.(string), value.(Place))
|
||||
return true
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// INFO: this overwrites any existing files
|
||||
func (p *GeonamesProvider) writePlace(folder, id string, place Place) {
|
||||
// JSON marshalling of the place and sanity check:
|
||||
filepath := filepath.Join(folder, place.KGPZID+".json")
|
||||
f, err := os.Create(filepath)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error creating file for writing: "+id)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bytevalue, err := json.Marshal(place)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error marshalling place: "+id)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := f.Write(bytevalue); err != nil {
|
||||
logging.Error(err, "Error writing file: "+id)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) Place(id string) *Place {
|
||||
place, ok := p.Places.Load(id)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
plc := place.(Place)
|
||||
return &plc
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) FetchPlaces(places []GeonamesData) {
|
||||
wg := sync.WaitGroup{}
|
||||
for _, place := range places {
|
||||
if place.ID == "" || place.Geonames == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: place already fetched; check for updates??
|
||||
if _, ok := p.Places.Load(place.Geonames); ok {
|
||||
continue
|
||||
}
|
||||
|
||||
p.errmu.Lock()
|
||||
if _, ok := p.errs[place.Geonames]; ok {
|
||||
continue
|
||||
}
|
||||
p.errmu.Unlock()
|
||||
|
||||
wg.Add(1)
|
||||
go func(place *GeonamesData) {
|
||||
defer wg.Done()
|
||||
p.fetchPlace(place.ID, place.Geonames)
|
||||
}(&place)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (p *GeonamesProvider) fetchPlace(ID, GeonamesURL string) {
|
||||
SPLITURL := strings.Split(GeonamesURL, "/")
|
||||
if len(SPLITURL) < 2 {
|
||||
logging.Error(nil, "Error parsing Geonames ID from: "+GeonamesURL)
|
||||
return
|
||||
}
|
||||
|
||||
GeonamesID := SPLITURL[len(SPLITURL)-1]
|
||||
|
||||
requestURL := GEONAMES_API_URL + "?geonameId=" + GeonamesID + "&username=" + GEONAMES_USERNAME
|
||||
logging.Debug("Fetching place: " + ID + " with URL: " + requestURL)
|
||||
request, err := http.NewRequest("GET", requestURL, nil)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error creating request: "+ID)
|
||||
return
|
||||
}
|
||||
|
||||
var response *http.Response
|
||||
|
||||
// INFO: we do 3 retries with increasing time between them
|
||||
for i := 0; i < 3; i++ {
|
||||
response, err = http.DefaultClient.Do(request)
|
||||
if err == nil && response.StatusCode < 400 {
|
||||
if i > 0 {
|
||||
logging.Info("Successfully fetched place: " + ID + " after " + strconv.Itoa(i) + " retries")
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
time.Sleep(time.Duration(i+1) * time.Second)
|
||||
logging.Error(err, "Retry fetching place: "+ID)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logging.Error(err, "Error fetching place: "+ID)
|
||||
return
|
||||
}
|
||||
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != http.StatusOK {
|
||||
if response.StatusCode < 500 {
|
||||
p.errmu.Lock()
|
||||
p.errs[GeonamesURL] = response.StatusCode
|
||||
p.errmu.Unlock()
|
||||
}
|
||||
logging.Error(errors.New("Error fetching place: " + ID + " with status code: " + http.StatusText(response.StatusCode)))
|
||||
return
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error reading response body: "+ID)
|
||||
return
|
||||
}
|
||||
|
||||
// For debug purposes: Write response body to file:
|
||||
// os.WriteFile("geonames_responses/"+ID+".json", body, 0644)
|
||||
|
||||
geonamesPlace := Place{}
|
||||
if err := json.Unmarshal(body, &geonamesPlace); err != nil {
|
||||
logging.Error(err, "Error unmarshalling response body: "+ID)
|
||||
return
|
||||
}
|
||||
|
||||
geonamesPlace.KGPZID = ID
|
||||
geonamesPlace.KGPZURL = GeonamesURL
|
||||
p.Places.Store(GeonamesURL, geonamesPlace)
|
||||
}
|
||||
5
providers/geonames/helpers.go
Normal file
5
providers/geonames/helpers.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package geonames
|
||||
|
||||
type GeonamesData struct {
|
||||
ID, Geonames string
|
||||
}
|
||||
85
providers/geonames/model.go
Normal file
85
providers/geonames/model.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package geonames
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Place struct {
|
||||
KGPZID string `json:"kgpzid"`
|
||||
KGPZURL string `json:"kgpzurl"`
|
||||
GeonameId int `json:"geonameId,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
AsciiName string `json:"asciiName,omitempty"`
|
||||
ToponymName string `json:"toponymName,omitempty"`
|
||||
Lat string `json:"lat,omitempty"`
|
||||
Lng string `json:"lng,omitempty"`
|
||||
CountryName string `json:"countryName,omitempty"`
|
||||
CountryCode string `json:"countryCode,omitempty"`
|
||||
CountryId string `json:"countryId,omitempty"`
|
||||
Population int `json:"population,omitempty"`
|
||||
WikipediaURL string `json:"wikipediaURL,omitempty"`
|
||||
Timezone Timezone `json:"timezone,omitempty"`
|
||||
Bbox BoundingBox `json:"bbox,omitempty"`
|
||||
Fcode string `json:"fcode,omitempty"`
|
||||
FcodeName string `json:"fcodeName,omitempty"`
|
||||
Fcl string `json:"fcl,omitempty"`
|
||||
FclName string `json:"fclName,omitempty"`
|
||||
ContinentCode string `json:"continentCode,omitempty"`
|
||||
AdminName1 string `json:"adminName1,omitempty"`
|
||||
AdminName2 string `json:"adminName2,omitempty"`
|
||||
AdminName3 string `json:"adminName3,omitempty"`
|
||||
AdminName4 string `json:"adminName4,omitempty"`
|
||||
AdminName5 string `json:"adminName5,omitempty"`
|
||||
AdminCode1 string `json:"adminCode1,omitempty"`
|
||||
AdminCode2 string `json:"adminCode2,omitempty"`
|
||||
AdminCode3 string `json:"adminCode3,omitempty"`
|
||||
AdminCode4 string `json:"adminCode4,omitempty"`
|
||||
AdminId1 string `json:"adminId1,omitempty"`
|
||||
AdminId2 string `json:"adminId2,omitempty"`
|
||||
AdminId3 string `json:"adminId3,omitempty"`
|
||||
AdminId4 string `json:"adminId4,omitempty"`
|
||||
AdminCodes1 AdminCodes1 `json:"adminCodes1,omitempty"`
|
||||
AlternateNames []AlternateName `json:"alternateNames,omitempty"`
|
||||
Astergdem int `json:"astergdem,omitempty"`
|
||||
Srtm3 int `json:"srtm3,omitempty"`
|
||||
}
|
||||
|
||||
type Timezone struct {
|
||||
TimeZoneId string `json:"timeZoneId,omitempty"`
|
||||
GmtOffset float64 `json:"gmtOffset,omitempty"`
|
||||
DstOffset float64 `json:"dstOffset,omitempty"`
|
||||
}
|
||||
|
||||
type BoundingBox struct {
|
||||
East float64 `json:"east,omitempty"`
|
||||
West float64 `json:"west,omitempty"`
|
||||
North float64 `json:"north,omitempty"`
|
||||
South float64 `json:"south,omitempty"`
|
||||
AccuracyLevel int `json:"accuracyLevel,omitempty"`
|
||||
}
|
||||
|
||||
type AdminCodes1 struct {
|
||||
ISO3166_2 string `json:"ISO3166_2,omitempty"`
|
||||
}
|
||||
|
||||
type AlternateName struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Lang string `json:"lang,omitempty"`
|
||||
IsPreferredName bool `json:"isPreferredName,omitempty"`
|
||||
IsShortName bool `json:"isShortName,omitempty"`
|
||||
}
|
||||
|
||||
func (p Place) String() string {
|
||||
return fmt.Sprintf("Place{KGPZID: %v, Name: %v, GeonameId: %v, CountryName: %v, Lat: %v, Lng: %v, Population: %v, WikipediaURL: %v}",
|
||||
p.KGPZID, p.Name, p.GeonameId, p.CountryName, p.Lat, p.Lng, p.Population, p.WikipediaURL)
|
||||
}
|
||||
|
||||
func (p Place) PlaceName() string {
|
||||
if p.Name != "" {
|
||||
return p.Name
|
||||
}
|
||||
if p.AsciiName != "" {
|
||||
return p.AsciiName
|
||||
}
|
||||
return p.ToponymName
|
||||
}
|
||||
Reference in New Issue
Block a user