mirror of
				https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
				synced 2025-10-31 01:55:29 +00:00 
			
		
		
		
	orte provider
This commit is contained in:
		
							
								
								
									
										266
									
								
								providers/geonames/geonames.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										266
									
								
								providers/geonames/geonames.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,266 @@ | ||||
| package geonames | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	GEONAMES_API_URL = "http://api.geonames.org/getJSON" | ||||
| 	GEONAMES_USERNAME = "theodorspringmans" | ||||
| ) | ||||
|  | ||||
| type GeonamesProvider struct { | ||||
| 	// Mutex is for file reading & writing, not place map access | ||||
| 	mu     sync.Mutex | ||||
| 	Places sync.Map | ||||
|  | ||||
| 	// INFO: this holds all errors that occurred during fetching | ||||
| 	// and is used to prevent further fetches of the same place. | ||||
| 	errmu sync.Mutex | ||||
| 	errs  map[string]int | ||||
| } | ||||
|  | ||||
| func NewGeonamesProvider() *GeonamesProvider { | ||||
| 	return &GeonamesProvider{ | ||||
| 		errs: make(map[string]int), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) ReadCache(folder string) error { | ||||
| 	p.mu.Lock() | ||||
| 	defer p.mu.Unlock() | ||||
| 	if err := p.readPlaces(folder); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) readPlaces(folder string) error { | ||||
| 	info, err := os.Stat(folder) | ||||
| 	if os.IsNotExist(err) { | ||||
| 		return os.MkdirAll(folder, 0755) | ||||
| 	} | ||||
| 	if err != nil || !info.IsDir() { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	files, err := filepath.Glob(filepath.Join(folder, "*.json")) | ||||
| 	// TODO: try to recover by recreating the folder | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	wg := sync.WaitGroup{} | ||||
| 	wg.Add(len(files)) | ||||
|  | ||||
| 	for _, file := range files { | ||||
| 		go func(file string) { | ||||
| 			p.readPlace(file) | ||||
| 			wg.Done() | ||||
| 		}(file) | ||||
| 	} | ||||
|  | ||||
| 	wg.Wait() | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) readPlace(file string) { | ||||
| 	place := Place{} | ||||
| 	f, err := os.Open(file) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error opening file for reading: "+file) | ||||
| 		return | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	bytevalue, err := io.ReadAll(f) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error reading file: "+file) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if err := json.Unmarshal(bytevalue, &place); err != nil { | ||||
| 		logging.Error(err, "Error unmarshalling file:"+file) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if place.KGPZURL != "" { | ||||
| 		p.Places.Store(place.KGPZURL, place) | ||||
| 		return | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) WriteCache(folder string) error { | ||||
| 	p.mu.Lock() | ||||
| 	defer p.mu.Unlock() | ||||
| 	if err := p.writePlaces(folder); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // INFO: this writes all places to the cache folder | ||||
| // We do that on every fetch, it's easier that way | ||||
| func (p *GeonamesProvider) writePlaces(folder string) error { | ||||
| 	info, err := os.Stat(folder) | ||||
| 	if err == os.ErrNotExist { | ||||
| 		return os.MkdirAll(folder, 0755) | ||||
| 	} | ||||
| 	if err != nil || !info.IsDir() { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	wg := sync.WaitGroup{} | ||||
| 	p.Places.Range(func(key, value interface{}) bool { | ||||
| 		wg.Add(1) | ||||
| 		go func(key string, value Place) { | ||||
| 			p.writePlace(folder, key, value) | ||||
| 			wg.Done() | ||||
| 		}(key.(string), value.(Place)) | ||||
| 		return true | ||||
| 	}) | ||||
|  | ||||
| 	wg.Wait() | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // INFO: this overwrites any existing files | ||||
| func (p *GeonamesProvider) writePlace(folder, id string, place Place) { | ||||
| 	// JSON marshalling of the place and sanity check: | ||||
| 	filepath := filepath.Join(folder, place.KGPZID+".json") | ||||
| 	f, err := os.Create(filepath) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error creating file for writing: "+id) | ||||
| 		return | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	bytevalue, err := json.Marshal(place) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error marshalling place: "+id) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if _, err := f.Write(bytevalue); err != nil { | ||||
| 		logging.Error(err, "Error writing file: "+id) | ||||
| 		return | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) Place(id string) *Place { | ||||
| 	place, ok := p.Places.Load(id) | ||||
| 	if !ok { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	plc := place.(Place) | ||||
| 	return &plc | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) FetchPlaces(places []GeonamesData) { | ||||
| 	wg := sync.WaitGroup{} | ||||
| 	for _, place := range places { | ||||
| 		if place.ID == "" || place.Geonames == "" { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// TODO: place already fetched; check for updates?? | ||||
| 		if _, ok := p.Places.Load(place.Geonames); ok { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		p.errmu.Lock() | ||||
| 		if _, ok := p.errs[place.Geonames]; ok { | ||||
| 			continue | ||||
| 		} | ||||
| 		p.errmu.Unlock() | ||||
|  | ||||
| 		wg.Add(1) | ||||
| 		go func(place *GeonamesData) { | ||||
| 			defer wg.Done() | ||||
| 			p.fetchPlace(place.ID, place.Geonames) | ||||
| 		}(&place) | ||||
| 	} | ||||
| 	wg.Wait() | ||||
| } | ||||
|  | ||||
| func (p *GeonamesProvider) fetchPlace(ID, GeonamesURL string) { | ||||
| 	SPLITURL := strings.Split(GeonamesURL, "/") | ||||
| 	if len(SPLITURL) < 2 { | ||||
| 		logging.Error(nil, "Error parsing Geonames ID from: "+GeonamesURL) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	GeonamesID := SPLITURL[len(SPLITURL)-1] | ||||
|  | ||||
| 	requestURL := GEONAMES_API_URL + "?geonameId=" + GeonamesID + "&username=" + GEONAMES_USERNAME | ||||
| 	logging.Debug("Fetching place: " + ID + " with URL: " + requestURL) | ||||
| 	request, err := http.NewRequest("GET", requestURL, nil) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error creating request: "+ID) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	var response *http.Response | ||||
|  | ||||
| 	// INFO: we do 3 retries with increasing time between them | ||||
| 	for i := 0; i < 3; i++ { | ||||
| 		response, err = http.DefaultClient.Do(request) | ||||
| 		if err == nil && response.StatusCode < 400 { | ||||
| 			if i > 0 { | ||||
| 				logging.Info("Successfully fetched place: " + ID + " after " + strconv.Itoa(i) + " retries") | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		time.Sleep(time.Duration(i+1) * time.Second) | ||||
| 		logging.Error(err, "Retry fetching place: "+ID) | ||||
| 	} | ||||
|  | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error fetching place: "+ID) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	defer response.Body.Close() | ||||
|  | ||||
| 	if response.StatusCode != http.StatusOK { | ||||
| 		if response.StatusCode < 500 { | ||||
| 			p.errmu.Lock() | ||||
| 			p.errs[GeonamesURL] = response.StatusCode | ||||
| 			p.errmu.Unlock() | ||||
| 		} | ||||
| 		logging.Error(errors.New("Error fetching place: " + ID + " with status code: " + http.StatusText(response.StatusCode))) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	body, err := io.ReadAll(response.Body) | ||||
| 	if err != nil { | ||||
| 		logging.Error(err, "Error reading response body: "+ID) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	// For debug purposes: Write response body to file: | ||||
| 	// os.WriteFile("geonames_responses/"+ID+".json", body, 0644) | ||||
|  | ||||
| 	geonamesPlace := Place{} | ||||
| 	if err := json.Unmarshal(body, &geonamesPlace); err != nil { | ||||
| 		logging.Error(err, "Error unmarshalling response body: "+ID) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	geonamesPlace.KGPZID = ID | ||||
| 	geonamesPlace.KGPZURL = GeonamesURL | ||||
| 	p.Places.Store(GeonamesURL, geonamesPlace) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Simon Martens
					Simon Martens