heko_webcrawler/app/webcrawler.py
2024-11-13 18:40:55 +01:00

128 lines
5.5 KiB
Python

import csv
import os
import requests
from .models import db, Job
from flask import current_app
UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = 'results'
API_KEY = 'AIzaSyAIf0yXJTwo87VMWLBtq2m2LqE-OaPGbzw'
def get_place_details(street, city_zip):
address = f"{street}, {city_zip}"
url = f"https://maps.googleapis.com/maps/api/place/textsearch/json"
params = {'query': address, 'key': API_KEY}
results = []
try:
response = requests.get(url, params=params, timeout=5)
if response.status_code == 200:
data = response.json()
print(f"API Response Data for {address}: {data}")
for place in data.get('results', []):
name = place.get('name', 'N/A')
place_id = place.get('place_id')
formatted_address = place.get('formatted_address', 'N/A')
# Zweite Anfrage für detailliertere Informationen
phone, website = 'N/A', 'N/A'
if place_id:
details_url = f"https://maps.googleapis.com/maps/api/place/details/json"
details_params = {
'place_id': place_id,
'fields': 'formatted_phone_number,website',
'key': API_KEY
}
details_response = requests.get(details_url, params=details_params, timeout=5)
if details_response.status_code == 200:
details_data = details_response.json().get('result', {})
phone = details_data.get('formatted_phone_number', 'N/A')
website = details_data.get('website', 'N/A')
# Speichern nur, wenn Name und Telefonnummer vorhanden sind
if name != 'N/A' and phone != 'N/A':
results.append({
'Name': name,
'Address': formatted_address,
'Phone': phone,
'Website': website
})
else:
print(f"Fehler beim Abrufen der URL: {url} - Statuscode: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Anfragefehler für {url}: {e}")
return results
def process_file(filename, job_id, app):
with app.app_context():
print(f"Starte Prozess für Job-ID: {job_id}")
filepath = os.path.join(UPLOAD_FOLDER, filename)
results = []
job = Job.query.get(job_id)
if not job:
print("Job wurde abgebrochen, bevor er starten konnte.")
return
job.status = "In Progress"
db.session.commit()
with open(filepath, newline='', encoding='ISO-8859-1') as csvfile:
reader = csv.DictReader(csvfile, delimiter=';')
rows = list(reader)
total_rows = len(rows)
print(f"Insgesamt zu verarbeitende Zeilen: {total_rows}")
for index, row in enumerate(rows):
# Job-Verfügbarkeit erneut prüfen
job = Job.query.get(job_id)
if not job:
print("Job wurde abgebrochen.")
return
# Vollständige Adresse erstellen
street = f"{row.get('Straße', '')} {row.get('Hausnummer', '')}".strip()
city_zip = f"{row.get('PLZ', '')} {row.get('Stadt', '')}".strip()
print(f"Verarbeite Adresse: {street}, {city_zip}")
address_results = get_place_details(street, city_zip)
for result in address_results:
# Ergebnisse nur speichern, wenn Name und Telefonnummer vorhanden sind
if result['Name'] != 'N/A' and result['Phone'] != 'N/A':
result.update({
'PLZ': row.get('PLZ', ''),
'Stadt': row.get('Stadt', ''),
'Straße': row.get('Straße', ''),
'Hausnummer': row.get('Hausnummer', ''),
'Zusatz': row.get('Zusatz', '')
})
results.append(result)
# Results-Dateiname basierend auf dem Upload-Dateinamen
result_file = f"results_{filename}"
result_path = os.path.join(RESULT_FOLDER, result_file)
# Prüfen und erstellen des Ergebnisverzeichnisses
if not os.path.exists(RESULT_FOLDER):
os.makedirs(RESULT_FOLDER)
print(f"Erstelle Ergebnisverzeichnis: {RESULT_FOLDER}")
try:
if results: # Nur speichern, wenn Ergebnisse vorhanden sind
with open(result_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['Name', 'Address', 'Phone', 'Website', 'PLZ', 'Stadt', 'Straße', 'Hausnummer', 'Zusatz'])
writer.writeheader()
writer.writerows(results)
print(f"Ergebnisdatei erfolgreich gespeichert unter: {result_path}")
job.status = "Completed"
job.result_filename = result_file
db.session.commit()
else:
print("Keine relevanten Ergebnisse zum Speichern vorhanden. Markiere den Job als 'Failed'.")
job.status = "Failed"
db.session.commit()
except Exception as e:
print(f"Fehler beim Schreiben der Ergebnisdatei: {e}")
job.status = "Failed"
db.session.commit()