diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2dc2c9d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +# Python Image +FROM python:3.10-slim + +# Arbeitsverzeichnis setzen +WORKDIR /app + +# Abhängigkeiten installieren +COPY requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# App-Dateien kopieren +COPY . . + +# Flask Umgebungsvariable setzen +ENV FLASK_APP=app + +# Flask starten +CMD ["flask", "run", "--host=0.0.0.0", "--port=5000"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..23efef7 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,52 @@ +import os +from flask import Flask, redirect, url_for, request +from flask_sqlalchemy import SQLAlchemy +from flask_login import LoginManager, current_user +from .models import db, User + +# Konfiguration für Upload- und Ergebnis-Ordner +UPLOAD_FOLDER = '/app/uploads' +RESULT_FOLDER = '/app/results' + +def create_app(): + app = Flask(__name__) + app.config['SECRET_KEY'] = '008e7369b075886d5f494c8813efdfb17155da6af12b3fe8ee' + app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db' + app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + app.config['RESULT_FOLDER'] = RESULT_FOLDER + + db.init_app(app) + + # Flask-Login Setup + login_manager = LoginManager() + login_manager.login_view = 'auth.login' + login_manager.init_app(app) + + @login_manager.user_loader + def load_user(user_id): + return User.query.get(int(user_id)) + + # Umleitung nicht authentifizierter Benutzer, statische Dateien und bestimmte Routen ausnehmen + @app.before_request + def require_login(): + allowed_routes = ['auth.login', 'auth.signup'] + + # Prüfen, ob der Benutzer authentifiziert ist oder eine erlaubte Route anfragt + if (not current_user.is_authenticated + and request.endpoint not in allowed_routes + and not request.path.startswith('/static/')): + return redirect(url_for('auth.login')) + + # Erstellen Sie die Ordner, falls sie noch nicht existieren + os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) + os.makedirs(app.config['RESULT_FOLDER'], exist_ok=True) + + # Registrieren der Routen + from . import routes + app.register_blueprint(routes.bp) + + # Erstellen der Tabellen in der Datenbank + with app.app_context(): + db.create_all() + + return app diff --git a/app/__pycache__/__init__.cpython-310.pyc b/app/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..41c64c9 Binary files /dev/null and b/app/__pycache__/__init__.cpython-310.pyc differ diff --git a/app/__pycache__/models.cpython-310.pyc b/app/__pycache__/models.cpython-310.pyc new file mode 100644 index 0000000..641a174 Binary files /dev/null and b/app/__pycache__/models.cpython-310.pyc differ diff --git a/app/__pycache__/routes.cpython-310.pyc b/app/__pycache__/routes.cpython-310.pyc new file mode 100644 index 0000000..38d5d36 Binary files /dev/null and b/app/__pycache__/routes.cpython-310.pyc differ diff --git a/app/__pycache__/webcrawler.cpython-310.pyc b/app/__pycache__/webcrawler.cpython-310.pyc new file mode 100644 index 0000000..b2349e9 Binary files /dev/null and b/app/__pycache__/webcrawler.cpython-310.pyc differ diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000..6a5c7f4 --- /dev/null +++ b/app/models.py @@ -0,0 +1,20 @@ +from flask_sqlalchemy import SQLAlchemy +from flask_login import UserMixin +from datetime import datetime + +db = SQLAlchemy() + +class User(UserMixin, db.Model): + id = db.Column(db.Integer, primary_key=True) + username = db.Column(db.String(150), unique=True, nullable=False) + password = db.Column(db.String(150), nullable=False) + +class Job(db.Model): + id = db.Column(db.Integer, primary_key=True) + user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False) + filename = db.Column(db.String(150), nullable=False) + status = db.Column(db.String(50), default="Pending") # Status: Pending, In Progress, Completed + created_at = db.Column(db.DateTime, default=datetime.utcnow) + result_filename = db.Column(db.String(150), nullable=True) + + user = db.relationship('User', backref=db.backref('jobs', lazy=True)) diff --git a/app/routes.py b/app/routes.py new file mode 100644 index 0000000..779d11c --- /dev/null +++ b/app/routes.py @@ -0,0 +1,148 @@ +import csv +import os +import threading +from flask import Blueprint, request, redirect, url_for, flash, render_template, send_file, current_app +from flask_login import login_user, logout_user, login_required, current_user +from werkzeug.utils import secure_filename +from werkzeug.security import generate_password_hash, check_password_hash +from .models import db, User, Job +from .webcrawler import process_file # Importiere die Funktion für das Webscraping + +UPLOAD_FOLDER = 'uploads' +RESULT_FOLDER = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'results') + +# Blueprint für auth erstellen +bp = Blueprint('auth', __name__) + +@bp.route('/login', methods=['GET', 'POST']) +def login(): + if request.method == 'POST': + username = request.form['username'] + password = request.form['password'] + user = User.query.filter_by(username=username).first() + if user and check_password_hash(user.password, password): + login_user(user) + return redirect(url_for('auth.job_status')) + flash('Login fehlgeschlagen. Überprüfen Sie Benutzername und Passwort.') + return render_template('login.html') + +@bp.route('/signup', methods=['GET', 'POST']) +def signup(): + if request.method == 'POST': + username = request.form['username'] + password = generate_password_hash(request.form['password'], method='sha256') + new_user = User(username=username, password=password) + db.session.add(new_user) + db.session.commit() + flash('Benutzer erfolgreich erstellt! Sie können sich jetzt einloggen.') + return redirect(url_for('auth.login')) + return render_template('signup.html') + +@bp.route('/logout') +@login_required +def logout(): + logout_user() + return redirect(url_for('auth.login')) + +@bp.route('/jobs') +@login_required +def job_status(): + jobs = Job.query.filter_by(user_id=current_user.id).all() + return render_template('jobs.html', jobs=jobs) + +# Hochladen und Verarbeiten der Datei im Hintergrund +@bp.route('/upload', methods=['GET', 'POST']) +@login_required +def upload(): + if request.method == 'POST': + file = request.files['file'] + filename = secure_filename(file.filename) + if not filename.endswith('.csv'): + flash('Bitte eine CSV-Datei hochladen') + return redirect(url_for('auth.upload')) + + file_path = os.path.join(UPLOAD_FOLDER, filename) + file.save(file_path) + flash('Datei erfolgreich hochgeladen und Job gestartet') + + # Neuen Job erstellen + new_job = Job(user_id=current_user.id, filename=filename, status="Pending") + db.session.add(new_job) + db.session.commit() + + # Debugging-Ausgabe zur Überprüfung der Thread-Erstellung + print(f"Starte Scraping-Thread für Job-ID: {new_job.id}") + + # Starten des Scraping im Hintergrund-Thread und Übergeben des aktuellen Anwendungskontexts + thread = threading.Thread(target=process_file, args=(filename, new_job.id, current_app._get_current_object())) + thread.start() + + # Debugging-Ausgabe, nachdem der Thread gestartet wurde + print(f"Thread für Job {new_job.id} erfolgreich gestartet.") + + return redirect(url_for('auth.job_status')) + + return render_template('upload.html') + +@bp.route('/download/', methods=['GET']) +@login_required +def download_result(job_id): + job = Job.query.get_or_404(job_id) + print(f"Job ID: {job.id} - User ID: {job.user_id} - Current User ID: {current_user.id}") + + # Überprüfen, ob der Job dem aktuellen Benutzer gehört + if job.user_id != current_user.id: + flash("Sie haben keine Berechtigung, dieses Ergebnis herunterzuladen.") + return redirect(url_for('auth.job_status')) + + # Überprüfen, ob das Ergebnis vorhanden ist + if not job.result_filename: + flash("Das Ergebnis ist noch nicht verfügbar.") + return redirect(url_for('auth.job_status')) + + # Überprüfen, ob die Datei im angegebenen Pfad existiert + result_path = os.path.join(current_app.config['RESULT_FOLDER'], job.result_filename) + print(f"Versuche, Datei herunterzuladen von: {result_path}") + + if os.path.exists(result_path): + print("Datei existiert und wird zum Download bereitgestellt.") + return send_file(result_path, as_attachment=True) + else: + print("Datei nicht gefunden. Ergebnisverzeichnis oder Pfad prüfen.") + flash("Ergebnisdatei nicht gefunden.") + return redirect(url_for('auth.job_status')) + + +@bp.route('/delete_job/', methods=['POST']) +@login_required +def delete_job(job_id): + job = Job.query.get_or_404(job_id) + if job.user_id != current_user.id: + flash("Sie haben keine Berechtigung, diesen Job zu löschen.") + return redirect(url_for('auth.job_status')) + + # Löschen der Upload-Datei + upload_path = os.path.join(UPLOAD_FOLDER, job.filename) + if os.path.exists(upload_path): + os.remove(upload_path) + print(f"Upload-Datei gelöscht: {upload_path}") + else: + print(f"Upload-Datei nicht gefunden: {upload_path}") + + # Löschen der Results-Datei, falls vorhanden + if job.result_filename: + result_path = os.path.join(RESULT_FOLDER, job.result_filename) + if os.path.exists(result_path): + try: + os.remove(result_path) + print(f"Ergebnisdatei gelöscht: {result_path}") + except Exception as e: + print(f"Fehler beim Löschen der Ergebnisdatei: {e}") + else: + print(f"Ergebnisdatei nicht gefunden: {result_path}") + + # Job aus der Datenbank löschen + db.session.delete(job) + db.session.commit() + flash("Job erfolgreich gelöscht.") + return redirect(url_for('auth.job_status')) diff --git a/app/static/styles.css b/app/static/styles.css new file mode 100644 index 0000000..20a6174 --- /dev/null +++ b/app/static/styles.css @@ -0,0 +1,166 @@ +* { + box-sizing: border-box; + margin: 0; + padding: 0; + font-family: 'Roboto', sans-serif; +} + +body { + display: flex; + flex-direction: column; + min-height: 100vh; + background-color: #f5f5f7; + color: #333; +} + +header { + background-color: #007aff; + color: white; + padding: 1em; +} + +nav ul { + display: flex; + justify-content: space-around; + list-style: none; +} + +nav ul li a { + color: white; + text-decoration: none; + font-weight: 500; + padding: 0.5em 1em; + transition: background-color 0.3s; +} + +nav ul li a:hover { + background-color: #005bb5; + border-radius: 4px; +} + +/* Vollbild-Layout */ +.container { + width: 90%; + margin: 2em auto; + flex: 1; +} + +/* Boxed-Design für Login und Signup */ +.form-container { + max-width: 400px; + margin: auto; + padding: 2em; + background: white; + border-radius: 10px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); + text-align: center; +} + +h2 { + font-weight: 500; + color: #1d1d1f; + margin-bottom: 1.5em; +} + +label { + display: block; + font-size: 0.9em; + color: #6e6e73; + margin-bottom: 0.5em; +} + +input { + width: 100%; + padding: 0.75em; + margin-bottom: 1em; + border: 1px solid #d1d1d6; + border-radius: 8px; + font-size: 1em; + color: #333; +} + +input:focus { + border-color: #007aff; + outline: none; + box-shadow: 0 0 0 3px rgba(0, 122, 255, 0.2); +} + +button { + width: 100%; + padding: 0.75em; + font-size: 1em; + font-weight: 500; + color: white; + background-color: #007aff; + border: none; + border-radius: 8px; + cursor: pointer; + transition: background-color 0.2s ease-in-out; +} + +button:hover { + background-color: #005bb5; +} + +p { + text-align: center; + margin-top: 1em; +} + +a { + color: #007aff; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +.table-container { + width: 90%; + margin: 2em auto; +} + +table { + width: 100%; + border-collapse: collapse; + margin-top: 1em; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); +} + +table, th, td { + border: 1px solid #d1d1d6; +} + +th, td { + padding: 0.75em; + text-align: left; +} + +th { + background-color: #f1f1f1; + color: #333; +} + +td { + background-color: white; +} + +tr:nth-child(even) td { + background-color: #f9f9f9; +} + +.delete-btn { + background-color: #ff4d4d; + color: white; + padding: 0.5em; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 0.9em; + transition: background-color 0.2s ease; +} + +.delete-btn:hover { + background-color: #e60000; +} diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000..1f06c06 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,27 @@ + + + + + + {{ title }} + + + + + {% if current_user.is_authenticated %} +
+ +
+ {% endif %} + +
+ {% block content %}{% endblock %} +
+ + diff --git a/app/templates/jobs.html b/app/templates/jobs.html new file mode 100644 index 0000000..34c8bd0 --- /dev/null +++ b/app/templates/jobs.html @@ -0,0 +1,61 @@ +{% extends "base.html" %} + +{% block content %} +
+

Ihre Aufträge

+ + + + + + + + + + + + {% for job in jobs %} + + + + + + + + {% endfor %} + +
DateinameStatusErstellt amErgebnisAktionen
{{ job.filename }}{{ job.status }}{{ job.created_at.strftime('%Y-%m-%d %H:%M:%S') }} + {% if job.status == "Completed" %} + Download + {% else %} + Noch nicht verfügbar + {% endif %} + +
+ +
+
+
+ + +{% endblock %} diff --git a/app/templates/login.html b/app/templates/login.html new file mode 100644 index 0000000..905a1f2 --- /dev/null +++ b/app/templates/login.html @@ -0,0 +1,16 @@ +{% extends "base.html" %} +{% block content %} +
+

Anmelden

+
+ + + + + + + +
+

Noch keinen Account? Registrieren

+
+{% endblock %} diff --git a/app/templates/signup.html b/app/templates/signup.html new file mode 100644 index 0000000..58f11cb --- /dev/null +++ b/app/templates/signup.html @@ -0,0 +1,16 @@ +{% extends "base.html" %} +{% block content %} +
+

Registrieren

+
+ + + + + + + +
+

Bereits registriert? Login

+
+{% endblock %} diff --git a/app/templates/upload.html b/app/templates/upload.html new file mode 100644 index 0000000..279c7af --- /dev/null +++ b/app/templates/upload.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} +{% block content %} +
+

Datei hochladen

+
+ + + +
+
+{% endblock %} diff --git a/app/webcrawler.py b/app/webcrawler.py new file mode 100644 index 0000000..c4b4aff --- /dev/null +++ b/app/webcrawler.py @@ -0,0 +1,128 @@ +import csv +import os +import requests +from .models import db, Job +from flask import current_app + +UPLOAD_FOLDER = 'uploads' +RESULT_FOLDER = 'results' +API_KEY = 'AIzaSyAIf0yXJTwo87VMWLBtq2m2LqE-OaPGbzw' + +def get_place_details(street, city_zip): + address = f"{street}, {city_zip}" + url = f"https://maps.googleapis.com/maps/api/place/textsearch/json" + params = {'query': address, 'key': API_KEY} + + results = [] + try: + response = requests.get(url, params=params, timeout=5) + if response.status_code == 200: + data = response.json() + print(f"API Response Data for {address}: {data}") + + for place in data.get('results', []): + name = place.get('name', 'N/A') + place_id = place.get('place_id') + formatted_address = place.get('formatted_address', 'N/A') + + # Zweite Anfrage für detailliertere Informationen + phone, website = 'N/A', 'N/A' + if place_id: + details_url = f"https://maps.googleapis.com/maps/api/place/details/json" + details_params = { + 'place_id': place_id, + 'fields': 'formatted_phone_number,website', + 'key': API_KEY + } + details_response = requests.get(details_url, params=details_params, timeout=5) + if details_response.status_code == 200: + details_data = details_response.json().get('result', {}) + phone = details_data.get('formatted_phone_number', 'N/A') + website = details_data.get('website', 'N/A') + + # Speichern nur, wenn Name und Telefonnummer vorhanden sind + if name != 'N/A' and phone != 'N/A': + results.append({ + 'Name': name, + 'Address': formatted_address, + 'Phone': phone, + 'Website': website + }) + else: + print(f"Fehler beim Abrufen der URL: {url} - Statuscode: {response.status_code}") + except requests.exceptions.RequestException as e: + print(f"Anfragefehler für {url}: {e}") + + return results + +def process_file(filename, job_id, app): + with app.app_context(): + print(f"Starte Prozess für Job-ID: {job_id}") + filepath = os.path.join(UPLOAD_FOLDER, filename) + results = [] + + job = Job.query.get(job_id) + if not job: + print("Job wurde abgebrochen, bevor er starten konnte.") + return + job.status = "In Progress" + db.session.commit() + + with open(filepath, newline='', encoding='ISO-8859-1') as csvfile: + reader = csv.DictReader(csvfile, delimiter=';') + rows = list(reader) + total_rows = len(rows) + print(f"Insgesamt zu verarbeitende Zeilen: {total_rows}") + + for index, row in enumerate(rows): + # Job-Verfügbarkeit erneut prüfen + job = Job.query.get(job_id) + if not job: + print("Job wurde abgebrochen.") + return + + # Vollständige Adresse erstellen + street = f"{row.get('Straße', '')} {row.get('Hausnummer', '')}".strip() + city_zip = f"{row.get('PLZ', '')} {row.get('Stadt', '')}".strip() + print(f"Verarbeite Adresse: {street}, {city_zip}") + address_results = get_place_details(street, city_zip) + + for result in address_results: + # Ergebnisse nur speichern, wenn Name und Telefonnummer vorhanden sind + if result['Name'] != 'N/A' and result['Phone'] != 'N/A': + result.update({ + 'PLZ': row.get('PLZ', ''), + 'Stadt': row.get('Stadt', ''), + 'Straße': row.get('Straße', ''), + 'Hausnummer': row.get('Hausnummer', ''), + 'Zusatz': row.get('Zusatz', '') + }) + results.append(result) + + # Results-Dateiname basierend auf dem Upload-Dateinamen + result_file = f"results_{filename}" + result_path = os.path.join(RESULT_FOLDER, result_file) + + # Prüfen und erstellen des Ergebnisverzeichnisses + if not os.path.exists(RESULT_FOLDER): + os.makedirs(RESULT_FOLDER) + print(f"Erstelle Ergebnisverzeichnis: {RESULT_FOLDER}") + + try: + if results: # Nur speichern, wenn Ergebnisse vorhanden sind + with open(result_path, 'w', newline='', encoding='utf-8-sig') as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=['Name', 'Address', 'Phone', 'Website', 'PLZ', 'Stadt', 'Straße', 'Hausnummer', 'Zusatz']) + writer.writeheader() + writer.writerows(results) + print(f"Ergebnisdatei erfolgreich gespeichert unter: {result_path}") + job.status = "Completed" + job.result_filename = result_file + db.session.commit() + else: + print("Keine relevanten Ergebnisse zum Speichern vorhanden. Markiere den Job als 'Failed'.") + job.status = "Failed" + db.session.commit() + except Exception as e: + print(f"Fehler beim Schreiben der Ergebnisdatei: {e}") + job.status = "Failed" + db.session.commit() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e11dd79 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3' +services: + web: + build: . + ports: + - "5000:5000" + environment: + - FLASK_APP=app + command: flask run --host=0.0.0.0 --port=5000 + volumes: + - .:/app diff --git a/instance/users.db b/instance/users.db new file mode 100644 index 0000000..5f9b034 Binary files /dev/null and b/instance/users.db differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..979acdc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +Flask==2.2.5 +Flask-Login==0.6.2 +Flask-SQLAlchemy==3.0.3 +Werkzeug==2.2.2 +pandas +requests +beautifulsoup4