webcrawler v1.0

This commit is contained in:
mkrieger 2024-11-14 10:20:42 +01:00
parent 008e2bc274
commit 6b057fb941
19 changed files with 814 additions and 112 deletions

162
.gitignore vendored Normal file
View file

@ -0,0 +1,162 @@
### Flask ###
instance/*
!instance/.gitignore
.webassets-cache
.env
### Flask.Python Stack ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# Uploads / Results
uploads/
results/

View file

@ -2,20 +2,25 @@ import os
from flask import Flask, redirect, url_for, request from flask import Flask, redirect, url_for, request
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from flask_login import LoginManager, current_user from flask_login import LoginManager, current_user
from .models import db, User from flask_migrate import Migrate
# Konfiguration für Upload- und Ergebnis-Ordner # Konfiguration für Upload- und Ergebnis-Ordner
UPLOAD_FOLDER = '/app/uploads' UPLOAD_FOLDER = '/app/uploads'
RESULT_FOLDER = '/app/results' RESULT_FOLDER = '/app/results'
db = SQLAlchemy()
migrate = Migrate()
def create_app(): def create_app():
app = Flask(__name__) app = Flask(__name__)
app.config['SECRET_KEY'] = '008e7369b075886d5f494c8813efdfb17155da6af12b3fe8ee' app.config['SECRET_KEY'] = '008e7369b075886d5f494c8813efdfb17155da6af12b3fe8ee'
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db' app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['RESULT_FOLDER'] = RESULT_FOLDER app.config['RESULT_FOLDER'] = RESULT_FOLDER
app.config['ALLOW_USER_SIGNUP'] = False
db.init_app(app) db.init_app(app)
migrate.init_app(app, db)
# Flask-Login Setup # Flask-Login Setup
login_manager = LoginManager() login_manager = LoginManager()
@ -24,14 +29,13 @@ def create_app():
@login_manager.user_loader @login_manager.user_loader
def load_user(user_id): def load_user(user_id):
from .models import User
return User.query.get(int(user_id)) return User.query.get(int(user_id))
# Umleitung nicht authentifizierter Benutzer, statische Dateien und bestimmte Routen ausnehmen # Umleitung nicht authentifizierter Benutzer, statische Dateien und bestimmte Routen ausnehmen
@app.before_request @app.before_request
def require_login(): def require_login():
allowed_routes = ['auth.login', 'auth.signup'] allowed_routes = ['auth.login', 'auth.signup']
# Prüfen, ob der Benutzer authentifiziert ist oder eine erlaubte Route anfragt
if (not current_user.is_authenticated if (not current_user.is_authenticated
and request.endpoint not in allowed_routes and request.endpoint not in allowed_routes
and not request.path.startswith('/static/')): and not request.path.startswith('/static/')):

View file

@ -1,19 +1,18 @@
from flask_sqlalchemy import SQLAlchemy
from flask_login import UserMixin from flask_login import UserMixin
from datetime import datetime from datetime import datetime
from . import db
db = SQLAlchemy()
class User(UserMixin, db.Model): class User(UserMixin, db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(150), unique=True, nullable=False) username = db.Column(db.String(150), unique=True, nullable=False)
password = db.Column(db.String(150), nullable=False) password = db.Column(db.String(150), nullable=False)
is_admin = db.Column(db.Boolean, default=False)
class Job(db.Model): class Job(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False) user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
filename = db.Column(db.String(150), nullable=False) filename = db.Column(db.String(150), nullable=False)
status = db.Column(db.String(50), default="Pending") # Status: Pending, In Progress, Completed status = db.Column(db.String(50), default="Pending")
created_at = db.Column(db.DateTime, default=datetime.utcnow) created_at = db.Column(db.DateTime, default=datetime.utcnow)
result_filename = db.Column(db.String(150), nullable=True) result_filename = db.Column(db.String(150), nullable=True)

View file

@ -1,3 +1,4 @@
import time
import csv import csv
import os import os
import threading import threading
@ -9,7 +10,7 @@ from .models import db, User, Job
from .webcrawler import process_file # Importiere die Funktion für das Webscraping from .webcrawler import process_file # Importiere die Funktion für das Webscraping
UPLOAD_FOLDER = 'uploads' UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'results') RESULT_FOLDER = 'results'
# Blueprint für auth erstellen # Blueprint für auth erstellen
bp = Blueprint('auth', __name__) bp = Blueprint('auth', __name__)
@ -28,6 +29,10 @@ def login():
@bp.route('/signup', methods=['GET', 'POST']) @bp.route('/signup', methods=['GET', 'POST'])
def signup(): def signup():
if not current_app.config['ALLOW_USER_SIGNUP']:
flash("Registrierung ist derzeit deaktiviert.")
return redirect(url_for('auth.login'))
if request.method == 'POST': if request.method == 'POST':
username = request.form['username'] username = request.form['username']
password = generate_password_hash(request.form['password'], method='sha256') password = generate_password_hash(request.form['password'], method='sha256')
@ -36,6 +41,7 @@ def signup():
db.session.commit() db.session.commit()
flash('Benutzer erfolgreich erstellt! Sie können sich jetzt einloggen.') flash('Benutzer erfolgreich erstellt! Sie können sich jetzt einloggen.')
return redirect(url_for('auth.login')) return redirect(url_for('auth.login'))
return render_template('signup.html') return render_template('signup.html')
@bp.route('/logout') @bp.route('/logout')
@ -50,18 +56,24 @@ def job_status():
jobs = Job.query.filter_by(user_id=current_user.id).all() jobs = Job.query.filter_by(user_id=current_user.id).all()
return render_template('jobs.html', jobs=jobs) return render_template('jobs.html', jobs=jobs)
# Hochladen und Verarbeiten der Datei im Hintergrund
@bp.route('/upload', methods=['GET', 'POST']) @bp.route('/upload', methods=['GET', 'POST'])
@login_required @login_required
def upload(): def upload():
if request.method == 'POST': if request.method == 'POST':
file = request.files['file'] file = request.files['file']
filename = secure_filename(file.filename) filename = secure_filename(file.filename)
if not filename.endswith('.csv'):
flash('Bitte eine CSV-Datei hochladen')
return redirect(url_for('auth.upload'))
file_path = os.path.join(UPLOAD_FOLDER, filename) # Überprüfen, ob eine Datei mit dem gleichen Namen bereits existiert
file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
if os.path.exists(file_path):
# Wenn eine Datei mit dem gleichen Namen existiert, einen Zeitstempel hinzufügen
name, ext = os.path.splitext(filename)
timestamp = time.strftime("%Y%m%d-%H%M%S") # Zeitstempel im Format JahrMonatTag-StundenMinutenSekunden
filename = f"{name}_{timestamp}{ext}"
file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
flash(f"Eine Datei mit gleichem Namen existierte bereits. Die Datei wurde als '{filename}' gespeichert.")
# Speichern der Datei
file.save(file_path) file.save(file_path)
flash('Datei erfolgreich hochgeladen und Job gestartet') flash('Datei erfolgreich hochgeladen und Job gestartet')
@ -122,7 +134,7 @@ def delete_job(job_id):
return redirect(url_for('auth.job_status')) return redirect(url_for('auth.job_status'))
# Löschen der Upload-Datei # Löschen der Upload-Datei
upload_path = os.path.join(UPLOAD_FOLDER, job.filename) upload_path = os.path.join(current_app.config['UPLOAD_FOLDER'], job.filename)
if os.path.exists(upload_path): if os.path.exists(upload_path):
os.remove(upload_path) os.remove(upload_path)
print(f"Upload-Datei gelöscht: {upload_path}") print(f"Upload-Datei gelöscht: {upload_path}")
@ -131,7 +143,9 @@ def delete_job(job_id):
# Löschen der Results-Datei, falls vorhanden # Löschen der Results-Datei, falls vorhanden
if job.result_filename: if job.result_filename:
result_path = os.path.join(RESULT_FOLDER, job.result_filename) result_path = os.path.join(current_app.config['RESULT_FOLDER'], job.result_filename)
print(f"Versuche Ergebnisdatei zu löschen: {result_path}")
if os.path.exists(result_path): if os.path.exists(result_path):
try: try:
os.remove(result_path) os.remove(result_path)
@ -139,10 +153,71 @@ def delete_job(job_id):
except Exception as e: except Exception as e:
print(f"Fehler beim Löschen der Ergebnisdatei: {e}") print(f"Fehler beim Löschen der Ergebnisdatei: {e}")
else: else:
print(f"Ergebnisdatei nicht gefunden: {result_path}") print(f"Ergebnisdatei nicht gefunden im Pfad: {result_path}")
# Job aus der Datenbank löschen # Job aus der Datenbank löschen
db.session.delete(job) db.session.delete(job)
db.session.commit() db.session.commit()
flash("Job erfolgreich gelöscht.") flash("Job erfolgreich gelöscht.")
return redirect(url_for('auth.job_status')) return redirect(url_for('auth.job_status'))
@bp.route('/admin', methods=['GET'])
@login_required
def admin_panel():
if not current_user.is_admin:
flash("Keine Berechtigung.")
return redirect(url_for('auth.job_status'))
users = User.query.all()
return render_template('admin_panel.html', users=users)
@bp.route('/admin/create_user', methods=['POST'])
@login_required
def create_user():
if not current_user.is_admin:
flash("Keine Berechtigung.")
return redirect(url_for('auth.admin_panel'))
username = request.form['username']
password = request.form['password']
is_admin = 'is_admin' in request.form # Checkbox für Adminrechte
hashed_password = generate_password_hash(password, method='sha256')
new_user = User(username=username, password=hashed_password, is_admin=is_admin)
db.session.add(new_user)
db.session.commit()
flash(f"Benutzer {username} wurde erstellt.")
return redirect(url_for('auth.admin_panel'))
@bp.route('/admin/reset_password/<int:user_id>', methods=['POST'])
@login_required
def reset_password(user_id):
if not current_user.is_admin:
flash("Keine Berechtigung.")
return redirect(url_for('auth.admin_panel'))
user = User.query.get_or_404(user_id)
new_password = request.form['new_password']
user.password = generate_password_hash(new_password, method='sha256')
db.session.commit()
flash(f"Passwort für Benutzer {user.username} wurde zurückgesetzt.")
return redirect(url_for('auth.admin_panel'))
@bp.route('/admin/delete_user/<int:user_id>', methods=['POST'])
@login_required
def delete_user(user_id):
if not current_user.is_admin:
flash("Keine Berechtigung.")
return redirect(url_for('auth.admin_panel'))
user = User.query.get_or_404(user_id)
if user.is_admin:
flash("Administratoren können nicht gelöscht werden.")
return redirect(url_for('auth.admin_panel'))
db.session.delete(user)
db.session.commit()
flash(f"Benutzer {user.username} wurde gelöscht.")
return redirect(url_for('auth.admin_panel'))

View file

@ -164,3 +164,135 @@ tr:nth-child(even) td {
.delete-btn:hover { .delete-btn:hover {
background-color: #e60000; background-color: #e60000;
} }
/* Flash-Badge Styling */
.flash-badge {
position: fixed;
top: 20px;
right: 20px;
background-color: #f44336; /* Material Design Rot */
color: white;
padding: 12px 24px;
border-radius: 8px;
font-family: 'Roboto', sans-serif;
font-weight: 500;
box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.2);
z-index: 1000;
opacity: 0;
transform: translateY(-20px);
transition: opacity 0.4s ease, transform 0.4s ease;
}
/* Einblend-Animation */
.flash-badge.show {
opacity: 1;
transform: translateY(0);
}
/* Ausblend-Animation */
.flash-badge.hide {
opacity: 0;
transform: translateY(-20px);
}
.admin-panel {
max-width: 800px;
margin: 2em auto;
padding: 2em;
background: white;
border-radius: 8px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
}
.admin-panel h2 {
font-weight: 500;
color: #1d1d1f;
margin-bottom: 1em;
}
.user-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 2em;
}
.user-table th, .user-table td {
padding: 0.75em;
text-align: left;
border: 1px solid #d1d1d6;
}
.user-table th {
background-color: #f1f1f1;
color: #333;
}
.user-table td {
background-color: white;
}
.user-table tr:nth-child(even) td {
background-color: #f9f9f9;
}
.reset-btn, .delete-btn, .create-btn {
padding: 0.5em 1em;
font-size: 0.9em;
font-weight: 500;
border: none;
border-radius: 4px;
cursor: pointer;
transition: background-color 0.2s ease-in-out;
}
.reset-btn {
background-color: #4caf50;
color: white;
}
.reset-btn:hover {
background-color: #388e3c;
}
.delete-btn {
background-color: #f44336;
color: white;
}
.delete-btn:hover {
background-color: #d32f2f;
}
.create-btn {
background-color: #007aff;
color: white;
padding: 0.75em;
margin-top: 1em;
display: block;
width: 100%;
font-size: 1em;
}
.create-btn:hover {
background-color: #005bb5;
}
.create-user-form {
margin-top: 1.5em;
}
.create-user-form input[type="text"],
.create-user-form input[type="password"] {
width: 100%;
padding: 0.75em;
margin-bottom: 1em;
border: 1px solid #d1d1d6;
border-radius: 8px;
}
.create-user-form label {
font-size: 0.9em;
color: #6e6e73;
display: block;
margin-bottom: 1em;
}

View file

@ -0,0 +1,50 @@
{% extends "base.html" %}
{% block content %}
<div class="admin-panel">
<h2>Benutzerverwaltung</h2>
<!-- Tabelle für Benutzerverwaltung -->
<table class="user-table">
<thead>
<tr>
<th>ID</th>
<th>Benutzername</th>
<th>Admin</th>
<th>Aktionen</th>
</tr>
</thead>
<tbody>
{% for user in users %}
<tr>
<td>{{ user.id }}</td>
<td>{{ user.username }}</td>
<td>{{ 'Ja' if user.is_admin else 'Nein' }}</td>
<td>
<form action="{{ url_for('auth.reset_password', user_id=user.id) }}" method="post" style="display:inline;">
<input type="text" name="new_password" placeholder="Neues Passwort" required>
<button type="submit" class="reset-btn">Passwort zurücksetzen</button>
</form>
{% if not user.is_admin %}
<form action="{{ url_for('auth.delete_user', user_id=user.id) }}" method="post" style="display:inline;">
<button type="submit" class="delete-btn">Benutzer löschen</button>
</form>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
<!-- Formular zum Erstellen neuer Benutzer -->
<h3>Neuen Benutzer erstellen</h3>
<form action="{{ url_for('auth.create_user') }}" method="post" class="create-user-form">
<input type="text" name="username" placeholder="Benutzername" required>
<input type="password" name="password" placeholder="Passwort" required>
<label>
<input type="checkbox" name="is_admin"> Admin
</label>
<button type="submit" class="create-btn">Benutzer erstellen</button>
</form>
</div>
{% endblock %}

View file

@ -14,14 +14,50 @@
<ul> <ul>
<li><a href="{{ url_for('auth.job_status') }}">Jobs</a></li> <li><a href="{{ url_for('auth.job_status') }}">Jobs</a></li>
<li><a href="{{ url_for('auth.upload') }}">Upload</a></li> <li><a href="{{ url_for('auth.upload') }}">Upload</a></li>
{% if current_user.is_admin %}
<li><a href="{{ url_for('auth.admin_panel') }}">Admin</a></li> <!-- Admin-Bereich Link -->
{% endif %}
<li><a href="{{ url_for('auth.logout') }}">Logout</a></li> <li><a href="{{ url_for('auth.logout') }}">Logout</a></li>
</ul> </ul>
</nav> </nav>
</header> </header>
{% endif %} {% endif %}
<!-- Flash-Nachrichten -->
{% with messages = get_flashed_messages() %}
{% if messages %}
<div id="flash-badge-container">
{% for message in messages %}
<div class="flash-badge">{{ message }}</div>
{% endfor %}
</div>
{% endif %}
{% endwith %}
<div class="{% if request.endpoint in ['auth.login', 'auth.signup'] %}form-container{% else %}container{% endif %}"> <div class="{% if request.endpoint in ['auth.login', 'auth.signup'] %}form-container{% else %}container{% endif %}">
{% block content %}{% endblock %} {% block content %}{% endblock %}
</div> </div>
<!-- JavaScript für Ein- und Ausblendanimation des Flash-Badges -->
<script>
document.addEventListener("DOMContentLoaded", function() {
var flashBadges = document.querySelectorAll('.flash-badge');
flashBadges.forEach(function(badge) {
// Einblendung mit Verzögerung
setTimeout(function() {
badge.classList.add('show');
}, 100);
// Ausblendung nach 5 Sekunden und Entfernen aus dem DOM
setTimeout(function() {
badge.classList.remove('show');
badge.classList.add('hide');
setTimeout(function() {
badge.remove();
}, 400); // Zeit für die Ausblendanimation
}, 5000);
});
});
</script>
</body> </body>
</html> </html>

View file

@ -6,123 +6,133 @@ from flask import current_app
UPLOAD_FOLDER = 'uploads' UPLOAD_FOLDER = 'uploads'
RESULT_FOLDER = 'results' RESULT_FOLDER = 'results'
API_KEY = 'AIzaSyAIf0yXJTwo87VMWLBtq2m2LqE-OaPGbzw' API_KEY = 'AIzaSyAIf0yXJTwo87VMWLBtq2m2LqE-OaPGbzw'
def get_place_details(street, city_zip): processed_companies = set()
address = f"{street}, {city_zip}"
url = f"https://maps.googleapis.com/maps/api/place/textsearch/json" def get_geocode(address):
params = {'query': address, 'key': API_KEY} url = f"https://maps.googleapis.com/maps/api/geocode/json"
params = {'address': address, 'key': API_KEY}
results = []
try: try:
response = requests.get(url, params=params, timeout=5) response = requests.get(url, params=params, timeout=5)
if response.status_code == 200: if response.status_code == 200:
data = response.json() data = response.json()
print(f"API Response Data for {address}: {data}") if data['status'] == 'OK':
location = data['results'][0]['geometry']['location']
return location['lat'], location['lng']
except requests.RequestException as e:
print(f"Geocode API Fehler für {address}: {e}")
return None, None
for place in data.get('results', []): def get_nearby_places(lat, lng):
name = place.get('name', 'N/A') places_url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json"
place_id = place.get('place_id') params = {
formatted_address = place.get('formatted_address', 'N/A') 'location': f"{lat},{lng}",
'radius': 10,
'type': 'point_of_interest',
'key': API_KEY
}
# Zweite Anfrage für detailliertere Informationen try:
phone, website = 'N/A', 'N/A' response = requests.get(places_url, params=params, timeout=5)
if place_id: if response.status_code == 200:
details_url = f"https://maps.googleapis.com/maps/api/place/details/json" return response.json().get('results', [])
details_params = { except requests.RequestException as e:
'place_id': place_id, print(f"Nearby Places API Fehler für Standort {lat},{lng}: {e}")
'fields': 'formatted_phone_number,website', return []
'key': API_KEY
}
details_response = requests.get(details_url, params=details_params, timeout=5)
if details_response.status_code == 200:
details_data = details_response.json().get('result', {})
phone = details_data.get('formatted_phone_number', 'N/A')
website = details_data.get('website', 'N/A')
# Speichern nur, wenn Name und Telefonnummer vorhanden sind def get_place_details(place_id):
if name != 'N/A' and phone != 'N/A': details_url = f"https://maps.googleapis.com/maps/api/place/details/json"
results.append({ params = {
'Name': name, 'place_id': place_id,
'Address': formatted_address, 'fields': 'formatted_phone_number,website',
'Phone': phone, 'key': API_KEY
'Website': website }
})
else:
print(f"Fehler beim Abrufen der URL: {url} - Statuscode: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Anfragefehler für {url}: {e}")
return results try:
response = requests.get(details_url, params=params, timeout=5)
if response.status_code == 200:
result = response.json().get('result', {})
return result.get('formatted_phone_number', 'N/A'), result.get('website', 'N/A')
except requests.RequestException as e:
print(f"Place Details API Fehler für Place ID {place_id}: {e}")
return 'N/A', 'N/A'
def process_file(filename, job_id, app): def process_file(filename, job_id, app):
with app.app_context(): with app.app_context():
print(f"Starte Prozess für Job-ID: {job_id}")
filepath = os.path.join(UPLOAD_FOLDER, filename) filepath = os.path.join(UPLOAD_FOLDER, filename)
results = [] results = []
job = Job.query.get(job_id) job = Job.query.get(job_id)
if not job: if not job:
print("Job wurde abgebrochen, bevor er starten konnte.") print("Job wurde abgebrochen.")
return return
job.status = "In Progress" job.status = "In Progress"
db.session.commit() db.session.commit()
with open(filepath, newline='', encoding='ISO-8859-1') as csvfile: with open(filepath, newline='', encoding='ISO-8859-1') as csvfile:
reader = csv.DictReader(csvfile, delimiter=';') reader = csv.DictReader(csvfile, delimiter=';')
rows = list(reader) headers = reader.fieldnames
total_rows = len(rows)
print(f"Insgesamt zu verarbeitende Zeilen: {total_rows}")
for index, row in enumerate(rows): if not all(field in headers for field in ['PLZ', 'Straße', 'Hausnummer']):
# Job-Verfügbarkeit erneut prüfen print("CSV-Datei enthält nicht alle notwendigen Spalten.")
job = Job.query.get(job_id)
if not job:
print("Job wurde abgebrochen.")
return
# Vollständige Adresse erstellen
street = f"{row.get('Straße', '')} {row.get('Hausnummer', '')}".strip()
city_zip = f"{row.get('PLZ', '')} {row.get('Stadt', '')}".strip()
print(f"Verarbeite Adresse: {street}, {city_zip}")
address_results = get_place_details(street, city_zip)
for result in address_results:
# Ergebnisse nur speichern, wenn Name und Telefonnummer vorhanden sind
if result['Name'] != 'N/A' and result['Phone'] != 'N/A':
result.update({
'PLZ': row.get('PLZ', ''),
'Stadt': row.get('Stadt', ''),
'Straße': row.get('Straße', ''),
'Hausnummer': row.get('Hausnummer', ''),
'Zusatz': row.get('Zusatz', '')
})
results.append(result)
# Results-Dateiname basierend auf dem Upload-Dateinamen
result_file = f"results_{filename}"
result_path = os.path.join(RESULT_FOLDER, result_file)
# Prüfen und erstellen des Ergebnisverzeichnisses
if not os.path.exists(RESULT_FOLDER):
os.makedirs(RESULT_FOLDER)
print(f"Erstelle Ergebnisverzeichnis: {RESULT_FOLDER}")
try:
if results: # Nur speichern, wenn Ergebnisse vorhanden sind
with open(result_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['Name', 'Address', 'Phone', 'Website', 'PLZ', 'Stadt', 'Straße', 'Hausnummer', 'Zusatz'])
writer.writeheader()
writer.writerows(results)
print(f"Ergebnisdatei erfolgreich gespeichert unter: {result_path}")
job.status = "Completed"
job.result_filename = result_file
db.session.commit()
else:
print("Keine relevanten Ergebnisse zum Speichern vorhanden. Markiere den Job als 'Failed'.")
job.status = "Failed" job.status = "Failed"
db.session.commit() db.session.commit()
except Exception as e: return
print(f"Fehler beim Schreiben der Ergebnisdatei: {e}")
for row in reader:
plz = row.get('PLZ', '').strip()
city = row.get('Stadt', row.get('Bezirk', '')).strip()
street = row.get('Straße', '').strip()
house_number = row.get('Hausnummer', '').strip()
additional = row.get('Zusatz', '').strip()
if not all([plz, city, street, house_number]):
continue
full_address = f"{street} {house_number} {additional}, {plz} {city}"
lat, lng = get_geocode(full_address)
if lat is None or lng is None:
continue
nearby_places = get_nearby_places(lat, lng)
for place in nearby_places:
company_name = place['name']
if company_name in processed_companies:
continue
processed_companies.add(company_name)
company_address = place.get('vicinity', 'N/A').split(',')[0]
place_id = place.get('place_id')
company_phone, company_website = get_place_details(place_id) if place_id else ('N/A', 'N/A')
results.append({
'PLZ': plz,
'Stadt': city,
'Straße': street,
'Hausnummer': house_number,
'Zusatz': additional,
'Company Name': company_name,
'Company Address': company_address,
'Company Phone': company_phone,
'Company Website': company_website
})
if results:
result_file = f"results_{os.path.splitext(filename)[0]}.csv"
result_path = os.path.join(RESULT_FOLDER, result_file)
with open(result_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=[
'PLZ', 'Stadt', 'Straße', 'Hausnummer', 'Zusatz',
'Company Name', 'Company Address', 'Company Phone', 'Company Website'
])
writer.writeheader()
writer.writerows(results)
job.status = "Completed"
job.result_filename = result_file
db.session.commit()
else:
job.status = "Failed" job.status = "Failed"
db.session.commit() db.session.commit()

Binary file not shown.

1
migrations/README Normal file
View file

@ -0,0 +1 @@
Single-database configuration for Flask.

50
migrations/alembic.ini Normal file
View file

@ -0,0 +1,50 @@
# A generic, single database configuration.
[alembic]
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic,flask_migrate
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[logger_flask_migrate]
level = INFO
handlers =
qualname = flask_migrate
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

113
migrations/env.py Normal file
View file

@ -0,0 +1,113 @@
import logging
from logging.config import fileConfig
from flask import current_app
from alembic import context
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env')
def get_engine():
try:
# this works with Flask-SQLAlchemy<3 and Alchemical
return current_app.extensions['migrate'].db.get_engine()
except (TypeError, AttributeError):
# this works with Flask-SQLAlchemy>=3
return current_app.extensions['migrate'].db.engine
def get_engine_url():
try:
return get_engine().url.render_as_string(hide_password=False).replace(
'%', '%%')
except AttributeError:
return str(get_engine().url).replace('%', '%%')
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
config.set_main_option('sqlalchemy.url', get_engine_url())
target_db = current_app.extensions['migrate'].db
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def get_metadata():
if hasattr(target_db, 'metadatas'):
return target_db.metadatas[None]
return target_db.metadata
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=get_metadata(), literal_binds=True
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
# this callback is used to prevent an auto-migration from being generated
# when there are no changes to the schema
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, 'autogenerate', False):
script = directives[0]
if script.upgrade_ops.is_empty():
directives[:] = []
logger.info('No changes in schema detected.')
conf_args = current_app.extensions['migrate'].configure_args
if conf_args.get("process_revision_directives") is None:
conf_args["process_revision_directives"] = process_revision_directives
connectable = get_engine()
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=get_metadata(),
**conf_args
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

24
migrations/script.py.mako Normal file
View file

@ -0,0 +1,24 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,45 @@
"""Add is_admin column to User model
Revision ID: 10331d61a25d
Revises:
Create Date: 2024-11-14 08:36:27.125841
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '10331d61a25d'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('user')
op.drop_table('job')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('job',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('user_id', sa.INTEGER(), nullable=False),
sa.Column('filename', sa.VARCHAR(length=150), nullable=False),
sa.Column('status', sa.VARCHAR(length=50), nullable=True),
sa.Column('created_at', sa.DATETIME(), nullable=True),
sa.Column('result_filename', sa.VARCHAR(length=150), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('user',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('username', sa.VARCHAR(length=150), nullable=False),
sa.Column('password', sa.VARCHAR(length=150), nullable=False),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('username')
)
# ### end Alembic commands ###

View file

@ -5,3 +5,4 @@ Werkzeug==2.2.2
pandas pandas
requests requests
beautifulsoup4 beautifulsoup4
Flask-Migrate