import asyncio import os import csv import time import schedule from playwright.async_api import async_playwright from PIL import Image, ImageChops # --- Konfiguration --- URLS_FILE = os.environ.get("URLS_FILE", "/app/urls.csv") OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/output") DEFAULT_INTERVAL = int(os.environ.get("INTERVAL_MINUTES", "60")) DEFAULT_SCALE = float(os.environ.get("SCALE", "1.0")) # --- CSV laden --- def load_urls(): entries = [] if os.path.exists(URLS_FILE): with open(URLS_FILE, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: url = row.get("url") filename = row.get("filename") scale = float(row.get("scale") or DEFAULT_SCALE) selector = row.get("selector", "").strip() element_width = row.get("element_width") element_height = row.get("element_height") interval_minutes = row.get("interval_minutes") entries.append({ "url": url.strip() if url else "", "filename": filename.strip() if filename else "", "scale": scale, "selector": selector, "element_width": int(element_width) if element_width else None, "element_height": int(element_height) if element_height else None, "interval_minutes": int(interval_minutes) if interval_minutes else DEFAULT_INTERVAL }) return [e for e in entries if e["url"] and e["filename"]] # --- Prüfen, ob Bild sich geändert hat --- def images_different(path1, path2): if not os.path.exists(path2): return True img1 = Image.open(path1) img2 = Image.open(path2) return ImageChops.difference(img1, img2).getbbox() is not None # --- Screenshot aufnehmen --- async def capture_page(entry): url = entry["url"] filename = entry["filename"] scale = entry["scale"] selector = entry.get("selector") width = entry.get("element_width") height = entry.get("element_height") print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Screenshot {url} → {filename} (Selector: '{selector}')") async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context(viewport={"width":1920,"height":1080}) page = await context.new_page() try: await page.goto(url, wait_until="networkidle", timeout=60000) # --- Temporäre Datei --- base, ext = os.path.splitext(filename) if ext.lower() not in [".png", ".jpg", ".jpeg"]: ext = ".png" temp_path = os.path.join(OUTPUT_DIR, f"{base}.tmp{ext}") output_path = os.path.join(OUTPUT_DIR, filename) # --- Screenshot aufnehmen --- if selector: element = await page.query_selector(selector) if element: # Größe anpassen, falls angegeben if width or height: js_width = f"{width}px" if width else "auto" js_height = f"{height}px" if height else "auto" await page.eval_on_selector(selector, f"(el) => {{ el.style.width = '{js_width}'; el.style.height = '{js_height}'; }}") await element.screenshot(path=temp_path) else: print(f"❌ Selector '{selector}' nicht gefunden, ganze Seite wird genutzt") await page.screenshot(path=temp_path, full_page=True) else: await page.screenshot(path=temp_path, full_page=True) await browser.close() # --- Skalierung --- if abs(scale - 1.0) > 0.001: img = Image.open(temp_path) new_size = (int(img.width * scale), int(img.height * scale)) img = img.resize(new_size, Image.Resampling.LANCZOS) img.save(temp_path, optimize=True, quality=90) # --- Nur speichern, wenn sich Screenshot geändert hat --- if images_different(temp_path, output_path): os.replace(temp_path, output_path) print(f"→ {filename} gespeichert ({scale*100:.0f}% Größe).") else: os.remove(temp_path) print(f"→ {filename} unverändert, nicht gespeichert.") except Exception as e: print(f"❌ Fehler bei {url}: {e}") await browser.close() # --- Scheduler pro URL --- def schedule_screenshots(entries): for entry in entries: # Sofort einmal Screenshot erstellen asyncio.run(capture_page(entry)) # Intervall planen interval = entry.get("interval_minutes", DEFAULT_INTERVAL) schedule.every(interval).minutes.do(lambda e=entry: asyncio.run(capture_page(e))) while True: schedule.run_pending() time.sleep(1) if __name__ == "__main__": os.makedirs(OUTPUT_DIR, exist_ok=True) entries = load_urls() if not entries: print("Keine Einträge in CSV gefunden!") else: schedule_screenshots(entries)