import os from typing import Dict, Optional import psycopg2 from psycopg2.extras import RealDictCursor def _env_str(name: str, default: str) -> str: return os.environ.get(name, default) def _env_int(name: str, default: int) -> int: try: return int(os.environ.get(name, default)) except ValueError: return default def get_connection(): return psycopg2.connect( host=_env_str("PW_DB_HOST", "localhost"), port=_env_int("PW_DB_PORT", 5432), dbname=_env_str("PW_DB_NAME", "pricewatch"), user=_env_str("PW_DB_USER", "pricewatch"), password=_env_str("PW_DB_PASSWORD", "pricewatch"), ) def gather(limit: Optional[int] = None): query = """ SELECT COALESCE(p.source, 'unknown') AS source, p.id, p.reference, p.title, p.description, p.category, p.msrp, EXISTS ( SELECT 1 FROM product_images WHERE product_id = p.id LIMIT 1 ) AS has_image, EXISTS ( SELECT 1 FROM product_specs WHERE product_id = p.id LIMIT 1 ) AS has_specs, ph.price, ph.stock_status FROM products p LEFT JOIN LATERAL ( SELECT price, stock_status FROM price_history WHERE product_id = p.id ORDER BY fetched_at DESC LIMIT 1 ) ph ON TRUE ORDER BY p.last_updated_at DESC """ if limit: query += f" LIMIT {limit}" with get_connection() as conn: with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(query) return cur.fetchall() def summarize(rows): stores: Dict[str, Dict[str, object]] = {} fields = [ ("price", "Prix absent"), ("stock_status", "Statut stock manquant"), ("description", "Description manquante"), ("category", "Catégorie manquante"), ("msrp", "Prix conseillé absent"), ("has_image", "Images absentes"), ("has_specs", "Caractéristiques absentes"), ] for row in rows: store = row["source"] or "unknown" entry = stores.setdefault( store, { "total": 0, "details": {field: [] for field, _ in fields}, }, ) entry["total"] += 1 for field, label in fields: value = row.get(field) if field in ("has_image", "has_specs"): missing = not value else: missing = value in (None, "", []) if missing: entry["details"][field].append( { "id": row["id"], "reference": row["reference"], "title": row["title"] or "Sans titre", } ) return fields, stores def pretty_print(fields, stores): for store, data in stores.items(): print(f"\n=== Store: {store} ({data['total']} produits) ===") for field, label in fields: unit = len(data["details"][field]) print(f" {label}: {unit}") for item in data["details"][field][:5]: print(f" - [{item['id']}] {item['reference']} · {item['title']}") def main(): rows = gather(limit=1000) fields, stores = summarize(rows) pretty_print(fields, stores) if __name__ == "__main__": main()