Files
scrap/scripts/missing_data_by_store.py
Gilles Soulier cf7c415e22 before claude
2026-01-17 13:40:26 +01:00

122 lines
3.3 KiB
Python

import os
from typing import Dict, Optional
import psycopg2
from psycopg2.extras import RealDictCursor
def _env_str(name: str, default: str) -> str:
return os.environ.get(name, default)
def _env_int(name: str, default: int) -> int:
try:
return int(os.environ.get(name, default))
except ValueError:
return default
def get_connection():
return psycopg2.connect(
host=_env_str("PW_DB_HOST", "localhost"),
port=_env_int("PW_DB_PORT", 5432),
dbname=_env_str("PW_DB_NAME", "pricewatch"),
user=_env_str("PW_DB_USER", "pricewatch"),
password=_env_str("PW_DB_PASSWORD", "pricewatch"),
)
def gather(limit: Optional[int] = None):
query = """
SELECT
COALESCE(p.source, 'unknown') AS source,
p.id,
p.reference,
p.title,
p.description,
p.category,
p.msrp,
EXISTS (
SELECT 1 FROM product_images WHERE product_id = p.id LIMIT 1
) AS has_image,
EXISTS (
SELECT 1 FROM product_specs WHERE product_id = p.id LIMIT 1
) AS has_specs,
ph.price,
ph.stock_status
FROM products p
LEFT JOIN LATERAL (
SELECT price, stock_status
FROM price_history
WHERE product_id = p.id
ORDER BY fetched_at DESC
LIMIT 1
) ph ON TRUE
ORDER BY p.last_updated_at DESC
"""
if limit:
query += f" LIMIT {limit}"
with get_connection() as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(query)
return cur.fetchall()
def summarize(rows):
stores: Dict[str, Dict[str, object]] = {}
fields = [
("price", "Prix absent"),
("stock_status", "Statut stock manquant"),
("description", "Description manquante"),
("category", "Catégorie manquante"),
("msrp", "Prix conseillé absent"),
("has_image", "Images absentes"),
("has_specs", "Caractéristiques absentes"),
]
for row in rows:
store = row["source"] or "unknown"
entry = stores.setdefault(
store,
{
"total": 0,
"details": {field: [] for field, _ in fields},
},
)
entry["total"] += 1
for field, label in fields:
value = row.get(field)
if field in ("has_image", "has_specs"):
missing = not value
else:
missing = value in (None, "", [])
if missing:
entry["details"][field].append(
{
"id": row["id"],
"reference": row["reference"],
"title": row["title"] or "Sans titre",
}
)
return fields, stores
def pretty_print(fields, stores):
for store, data in stores.items():
print(f"\n=== Store: {store} ({data['total']} produits) ===")
for field, label in fields:
unit = len(data["details"][field])
print(f" {label}: {unit}")
for item in data["details"][field][:5]:
print(f" - [{item['id']}] {item['reference']} · {item['title']}")
def main():
rows = gather(limit=1000)
fields, stores = summarize(rows)
pretty_print(fields, stores)
if __name__ == "__main__":
main()