before claude
This commit is contained in:
121
scripts/missing_data_by_store.py
Normal file
121
scripts/missing_data_by_store.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
|
||||
def _env_str(name: str, default: str) -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, default))
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def get_connection():
|
||||
return psycopg2.connect(
|
||||
host=_env_str("PW_DB_HOST", "localhost"),
|
||||
port=_env_int("PW_DB_PORT", 5432),
|
||||
dbname=_env_str("PW_DB_NAME", "pricewatch"),
|
||||
user=_env_str("PW_DB_USER", "pricewatch"),
|
||||
password=_env_str("PW_DB_PASSWORD", "pricewatch"),
|
||||
)
|
||||
|
||||
|
||||
def gather(limit: Optional[int] = None):
|
||||
query = """
|
||||
SELECT
|
||||
COALESCE(p.source, 'unknown') AS source,
|
||||
p.id,
|
||||
p.reference,
|
||||
p.title,
|
||||
p.description,
|
||||
p.category,
|
||||
p.msrp,
|
||||
EXISTS (
|
||||
SELECT 1 FROM product_images WHERE product_id = p.id LIMIT 1
|
||||
) AS has_image,
|
||||
EXISTS (
|
||||
SELECT 1 FROM product_specs WHERE product_id = p.id LIMIT 1
|
||||
) AS has_specs,
|
||||
ph.price,
|
||||
ph.stock_status
|
||||
FROM products p
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT price, stock_status
|
||||
FROM price_history
|
||||
WHERE product_id = p.id
|
||||
ORDER BY fetched_at DESC
|
||||
LIMIT 1
|
||||
) ph ON TRUE
|
||||
ORDER BY p.last_updated_at DESC
|
||||
"""
|
||||
if limit:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
with get_connection() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(query)
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
def summarize(rows):
|
||||
stores: Dict[str, Dict[str, object]] = {}
|
||||
fields = [
|
||||
("price", "Prix absent"),
|
||||
("stock_status", "Statut stock manquant"),
|
||||
("description", "Description manquante"),
|
||||
("category", "Catégorie manquante"),
|
||||
("msrp", "Prix conseillé absent"),
|
||||
("has_image", "Images absentes"),
|
||||
("has_specs", "Caractéristiques absentes"),
|
||||
]
|
||||
for row in rows:
|
||||
store = row["source"] or "unknown"
|
||||
entry = stores.setdefault(
|
||||
store,
|
||||
{
|
||||
"total": 0,
|
||||
"details": {field: [] for field, _ in fields},
|
||||
},
|
||||
)
|
||||
entry["total"] += 1
|
||||
for field, label in fields:
|
||||
value = row.get(field)
|
||||
if field in ("has_image", "has_specs"):
|
||||
missing = not value
|
||||
else:
|
||||
missing = value in (None, "", [])
|
||||
if missing:
|
||||
entry["details"][field].append(
|
||||
{
|
||||
"id": row["id"],
|
||||
"reference": row["reference"],
|
||||
"title": row["title"] or "Sans titre",
|
||||
}
|
||||
)
|
||||
return fields, stores
|
||||
|
||||
|
||||
def pretty_print(fields, stores):
|
||||
for store, data in stores.items():
|
||||
print(f"\n=== Store: {store} ({data['total']} produits) ===")
|
||||
for field, label in fields:
|
||||
unit = len(data["details"][field])
|
||||
print(f" {label}: {unit}")
|
||||
for item in data["details"][field][:5]:
|
||||
print(f" - [{item['id']}] {item['reference']} · {item['title']}")
|
||||
|
||||
|
||||
def main():
|
||||
rows = gather(limit=1000)
|
||||
fields, stores = summarize(rows)
|
||||
pretty_print(fields, stores)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user