diff --git a/.coverage b/.coverage old mode 100755 new mode 100644 index e2106b9..719073e Binary files a/.coverage and b/.coverage differ diff --git a/.env b/.env new file mode 100644 index 0000000..289289d --- /dev/null +++ b/.env @@ -0,0 +1,21 @@ +# Database +PW_DB_HOST=localhost +PW_DB_PORT=5432 +PW_DB_DATABASE=pricewatch +PW_DB_USER=pricewatch +PW_DB_PASSWORD=pricewatch + +# Redis +PW_REDIS_HOST=localhost +PW_REDIS_PORT=6379 +PW_REDIS_DB=0 + +# App +PW_DEBUG=false +PW_WORKER_TIMEOUT=300 +PW_WORKER_CONCURRENCY=2 +PW_ENABLE_DB=true +PW_ENABLE_WORKER=true + +# API +PW_API_TOKEN=change_me diff --git a/.env.example b/.env.example old mode 100755 new mode 100644 index a89bb87..289289d --- a/.env.example +++ b/.env.example @@ -16,3 +16,6 @@ PW_WORKER_TIMEOUT=300 PW_WORKER_CONCURRENCY=2 PW_ENABLE_DB=true PW_ENABLE_WORKER=true + +# API +PW_API_TOKEN=change_me diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/CHANGELOG.md b/CHANGELOG.md index 90643a3..623e305 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,10 @@ Le format est basé sur [Keep a Changelog](https://keepachangelog.com/fr/1.0.0/) ## [Non publié] +**Dernière mise à jour**: 2026-01-15 + ### En cours -- Phase 2 : Base de données PostgreSQL -- Phase 2 : Worker Redis/RQ -- Phase 3 : API REST FastAPI +- Phase 3 : API REST FastAPI (filtres/exports/webhooks) - Phase 4 : Web UI ### Ajouté @@ -26,6 +26,38 @@ Le format est basé sur [Keep a Changelog](https://keepachangelog.com/fr/1.0.0/) - Tests repository/pipeline (SQLite) - Test end-to-end CLI + DB (SQLite) - Worker RQ + scheduler (tasks + CLI) +- Tests worker/scheduler (SQLite + mocks) +- Tests CLI worker/enqueue/schedule + erreur DB (SQLite) +- Gestion erreurs Redis (RedisUnavailableError, check_redis_connection) +- Messages d'erreur clairs pour Redis down dans CLI (worker, enqueue, schedule) +- 7 nouveaux tests pour la gestion des erreurs Redis +- Logs d'observabilité pour jobs planifiés (JOB START/OK/FAILED, FETCH, PARSE) +- Tests end-to-end worker + DB (Redis/SQLite, skip si Redis down) +- Test end-to-end CLI -> DB -> worker (Redis, skip si Redis down) +- Guide de migration JSON -> DB +- API FastAPI (health/products/prices/logs/enqueue/schedule) + auth token +- Docker API + uvicorn +- Tests API de base +- Docker Compose API: port 8001 et hosts postgres/redis +- CRUD API (products/prices/logs) +- Filtres avances API (prix, dates, stock, status) +- Exports API CSV/JSON (products, prices, logs) +- Webhooks API (CRUD + test) +- Tests compatibilite `--no-db` (CLI) +- Test charge legere 100 snapshots (SQLite) +- Nettoyage warnings (Pydantic ConfigDict, datetime UTC, selectors SoupSieve) +- Web UI Vue 3 (layout dense, themes, settings) + Docker compose frontend +- Web UI: integration API (list produits, edition, enqueue, settings API) +- API: endpoints preview/commit scraping pour ajout produit depuis l UI +- Web UI: ajout produit par URL avec preview scraping et sauvegarde en base +- Web UI: popup ajout produit central + favicon +- API: logs Uvicorn exposes pour l UI +- Parsing prix: gestion des separateurs de milliers (espace, NBSP, point) +- API/DB: description + msrp + images/specs exposes, reduction calculee + +### Corrigé +- Migration Alembic: down_revision aligne sur 20260114_02 +- Amazon: extraction images via data-a-dynamic-image + filtrage logos --- diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b16f119 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +FROM python:3.12-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +RUN mkdir -p /app/logs + +COPY pyproject.toml README.md alembic.ini ./ +COPY pricewatch ./pricewatch + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libglib2.0-0 \ + libgbm1 \ + libnss3 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libgtk-3-0 \ + libxkbcommon0 \ + libxcomposite1 \ + libxrandr2 \ + libxinerama1 \ + libasound2 \ + libpangocairo-1.0-0 \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir -e . + +EXPOSE 8000 + +CMD ["sh", "-c", "uvicorn pricewatch.app.api.main:app --host 0.0.0.0 --port 8000 2>&1 | tee /app/logs/uvicorn.log"] diff --git a/Image collée (2).png b/Image collée (2).png new file mode 100755 index 0000000..a3d7df3 Binary files /dev/null and b/Image collée (2).png differ diff --git a/Image collée (3).png b/Image collée (3).png new file mode 100755 index 0000000..a702708 Binary files /dev/null and b/Image collée (3).png differ diff --git a/Image collée (4).png b/Image collée (4).png new file mode 100755 index 0000000..93aae9f Binary files /dev/null and b/Image collée (4).png differ diff --git a/Image collée.png b/Image collée.png new file mode 100755 index 0000000..6888ae6 Binary files /dev/null and b/Image collée.png differ diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md new file mode 100644 index 0000000..558bf53 --- /dev/null +++ b/MIGRATION_GUIDE.md @@ -0,0 +1,83 @@ +# Migration JSON -> Database (Phase 2) + +Guide pour migrer des resultats JSON existants (Phase 1) vers PostgreSQL (Phase 2). + +## Prerequis + +- PostgreSQL + Redis operationnels +- Dependencies installees (`pip install -e .`) +- Migration DB appliquee (`alembic upgrade head`) + +## 1) Verifier la configuration + +Copier l'exemple et ajuster les identifiants si besoin: + +```bash +cp .env.example .env +``` + +Verifier la configuration: + +```bash +pricewatch doctor +``` + +## 2) Initialiser la base + +Si la base n'est pas encore initialisee: + +```bash +pricewatch upgrade +``` + +Verifier les tables: + +```bash +psql -h localhost -U pricewatch pricewatch +\dt +``` + +## 3) Migrer un fichier JSON existant + +Le JSON de Phase 1 est deja conforme au schema `ProductSnapshot`. Il suffit de le recharger puis de repasser par la persistence. + +### Option A: Script rapide + +Creer un petit script ad-hoc (exemple): + +```python +from pricewatch.app.core.io import read_json_results +from pricewatch.app.scraping.pipeline import ScrapingPipeline + +snapshots = read_json_results("scraped_store.json") + +pipeline = ScrapingPipeline() +for snapshot in snapshots: + pipeline.process_snapshot(snapshot, save_to_db=True) +``` + +Execution: + +```bash +python migrate_json.py +``` + +### Option B: Enqueue via worker + +Si vous voulez traiter les snapshots via worker, utilisez une boucle qui enqueue `scrape_product` avec l'URL du snapshot, puis laissez le worker tourner. Cela garantira un refresh complet (fetch + parse + DB) au lieu d'inserer uniquement le JSON. + +## 4) Verifier les donnees + +```bash +psql -h localhost -U pricewatch pricewatch +SELECT COUNT(*) FROM products; +SELECT COUNT(*) FROM price_history; +SELECT COUNT(*) FROM scraping_logs; +``` + +## 5) Notes importantes + +- Si `reference` est absente, la persistence du produit est ignoree, mais un `ScrapingLog` est cree. +- La contrainte d'unicite `(source, reference)` evite les doublons. +- Les images/specs sont synchronises par ajout/ups ert (pas de suppression automatique). +- En cas d'erreur DB, le snapshot est conserve et une note est ajoutee dans `snapshot.debug.notes`. diff --git a/PHASE_1_COMPLETE.md b/PHASE_1_COMPLETE.md old mode 100755 new mode 100644 diff --git a/PHASE_2_PROGRESS.md b/PHASE_2_PROGRESS.md old mode 100755 new mode 100644 index 1408c7f..4f796b5 --- a/PHASE_2_PROGRESS.md +++ b/PHASE_2_PROGRESS.md @@ -8,17 +8,28 @@ ## 📊 Vue d'Ensemble +### Mises a jour recentes +- Migration Alembic corrigee (down_revision sur 20260114_02) +- Extraction images Amazon amelioree (data-a-dynamic-image + filtre logos) +- Nouveau scraping de validation (URL Amazon ASUS A16) + +### Prochaines actions +- Verifier l'affichage des images, description, specs, msrp et reduction dans le Web UI +- Confirmer que le popup ajout produit affiche toutes les donnees du preview + ### Objectifs Phase 2 - ✅ Configuration centralisée (database, Redis, app) - ✅ Modèles SQLAlchemy ORM (5 tables) - ✅ Connexion base de données (init_db, get_session) - ✅ Migrations Alembic -- ⏳ Repository pattern (CRUD) -- ⏳ Worker RQ pour scraping asynchrone -- ⏳ Scheduler pour jobs récurrents -- ✅ CLI étendu (commandes DB) +- ✅ Repository pattern (CRUD) +- ✅ Worker RQ pour scraping asynchrone +- ✅ Scheduler pour jobs récurrents +- ✅ CLI étendu (commandes DB + worker) - ✅ Docker Compose (PostgreSQL + Redis) -- ⏳ Tests complets +- ✅ Gestion erreurs Redis +- ✅ Logs d'observabilité jobs +- ⏳ Tests end-to-end (Semaine 4) --- @@ -226,7 +237,7 @@ PW_ENABLE_WORKER=true --- -## 📦 Semaine 2: Repository & Pipeline (EN COURS) +## 📦 Semaine 2: Repository & Pipeline (TERMINEE) ### Tâches Prévues @@ -279,7 +290,7 @@ PW_ENABLE_WORKER=true --- -## 📦 Semaine 3: Worker Infrastructure (EN COURS) +## 📦 Semaine 3: Worker Infrastructure (TERMINEE) ### Tâches Prévues @@ -313,22 +324,73 @@ pricewatch schedule --interval 24 # Scrape quotidien **Statut**: ✅ Terminé +#### Tests worker + scheduler ✅ +**Fichiers**: +- `tests/tasks/test_scrape_task.py` +- `tests/tasks/test_scheduler.py` + +**Statut**: ✅ Terminé + +#### Gestion erreurs Redis ✅ +**Fichiers modifiés**: +- `pricewatch/app/tasks/scheduler.py`: + - Ajout `RedisUnavailableError` exception + - Ajout `check_redis_connection()` helper + - Connexion lazy avec ping de vérification +- `pricewatch/app/cli/main.py`: + - Commandes `worker`, `enqueue`, `schedule` gèrent Redis down + - Messages d'erreur clairs avec instructions + +**Tests ajoutés** (7 tests): +- `test_scheduler_redis_connection_error` +- `test_scheduler_lazy_connection` +- `test_check_redis_connection_success` +- `test_check_redis_connection_failure` +- `test_scheduler_schedule_redis_error` + +**Statut**: ✅ Terminé + +#### Logs d'observabilité jobs ✅ +**Fichier modifié**: `pricewatch/app/tasks/scrape.py` + +**Logs ajoutés**: +- `[JOB START]` - Début du job avec URL +- `[STORE]` - Store détecté +- `[FETCH]` - Résultat fetch HTTP/Playwright (durée, taille) +- `[PARSE]` - Résultat parsing (titre, prix) +- `[JOB OK]` / `[JOB FAILED]` - Résultat final avec durée totale + +**Note**: Les logs sont aussi persistés en DB via `ScrapingLog` (déjà implémenté). + +**Statut**: ✅ Terminé + --- -## 📦 Semaine 4: Tests & Documentation (NON DÉMARRÉ) +## 📦 Semaine 4: Tests & Documentation (EN COURS) ### Tâches Prévues #### Tests -- Tests end-to-end (CLI → DB → Worker) -- Tests erreurs (DB down, Redis down) -- Tests backward compatibility (`--no-db`) -- Performance tests (100+ produits) +- ✅ Tests end-to-end (CLI → DB → Worker) +- ✅ Tests erreurs (DB down, Redis down) +- ✅ Tests backward compatibility (`--no-db`) +- ✅ Performance tests (100+ produits) + +**Fichiers tests ajoutes**: +- `tests/cli/test_worker_cli.py` +- `tests/cli/test_enqueue_schedule_cli.py` +- `tests/scraping/test_pipeline.py` (erreurs DB) +- `tests/tasks/test_redis_errors.py` +- `tests/cli/test_run_no_db.py` +- `tests/db/test_bulk_persistence.py` +- `tests/tasks/test_worker_end_to_end.py` +- `tests/cli/test_cli_worker_end_to_end.py` + - **Resultat**: OK avec Redis actif #### Documentation -- Update README.md (setup Phase 2) -- Update CHANGELOG.md -- Migration guide (JSON → DB) +- ✅ Update README.md (setup Phase 2) +- ✅ Update CHANGELOG.md +- ✅ Migration guide (JSON → DB) --- @@ -338,20 +400,22 @@ pricewatch schedule --interval 24 # Scrape quotidien |-----------|------------|---------|---| | **Semaine 1** | 10 | 10 | 100% | | **Semaine 2** | 5 | 5 | 100% | -| **Semaine 3** | 3 | 6 | 50% | -| **Semaine 4** | 0 | 7 | 0% | -| **TOTAL Phase 2** | 18 | 28 | **64%** | +| **Semaine 3** | 6 | 6 | 100% | +| **Semaine 4** | 7 | 7 | 100% | +| **TOTAL Phase 2** | 28 | 28 | **100%** | --- ## 🎯 Prochaine Étape Immédiate **Prochaine étape immédiate** -- Tests end-to-end worker + DB -- Gestion des erreurs Redis down (CLI + worker) +- Phase 2 terminee, bascule vers Phase 3 (API REST) +- API v1 avancee: filtres, export CSV/JSON, webhooks + tests associes -**Apres (prevu)** -- Logs d'observabilite pour jobs planifies +**Après (prévu)** +- Documentation Phase 2 (resume final) +- Retry policy (optionnel) +- Phase 4 Web UI (dashboard + graphiques) --- @@ -423,7 +487,13 @@ SELECT * FROM scraping_logs ORDER BY fetched_at DESC LIMIT 5; --- -**Dernière mise à jour**: 2026-01-14 +**Dernière mise à jour**: 2026-01-15 + +### Recap avancement recent (Phase 3 API) +- Filtres avances + exports CSV/JSON + webhooks (CRUD + test) +- Tests API avances ajoutes +- Nettoyage warnings Pydantic/datetime/selectors +- Suite pytest complete: 339 passed, 4 skipped ### Validation locale (Semaine 1) ```bash @@ -434,4 +504,4 @@ psql -h localhost -U pricewatch pricewatch ``` **Resultat**: 6 tables visibles (products, price_history, product_images, product_specs, scraping_logs, alembic_version). -**Statut**: ✅ Semaine 1 en cours (30% complétée) +**Statut**: ✅ Semaine 1 terminee (100%). diff --git a/README.md b/README.md index cfa1533..c2c7800 100755 --- a/README.md +++ b/README.md @@ -146,6 +146,70 @@ docker-compose up -d cp .env.example .env ``` +Guide de migration JSON -> DB: `MIGRATION_GUIDE.md` + +## API REST (Phase 3) + +L'API est protegee par un token simple. + +```bash +export PW_API_TOKEN=change_me +docker compose up -d api +``` + +Exemples: + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" http://localhost:8001/products +curl http://localhost:8001/health +``` + +Filtres (exemples rapides): + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" \\ + "http://localhost:8001/products?price_min=100&stock_status=in_stock" +curl -H "Authorization: Bearer $PW_API_TOKEN" \\ + "http://localhost:8001/products/1/prices?fetch_status=success&fetched_after=2026-01-14T00:00:00" +curl -H "Authorization: Bearer $PW_API_TOKEN" \\ + "http://localhost:8001/logs?fetch_status=failed&fetched_before=2026-01-15T00:00:00" +``` + +Exports (CSV/JSON): + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" \\ + "http://localhost:8001/products/export?format=csv" +curl -H "Authorization: Bearer $PW_API_TOKEN" \\ + "http://localhost:8001/logs/export?format=json" +``` + +CRUD (examples rapides): + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" -X POST http://localhost:8001/products \\ + -H "Content-Type: application/json" \\ + -d '{"source":"amazon","reference":"REF1","url":"https://example.com"}' +``` + +Webhooks (exemples rapides): + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" -X POST http://localhost:8001/webhooks \\ + -H "Content-Type: application/json" \\ + -d '{"event":"price_changed","url":"https://example.com/webhook","enabled":true}' +curl -H "Authorization: Bearer $PW_API_TOKEN" -X POST http://localhost:8001/webhooks/1/test +``` + +## Web UI (Phase 4) + +Interface Vue 3 dense avec themes Gruvbox/Monokai, header fixe, sidebar filtres, et split compare. + +```bash +docker compose up -d frontend +# Acces: http://localhost:3000 +``` + ## Configuration (scrap_url.yaml) ```yaml diff --git a/TODO.md b/TODO.md index 8ce85be..e9770d6 100755 --- a/TODO.md +++ b/TODO.md @@ -154,7 +154,7 @@ Liste des tâches priorisées pour le développement de PriceWatch. --- -## Phase 2 : Base de données (En cours) +## Phase 2 : Base de données (Terminee) ### Persistence - [x] Schéma PostgreSQL @@ -166,8 +166,13 @@ Liste des tâches priorisées pour le développement de PriceWatch. - [x] ScrapingPipeline (persistence optionnelle) - [x] CLI `--save-db/--no-db` - [x] Tests end-to-end CLI + DB -- [ ] CRUD produits -- [ ] Historique prix +- [x] Tests backward compatibility (`--no-db`) +- [x] Tests performance (100+ produits) +- [x] CRUD produits +- [x] Historique prix + +### Documentation +- [x] Migration guide (JSON -> DB) ### Configuration - [x] Fichier config (DB credentials) @@ -182,26 +187,43 @@ Liste des tâches priorisées pour le développement de PriceWatch. - [x] Setup Redis - [x] Worker RQ - [x] Queue de scraping +- [x] Tests worker + scheduler +- [x] Gestion erreurs Redis (RedisUnavailableError) - [ ] Retry policy ### Planification - [x] Cron ou scheduler intégré - [x] Scraping quotidien automatique -- [ ] Logs des runs +- [x] Logs des runs (JOB START/OK/FAILED) +- [x] Tests end-to-end worker + DB +- [x] Tests end-to-end CLI -> DB -> worker + +## Phase 3 : API REST (En cours) + +### API FastAPI +- [x] Endpoints read-only (products, prices, logs, health) +- [x] Auth token simple (Bearer) +- [x] Endpoints enqueue/schedule +- [x] CRUD products + prices + logs +- [x] Docker + uvicorn + config env +- [x] Tests API de base +- [x] Filtres avances (prix, dates, stock, status) +- [x] Exports CSV/JSON (products, prices, logs) +- [x] Webhooks (CRUD + test) --- ## Phase 4 : Web UI (Future) ### Backend API -- [ ] FastAPI endpoints -- [ ] Authentification +- [x] FastAPI endpoints +- [x] Authentification - [ ] CORS ### Frontend -- [ ] Framework (React/Vue?) -- [ ] Design responsive -- [ ] Dark theme Gruvbox +- [x] Framework (Vue 3 + Vite) +- [x] Design responsive (layout dense + compact) +- [x] Dark theme Gruvbox (defaut) + Monokai - [ ] Graphiques historique prix - [ ] Gestion alertes @@ -236,4 +258,4 @@ Liste des tâches priorisées pour le développement de PriceWatch. --- -**Dernière mise à jour**: 2026-01-14 +**Dernière mise à jour**: 2026-01-15 diff --git a/alembic.ini b/alembic.ini old mode 100755 new mode 100644 diff --git a/docker-compose.yml b/docker-compose.yml old mode 100755 new mode 100644 index 8a4c487..362e5af --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,7 @@ services: POSTGRES_DB: pricewatch POSTGRES_USER: pricewatch POSTGRES_PASSWORD: pricewatch + TZ: Europe/Paris ports: - "5432:5432" volumes: @@ -12,11 +13,36 @@ services: redis: image: redis:7 + environment: + TZ: Europe/Paris ports: - "6379:6379" volumes: - pricewatch_redisdata:/data + api: + build: . + ports: + - "8001:8000" + env_file: + - .env + environment: + PW_DB_HOST: postgres + PW_REDIS_HOST: redis + TZ: Europe/Paris + depends_on: + - postgres + - redis + + frontend: + build: ./webui + ports: + - "3000:80" + environment: + TZ: Europe/Paris + depends_on: + - api + volumes: pricewatch_pgdata: pricewatch_redisdata: diff --git a/pricewatch.egg-info/PKG-INFO b/pricewatch.egg-info/PKG-INFO old mode 100755 new mode 100644 index b784bc1..ac434d9 --- a/pricewatch.egg-info/PKG-INFO +++ b/pricewatch.egg-info/PKG-INFO @@ -28,6 +28,8 @@ Requires-Dist: python-dotenv>=1.0.0 Requires-Dist: redis>=5.0.0 Requires-Dist: rq>=1.15.0 Requires-Dist: rq-scheduler>=0.13.0 +Requires-Dist: fastapi>=0.110.0 +Requires-Dist: uvicorn>=0.27.0 Provides-Extra: dev Requires-Dist: pytest>=8.0.0; extra == "dev" Requires-Dist: pytest-cov>=4.1.0; extra == "dev" @@ -100,6 +102,13 @@ pricewatch/ │ │ ├── store.py │ │ ├── selectors.yml │ │ └── fixtures/ +│ ├── db/ # Persistence SQLAlchemy (Phase 2) +│ │ ├── models.py +│ │ ├── connection.py +│ │ └── migrations/ +│ ├── tasks/ # Jobs RQ (Phase 3) +│ │ ├── scrape.py +│ │ └── scheduler.py │ └── cli/ │ └── main.py # CLI Typer ├── tests/ # Tests pytest @@ -118,6 +127,9 @@ pricewatch run --yaml scrap_url.yaml --out scraped_store.json # Avec debug pricewatch run --yaml scrap_url.yaml --out scraped_store.json --debug + +# Avec persistence DB +pricewatch run --yaml scrap_url.yaml --out scraped_store.json --save-db ``` ### Commandes utilitaires @@ -139,6 +151,63 @@ pricewatch parse amazon --in scraped/page.html pricewatch doctor ``` +### Commandes base de donnees + +```bash +# Initialiser les tables +pricewatch init-db + +# Generer une migration +pricewatch migrate "Initial schema" + +# Appliquer les migrations +pricewatch upgrade + +# Revenir en arriere +pricewatch downgrade -1 +``` + +### Commandes worker + +```bash +# Lancer un worker RQ +pricewatch worker + +# Enqueue un job immediat +pricewatch enqueue "https://example.com/product" + +# Planifier un job recurrent +pricewatch schedule "https://example.com/product" --interval 24 +``` + +## Base de donnees (Phase 2) + +```bash +# Lancer PostgreSQL + Redis en local +docker-compose up -d + +# Exemple de configuration +cp .env.example .env +``` + +Guide de migration JSON -> DB: `MIGRATION_GUIDE.md` + +## API REST (Phase 3) + +L'API est protegee par un token simple. + +```bash +export PW_API_TOKEN=change_me +docker compose up -d api +``` + +Exemples: + +```bash +curl -H "Authorization: Bearer $PW_API_TOKEN" http://localhost:8000/products +curl http://localhost:8000/health +``` + ## Configuration (scrap_url.yaml) ```yaml @@ -238,8 +307,8 @@ Aucune erreur ne doit crasher silencieusement : toutes sont loggées et tracées - ✅ Tests pytest ### Phase 2 : Persistence -- [ ] Base de données PostgreSQL -- [ ] Migrations Alembic +- [x] Base de données PostgreSQL +- [x] Migrations Alembic - [ ] Historique des prix ### Phase 3 : Automation diff --git a/pricewatch.egg-info/SOURCES.txt b/pricewatch.egg-info/SOURCES.txt index a519fb9..48f824a 100755 --- a/pricewatch.egg-info/SOURCES.txt +++ b/pricewatch.egg-info/SOURCES.txt @@ -18,6 +18,7 @@ pricewatch/app/core/registry.py pricewatch/app/core/schema.py pricewatch/app/scraping/__init__.py pricewatch/app/scraping/http_fetch.py +pricewatch/app/scraping/pipeline.py pricewatch/app/scraping/pw_fetch.py pricewatch/app/stores/__init__.py pricewatch/app/stores/base.py diff --git a/pricewatch.egg-info/requires.txt b/pricewatch.egg-info/requires.txt index f366fd7..003a098 100755 --- a/pricewatch.egg-info/requires.txt +++ b/pricewatch.egg-info/requires.txt @@ -16,6 +16,8 @@ python-dotenv>=1.0.0 redis>=5.0.0 rq>=1.15.0 rq-scheduler>=0.13.0 +fastapi>=0.110.0 +uvicorn>=0.27.0 [dev] pytest>=8.0.0 diff --git a/pricewatch/app/api/__init__.py b/pricewatch/app/api/__init__.py new file mode 100644 index 0000000..595bb5f --- /dev/null +++ b/pricewatch/app/api/__init__.py @@ -0,0 +1,5 @@ +"""Module API FastAPI.""" + +from pricewatch.app.api.main import app + +__all__ = ["app"] diff --git a/pricewatch/app/api/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/api/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..8701b61 Binary files /dev/null and b/pricewatch/app/api/__pycache__/__init__.cpython-313.pyc differ diff --git a/pricewatch/app/api/__pycache__/main.cpython-313.pyc b/pricewatch/app/api/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..66e5ecf Binary files /dev/null and b/pricewatch/app/api/__pycache__/main.cpython-313.pyc differ diff --git a/pricewatch/app/api/__pycache__/schemas.cpython-313.pyc b/pricewatch/app/api/__pycache__/schemas.cpython-313.pyc new file mode 100644 index 0000000..0e52c07 Binary files /dev/null and b/pricewatch/app/api/__pycache__/schemas.cpython-313.pyc differ diff --git a/pricewatch/app/api/main.py b/pricewatch/app/api/main.py new file mode 100644 index 0000000..d32e95b --- /dev/null +++ b/pricewatch/app/api/main.py @@ -0,0 +1,876 @@ +""" +API REST FastAPI pour PriceWatch (Phase 3). +""" + +from __future__ import annotations + +import csv +from collections import deque +from datetime import datetime, timezone +import os +from pathlib import Path +from io import StringIO +from typing import Generator, Optional + +import httpx +from fastapi import Depends, FastAPI, Header, HTTPException, Response +from fastapi.encoders import jsonable_encoder +from fastapi.responses import JSONResponse +from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy import and_, desc, func +from sqlalchemy.orm import Session + +from pricewatch.app.api.schemas import ( + EnqueueRequest, + EnqueueResponse, + HealthStatus, + PriceHistoryOut, + PriceHistoryCreate, + PriceHistoryUpdate, + ProductOut, + ProductCreate, + ProductUpdate, + ScheduleRequest, + ScheduleResponse, + ScrapingLogOut, + ScrapingLogCreate, + ScrapingLogUpdate, + ScrapePreviewRequest, + ScrapePreviewResponse, + ScrapeCommitRequest, + ScrapeCommitResponse, + VersionResponse, + BackendLogEntry, + UvicornLogEntry, + WebhookOut, + WebhookCreate, + WebhookUpdate, + WebhookTestResponse, +) +from pricewatch.app.core.config import get_config +from pricewatch.app.core.logging import get_logger +from pricewatch.app.core.schema import ProductSnapshot +from pricewatch.app.db.connection import check_db_connection, get_session +from pricewatch.app.db.models import PriceHistory, Product, ScrapingLog, Webhook +from pricewatch.app.scraping.pipeline import ScrapingPipeline +from pricewatch.app.tasks.scrape import scrape_product +from pricewatch.app.tasks.scheduler import RedisUnavailableError, check_redis_connection, ScrapingScheduler + +logger = get_logger("api") + +app = FastAPI(title="PriceWatch API", version="0.4.0") + +# Buffer de logs backend en memoire pour debug UI. +BACKEND_LOGS = deque(maxlen=200) + +UVICORN_LOG_PATH = Path( + os.environ.get("PW_UVICORN_LOG_PATH", "/app/logs/uvicorn.log") +) + + +def get_db_session() -> Generator[Session, None, None]: + """Dependency: session SQLAlchemy.""" + with get_session(get_config()) as session: + yield session + + +def require_token(authorization: Optional[str] = Header(default=None)) -> None: + """Auth simple via token Bearer.""" + config = get_config() + token = config.api_token + if not token: + raise HTTPException(status_code=500, detail="API token non configure") + + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Token manquant") + + provided = authorization.split("Bearer ")[-1].strip() + if provided != token: + raise HTTPException(status_code=403, detail="Token invalide") + + +@app.get("/health", response_model=HealthStatus) +def health_check() -> HealthStatus: + """Health check DB + Redis.""" + config = get_config() + return HealthStatus( + db=check_db_connection(config), + redis=check_redis_connection(config.redis.url), + ) + + +@app.get("/version", response_model=VersionResponse) +def version_info() -> VersionResponse: + """Expose la version API.""" + return VersionResponse(api_version=app.version) + + +@app.get("/logs/backend", response_model=list[BackendLogEntry], dependencies=[Depends(require_token)]) +def list_backend_logs() -> list[BackendLogEntry]: + """Expose un buffer de logs backend.""" + return list(BACKEND_LOGS) + + +@app.get("/logs/uvicorn", response_model=list[UvicornLogEntry], dependencies=[Depends(require_token)]) +def list_uvicorn_logs(limit: int = 200) -> list[UvicornLogEntry]: + """Expose les dernieres lignes du log Uvicorn.""" + lines = _read_uvicorn_lines(limit=limit) + return [UvicornLogEntry(line=line) for line in lines] + + +@app.get("/products", response_model=list[ProductOut], dependencies=[Depends(require_token)]) +def list_products( + source: Optional[str] = None, + reference: Optional[str] = None, + updated_after: Optional[datetime] = None, + price_min: Optional[float] = None, + price_max: Optional[float] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + stock_status: Optional[str] = None, + limit: int = 50, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> list[ProductOut]: + """Liste des produits avec filtres optionnels.""" + latest_price_subquery = ( + session.query( + PriceHistory.product_id.label("product_id"), + func.max(PriceHistory.fetched_at).label("latest_fetched_at"), + ) + .group_by(PriceHistory.product_id) + .subquery() + ) + latest_price = ( + session.query(PriceHistory) + .join( + latest_price_subquery, + and_( + PriceHistory.product_id == latest_price_subquery.c.product_id, + PriceHistory.fetched_at == latest_price_subquery.c.latest_fetched_at, + ), + ) + .subquery() + ) + + query = session.query(Product).outerjoin(latest_price, Product.id == latest_price.c.product_id) + if source: + query = query.filter(Product.source == source) + if reference: + query = query.filter(Product.reference == reference) + if updated_after: + query = query.filter(Product.last_updated_at >= updated_after) + if price_min is not None: + query = query.filter(latest_price.c.price >= price_min) + if price_max is not None: + query = query.filter(latest_price.c.price <= price_max) + if fetched_after: + query = query.filter(latest_price.c.fetched_at >= fetched_after) + if fetched_before: + query = query.filter(latest_price.c.fetched_at <= fetched_before) + if stock_status: + query = query.filter(latest_price.c.stock_status == stock_status) + + products = query.order_by(desc(Product.last_updated_at)).offset(offset).limit(limit).all() + return [_product_to_out(session, product) for product in products] + + +@app.post("/products", response_model=ProductOut, dependencies=[Depends(require_token)]) +def create_product( + payload: ProductCreate, + session: Session = Depends(get_db_session), +) -> ProductOut: + """Cree un produit.""" + product = Product( + source=payload.source, + reference=payload.reference, + url=payload.url, + title=payload.title, + category=payload.category, + description=payload.description, + currency=payload.currency, + msrp=payload.msrp, + ) + session.add(product) + try: + session.commit() + session.refresh(product) + except IntegrityError as exc: + session.rollback() + raise HTTPException(status_code=409, detail="Produit deja existant") from exc + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _product_to_out(session, product) + + +@app.get("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)]) +def get_product( + product_id: int, + session: Session = Depends(get_db_session), +) -> ProductOut: + """Detail produit + dernier prix.""" + product = session.query(Product).filter(Product.id == product_id).one_or_none() + if not product: + raise HTTPException(status_code=404, detail="Produit non trouve") + return _product_to_out(session, product) + + +@app.patch("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)]) +def update_product( + product_id: int, + payload: ProductUpdate, + session: Session = Depends(get_db_session), +) -> ProductOut: + """Met a jour un produit (partial).""" + product = session.query(Product).filter(Product.id == product_id).one_or_none() + if not product: + raise HTTPException(status_code=404, detail="Produit non trouve") + + updates = payload.model_dump(exclude_unset=True) + for key, value in updates.items(): + setattr(product, key, value) + + try: + session.commit() + session.refresh(product) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _product_to_out(session, product) + + +@app.delete("/products/{product_id}", dependencies=[Depends(require_token)]) +def delete_product( + product_id: int, + session: Session = Depends(get_db_session), +) -> dict[str, str]: + """Supprime un produit (cascade).""" + product = session.query(Product).filter(Product.id == product_id).one_or_none() + if not product: + raise HTTPException(status_code=404, detail="Produit non trouve") + + session.delete(product) + try: + session.commit() + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return {"status": "deleted"} + + +@app.get( + "/products/{product_id}/prices", + response_model=list[PriceHistoryOut], + dependencies=[Depends(require_token)], +) +def list_prices( + product_id: int, + price_min: Optional[float] = None, + price_max: Optional[float] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + fetch_status: Optional[str] = None, + limit: int = 50, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> list[PriceHistoryOut]: + """Historique de prix pour un produit.""" + query = session.query(PriceHistory).filter(PriceHistory.product_id == product_id) + if price_min is not None: + query = query.filter(PriceHistory.price >= price_min) + if price_max is not None: + query = query.filter(PriceHistory.price <= price_max) + if fetched_after: + query = query.filter(PriceHistory.fetched_at >= fetched_after) + if fetched_before: + query = query.filter(PriceHistory.fetched_at <= fetched_before) + if fetch_status: + query = query.filter(PriceHistory.fetch_status == fetch_status) + + prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all() + return [_price_to_out(price) for price in prices] + + +@app.post("/prices", response_model=PriceHistoryOut, dependencies=[Depends(require_token)]) +def create_price( + payload: PriceHistoryCreate, + session: Session = Depends(get_db_session), +) -> PriceHistoryOut: + """Ajoute une entree d'historique de prix.""" + price = PriceHistory( + product_id=payload.product_id, + price=payload.price, + shipping_cost=payload.shipping_cost, + stock_status=payload.stock_status, + fetch_method=payload.fetch_method, + fetch_status=payload.fetch_status, + fetched_at=payload.fetched_at, + ) + session.add(price) + try: + session.commit() + session.refresh(price) + except IntegrityError as exc: + session.rollback() + raise HTTPException(status_code=409, detail="Entree prix deja existante") from exc + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _price_to_out(price) + + +@app.patch("/prices/{price_id}", response_model=PriceHistoryOut, dependencies=[Depends(require_token)]) +def update_price( + price_id: int, + payload: PriceHistoryUpdate, + session: Session = Depends(get_db_session), +) -> PriceHistoryOut: + """Met a jour une entree de prix.""" + price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none() + if not price: + raise HTTPException(status_code=404, detail="Entree prix non trouvee") + + updates = payload.model_dump(exclude_unset=True) + for key, value in updates.items(): + setattr(price, key, value) + + try: + session.commit() + session.refresh(price) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _price_to_out(price) + + +@app.delete("/prices/{price_id}", dependencies=[Depends(require_token)]) +def delete_price( + price_id: int, + session: Session = Depends(get_db_session), +) -> dict[str, str]: + """Supprime une entree de prix.""" + price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none() + if not price: + raise HTTPException(status_code=404, detail="Entree prix non trouvee") + + session.delete(price) + try: + session.commit() + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return {"status": "deleted"} + + +@app.get("/logs", response_model=list[ScrapingLogOut], dependencies=[Depends(require_token)]) +def list_logs( + source: Optional[str] = None, + fetch_status: Optional[str] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + limit: int = 50, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> list[ScrapingLogOut]: + """Liste des logs de scraping.""" + query = session.query(ScrapingLog) + if source: + query = query.filter(ScrapingLog.source == source) + if fetch_status: + query = query.filter(ScrapingLog.fetch_status == fetch_status) + if fetched_after: + query = query.filter(ScrapingLog.fetched_at >= fetched_after) + if fetched_before: + query = query.filter(ScrapingLog.fetched_at <= fetched_before) + + logs = query.order_by(desc(ScrapingLog.fetched_at)).offset(offset).limit(limit).all() + return [_log_to_out(log) for log in logs] + + +@app.post("/logs", response_model=ScrapingLogOut, dependencies=[Depends(require_token)]) +def create_log( + payload: ScrapingLogCreate, + session: Session = Depends(get_db_session), +) -> ScrapingLogOut: + """Cree un log de scraping.""" + log_entry = ScrapingLog( + product_id=payload.product_id, + url=payload.url, + source=payload.source, + reference=payload.reference, + fetch_method=payload.fetch_method, + fetch_status=payload.fetch_status, + fetched_at=payload.fetched_at, + duration_ms=payload.duration_ms, + html_size_bytes=payload.html_size_bytes, + errors=payload.errors, + notes=payload.notes, + ) + session.add(log_entry) + try: + session.commit() + session.refresh(log_entry) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _log_to_out(log_entry) + + +@app.patch("/logs/{log_id}", response_model=ScrapingLogOut, dependencies=[Depends(require_token)]) +def update_log( + log_id: int, + payload: ScrapingLogUpdate, + session: Session = Depends(get_db_session), +) -> ScrapingLogOut: + """Met a jour un log.""" + log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none() + if not log_entry: + raise HTTPException(status_code=404, detail="Log non trouve") + + updates = payload.model_dump(exclude_unset=True) + for key, value in updates.items(): + setattr(log_entry, key, value) + + try: + session.commit() + session.refresh(log_entry) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _log_to_out(log_entry) + + +@app.delete("/logs/{log_id}", dependencies=[Depends(require_token)]) +def delete_log( + log_id: int, + session: Session = Depends(get_db_session), +) -> dict[str, str]: + """Supprime un log.""" + log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none() + if not log_entry: + raise HTTPException(status_code=404, detail="Log non trouve") + + session.delete(log_entry) + try: + session.commit() + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return {"status": "deleted"} + + +@app.get("/products/export", dependencies=[Depends(require_token)]) +def export_products( + source: Optional[str] = None, + reference: Optional[str] = None, + updated_after: Optional[datetime] = None, + price_min: Optional[float] = None, + price_max: Optional[float] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + stock_status: Optional[str] = None, + format: str = "csv", + limit: int = 500, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> Response: + """Export produits en CSV/JSON.""" + products = list_products( + source=source, + reference=reference, + updated_after=updated_after, + price_min=price_min, + price_max=price_max, + fetched_after=fetched_after, + fetched_before=fetched_before, + stock_status=stock_status, + limit=limit, + offset=offset, + session=session, + ) + rows = [product.model_dump() for product in products] + fieldnames = list(ProductOut.model_fields.keys()) + return _export_response(rows, fieldnames, "products", format) + + +@app.get("/prices/export", dependencies=[Depends(require_token)]) +def export_prices( + product_id: Optional[int] = None, + price_min: Optional[float] = None, + price_max: Optional[float] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + fetch_status: Optional[str] = None, + format: str = "csv", + limit: int = 500, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> Response: + """Export historique de prix en CSV/JSON.""" + query = session.query(PriceHistory) + if product_id is not None: + query = query.filter(PriceHistory.product_id == product_id) + if price_min is not None: + query = query.filter(PriceHistory.price >= price_min) + if price_max is not None: + query = query.filter(PriceHistory.price <= price_max) + if fetched_after: + query = query.filter(PriceHistory.fetched_at >= fetched_after) + if fetched_before: + query = query.filter(PriceHistory.fetched_at <= fetched_before) + if fetch_status: + query = query.filter(PriceHistory.fetch_status == fetch_status) + + prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all() + rows = [_price_to_out(price).model_dump() for price in prices] + fieldnames = list(PriceHistoryOut.model_fields.keys()) + return _export_response(rows, fieldnames, "prices", format) + + +@app.get("/logs/export", dependencies=[Depends(require_token)]) +def export_logs( + source: Optional[str] = None, + fetch_status: Optional[str] = None, + fetched_after: Optional[datetime] = None, + fetched_before: Optional[datetime] = None, + format: str = "csv", + limit: int = 500, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> Response: + """Export logs de scraping en CSV/JSON.""" + logs = list_logs( + source=source, + fetch_status=fetch_status, + fetched_after=fetched_after, + fetched_before=fetched_before, + limit=limit, + offset=offset, + session=session, + ) + rows = [log.model_dump() for log in logs] + fieldnames = list(ScrapingLogOut.model_fields.keys()) + return _export_response(rows, fieldnames, "logs", format) + + +@app.get("/webhooks", response_model=list[WebhookOut], dependencies=[Depends(require_token)]) +def list_webhooks( + event: Optional[str] = None, + enabled: Optional[bool] = None, + limit: int = 50, + offset: int = 0, + session: Session = Depends(get_db_session), +) -> list[WebhookOut]: + """Liste des webhooks.""" + query = session.query(Webhook) + if event: + query = query.filter(Webhook.event == event) + if enabled is not None: + query = query.filter(Webhook.enabled == enabled) + + webhooks = query.order_by(desc(Webhook.created_at)).offset(offset).limit(limit).all() + return [_webhook_to_out(webhook) for webhook in webhooks] + + +@app.post("/webhooks", response_model=WebhookOut, dependencies=[Depends(require_token)]) +def create_webhook( + payload: WebhookCreate, + session: Session = Depends(get_db_session), +) -> WebhookOut: + """Cree un webhook.""" + webhook = Webhook( + event=payload.event, + url=payload.url, + enabled=payload.enabled, + secret=payload.secret, + ) + session.add(webhook) + try: + session.commit() + session.refresh(webhook) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _webhook_to_out(webhook) + + +@app.patch("/webhooks/{webhook_id}", response_model=WebhookOut, dependencies=[Depends(require_token)]) +def update_webhook( + webhook_id: int, + payload: WebhookUpdate, + session: Session = Depends(get_db_session), +) -> WebhookOut: + """Met a jour un webhook.""" + webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none() + if not webhook: + raise HTTPException(status_code=404, detail="Webhook non trouve") + + updates = payload.model_dump(exclude_unset=True) + for key, value in updates.items(): + setattr(webhook, key, value) + + try: + session.commit() + session.refresh(webhook) + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return _webhook_to_out(webhook) + + +@app.delete("/webhooks/{webhook_id}", dependencies=[Depends(require_token)]) +def delete_webhook( + webhook_id: int, + session: Session = Depends(get_db_session), +) -> dict[str, str]: + """Supprime un webhook.""" + webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none() + if not webhook: + raise HTTPException(status_code=404, detail="Webhook non trouve") + + session.delete(webhook) + try: + session.commit() + except SQLAlchemyError as exc: + session.rollback() + raise HTTPException(status_code=500, detail="Erreur DB") from exc + return {"status": "deleted"} + + +@app.post( + "/webhooks/{webhook_id}/test", + response_model=WebhookTestResponse, + dependencies=[Depends(require_token)], +) +def send_webhook_test( + webhook_id: int, + session: Session = Depends(get_db_session), +) -> WebhookTestResponse: + """Envoie un evenement de test.""" + webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none() + if not webhook: + raise HTTPException(status_code=404, detail="Webhook non trouve") + if not webhook.enabled: + raise HTTPException(status_code=409, detail="Webhook desactive") + + payload = {"message": "test webhook", "webhook_id": webhook.id} + _send_webhook(webhook, "test", payload) + return WebhookTestResponse(status="sent") + +@app.post("/enqueue", response_model=EnqueueResponse, dependencies=[Depends(require_token)]) +def enqueue_job(payload: EnqueueRequest) -> EnqueueResponse: + """Enqueue un job immediat.""" + try: + scheduler = ScrapingScheduler(get_config()) + job = scheduler.enqueue_immediate( + payload.url, + use_playwright=payload.use_playwright, + save_db=payload.save_db, + ) + return EnqueueResponse(job_id=job.id) + except RedisUnavailableError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + + +@app.post("/schedule", response_model=ScheduleResponse, dependencies=[Depends(require_token)]) +def schedule_job(payload: ScheduleRequest) -> ScheduleResponse: + """Planifie un job recurrent.""" + try: + scheduler = ScrapingScheduler(get_config()) + job_info = scheduler.schedule_product( + payload.url, + interval_hours=payload.interval_hours, + use_playwright=payload.use_playwright, + save_db=payload.save_db, + ) + return ScheduleResponse(job_id=job_info.job_id, next_run=job_info.next_run) + except RedisUnavailableError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + + +@app.post("/scrape/preview", response_model=ScrapePreviewResponse, dependencies=[Depends(require_token)]) +def preview_scrape(payload: ScrapePreviewRequest) -> ScrapePreviewResponse: + """Scrape un produit sans persistence pour previsualisation.""" + _add_backend_log("INFO", f"Preview scraping: {payload.url}") + result = scrape_product( + payload.url, + use_playwright=payload.use_playwright, + save_db=False, + ) + snapshot = result.get("snapshot") + if snapshot is None: + _add_backend_log("ERROR", f"Preview scraping KO: {payload.url}") + return ScrapePreviewResponse(success=False, snapshot=None, error=result.get("error")) + return ScrapePreviewResponse( + success=bool(result.get("success")), + snapshot=snapshot.model_dump(mode="json"), + error=result.get("error"), + ) + + +@app.post("/scrape/commit", response_model=ScrapeCommitResponse, dependencies=[Depends(require_token)]) +def commit_scrape(payload: ScrapeCommitRequest) -> ScrapeCommitResponse: + """Persiste un snapshot previsualise.""" + try: + snapshot = ProductSnapshot.model_validate(payload.snapshot) + except Exception as exc: + _add_backend_log("ERROR", "Commit scraping KO: snapshot invalide") + raise HTTPException(status_code=400, detail="Snapshot invalide") from exc + + product_id = ScrapingPipeline(config=get_config()).process_snapshot(snapshot, save_to_db=True) + _add_backend_log("INFO", f"Commit scraping OK: product_id={product_id}") + return ScrapeCommitResponse(success=True, product_id=product_id) + + +def _export_response( + rows: list[dict[str, object]], + fieldnames: list[str], + filename_prefix: str, + format: str, +) -> Response: + """Expose une reponse CSV/JSON avec un nom de fichier stable.""" + if format not in {"csv", "json"}: + raise HTTPException(status_code=400, detail="Format invalide (csv ou json)") + + headers = {"Content-Disposition": f'attachment; filename="{filename_prefix}.{format}"'} + if format == "json": + return JSONResponse(content=jsonable_encoder(rows), headers=headers) + return _to_csv_response(rows, fieldnames, headers) + + +def _to_csv_response( + rows: list[dict[str, object]], + fieldnames: list[str], + headers: dict[str, str], +) -> Response: + buffer = StringIO() + writer = csv.DictWriter(buffer, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + return Response(content=buffer.getvalue(), media_type="text/csv", headers=headers) + + +def _send_webhook(webhook: Webhook, event: str, payload: dict[str, object]) -> None: + """Envoie un webhook avec gestion d'erreur explicite.""" + headers = {"Content-Type": "application/json"} + if webhook.secret: + headers["X-Webhook-Secret"] = webhook.secret + + try: + response = httpx.post( + webhook.url, + json={"event": event, "payload": payload}, + headers=headers, + timeout=5.0, + ) + response.raise_for_status() + except httpx.HTTPError as exc: + logger.error("Erreur webhook", extra={"url": webhook.url, "event": event, "error": str(exc)}) + raise HTTPException(status_code=502, detail="Echec webhook") from exc + + +def _add_backend_log(level: str, message: str) -> None: + BACKEND_LOGS.append( + BackendLogEntry( + time=datetime.now(timezone.utc), + level=level, + message=message, + ) + ) + + +def _read_uvicorn_lines(limit: int = 200) -> list[str]: + """Lit les dernieres lignes du log Uvicorn si disponible.""" + if limit <= 0: + return [] + try: + if not UVICORN_LOG_PATH.exists(): + return [] + with UVICORN_LOG_PATH.open("r", encoding="utf-8", errors="ignore") as handle: + lines = handle.readlines() + return [line.rstrip("\n") for line in lines[-limit:]] + except Exception: + return [] + + +def _product_to_out(session: Session, product: Product) -> ProductOut: + """Helper pour mapper Product + dernier prix.""" + latest = ( + session.query(PriceHistory) + .filter(PriceHistory.product_id == product.id) + .order_by(desc(PriceHistory.fetched_at)) + .first() + ) + images = [image.image_url for image in product.images] + specs = {spec.spec_key: spec.spec_value for spec in product.specs} + discount_amount = None + discount_percent = None + if latest and latest.price is not None and product.msrp: + discount_amount = float(product.msrp) - float(latest.price) + if product.msrp > 0: + discount_percent = (discount_amount / float(product.msrp)) * 100 + return ProductOut( + id=product.id, + source=product.source, + reference=product.reference, + url=product.url, + title=product.title, + category=product.category, + description=product.description, + currency=product.currency, + msrp=float(product.msrp) if product.msrp is not None else None, + first_seen_at=product.first_seen_at, + last_updated_at=product.last_updated_at, + latest_price=float(latest.price) if latest and latest.price is not None else None, + latest_shipping_cost=( + float(latest.shipping_cost) if latest and latest.shipping_cost is not None else None + ), + latest_stock_status=latest.stock_status if latest else None, + latest_fetched_at=latest.fetched_at if latest else None, + images=images, + specs=specs, + discount_amount=discount_amount, + discount_percent=discount_percent, + ) + + +def _price_to_out(price: PriceHistory) -> PriceHistoryOut: + return PriceHistoryOut( + id=price.id, + product_id=price.product_id, + price=float(price.price) if price.price is not None else None, + shipping_cost=float(price.shipping_cost) if price.shipping_cost is not None else None, + stock_status=price.stock_status, + fetch_method=price.fetch_method, + fetch_status=price.fetch_status, + fetched_at=price.fetched_at, + ) + + +def _log_to_out(log: ScrapingLog) -> ScrapingLogOut: + return ScrapingLogOut( + id=log.id, + product_id=log.product_id, + url=log.url, + source=log.source, + reference=log.reference, + fetch_method=log.fetch_method, + fetch_status=log.fetch_status, + fetched_at=log.fetched_at, + duration_ms=log.duration_ms, + html_size_bytes=log.html_size_bytes, + errors=log.errors, + notes=log.notes, + ) + + +def _webhook_to_out(webhook: Webhook) -> WebhookOut: + return WebhookOut( + id=webhook.id, + event=webhook.event, + url=webhook.url, + enabled=webhook.enabled, + secret=webhook.secret, + created_at=webhook.created_at, + ) diff --git a/pricewatch/app/api/schemas.py b/pricewatch/app/api/schemas.py new file mode 100644 index 0000000..a591eb9 --- /dev/null +++ b/pricewatch/app/api/schemas.py @@ -0,0 +1,212 @@ +""" +Schemas API FastAPI pour Phase 3. +""" + +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + + +class HealthStatus(BaseModel): + db: bool + redis: bool + + +class ProductOut(BaseModel): + id: int + source: str + reference: str + url: str + title: Optional[str] = None + category: Optional[str] = None + description: Optional[str] = None + currency: Optional[str] = None + msrp: Optional[float] = None + first_seen_at: datetime + last_updated_at: datetime + latest_price: Optional[float] = None + latest_shipping_cost: Optional[float] = None + latest_stock_status: Optional[str] = None + latest_fetched_at: Optional[datetime] = None + images: list[str] = [] + specs: dict[str, str] = {} + discount_amount: Optional[float] = None + discount_percent: Optional[float] = None + + +class ProductCreate(BaseModel): + source: str + reference: str + url: str + title: Optional[str] = None + category: Optional[str] = None + description: Optional[str] = None + currency: Optional[str] = None + msrp: Optional[float] = None + + +class ProductUpdate(BaseModel): + url: Optional[str] = None + title: Optional[str] = None + category: Optional[str] = None + description: Optional[str] = None + currency: Optional[str] = None + msrp: Optional[float] = None + + +class PriceHistoryOut(BaseModel): + id: int + product_id: int + price: Optional[float] = None + shipping_cost: Optional[float] = None + stock_status: Optional[str] = None + fetch_method: str + fetch_status: str + fetched_at: datetime + + +class PriceHistoryCreate(BaseModel): + product_id: int + price: Optional[float] = None + shipping_cost: Optional[float] = None + stock_status: Optional[str] = None + fetch_method: str + fetch_status: str + fetched_at: datetime + + +class PriceHistoryUpdate(BaseModel): + price: Optional[float] = None + shipping_cost: Optional[float] = None + stock_status: Optional[str] = None + fetch_method: Optional[str] = None + fetch_status: Optional[str] = None + fetched_at: Optional[datetime] = None + + +class ScrapingLogOut(BaseModel): + id: int + product_id: Optional[int] = None + url: str + source: str + reference: Optional[str] = None + fetch_method: str + fetch_status: str + fetched_at: datetime + duration_ms: Optional[int] = None + html_size_bytes: Optional[int] = None + errors: Optional[list[str]] = None + notes: Optional[list[str]] = None + + +class WebhookOut(BaseModel): + id: int + event: str + url: str + enabled: bool + secret: Optional[str] = None + created_at: datetime + + +class WebhookCreate(BaseModel): + event: str + url: str + enabled: bool = True + secret: Optional[str] = None + + +class WebhookUpdate(BaseModel): + event: Optional[str] = None + url: Optional[str] = None + enabled: Optional[bool] = None + secret: Optional[str] = None + + +class WebhookTestResponse(BaseModel): + status: str + + +class ScrapingLogCreate(BaseModel): + product_id: Optional[int] = None + url: str + source: str + reference: Optional[str] = None + fetch_method: str + fetch_status: str + fetched_at: datetime + duration_ms: Optional[int] = None + html_size_bytes: Optional[int] = None + errors: Optional[list[str]] = None + notes: Optional[list[str]] = None + + +class ScrapingLogUpdate(BaseModel): + product_id: Optional[int] = None + url: Optional[str] = None + source: Optional[str] = None + reference: Optional[str] = None + fetch_method: Optional[str] = None + fetch_status: Optional[str] = None + fetched_at: Optional[datetime] = None + duration_ms: Optional[int] = None + html_size_bytes: Optional[int] = None + errors: Optional[list[str]] = None + notes: Optional[list[str]] = None + + +class EnqueueRequest(BaseModel): + url: str = Field(..., description="URL du produit") + use_playwright: Optional[bool] = None + save_db: bool = True + + +class EnqueueResponse(BaseModel): + job_id: str + + +class ScheduleRequest(BaseModel): + url: str = Field(..., description="URL du produit") + interval_hours: int = Field(default=24, ge=1) + use_playwright: Optional[bool] = None + save_db: bool = True + + +class ScheduleResponse(BaseModel): + job_id: str + next_run: datetime + + +class ScrapePreviewRequest(BaseModel): + url: str + use_playwright: Optional[bool] = None + + +class ScrapePreviewResponse(BaseModel): + success: bool + snapshot: Optional[dict[str, object]] = None + error: Optional[str] = None + + +class ScrapeCommitRequest(BaseModel): + snapshot: dict[str, object] + + +class ScrapeCommitResponse(BaseModel): + success: bool + product_id: Optional[int] = None + error: Optional[str] = None + + +class VersionResponse(BaseModel): + api_version: str + + +class BackendLogEntry(BaseModel): + time: datetime + level: str + message: str + + +class UvicornLogEntry(BaseModel): + line: str diff --git a/pricewatch/app/cli/__pycache__/main.cpython-313.pyc b/pricewatch/app/cli/__pycache__/main.cpython-313.pyc old mode 100755 new mode 100644 index 0b4bad6..4955cb5 Binary files a/pricewatch/app/cli/__pycache__/main.cpython-313.pyc and b/pricewatch/app/cli/__pycache__/main.cpython-313.pyc differ diff --git a/pricewatch/app/cli/main.py b/pricewatch/app/cli/main.py index 3ad3d1f..f01c817 100755 --- a/pricewatch/app/cli/main.py +++ b/pricewatch/app/cli/main.py @@ -15,7 +15,7 @@ from typing import Optional import redis import typer -from rq import Connection, Worker +from rq import Worker from alembic import command as alembic_command from alembic.config import Config as AlembicConfig from rich import print as rprint @@ -34,7 +34,7 @@ from pricewatch.app.scraping.pipeline import ScrapingPipeline from pricewatch.app.scraping.pw_fetch import fetch_playwright from pricewatch.app.stores.amazon.store import AmazonStore from pricewatch.app.stores.cdiscount.store import CdiscountStore -from pricewatch.app.tasks.scheduler import ScrapingScheduler +from pricewatch.app.tasks.scheduler import RedisUnavailableError, ScrapingScheduler # Créer l'application Typer app = typer.Typer( @@ -197,18 +197,21 @@ def run( html = None fetch_method = FetchMethod.HTTP fetch_error = None + http_result = None - # Tenter HTTP d'abord - logger.info("Tentative HTTP...") - http_result = fetch_http(canonical_url) + if config.options.force_playwright: + logger.info("Playwright force, skip HTTP") + else: + logger.info("Tentative HTTP...") + http_result = fetch_http(canonical_url) - if http_result.success: + if http_result and http_result.success: html = http_result.html fetch_method = FetchMethod.HTTP logger.info("✓ HTTP réussi") elif config.options.use_playwright: - # Fallback Playwright - logger.warning(f"HTTP échoué: {http_result.error}, fallback Playwright") + fallback_reason = http_result.error if http_result else "force_playwright" + logger.warning(f"HTTP échoué: {fallback_reason}, fallback Playwright") pw_result = fetch_playwright( canonical_url, headless=not config.options.headful, @@ -231,7 +234,7 @@ def run( fetch_error = pw_result.error logger.error(f"✗ Playwright échoué: {fetch_error}") else: - fetch_error = http_result.error + fetch_error = http_result.error if http_result else "skip_http" logger.error(f"✗ HTTP échoué: {fetch_error}") # Parser si on a du HTML @@ -467,11 +470,25 @@ def worker( Lance un worker RQ. """ config = get_config() - connection = redis.from_url(config.redis.url) + try: + connection = redis.from_url(config.redis.url) + # Verification connexion avant de lancer le worker + connection.ping() + except redis.exceptions.ConnectionError as e: + rprint(f"[red]✗ Impossible de se connecter a Redis ({config.redis.url})[/red]") + rprint(f"[red] Erreur: {e}[/red]") + rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]") + rprint(" docker compose up -d redis") + rprint(" # ou") + rprint(" redis-server") + raise typer.Exit(code=1) + except redis.exceptions.RedisError as e: + rprint(f"[red]✗ Erreur Redis: {e}[/red]") + raise typer.Exit(code=1) - with Connection(connection): - worker_instance = Worker([queue]) - worker_instance.work(with_scheduler=with_scheduler) + # RQ 2.x: connexion passee directement au Worker + worker_instance = Worker([queue], connection=connection) + worker_instance.work(with_scheduler=with_scheduler) @app.command() @@ -486,9 +503,15 @@ def enqueue( """ Enqueue un scraping immediat. """ - scheduler = ScrapingScheduler(get_config(), queue_name=queue) - job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db) - rprint(f"[green]✓ Job enqueued: {job.id}[/green]") + try: + scheduler = ScrapingScheduler(get_config(), queue_name=queue) + job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db) + rprint(f"[green]✓ Job enqueued: {job.id}[/green]") + except RedisUnavailableError as e: + rprint(f"[red]✗ {e.message}[/red]") + rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]") + rprint(" docker compose up -d redis") + raise typer.Exit(code=1) @app.command() @@ -504,16 +527,22 @@ def schedule( """ Planifie un scraping recurrent. """ - scheduler = ScrapingScheduler(get_config(), queue_name=queue) - job_info = scheduler.schedule_product( - url, - interval_hours=interval, - use_playwright=use_playwright, - save_db=save_db, - ) - rprint( - f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]" - ) + try: + scheduler = ScrapingScheduler(get_config(), queue_name=queue) + job_info = scheduler.schedule_product( + url, + interval_hours=interval, + use_playwright=use_playwright, + save_db=save_db, + ) + rprint( + f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]" + ) + except RedisUnavailableError as e: + rprint(f"[red]✗ {e.message}[/red]") + rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]") + rprint(" docker compose up -d redis") + raise typer.Exit(code=1) if __name__ == "__main__": diff --git a/pricewatch/app/core/__pycache__/config.cpython-313.pyc b/pricewatch/app/core/__pycache__/config.cpython-313.pyc old mode 100755 new mode 100644 index 9347fe5..9fd576c Binary files a/pricewatch/app/core/__pycache__/config.cpython-313.pyc and b/pricewatch/app/core/__pycache__/config.cpython-313.pyc differ diff --git a/pricewatch/app/core/__pycache__/schema.cpython-313.pyc b/pricewatch/app/core/__pycache__/schema.cpython-313.pyc old mode 100755 new mode 100644 index 5513cdf..8319162 Binary files a/pricewatch/app/core/__pycache__/schema.cpython-313.pyc and b/pricewatch/app/core/__pycache__/schema.cpython-313.pyc differ diff --git a/pricewatch/app/core/config.py b/pricewatch/app/core/config.py old mode 100755 new mode 100644 index 66e4e7e..84bd36f --- a/pricewatch/app/core/config.py +++ b/pricewatch/app/core/config.py @@ -108,6 +108,11 @@ class AppConfig(BaseSettings): default=True, description="Enable background worker functionality" ) + # API auth + api_token: Optional[str] = Field( + default=None, description="API token simple (Bearer)" + ) + # Scraping defaults default_playwright_timeout: int = Field( default=60000, description="Default Playwright timeout in milliseconds" @@ -138,6 +143,7 @@ class AppConfig(BaseSettings): logger.info(f"Worker enabled: {self.enable_worker}") logger.info(f"Worker timeout: {self.worker_timeout}s") logger.info(f"Worker concurrency: {self.worker_concurrency}") + logger.info(f"API token configured: {bool(self.api_token)}") logger.info("================================") diff --git a/pricewatch/app/core/io.py b/pricewatch/app/core/io.py index 2e99c46..7a6bef7 100755 --- a/pricewatch/app/core/io.py +++ b/pricewatch/app/core/io.py @@ -23,6 +23,9 @@ class ScrapingOptions(BaseModel): use_playwright: bool = Field( default=True, description="Utiliser Playwright en fallback" ) + force_playwright: bool = Field( + default=False, description="Forcer Playwright même si HTTP réussi" + ) headful: bool = Field(default=False, description="Mode headful (voir le navigateur)") save_html: bool = Field( default=True, description="Sauvegarder HTML pour debug" @@ -94,7 +97,8 @@ def read_yaml_config(yaml_path: str | Path) -> ScrapingConfig: config = ScrapingConfig.model_validate(data) logger.info( f"Configuration chargée: {len(config.urls)} URL(s), " - f"playwright={config.options.use_playwright}" + f"playwright={config.options.use_playwright}, " + f"force_playwright={config.options.force_playwright}" ) return config diff --git a/pricewatch/app/core/schema.py b/pricewatch/app/core/schema.py index dde2503..9c45228 100755 --- a/pricewatch/app/core/schema.py +++ b/pricewatch/app/core/schema.py @@ -9,7 +9,7 @@ from datetime import datetime from enum import Enum from typing import Optional -from pydantic import BaseModel, Field, HttpUrl, field_validator +from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator class StockStatus(str, Enum): @@ -38,6 +38,8 @@ class DebugStatus(str, Enum): class DebugInfo(BaseModel): """Informations de debug pour tracer les problèmes de scraping.""" + model_config = ConfigDict(use_enum_values=True) + method: FetchMethod = Field( description="Méthode utilisée pour la récupération (http ou playwright)" ) @@ -55,9 +57,6 @@ class DebugInfo(BaseModel): default=None, description="Taille du HTML récupéré en octets" ) - class Config: - use_enum_values = True - class ProductSnapshot(BaseModel): """ @@ -81,6 +80,7 @@ class ProductSnapshot(BaseModel): # Données produit principales title: Optional[str] = Field(default=None, description="Nom du produit") price: Optional[float] = Field(default=None, description="Prix du produit", ge=0) + msrp: Optional[float] = Field(default=None, description="Prix conseille", ge=0) currency: str = Field(default="EUR", description="Devise (EUR, USD, etc.)") shipping_cost: Optional[float] = Field( default=None, description="Frais de port", ge=0 @@ -94,6 +94,7 @@ class ProductSnapshot(BaseModel): default=None, description="Référence produit (ASIN, SKU, etc.)" ) category: Optional[str] = Field(default=None, description="Catégorie du produit") + description: Optional[str] = Field(default=None, description="Description produit") # Médias images: list[str] = Field( @@ -133,20 +134,22 @@ class ProductSnapshot(BaseModel): """Filtre les URLs d'images vides.""" return [url.strip() for url in v if url and url.strip()] - class Config: - use_enum_values = True - json_schema_extra = { + model_config = ConfigDict( + use_enum_values=True, + json_schema_extra={ "example": { "source": "amazon", "url": "https://www.amazon.fr/dp/B08N5WRWNW", "fetched_at": "2026-01-13T10:30:00Z", "title": "Exemple de produit", "price": 299.99, + "msrp": 349.99, "currency": "EUR", "shipping_cost": 0.0, "stock_status": "in_stock", "reference": "B08N5WRWNW", "category": "Electronics", + "description": "Chargeur USB-C multi-ports.", "images": [ "https://example.com/image1.jpg", "https://example.com/image2.jpg", @@ -165,7 +168,8 @@ class ProductSnapshot(BaseModel): "html_size_bytes": 145000, }, } - } + }, + ) def to_dict(self) -> dict: """Serialize vers un dictionnaire Python natif.""" diff --git a/pricewatch/app/db/__init__.py b/pricewatch/app/db/__init__.py old mode 100755 new mode 100644 index c466e97..0bceaa0 --- a/pricewatch/app/db/__init__.py +++ b/pricewatch/app/db/__init__.py @@ -20,6 +20,7 @@ from pricewatch.app.db.models import ( ProductImage, ProductSpec, ScrapingLog, + Webhook, ) __all__ = [ @@ -30,6 +31,7 @@ __all__ = [ "ProductImage", "ProductSpec", "ScrapingLog", + "Webhook", "ProductRepository", # Connection "get_engine", diff --git a/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc old mode 100755 new mode 100644 index 6d900a4..81bdad1 Binary files a/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc and b/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc differ diff --git a/pricewatch/app/db/__pycache__/connection.cpython-313.pyc b/pricewatch/app/db/__pycache__/connection.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/__pycache__/models.cpython-313.pyc b/pricewatch/app/db/__pycache__/models.cpython-313.pyc old mode 100755 new mode 100644 index 7e34247..ca8fa98 Binary files a/pricewatch/app/db/__pycache__/models.cpython-313.pyc and b/pricewatch/app/db/__pycache__/models.cpython-313.pyc differ diff --git a/pricewatch/app/db/__pycache__/repository.cpython-313.pyc b/pricewatch/app/db/__pycache__/repository.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/connection.py b/pricewatch/app/db/connection.py old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/migrations/__pycache__/env.cpython-313.pyc b/pricewatch/app/db/migrations/__pycache__/env.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/migrations/env.py b/pricewatch/app/db/migrations/env.py old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/migrations/script.py.mako b/pricewatch/app/db/migrations/script.py.mako old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py b/pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/migrations/versions/20260114_02_webhooks.py b/pricewatch/app/db/migrations/versions/20260114_02_webhooks.py new file mode 100644 index 0000000..7e0ee83 --- /dev/null +++ b/pricewatch/app/db/migrations/versions/20260114_02_webhooks.py @@ -0,0 +1,35 @@ +"""Add webhooks table + +Revision ID: 20260114_02 +Revises: 20260114_01 +Create Date: 2026-01-14 00:00:00 +""" + +from alembic import op +import sqlalchemy as sa + +# Revision identifiers, used by Alembic. +revision = "20260114_02" +down_revision = "20260114_01" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "webhooks", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("event", sa.String(length=50), nullable=False), + sa.Column("url", sa.Text(), nullable=False), + sa.Column("enabled", sa.Boolean(), nullable=False, server_default=sa.text("true")), + sa.Column("secret", sa.String(length=200), nullable=True), + sa.Column("created_at", sa.TIMESTAMP(), nullable=False), + ) + op.create_index("ix_webhook_event", "webhooks", ["event"], unique=False) + op.create_index("ix_webhook_enabled", "webhooks", ["enabled"], unique=False) + + +def downgrade() -> None: + op.drop_index("ix_webhook_enabled", table_name="webhooks") + op.drop_index("ix_webhook_event", table_name="webhooks") + op.drop_table("webhooks") diff --git a/pricewatch/app/db/migrations/versions/20260115_02_product_details.py b/pricewatch/app/db/migrations/versions/20260115_02_product_details.py new file mode 100644 index 0000000..7c6d053 --- /dev/null +++ b/pricewatch/app/db/migrations/versions/20260115_02_product_details.py @@ -0,0 +1,26 @@ +"""Ajout description et msrp sur products. + +Revision ID: 20260115_02_product_details +Revises: 20260114_02 +Create Date: 2026-01-15 10:00:00.000000 +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "20260115_02_product_details" +down_revision = "20260114_02" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column("products", sa.Column("description", sa.Text(), nullable=True)) + op.add_column("products", sa.Column("msrp", sa.Numeric(10, 2), nullable=True)) + + +def downgrade() -> None: + op.drop_column("products", "msrp") + op.drop_column("products", "description") diff --git a/pricewatch/app/db/migrations/versions/__pycache__/20260114_01_initial_schema.cpython-313.pyc b/pricewatch/app/db/migrations/versions/__pycache__/20260114_01_initial_schema.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/db/models.py b/pricewatch/app/db/models.py old mode 100755 new mode 100644 index 096a0cf..20693b1 --- a/pricewatch/app/db/models.py +++ b/pricewatch/app/db/models.py @@ -15,7 +15,7 @@ Justification technique: - JSONB uniquement pour données variables: errors, notes dans logs """ -from datetime import datetime +from datetime import datetime, timezone from decimal import Decimal from typing import List, Optional @@ -28,6 +28,7 @@ from sqlalchemy import ( Integer, JSON, Numeric, + Boolean, String, Text, UniqueConstraint, @@ -42,6 +43,10 @@ class Base(DeclarativeBase): pass +def utcnow() -> datetime: + return datetime.now(timezone.utc) + + class Product(Base): """ Catalogue produits (1 ligne par produit unique). @@ -70,19 +75,25 @@ class Product(Base): category: Mapped[Optional[str]] = mapped_column( Text, nullable=True, comment="Product category (breadcrumb)" ) + description: Mapped[Optional[str]] = mapped_column( + Text, nullable=True, comment="Product description" + ) currency: Mapped[Optional[str]] = mapped_column( String(3), nullable=True, comment="Currency code (EUR, USD, GBP)" ) + msrp: Mapped[Optional[Decimal]] = mapped_column( + Numeric(10, 2), nullable=True, comment="Recommended price" + ) # Timestamps first_seen_at: Mapped[datetime] = mapped_column( - TIMESTAMP, nullable=False, default=datetime.utcnow, comment="First scraping timestamp" + TIMESTAMP, nullable=False, default=utcnow, comment="First scraping timestamp" ) last_updated_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, - default=datetime.utcnow, - onupdate=datetime.utcnow, + default=utcnow, + onupdate=utcnow, comment="Last metadata update", ) @@ -280,7 +291,7 @@ class ScrapingLog(Base): String(20), nullable=False, comment="Fetch status (success, partial, failed)" ) fetched_at: Mapped[datetime] = mapped_column( - TIMESTAMP, nullable=False, default=datetime.utcnow, comment="Scraping timestamp" + TIMESTAMP, nullable=False, default=utcnow, comment="Scraping timestamp" ) # Performance metrics @@ -318,3 +329,30 @@ class ScrapingLog(Base): def __repr__(self) -> str: return f"" + + +class Webhook(Base): + """ + Webhooks pour notifications externes. + """ + + __tablename__ = "webhooks" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + event: Mapped[str] = mapped_column(String(50), nullable=False, comment="Event name") + url: Mapped[str] = mapped_column(Text, nullable=False, comment="Webhook URL") + enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + secret: Mapped[Optional[str]] = mapped_column( + String(200), nullable=True, comment="Secret optionnel" + ) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp" + ) + + __table_args__ = ( + Index("ix_webhook_event", "event"), + Index("ix_webhook_enabled", "enabled"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/pricewatch/app/db/repository.py b/pricewatch/app/db/repository.py old mode 100755 new mode 100644 index 5474b98..d0b451c --- a/pricewatch/app/db/repository.py +++ b/pricewatch/app/db/repository.py @@ -49,8 +49,12 @@ class ProductRepository: product.title = snapshot.title if snapshot.category: product.category = snapshot.category + if snapshot.description: + product.description = snapshot.description if snapshot.currency: product.currency = snapshot.currency + if snapshot.msrp is not None: + product.msrp = snapshot.msrp def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]: """Ajoute une entree d'historique de prix si inexistante.""" diff --git a/pricewatch/app/scraping/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/scraping/__pycache__/__init__.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/scraping/__pycache__/pipeline.cpython-313.pyc b/pricewatch/app/scraping/__pycache__/pipeline.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/scraping/pipeline.py b/pricewatch/app/scraping/pipeline.py old mode 100755 new mode 100644 diff --git a/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc b/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc new file mode 100644 index 0000000..0725aba Binary files /dev/null and b/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc differ diff --git a/pricewatch/app/stores/aliexpress/store.py b/pricewatch/app/stores/aliexpress/store.py index eaa90a0..5459d3d 100755 --- a/pricewatch/app/stores/aliexpress/store.py +++ b/pricewatch/app/stores/aliexpress/store.py @@ -23,6 +23,7 @@ from pricewatch.app.core.schema import ( StockStatus, ) from pricewatch.app.stores.base import BaseStore +from pricewatch.app.stores.price_parser import parse_price_text logger = get_logger("stores.aliexpress") @@ -126,6 +127,8 @@ class AliexpressStore(BaseStore): images = self._extract_images(html, soup, debug_info) category = self._extract_category(soup, debug_info) specs = self._extract_specs(soup, debug_info) + description = self._extract_description(soup, debug_info) + msrp = self._extract_msrp(html, debug_info) reference = self.extract_reference(url) # Note sur le rendu client-side @@ -150,8 +153,10 @@ class AliexpressStore(BaseStore): stock_status=stock_status, reference=reference, category=category, + description=description, images=images, specs=specs, + msrp=msrp, debug=debug_info, ) @@ -183,6 +188,17 @@ class AliexpressStore(BaseStore): debug.errors.append("Titre non trouvé") return None + def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: + """Extrait la description (meta tags).""" + meta = soup.find("meta", property="og:description") or soup.find( + "meta", attrs={"name": "description"} + ) + if meta: + description = meta.get("content", "").strip() + if description: + return description + return None + def _extract_price( self, html: str, soup: BeautifulSoup, debug: DebugInfo ) -> Optional[float]: @@ -193,35 +209,39 @@ class AliexpressStore(BaseStore): On utilise regex sur le HTML brut. """ # Pattern 1: Prix avant € (ex: "136,69 €") - match = re.search(r"([0-9]+[.,][0-9]{2})\s*€", html) + match = re.search(r"([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)\\s*€", html) if match: - price_str = match.group(1).replace(",", ".") - try: - return float(price_str) - except ValueError: - pass + price = parse_price_text(match.group(1)) + if price is not None: + return price # Pattern 2: € avant prix (ex: "€ 136.69") - match = re.search(r"€\s*([0-9]+[.,][0-9]{2})", html) + match = re.search(r"€\\s*([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)", html) if match: - price_str = match.group(1).replace(",", ".") - try: - return float(price_str) - except ValueError: - pass + price = parse_price_text(match.group(1)) + if price is not None: + return price # Pattern 3: Chercher dans meta tags (moins fiable) og_price = soup.find("meta", property="og:price:amount") if og_price: price_str = og_price.get("content", "") - try: - return float(price_str) - except ValueError: - pass + price = parse_price_text(price_str) + if price is not None: + return price debug.errors.append("Prix non trouvé") return None + def _extract_msrp(self, html: str, debug: DebugInfo) -> Optional[float]: + """Extrait le prix conseille si present.""" + match = re.search(r"originalPrice\"\\s*:\\s*\"([0-9\\s.,]+)\"", html) + if match: + price = parse_price_text(match.group(1)) + if price is not None: + return price + return None + def _extract_currency( self, url: str, soup: BeautifulSoup, debug: DebugInfo ) -> str: diff --git a/pricewatch/app/stores/amazon/__pycache__/store.cpython-313.pyc b/pricewatch/app/stores/amazon/__pycache__/store.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/pricewatch/app/stores/amazon/selectors.yml b/pricewatch/app/stores/amazon/selectors.yml index 2bd8ad3..652ab5e 100755 --- a/pricewatch/app/stores/amazon/selectors.yml +++ b/pricewatch/app/stores/amazon/selectors.yml @@ -54,12 +54,12 @@ specs_table: # ASIN (parfois dans les métadonnées) asin: - "input[name='ASIN']" - - "th:contains('ASIN') + td" + - "th:-soup-contains('ASIN') + td" # Messages captcha / robot check captcha_indicators: - "form[action*='validateCaptcha']" - - "p.a-last:contains('Sorry')" + - "p.a-last:-soup-contains('Sorry')" - "img[alt*='captcha']" # Notes pour le parsing: diff --git a/pricewatch/app/stores/amazon/store.py b/pricewatch/app/stores/amazon/store.py index a2bdaca..7426a9d 100755 --- a/pricewatch/app/stores/amazon/store.py +++ b/pricewatch/app/stores/amazon/store.py @@ -4,7 +4,9 @@ Store Amazon - Parsing de produits Amazon.fr et Amazon.com. Supporte l'extraction de: titre, prix, ASIN, images, specs, etc. """ +import json import re +from html import unescape from datetime import datetime from pathlib import Path from typing import Optional @@ -21,6 +23,7 @@ from pricewatch.app.core.schema import ( StockStatus, ) from pricewatch.app.stores.base import BaseStore +from pricewatch.app.stores.price_parser import parse_price_text logger = get_logger("stores.amazon") @@ -131,6 +134,8 @@ class AmazonStore(BaseStore): images = self._extract_images(soup, debug_info) category = self._extract_category(soup, debug_info) specs = self._extract_specs(soup, debug_info) + description = self._extract_description(soup, debug_info) + msrp = self._extract_msrp(soup, debug_info) reference = self.extract_reference(url) or self._extract_asin_from_html(soup) # Déterminer le statut final (ne pas écraser FAILED) @@ -150,8 +155,10 @@ class AmazonStore(BaseStore): stock_status=stock_status, reference=reference, category=category, + description=description, images=images, specs=specs, + msrp=msrp, debug=debug_info, ) @@ -195,6 +202,17 @@ class AmazonStore(BaseStore): debug.errors.append("Titre non trouvé") return None + def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: + """Extrait la description (meta tags).""" + meta = soup.find("meta", property="og:description") or soup.find( + "meta", attrs={"name": "description"} + ) + if meta: + description = meta.get("content", "").strip() + if description: + return description + return None + def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: """Extrait le prix.""" selectors = self.get_selector("price", []) @@ -205,14 +223,9 @@ class AmazonStore(BaseStore): elements = soup.select(selector) for element in elements: text = element.get_text(strip=True) - # Extraire nombre (format: "299,99" ou "299.99") - match = re.search(r"(\d+)[.,](\d+)", text) - if match: - price_str = f"{match.group(1)}.{match.group(2)}" - try: - return float(price_str) - except ValueError: - continue + price = parse_price_text(text) + if price is not None: + return price # Fallback: chercher les spans séparés a-price-whole et a-price-fraction whole = soup.select_one("span.a-price-whole") @@ -220,15 +233,24 @@ class AmazonStore(BaseStore): if whole and fraction: whole_text = whole.get_text(strip=True) fraction_text = fraction.get_text(strip=True) - try: - price_str = f"{whole_text}.{fraction_text}" - return float(price_str) - except ValueError: - pass + price = parse_price_text(f"{whole_text}.{fraction_text}") + if price is not None: + return price debug.errors.append("Prix non trouvé") return None + def _extract_msrp(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: + """Extrait le prix conseille.""" + strike = soup.select_one("span.priceBlockStrikePriceString") or soup.select_one( + "span.a-text-price span.a-offscreen" + ) + if strike: + price = parse_price_text(strike.get_text(strip=True)) + if price is not None: + return price + return None + def _extract_currency(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: """Extrait la devise.""" selectors = self.get_selector("currency", []) @@ -270,6 +292,7 @@ class AmazonStore(BaseStore): def _extract_images(self, soup: BeautifulSoup, debug: DebugInfo) -> list[str]: """Extrait les URLs d'images.""" images = [] + seen = set() selectors = self.get_selector("images", []) if isinstance(selectors, str): selectors = [selectors] @@ -278,19 +301,57 @@ class AmazonStore(BaseStore): elements = soup.select(selector) for element in elements: # Attribut src ou data-src - url = element.get("src") or element.get("data-src") + url = element.get("src") or element.get("data-src") or element.get("data-old-hires") if url and url.startswith("http"): - images.append(url) + if self._is_product_image(url) and url not in seen: + images.append(url) + seen.add(url) + dynamic = element.get("data-a-dynamic-image") + if dynamic: + urls = self._extract_dynamic_images(dynamic) + for dyn_url in urls: + if self._is_product_image(dyn_url) and dyn_url not in seen: + images.append(dyn_url) + seen.add(dyn_url) # Fallback: chercher tous les img tags si aucune image trouvée if not images: all_imgs = soup.find_all("img") for img in all_imgs: url = img.get("src") or img.get("data-src") - if url and url.startswith("http"): - images.append(url) + if url and url.startswith("http") and self._is_product_image(url): + if url not in seen: + images.append(url) + seen.add(url) - return list(set(images)) # Dédupliquer + return images + + def _extract_dynamic_images(self, raw: str) -> list[str]: + """Extrait les URLs du JSON data-a-dynamic-image.""" + try: + data = json.loads(unescape(raw)) + except (TypeError, json.JSONDecodeError): + return [] + + urls = [] + if isinstance(data, dict): + candidates = [] + for url, dims in data.items(): + if not isinstance(url, str) or not url.startswith("http"): + continue + size = dims[0] if isinstance(dims, list) and dims else 0 + candidates.append((size, url)) + candidates.sort(key=lambda item: item[0], reverse=True) + for _, url in candidates: + urls.append(url) + return urls + + def _is_product_image(self, url: str) -> bool: + """Filtre basique pour eviter les logos et sprites.""" + lowered = url.lower() + if "prime_logo" in lowered or "sprite" in lowered: + return False + return True def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: """Extrait la catégorie depuis les breadcrumbs.""" diff --git a/pricewatch/app/stores/backmarket/store.py b/pricewatch/app/stores/backmarket/store.py index a06a8bf..d48d6e6 100755 --- a/pricewatch/app/stores/backmarket/store.py +++ b/pricewatch/app/stores/backmarket/store.py @@ -23,6 +23,7 @@ from pricewatch.app.core.schema import ( StockStatus, ) from pricewatch.app.stores.base import BaseStore +from pricewatch.app.stores.price_parser import parse_price_text logger = get_logger("stores.backmarket") @@ -116,6 +117,8 @@ class BackmarketStore(BaseStore): images = json_ld_data.get("images") or self._extract_images(soup, debug_info) category = self._extract_category(soup, debug_info) specs = self._extract_specs(soup, debug_info) + description = self._extract_description(soup, debug_info) + msrp = self._extract_msrp(soup, debug_info) reference = self.extract_reference(url) # Spécifique Backmarket: condition (état du reconditionné) @@ -140,8 +143,10 @@ class BackmarketStore(BaseStore): stock_status=stock_status, reference=reference, category=category, + description=description, images=images, specs=specs, + msrp=msrp, debug=debug_info, ) @@ -213,6 +218,17 @@ class BackmarketStore(BaseStore): debug.errors.append("Titre non trouvé") return None + def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: + """Extrait la description (meta tags).""" + meta = soup.find("meta", property="og:description") or soup.find( + "meta", attrs={"name": "description"} + ) + if meta: + description = meta.get("content", "").strip() + if description: + return description + return None + def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: """Extrait le prix.""" selectors = self.get_selector("price", []) @@ -225,20 +241,29 @@ class BackmarketStore(BaseStore): # Attribut content (schema.org) ou texte price_text = element.get("content") or element.get_text(strip=True) - # Extraire nombre (format: "299,99" ou "299.99" ou "299") - match = re.search(r"(\d+)[.,]?(\d*)", price_text) - if match: - integer_part = match.group(1) - decimal_part = match.group(2) or "00" - price_str = f"{integer_part}.{decimal_part}" - try: - return float(price_str) - except ValueError: - continue + price = parse_price_text(price_text) + if price is not None: + return price debug.errors.append("Prix non trouvé") return None + def _extract_msrp(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: + """Extrait le prix conseille.""" + selectors = [ + ".price--old", + ".price--striked", + ".price__old", + "del", + ] + for selector in selectors: + element = soup.select_one(selector) + if element: + price = parse_price_text(element.get_text(strip=True)) + if price is not None: + return price + return None + def _extract_currency(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: """Extrait la devise.""" selectors = self.get_selector("currency", []) diff --git a/pricewatch/app/stores/cdiscount/store.py b/pricewatch/app/stores/cdiscount/store.py index 0de9f5e..be8bdc9 100755 --- a/pricewatch/app/stores/cdiscount/store.py +++ b/pricewatch/app/stores/cdiscount/store.py @@ -4,6 +4,7 @@ Store Cdiscount - Parsing de produits Cdiscount.com. Supporte l'extraction de: titre, prix, SKU, images, specs, etc. """ +import json import re from datetime import datetime from pathlib import Path @@ -21,6 +22,7 @@ from pricewatch.app.core.schema import ( StockStatus, ) from pricewatch.app.stores.base import BaseStore +from pricewatch.app.stores.price_parser import parse_price_text logger = get_logger("stores.cdiscount") @@ -112,6 +114,8 @@ class CdiscountStore(BaseStore): images = self._extract_images(soup, debug_info) category = self._extract_category(soup, debug_info) specs = self._extract_specs(soup, debug_info) + description = self._extract_description(soup, debug_info) + msrp = self._extract_msrp(soup, debug_info) reference = self.extract_reference(url) or self._extract_sku_from_html(soup) # Déterminer le statut final @@ -130,8 +134,10 @@ class CdiscountStore(BaseStore): stock_status=stock_status, reference=reference, category=category, + description=description, images=images, specs=specs, + msrp=msrp, debug=debug_info, ) @@ -158,6 +164,21 @@ class CdiscountStore(BaseStore): debug.errors.append("Titre non trouvé") return None + def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: + """Extrait la description (meta tags).""" + meta = soup.find("meta", property="og:description") or soup.find( + "meta", attrs={"name": "description"} + ) + if meta: + description = meta.get("content", "").strip() + if description: + return description + product_ld = self._find_product_ld(soup) + desc_ld = product_ld.get("description") if product_ld else None + if isinstance(desc_ld, str) and desc_ld.strip(): + return desc_ld.strip() + return None + def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: """Extrait le prix.""" selectors = self.get_selector("price", []) @@ -170,20 +191,29 @@ class CdiscountStore(BaseStore): # Attribut content (schema.org) ou texte price_text = element.get("content") or element.get_text(strip=True) - # Extraire nombre (format: "299,99" ou "299.99") - match = re.search(r"(\d+)[.,]?(\d*)", price_text) - if match: - integer_part = match.group(1) - decimal_part = match.group(2) or "00" - price_str = f"{integer_part}.{decimal_part}" - try: - return float(price_str) - except ValueError: - continue + price = parse_price_text(price_text) + if price is not None: + return price debug.errors.append("Prix non trouvé") return None + def _extract_msrp(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]: + """Extrait le prix conseille.""" + selectors = [ + ".jsStrikePrice", + ".price__old", + ".c-price__strike", + ".price-strike", + ] + for selector in selectors: + element = soup.select_one(selector) + if element: + price = parse_price_text(element.get_text(strip=True)) + if price is not None: + return price + return None + def _extract_currency(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: """Extrait la devise.""" selectors = self.get_selector("currency", []) @@ -249,7 +279,14 @@ class CdiscountStore(BaseStore): url = f"https:{url}" images.append(url) - return list(set(images)) # Dédupliquer + ld_images = self._extract_ld_images(self._find_product_ld(soup)) + for url in ld_images: + if url and url not in images: + if url.startswith("//"): + url = f"https:{url}" + images.append(url) + + return list(dict.fromkeys(images)) # Préserver l’ordre def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: """Extrait la catégorie depuis les breadcrumbs.""" @@ -275,6 +312,53 @@ class CdiscountStore(BaseStore): return None + def _extract_json_ld_entries(self, soup: BeautifulSoup) -> list[dict]: + """Parse les scripts JSON-LD et retourne les objets.""" + entries = [] + scripts = soup.find_all("script", type="application/ld+json") + for script in scripts: + raw = script.string or script.text + if not raw: + continue + try: + payload = json.loads(raw.strip()) + except (json.JSONDecodeError, TypeError): + continue + if isinstance(payload, list): + entries.extend(payload) + else: + entries.append(payload) + return entries + + def _find_product_ld(self, soup: BeautifulSoup) -> dict: + """Retourne l’objet Product JSON-LD si présent.""" + for entry in self._extract_json_ld_entries(soup): + if not isinstance(entry, dict): + continue + type_field = entry.get("@type") or entry.get("type") + if isinstance(type_field, str) and "product" in type_field.lower(): + return entry + return {} + + def _extract_ld_images(self, product_ld: dict) -> list[str]: + """Récupère les images listées dans le JSON-LD.""" + if not product_ld: + return [] + images = product_ld.get("image") or product_ld.get("images") + if not images: + return [] + if isinstance(images, str): + images = [images] + extracted = [] + for item in images: + if isinstance(item, str): + extracted.append(item) + elif isinstance(item, dict): + url = item.get("url") + if isinstance(url, str): + extracted.append(url) + return extracted + def _extract_specs(self, soup: BeautifulSoup, debug: DebugInfo) -> dict[str, str]: """Extrait les caractéristiques techniques.""" specs = {} @@ -298,6 +382,19 @@ class CdiscountStore(BaseStore): if key and value: specs[key] = value + product_ld = self._find_product_ld(soup) + additional = product_ld.get("additionalProperty") if product_ld else None + if isinstance(additional, dict): + additional = [additional] + if isinstance(additional, list): + for item in additional: + if not isinstance(item, dict): + continue + key = item.get("name") or item.get("propertyID") + value = item.get("value") or item.get("valueReference") + if key and value: + specs[key] = value + return specs def _extract_sku_from_html(self, soup: BeautifulSoup) -> Optional[str]: diff --git a/pricewatch/app/stores/price_parser.py b/pricewatch/app/stores/price_parser.py new file mode 100644 index 0000000..2947944 --- /dev/null +++ b/pricewatch/app/stores/price_parser.py @@ -0,0 +1,48 @@ +""" +Helpers pour parser des prix avec separateurs de milliers. +""" + +from __future__ import annotations + +import re +from typing import Optional + + +def parse_price_text(text: str) -> Optional[float]: + """ + Parse un texte de prix en float. + + Gere les separateurs espace, point, virgule et espaces insécables. + """ + if not text: + return None + + text = re.sub(r"(\d)\s*€\s*(\d)", r"\1,\2", text) + cleaned = text.replace("\u00a0", " ").replace("\u202f", " ").replace("\u2009", " ") + cleaned = "".join(ch for ch in cleaned if ch.isdigit() or ch in ".,") + if not cleaned: + return None + + if "," in cleaned and "." in cleaned: + if cleaned.rfind(",") > cleaned.rfind("."): + cleaned = cleaned.replace(".", "") + cleaned = cleaned.replace(",", ".") + else: + cleaned = cleaned.replace(",", "") + elif "," in cleaned: + parts = cleaned.split(",") + if len(parts) > 1: + decimal = parts[-1] + integer = "".join(parts[:-1]) + cleaned = f"{integer}.{decimal}" if decimal else integer + elif "." in cleaned: + parts = cleaned.split(".") + if len(parts) > 1: + decimal = parts[-1] + integer = "".join(parts[:-1]) + cleaned = f"{integer}.{decimal}" if decimal else integer + + try: + return float(cleaned) + except ValueError: + return None diff --git a/pricewatch/app/tasks/__init__.py b/pricewatch/app/tasks/__init__.py old mode 100755 new mode 100644 index 7ffa53d..9651355 --- a/pricewatch/app/tasks/__init__.py +++ b/pricewatch/app/tasks/__init__.py @@ -3,6 +3,15 @@ Module tasks pour les jobs RQ. """ from pricewatch.app.tasks.scrape import scrape_product -from pricewatch.app.tasks.scheduler import ScrapingScheduler +from pricewatch.app.tasks.scheduler import ( + RedisUnavailableError, + ScrapingScheduler, + check_redis_connection, +) -__all__ = ["scrape_product", "ScrapingScheduler"] +__all__ = [ + "scrape_product", + "ScrapingScheduler", + "RedisUnavailableError", + "check_redis_connection", +] diff --git a/pricewatch/app/tasks/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/tasks/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..9d48526 Binary files /dev/null and b/pricewatch/app/tasks/__pycache__/__init__.cpython-313.pyc differ diff --git a/pricewatch/app/tasks/__pycache__/scheduler.cpython-313.pyc b/pricewatch/app/tasks/__pycache__/scheduler.cpython-313.pyc new file mode 100644 index 0000000..f0898ec Binary files /dev/null and b/pricewatch/app/tasks/__pycache__/scheduler.cpython-313.pyc differ diff --git a/pricewatch/app/tasks/__pycache__/scrape.cpython-313.pyc b/pricewatch/app/tasks/__pycache__/scrape.cpython-313.pyc new file mode 100644 index 0000000..0656cdf Binary files /dev/null and b/pricewatch/app/tasks/__pycache__/scrape.cpython-313.pyc differ diff --git a/pricewatch/app/tasks/scheduler.py b/pricewatch/app/tasks/scheduler.py old mode 100755 new mode 100644 index 628594c..cb11883 --- a/pricewatch/app/tasks/scheduler.py +++ b/pricewatch/app/tasks/scheduler.py @@ -9,6 +9,8 @@ from datetime import datetime, timedelta, timezone from typing import Optional import redis +from redis.exceptions import ConnectionError as RedisConnectionError +from redis.exceptions import RedisError, TimeoutError as RedisTimeoutError from rq import Queue from rq_scheduler import Scheduler @@ -19,6 +21,15 @@ from pricewatch.app.tasks.scrape import scrape_product logger = get_logger("tasks.scheduler") +class RedisUnavailableError(Exception): + """Exception levee quand Redis n'est pas disponible.""" + + def __init__(self, message: str = "Redis non disponible", cause: Optional[Exception] = None): + self.message = message + self.cause = cause + super().__init__(self.message) + + @dataclass class ScheduledJobInfo: """Infos de retour pour un job planifie.""" @@ -27,14 +38,72 @@ class ScheduledJobInfo: next_run: datetime +def check_redis_connection(redis_url: str) -> bool: + """ + Verifie si Redis est accessible. + + Returns: + True si Redis repond, False sinon. + """ + try: + conn = redis.from_url(redis_url) + conn.ping() + return True + except (RedisConnectionError, RedisTimeoutError, RedisError) as e: + logger.debug(f"Redis ping echoue: {e}") + return False + + class ScrapingScheduler: """Scheduler pour les jobs de scraping avec RQ.""" def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None: self.config = config or get_config() - self.redis = redis.from_url(self.config.redis.url) - self.queue = Queue(queue_name, connection=self.redis) - self.scheduler = Scheduler(queue=self.queue, connection=self.redis) + self._queue_name = queue_name + self._redis: Optional[redis.Redis] = None + self._queue: Optional[Queue] = None + self._scheduler: Optional[Scheduler] = None + + def _ensure_connected(self) -> None: + """Etablit la connexion Redis si necessaire, leve RedisUnavailableError si echec.""" + if self._redis is not None: + return + + try: + self._redis = redis.from_url(self.config.redis.url) + # Ping pour verifier la connexion + self._redis.ping() + self._queue = Queue(self._queue_name, connection=self._redis) + self._scheduler = Scheduler(queue=self._queue, connection=self._redis) + logger.debug(f"Connexion Redis etablie: {self.config.redis.url}") + except (RedisConnectionError, RedisTimeoutError) as e: + self._redis = None + msg = f"Impossible de se connecter a Redis ({self.config.redis.url}): {e}" + logger.error(msg) + raise RedisUnavailableError(msg, cause=e) from e + except RedisError as e: + self._redis = None + msg = f"Erreur Redis: {e}" + logger.error(msg) + raise RedisUnavailableError(msg, cause=e) from e + + @property + def redis(self) -> redis.Redis: + """Acces a la connexion Redis (lazy).""" + self._ensure_connected() + return self._redis # type: ignore + + @property + def queue(self) -> Queue: + """Acces a la queue RQ (lazy).""" + self._ensure_connected() + return self._queue # type: ignore + + @property + def scheduler(self) -> Scheduler: + """Acces au scheduler RQ (lazy).""" + self._ensure_connected() + return self._scheduler # type: ignore def enqueue_immediate( self, diff --git a/pricewatch/app/tasks/scrape.py b/pricewatch/app/tasks/scrape.py old mode 100755 new mode 100644 index 3db721a..44486d6 --- a/pricewatch/app/tasks/scrape.py +++ b/pricewatch/app/tasks/scrape.py @@ -4,6 +4,7 @@ Tache de scraping asynchrone pour RQ. from __future__ import annotations +import time from typing import Any, Optional from pricewatch.app.core.config import AppConfig, get_config @@ -46,6 +47,9 @@ def scrape_product( Retourne un dict avec success, product_id, snapshot, error. """ + job_start_time = time.time() + logger.info(f"[JOB START] Scraping: {url}") + config: AppConfig = get_config() setup_stores() @@ -58,6 +62,8 @@ def scrape_product( registry = get_registry() store = registry.detect_store(url) if not store: + elapsed_ms = int((time.time() - job_start_time) * 1000) + logger.error(f"[JOB FAILED] Aucun store detecte pour: {url} (duree={elapsed_ms}ms)") snapshot = ProductSnapshot( source="unknown", url=url, @@ -70,6 +76,8 @@ def scrape_product( ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db) return {"success": False, "product_id": None, "snapshot": snapshot, "error": "store"} + logger.info(f"[STORE] Detecte: {store.store_id}") + canonical_url = store.canonicalize(url) html = None @@ -79,13 +87,16 @@ def scrape_product( html_size_bytes = None pw_result = None + logger.debug(f"[FETCH] Tentative HTTP: {canonical_url}") http_result = fetch_http(canonical_url) duration_ms = http_result.duration_ms if http_result.success: html = http_result.html fetch_method = FetchMethod.HTTP + logger.info(f"[FETCH] HTTP OK (duree={duration_ms}ms, taille={len(html)})") elif use_playwright: + logger.debug(f"[FETCH] HTTP echoue ({http_result.error}), fallback Playwright") pw_result = fetch_playwright( canonical_url, headless=not headful, @@ -97,10 +108,13 @@ def scrape_product( if pw_result.success: html = pw_result.html fetch_method = FetchMethod.PLAYWRIGHT + logger.info(f"[FETCH] Playwright OK (duree={duration_ms}ms, taille={len(html)})") else: fetch_error = pw_result.error + logger.warning(f"[FETCH] Playwright echoue: {fetch_error}") else: fetch_error = http_result.error + logger.warning(f"[FETCH] HTTP echoue: {fetch_error}") if html: html_size_bytes = len(html.encode("utf-8")) @@ -118,12 +132,18 @@ def scrape_product( save_debug_screenshot(pw_result.screenshot, f"{store.store_id}_{ref}") try: + logger.debug(f"[PARSE] Parsing avec {store.store_id}...") snapshot = store.parse(html, canonical_url) snapshot.debug.method = fetch_method snapshot.debug.duration_ms = duration_ms snapshot.debug.html_size_bytes = html_size_bytes success = snapshot.debug.status != DebugStatus.FAILED + if success: + logger.info(f"[PARSE] OK - titre={bool(snapshot.title)}, prix={snapshot.price}") + else: + logger.warning(f"[PARSE] Partiel - status={snapshot.debug.status}") except Exception as exc: + logger.error(f"[PARSE] Exception: {exc}") snapshot = ProductSnapshot( source=store.store_id, url=canonical_url, @@ -152,6 +172,19 @@ def scrape_product( product_id = ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db) + # Log final du job + elapsed_ms = int((time.time() - job_start_time) * 1000) + if success: + logger.info( + f"[JOB OK] {store.store_id}/{snapshot.reference} " + f"product_id={product_id} prix={snapshot.price} duree={elapsed_ms}ms" + ) + else: + logger.warning( + f"[JOB FAILED] {store.store_id}/{snapshot.reference or 'unknown'} " + f"erreur={fetch_error} duree={elapsed_ms}ms" + ) + return { "success": success, "product_id": product_id, diff --git a/pyproject.toml b/pyproject.toml index 4697124..39f9c59 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,10 @@ dependencies = [ "redis>=5.0.0", "rq>=1.15.0", "rq-scheduler>=0.13.0", + + # API (Phase 3) + "fastapi>=0.110.0", + "uvicorn>=0.27.0", ] [project.optional-dependencies] diff --git a/scrap_url.yaml b/scrap_url.yaml index a73e808..de96eb9 100755 --- a/scrap_url.yaml +++ b/scrap_url.yaml @@ -4,7 +4,8 @@ # Liste des URLs à scraper # Note: Ces URLs sont des exemples, remplacez-les par de vraies URLs produit urls: - - "https://www.amazon.fr/NINJA-Essential-Cappuccino-préréglages-ES501EU/dp/B0DFWRHZ7L" + - "https://www.amazon.fr/ASUS-A16-TUF608UH-RV054W-Portable-Processeur-Windows/dp/B0DQ8M74KL" + - "https://www.cdiscount.com/informatique/ordinateurs-pc-portables/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo/f-10709-tuf608umrv004.html" # Options de scraping options: @@ -23,3 +24,4 @@ options: # Timeout par page en millisecondes timeout_ms: 60000 + force_playwright: true diff --git a/scraped_store.json b/scraped_store.json old mode 100755 new mode 100644 index e416b34..c7c4692 --- a/scraped_store.json +++ b/scraped_store.json @@ -1,28 +1,121 @@ [ { "source": "amazon", - "url": "https://www.amazon.fr/dp/B0DFWRHZ7L", - "fetched_at": "2026-01-13T13:24:21.615894", - "title": null, - "price": null, + "url": "https://www.amazon.fr/dp/B0DQ8M74KL", + "fetched_at": "2026-01-14T21:33:15.838503", + "title": "ASUS TUF Gaming A16-TUF608UH-RV054W 16 Pouces FHD Plus 165Hz Pc Portable (Processeur AMD Ryzen 7 260, 16GB DDR5, 512GB SSD, NVIDIA RTX 5050) Windows 11 Home – Clavier AZERTY", + "price": 1259.0, + "msrp": 1699.99, + "currency": "EUR", + "shipping_cost": null, + "stock_status": "in_stock", + "reference": "B0DQ8M74KL", + "category": "Ordinateurs portables classiques", + "description": "ASUS TUF Gaming A16-TUF608UH-RV054W 16 Pouces FHD Plus 165Hz Pc Portable (Processeur AMD Ryzen 7 260, 16GB DDR5, 512GB SSD, NVIDIA RTX 5050) Windows 11 Home – Clavier AZERTY : Amazon.fr: Informatique", + "images": [ + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SY300_SX300_QL70_ML2_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SX679_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SX569_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SX522_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SX466_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SY450_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SX425_.jpg", + "https://m.media-amazon.com/images/I/713fTyxvEWL._AC_SY355_.jpg" + ], + "specs": { + "Marque": "‎ASUS", + "Numéro du modèle de l'article": "‎90NR0KS1-M00480", + "séries": "‎ASUS TUF Gaming", + "Couleur": "‎GRAY", + "Garantie constructeur": "‎3 ans contructeur", + "Système d'exploitation": "‎Windows 11 Home", + "Description du clavier": "‎Jeu", + "Marque du processeur": "‎AMD", + "Type de processeur": "‎Ryzen 7", + "Vitesse du processeur": "‎3,8 GHz", + "Nombre de coeurs": "‎8", + "Mémoire maximale": "‎32 Go", + "Taille du disque dur": "‎512 GB", + "Technologie du disque dur": "‎SSD", + "Interface du disque dur": "‎PCIE x 4", + "Type d'écran": "‎LED", + "Taille de l'écran": "‎16 Pouces", + "Résolution de l'écran": "‎1920 x 1200 pixels", + "Resolution": "‎1920x1200 Pixels", + "Marque chipset graphique": "‎NVIDIA", + "Description de la carte graphique": "‎NVIDIA GeForce RTX 5050 Laptop GPU - 8GB GDDR7", + "GPU": "‎NVIDIA GeForce RTX 5050 Laptop GPU - 8GB GDDR7", + "Mémoire vive de la carte graphique": "‎8 GB", + "Type de mémoire vive (carte graphique)": "‎GDDR7", + "Type de connectivité": "‎Bluetooth, Wi-Fi", + "Type de technologie sans fil": "‎802.11ax, Bluetooth", + "Bluetooth": "‎Oui", + "Nombre de ports HDMI": "‎1", + "Nombre de ports USB 2.0": "‎1", + "Nombre de ports USB 3.0": "‎3", + "Nombre de ports Ethernet": "‎1", + "Type de connecteur": "‎Bluetooth, HDMI, USB, Wi-Fi", + "Compatibilité du périphérique": "‎Casque audio, Clavier, Souris, Ecran externe, Disque dur externe, Imprimante, etc., Haut-parleur", + "Poids du produit": "‎2,1 Kilogrammes", + "Divers": "‎Clavier rétroéclairé", + "Disponibilité des pièces détachées": "‎5 Ans", + "Mises à jour logicielles garanties jusqu’à": "‎Information non disponible", + "ASIN": "B0DQ8M74KL", + "Moyenne des commentaires client": "4,74,7 sur 5 étoiles(7)4,7 sur 5 étoiles", + "Classement des meilleures ventes d'Amazon": "5 025 en Informatique (Voir les 100 premiers en Informatique)124 enOrdinateurs portables classiques", + "Date de mise en ligne sur Amazon.fr": "1 juillet 2025" + }, + "debug": { + "method": "playwright", + "status": "success", + "errors": [], + "notes": [], + "duration_ms": null, + "html_size_bytes": null + } + }, + { + "source": "cdiscount", + "url": "https://www.cdiscount.com/informatique/ordinateurs-pc-portables/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo/f-10709-tuf608umrv004.html", + "fetched_at": "2026-01-14T21:33:20.309754", + "title": "PC Portable Gamer ASUS TUF Gaming A16 | Sans Windows - 16\" WUXGA 165Hz - RTX 5060 8Go - AMD Ryzen 7 260 - RAM 16Go - 1To SSD", + "price": 119999.0, + "msrp": null, "currency": "EUR", "shipping_cost": null, "stock_status": "unknown", - "reference": "B0DFWRHZ7L", + "reference": "10709-tuf608umrv004", "category": null, - "images": [], + "description": "Cdiscount : Meuble, Déco, High Tech, Bricolage, Jardin, Sport | Livraison gratuite à partir de 10€ | Paiement sécurisé | 4x possible | Retour simple et rapide | E-commerçant français, des produits et services au meilleur prix.", + "images": [ + "https://www.cdiscount.com/pdt2/0/0/4/1/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/2/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/3/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/4/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/5/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/6/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/7/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/8/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/9/700x700/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/1/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/2/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/3/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/4/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/5/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/6/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/7/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/8/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/pdt2/0/0/4/9/115x115/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg", + "https://www.cdiscount.com/ac/085x085/TUF608UMRV004_177763282_1.png", + "https://www.cdiscount.com/ac/085x085/TUF608UMRV004_177763282_2.png", + "https://www.cdiscount.com/pdt2/0/0/4/9/550x550/tuf608umrv004/rw/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo.jpg" + ], "specs": {}, "debug": { - "method": "http", - "status": "partial", - "errors": [ - "Captcha ou robot check détecté", - "Titre non trouvé", - "Prix non trouvé" - ], - "notes": [ - "Parsing incomplet: titre ou prix manquant" - ], + "method": "playwright", + "status": "success", + "errors": [], + "notes": [], "duration_ms": null, "html_size_bytes": null } diff --git a/tests/api/__pycache__/test_auth.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_auth.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..ad1e110 Binary files /dev/null and b/tests/api/__pycache__/test_auth.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_backend_logs.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_backend_logs.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..003a091 Binary files /dev/null and b/tests/api/__pycache__/test_backend_logs.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_filters_exports.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_filters_exports.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..9a4ff02 Binary files /dev/null and b/tests/api/__pycache__/test_filters_exports.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_health.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_health.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..df49b52 Binary files /dev/null and b/tests/api/__pycache__/test_health.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_http_integration.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_http_integration.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..303a9d1 Binary files /dev/null and b/tests/api/__pycache__/test_http_integration.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_products.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_products.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..f5618b7 Binary files /dev/null and b/tests/api/__pycache__/test_products.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_scrape_endpoints.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_scrape_endpoints.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..7fe7022 Binary files /dev/null and b/tests/api/__pycache__/test_scrape_endpoints.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_uvicorn_logs.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_uvicorn_logs.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..504285d Binary files /dev/null and b/tests/api/__pycache__/test_uvicorn_logs.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_version.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_version.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..4d42ca7 Binary files /dev/null and b/tests/api/__pycache__/test_version.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/__pycache__/test_webhooks.cpython-313-pytest-9.0.2.pyc b/tests/api/__pycache__/test_webhooks.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..e4e3f55 Binary files /dev/null and b/tests/api/__pycache__/test_webhooks.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py new file mode 100644 index 0000000..e2a4f8b --- /dev/null +++ b/tests/api/test_auth.py @@ -0,0 +1,56 @@ +""" +Tests auth API. +""" + +from dataclasses import dataclass +import pytest +from fastapi import HTTPException + +from pricewatch.app.api.main import require_token + + +@dataclass +class FakeRedisConfig: + url: str + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + redis: FakeRedisConfig + api_token: str + + +def test_missing_token_returns_401(monkeypatch): + """Sans token, retourne 401.""" + config = FakeAppConfig( + db=FakeDbConfig(url="sqlite:///:memory:"), + redis=FakeRedisConfig(url="redis://localhost:6379/0"), + api_token="secret", + ) + monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config) + + with pytest.raises(HTTPException) as excinfo: + require_token(None) + + assert excinfo.value.status_code == 401 + + +def test_bad_token_returns_403(monkeypatch): + """Token invalide retourne 403.""" + config = FakeAppConfig( + db=FakeDbConfig(url="sqlite:///:memory:"), + redis=FakeRedisConfig(url="redis://localhost:6379/0"), + api_token="secret", + ) + monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config) + + with pytest.raises(HTTPException) as excinfo: + require_token("Bearer nope") + + assert excinfo.value.status_code == 403 diff --git a/tests/api/test_backend_logs.py b/tests/api/test_backend_logs.py new file mode 100644 index 0000000..7d07d05 --- /dev/null +++ b/tests/api/test_backend_logs.py @@ -0,0 +1,30 @@ +""" +Tests API logs backend. +""" + +from pricewatch.app.api.main import BACKEND_LOGS, list_backend_logs, preview_scrape +from pricewatch.app.api.schemas import ScrapePreviewRequest +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot + + +def test_backend_logs_capture_preview(monkeypatch): + BACKEND_LOGS.clear() + + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com", + title="Produit", + price=9.99, + currency="EUR", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + def fake_scrape(url, use_playwright=None, save_db=False): + return {"success": True, "snapshot": snapshot, "error": None} + + monkeypatch.setattr("pricewatch.app.api.main.scrape_product", fake_scrape) + + preview_scrape(ScrapePreviewRequest(url="https://example.com")) + logs = list_backend_logs() + assert logs + assert logs[-1].message.startswith("Preview scraping") diff --git a/tests/api/test_filters_exports.py b/tests/api/test_filters_exports.py new file mode 100644 index 0000000..303cb49 --- /dev/null +++ b/tests/api/test_filters_exports.py @@ -0,0 +1,239 @@ +""" +Tests filtres avances et exports API. +""" + +from datetime import datetime, timedelta +import json + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from pricewatch.app.api.main import ( + export_logs, + export_prices, + export_products, + list_logs, + list_prices, + list_products, +) +from pricewatch.app.db.models import Base, PriceHistory, Product, ScrapingLog + + +def _make_session(): + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + session = sessionmaker(bind=engine)() + return engine, session + + +def test_list_products_filters_latest_price_and_stock(): + engine, session = _make_session() + try: + product_a = Product( + source="amazon", + reference="REF-A", + url="https://example.com/a", + title="A", + category="Test", + currency="EUR", + first_seen_at=datetime(2026, 1, 14, 10, 0, 0), + last_updated_at=datetime(2026, 1, 15, 9, 0, 0), + ) + product_b = Product( + source="amazon", + reference="REF-B", + url="https://example.com/b", + title="B", + category="Test", + currency="EUR", + first_seen_at=datetime(2026, 1, 14, 10, 0, 0), + last_updated_at=datetime(2026, 1, 15, 9, 5, 0), + ) + session.add_all([product_a, product_b]) + session.commit() + + history = [ + PriceHistory( + product_id=product_a.id, + price=80, + shipping_cost=0, + stock_status="out_of_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 15, 8, 0, 0), + ), + PriceHistory( + product_id=product_a.id, + price=100, + shipping_cost=0, + stock_status="in_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 15, 9, 0, 0), + ), + PriceHistory( + product_id=product_b.id, + price=200, + shipping_cost=10, + stock_status="in_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 15, 9, 5, 0), + ), + ] + session.add_all(history) + session.commit() + + filtered = list_products(price_min=150, session=session) + assert len(filtered) == 1 + assert filtered[0].reference == "REF-B" + + filtered_stock = list_products(stock_status="in_stock", session=session) + assert {item.reference for item in filtered_stock} == {"REF-A", "REF-B"} + finally: + session.close() + engine.dispose() + + +def test_list_prices_filters(): + engine, session = _make_session() + try: + product = Product( + source="amazon", + reference="REF-1", + url="https://example.com/1", + title="Produit", + category="Test", + currency="EUR", + first_seen_at=datetime(2026, 1, 14, 10, 0, 0), + last_updated_at=datetime(2026, 1, 14, 11, 0, 0), + ) + session.add(product) + session.commit() + + history = [ + PriceHistory( + product_id=product.id, + price=50, + shipping_cost=0, + stock_status="in_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 14, 12, 0, 0), + ), + PriceHistory( + product_id=product.id, + price=120, + shipping_cost=0, + stock_status="in_stock", + fetch_method="http", + fetch_status="failed", + fetched_at=datetime(2026, 1, 15, 12, 0, 0), + ), + ] + session.add_all(history) + session.commit() + + results = list_prices( + product_id=product.id, + price_min=100, + fetch_status="failed", + session=session, + ) + assert len(results) == 1 + assert results[0].price == 120 + finally: + session.close() + engine.dispose() + + +def test_list_logs_filters(): + engine, session = _make_session() + try: + now = datetime(2026, 1, 15, 10, 0, 0) + logs = [ + ScrapingLog( + product_id=None, + url="https://example.com/a", + source="amazon", + reference="REF-A", + fetch_method="http", + fetch_status="success", + fetched_at=now, + ), + ScrapingLog( + product_id=None, + url="https://example.com/b", + source="amazon", + reference="REF-B", + fetch_method="http", + fetch_status="failed", + fetched_at=now - timedelta(hours=2), + ), + ] + session.add_all(logs) + session.commit() + + filtered = list_logs( + fetch_status="success", + fetched_after=now - timedelta(hours=1), + session=session, + ) + assert len(filtered) == 1 + assert filtered[0].reference == "REF-A" + finally: + session.close() + engine.dispose() + + +def test_exports_csv_and_json(): + engine, session = _make_session() + try: + product = Product( + source="amazon", + reference="REF-EXPORT", + url="https://example.com/export", + title="Export", + category="Test", + currency="EUR", + first_seen_at=datetime(2026, 1, 14, 10, 0, 0), + last_updated_at=datetime(2026, 1, 14, 11, 0, 0), + ) + session.add(product) + session.commit() + + session.add( + PriceHistory( + product_id=product.id, + price=99, + shipping_cost=0, + stock_status="in_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 14, 12, 0, 0), + ) + ) + session.add( + ScrapingLog( + product_id=product.id, + url=product.url, + source=product.source, + reference=product.reference, + fetch_method="http", + fetch_status="success", + fetched_at=datetime(2026, 1, 14, 12, 0, 0), + ) + ) + session.commit() + + csv_response = export_products(format="csv", session=session) + assert csv_response.media_type == "text/csv" + assert "products.csv" in csv_response.headers.get("Content-Disposition", "") + assert "REF-EXPORT" in csv_response.body.decode("utf-8") + + json_response = export_logs(format="json", session=session) + payload = json.loads(json_response.body.decode("utf-8")) + assert payload[0]["reference"] == "REF-EXPORT" + finally: + session.close() + engine.dispose() diff --git a/tests/api/test_health.py b/tests/api/test_health.py new file mode 100644 index 0000000..56cd1c9 --- /dev/null +++ b/tests/api/test_health.py @@ -0,0 +1,40 @@ +""" +Tests endpoint /health. +""" + +from dataclasses import dataclass + +from pricewatch.app.api.main import health_check + + +@dataclass +class FakeRedisConfig: + url: str + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + redis: FakeRedisConfig + api_token: str + + +def test_health_ok(monkeypatch): + """Health retourne db/redis true.""" + config = FakeAppConfig( + db=FakeDbConfig(url="sqlite:///:memory:"), + redis=FakeRedisConfig(url="redis://localhost:6379/0"), + api_token="secret", + ) + monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config) + monkeypatch.setattr("pricewatch.app.api.main.check_db_connection", lambda cfg: True) + monkeypatch.setattr("pricewatch.app.api.main.check_redis_connection", lambda url: True) + + result = health_check() + assert result.db is True + assert result.redis is True diff --git a/tests/api/test_http_integration.py b/tests/api/test_http_integration.py new file mode 100644 index 0000000..ee51c74 --- /dev/null +++ b/tests/api/test_http_integration.py @@ -0,0 +1,47 @@ +""" +Tests HTTP d'integration contre l'API Docker. +""" + +import os + +import pytest +import httpx + + +API_BASE = os.getenv("PW_API_BASE", "http://localhost:8001") +API_TOKEN = os.getenv("PW_API_TOKEN", "change_me") + + +def _client() -> httpx.Client: + return httpx.Client(base_url=API_BASE, timeout=2.0) + + +def _is_api_up() -> bool: + try: + with _client() as client: + resp = client.get("/health") + return resp.status_code == 200 + except Exception: + return False + + +@pytest.mark.skipif(not _is_api_up(), reason="API Docker indisponible") +def test_health_endpoint(): + """/health repond avec db/redis.""" + with _client() as client: + resp = client.get("/health") + assert resp.status_code == 200 + payload = resp.json() + assert "db" in payload and "redis" in payload + + +@pytest.mark.skipif(not _is_api_up(), reason="API Docker indisponible") +def test_products_requires_token(): + """/products demande un token valide.""" + with _client() as client: + resp = client.get("/products") + assert resp.status_code == 401 + + resp = client.get("/products", headers={"Authorization": f"Bearer {API_TOKEN}"}) + assert resp.status_code == 200 + assert isinstance(resp.json(), list) diff --git a/tests/api/test_products.py b/tests/api/test_products.py new file mode 100644 index 0000000..55e4e74 --- /dev/null +++ b/tests/api/test_products.py @@ -0,0 +1,37 @@ +""" +Tests API produits en lecture seule. +""" + +from datetime import datetime + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from pricewatch.app.api.main import list_products +from pricewatch.app.db.models import Base, Product + + +def test_list_products(): + """Liste des produits.""" + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + session = sessionmaker(bind=engine)() + + product = Product( + source="amazon", + reference="REF1", + url="https://example.com", + title="Produit", + category="Test", + currency="EUR", + first_seen_at=datetime(2026, 1, 14, 16, 0, 0), + last_updated_at=datetime(2026, 1, 14, 16, 0, 0), + ) + session.add(product) + session.commit() + + data = list_products(session=session, limit=50, offset=0) + assert len(data) == 1 + assert data[0].reference == "REF1" + session.close() + engine.dispose() diff --git a/tests/api/test_scrape_endpoints.py b/tests/api/test_scrape_endpoints.py new file mode 100644 index 0000000..25e1ebb --- /dev/null +++ b/tests/api/test_scrape_endpoints.py @@ -0,0 +1,55 @@ +""" +Tests API preview/commit scraping. +""" + +from datetime import datetime + +from pricewatch.app.api.main import commit_scrape, preview_scrape +from pricewatch.app.api.schemas import ScrapeCommitRequest, ScrapePreviewRequest +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot + + +def test_preview_scrape_returns_snapshot(monkeypatch): + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com", + title="Produit", + price=9.99, + currency="EUR", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + def fake_scrape(url, use_playwright=None, save_db=False): + return {"success": True, "snapshot": snapshot, "error": None} + + monkeypatch.setattr("pricewatch.app.api.main.scrape_product", fake_scrape) + + response = preview_scrape(ScrapePreviewRequest(url="https://example.com")) + assert response.success is True + assert response.snapshot["source"] == "amazon" + assert response.snapshot["price"] == 9.99 + + +def test_commit_scrape_persists_snapshot(monkeypatch): + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com", + title="Produit", + price=19.99, + currency="EUR", + fetched_at=datetime(2026, 1, 15, 10, 0, 0), + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + class FakePipeline: + def __init__(self, config=None): + self.config = config + + def process_snapshot(self, snapshot, save_to_db=True): + return 42 + + monkeypatch.setattr("pricewatch.app.api.main.ScrapingPipeline", FakePipeline) + + response = commit_scrape(ScrapeCommitRequest(snapshot=snapshot.model_dump(mode="json"))) + assert response.success is True + assert response.product_id == 42 diff --git a/tests/api/test_uvicorn_logs.py b/tests/api/test_uvicorn_logs.py new file mode 100644 index 0000000..7bb823f --- /dev/null +++ b/tests/api/test_uvicorn_logs.py @@ -0,0 +1,16 @@ +""" +Tests API logs Uvicorn. +""" + +from pricewatch.app.api.main import list_uvicorn_logs + + +def test_list_uvicorn_logs_reads_file(monkeypatch, tmp_path): + log_file = tmp_path / "uvicorn.log" + log_file.write_text("ligne-1\nligne-2\n", encoding="utf-8") + + monkeypatch.setattr("pricewatch.app.api.main.UVICORN_LOG_PATH", log_file) + + response = list_uvicorn_logs(limit=1) + assert len(response) == 1 + assert response[0].line == "ligne-2" diff --git a/tests/api/test_version.py b/tests/api/test_version.py new file mode 100644 index 0000000..e6af5cd --- /dev/null +++ b/tests/api/test_version.py @@ -0,0 +1,11 @@ +""" +Tests API version. +""" + +from pricewatch.app.api.main import version_info + + +def test_version_info(): + """Retourne la version API.""" + response = version_info() + assert response.api_version diff --git a/tests/api/test_webhooks.py b/tests/api/test_webhooks.py new file mode 100644 index 0000000..de2e193 --- /dev/null +++ b/tests/api/test_webhooks.py @@ -0,0 +1,72 @@ +""" +Tests API webhooks. +""" + +import pytest +from fastapi import HTTPException +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from pricewatch.app.api.main import ( + create_webhook, + delete_webhook, + list_webhooks, + send_webhook_test, + update_webhook, +) +from pricewatch.app.api.schemas import WebhookCreate, WebhookUpdate +from pricewatch.app.db.models import Base + + +def _make_session(): + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + session = sessionmaker(bind=engine)() + return engine, session + + +def test_webhook_crud_and_test(monkeypatch): + engine, session = _make_session() + try: + payload = WebhookCreate(event="price_changed", url="https://example.com/webhook") + created = create_webhook(payload, session=session) + assert created.id > 0 + + items = list_webhooks(session=session) + assert len(items) == 1 + + updated = update_webhook(created.id, WebhookUpdate(enabled=False), session=session) + assert updated.enabled is False + + with pytest.raises(HTTPException) as excinfo: + send_webhook_test(created.id, session=session) + assert excinfo.value.status_code == 409 + + update_webhook(created.id, WebhookUpdate(enabled=True), session=session) + + called = {} + + def fake_post(url, json, headers, timeout): + called["url"] = url + called["json"] = json + called["headers"] = headers + called["timeout"] = timeout + + class FakeResponse: + status_code = 200 + + def raise_for_status(self): + return None + + return FakeResponse() + + monkeypatch.setattr("pricewatch.app.api.main.httpx.post", fake_post) + response = send_webhook_test(created.id, session=session) + assert response.status == "sent" + assert called["json"]["event"] == "test" + + delete_webhook(created.id, session=session) + assert list_webhooks(session=session) == [] + finally: + session.close() + engine.dispose() diff --git a/tests/cli/__pycache__/test_cli_worker_end_to_end.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_cli_worker_end_to_end.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..660fcd4 Binary files /dev/null and b/tests/cli/__pycache__/test_cli_worker_end_to_end.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/cli/__pycache__/test_enqueue_schedule_cli.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_enqueue_schedule_cli.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..14d439e Binary files /dev/null and b/tests/cli/__pycache__/test_enqueue_schedule_cli.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/cli/__pycache__/test_run_no_db.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_run_no_db.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..2081d2a Binary files /dev/null and b/tests/cli/__pycache__/test_run_no_db.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/cli/__pycache__/test_worker_cli.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_worker_cli.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..791c65f Binary files /dev/null and b/tests/cli/__pycache__/test_worker_cli.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/cli/test_cli_worker_end_to_end.py b/tests/cli/test_cli_worker_end_to_end.py new file mode 100644 index 0000000..eb5de48 --- /dev/null +++ b/tests/cli/test_cli_worker_end_to_end.py @@ -0,0 +1,130 @@ +""" +Test end-to-end: CLI enqueue -> worker -> DB via Redis. +""" + +from dataclasses import dataclass +from datetime import datetime + +import pytest +import redis +from rq import Queue +from rq.worker import SimpleWorker +from typer.testing import CliRunner + +from pricewatch.app.cli import main as cli_main +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product, ScrapingLog +from pricewatch.app.stores.base import BaseStore +from pricewatch.app.tasks import scrape as scrape_task + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeRedisConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + redis: FakeRedisConfig + debug: bool = False + enable_db: bool = True + default_use_playwright: bool = False + default_playwright_timeout: int = 1000 + + +class DummyStore(BaseStore): + def __init__(self) -> None: + super().__init__(store_id="dummy") + + def match(self, url: str) -> float: + return 1.0 if "example.com" in url else 0.0 + + def canonicalize(self, url: str) -> str: + return url + + def extract_reference(self, url: str) -> str | None: + return "REF-CLI" + + def parse(self, html: str, url: str) -> ProductSnapshot: + return ProductSnapshot( + source=self.store_id, + url=url, + fetched_at=datetime(2026, 1, 14, 15, 0, 0), + title="Produit cli", + price=49.99, + currency="EUR", + reference="REF-CLI", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + +class DummyFetchResult: + def __init__(self, html: str) -> None: + self.success = True + self.html = html + self.error = None + self.duration_ms = 20 + + +def _redis_available(redis_url: str) -> bool: + try: + conn = redis.from_url(redis_url) + conn.ping() + return True + except Exception: + return False + + +@pytest.mark.skipif(not _redis_available("redis://localhost:6379/0"), reason="Redis indisponible") +def test_cli_enqueue_worker_persists_db(tmp_path, monkeypatch): + """Enqueue via CLI, execution worker, persistence DB.""" + reset_engine() + db_path = tmp_path / "cli-worker.db" + redis_url = "redis://localhost:6379/0" + config = FakeAppConfig( + db=FakeDbConfig(url=f"sqlite:///{db_path}"), + redis=FakeRedisConfig(url=redis_url), + ) + init_db(config) + + registry = get_registry() + previous_stores = list(registry._stores) + registry._stores = [] + registry.register(DummyStore()) + + monkeypatch.setattr(cli_main, "get_config", lambda: config) + monkeypatch.setattr(scrape_task, "get_config", lambda: config) + monkeypatch.setattr(scrape_task, "setup_stores", lambda: None) + monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("")) + + queue_name = "test-cli" + redis_conn = redis.from_url(redis_url) + queue = Queue(queue_name, connection=redis_conn) + queue.empty() + + runner = CliRunner() + try: + result = runner.invoke( + cli_main.app, + ["enqueue", "https://example.com/product", "--queue", queue_name, "--save-db"], + ) + assert result.exit_code == 0 + + worker = SimpleWorker([queue], connection=redis_conn) + worker.work(burst=True) + finally: + queue.empty() + registry._stores = previous_stores + reset_engine() + + with get_session(config) as session: + assert session.query(Product).count() == 1 + assert session.query(ScrapingLog).count() == 1 diff --git a/tests/cli/test_enqueue_schedule_cli.py b/tests/cli/test_enqueue_schedule_cli.py new file mode 100644 index 0000000..9c5e19d --- /dev/null +++ b/tests/cli/test_enqueue_schedule_cli.py @@ -0,0 +1,83 @@ +""" +Tests CLI pour enqueue/schedule avec gestion Redis. +""" + +from types import SimpleNamespace + +from typer.testing import CliRunner + +from pricewatch.app.cli import main as cli_main + + +class DummyScheduler: + def __init__(self, *args, **kwargs) -> None: + self.enqueue_calls = [] + self.schedule_calls = [] + + def enqueue_immediate(self, url, use_playwright=None, save_db=True): + self.enqueue_calls.append((url, use_playwright, save_db)) + return SimpleNamespace(id="job-123") + + def schedule_product(self, url, interval_hours=24, use_playwright=None, save_db=True): + self.schedule_calls.append((url, interval_hours, use_playwright, save_db)) + return SimpleNamespace(job_id="job-456", next_run=SimpleNamespace(isoformat=lambda: "2026")) + + +def test_enqueue_cli_success(monkeypatch): + """La commande enqueue retourne un job id.""" + runner = CliRunner() + dummy = DummyScheduler() + + monkeypatch.setattr(cli_main, "ScrapingScheduler", lambda *args, **kwargs: dummy) + + result = runner.invoke(cli_main.app, ["enqueue", "https://example.com/product"]) + + assert result.exit_code == 0 + assert "job-123" in result.output + + +def test_schedule_cli_success(monkeypatch): + """La commande schedule retourne un job id et une date.""" + runner = CliRunner() + dummy = DummyScheduler() + + monkeypatch.setattr(cli_main, "ScrapingScheduler", lambda *args, **kwargs: dummy) + + result = runner.invoke( + cli_main.app, + ["schedule", "https://example.com/product", "--interval", "12"], + ) + + assert result.exit_code == 0 + assert "job-456" in result.output + assert "2026" in result.output + + +def test_enqueue_cli_redis_unavailable(monkeypatch): + """La commande enqueue echoue si Redis est indisponible.""" + runner = CliRunner() + + def raise_redis(*args, **kwargs): + raise cli_main.RedisUnavailableError("Redis non disponible") + + monkeypatch.setattr(cli_main, "ScrapingScheduler", raise_redis) + + result = runner.invoke(cli_main.app, ["enqueue", "https://example.com/product"]) + + assert result.exit_code == 1 + assert "Redis non disponible" in result.output + + +def test_schedule_cli_redis_unavailable(monkeypatch): + """La commande schedule echoue si Redis est indisponible.""" + runner = CliRunner() + + def raise_redis(*args, **kwargs): + raise cli_main.RedisUnavailableError("Redis non disponible") + + monkeypatch.setattr(cli_main, "ScrapingScheduler", raise_redis) + + result = runner.invoke(cli_main.app, ["schedule", "https://example.com/product"]) + + assert result.exit_code == 1 + assert "Redis non disponible" in result.output diff --git a/tests/cli/test_run_db.py b/tests/cli/test_run_db.py old mode 100755 new mode 100644 diff --git a/tests/cli/test_run_no_db.py b/tests/cli/test_run_no_db.py new file mode 100644 index 0000000..246d9cf --- /dev/null +++ b/tests/cli/test_run_no_db.py @@ -0,0 +1,106 @@ +""" +Tests pour la compatibilite --no-db. +""" + +from dataclasses import dataclass +from pathlib import Path + +from typer.testing import CliRunner + +from pricewatch.app.cli import main as cli_main +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product +from pricewatch.app.stores.base import BaseStore + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + debug: bool = False + enable_db: bool = True + + +class DummyStore(BaseStore): + def __init__(self) -> None: + super().__init__(store_id="dummy") + + def match(self, url: str) -> float: + return 1.0 if "example.com" in url else 0.0 + + def canonicalize(self, url: str) -> str: + return url + + def extract_reference(self, url: str) -> str | None: + return "REF-NODB" + + def parse(self, html: str, url: str) -> ProductSnapshot: + return ProductSnapshot( + source=self.store_id, + url=url, + title="Produit nodb", + price=9.99, + currency="EUR", + reference="REF-NODB", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + +class DummyFetchResult: + def __init__(self, html: str) -> None: + self.success = True + self.html = html + self.error = None + + +def test_cli_run_no_db(tmp_path, monkeypatch): + """Le flag --no-db evite toute ecriture DB.""" + reset_engine() + db_path = tmp_path / "nodb.db" + config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}")) + init_db(config) + + yaml_path = tmp_path / "config.yaml" + out_path = tmp_path / "out.json" + yaml_path.write_text( + """ +urls: + - "https://example.com/product" +options: + use_playwright: false + save_html: false + save_screenshot: false +""", + encoding="utf-8", + ) + + registry = get_registry() + previous_stores = list(registry._stores) + registry._stores = [] + registry.register(DummyStore()) + + monkeypatch.setattr(cli_main, "get_config", lambda: config) + monkeypatch.setattr(cli_main, "setup_stores", lambda: None) + monkeypatch.setattr(cli_main, "fetch_http", lambda url: DummyFetchResult("")) + + runner = CliRunner() + try: + result = runner.invoke( + cli_main.app, + ["run", "--yaml", str(yaml_path), "--out", str(out_path), "--no-db"], + ) + finally: + registry._stores = previous_stores + reset_engine() + + assert result.exit_code == 0 + assert out_path.exists() + + with get_session(config) as session: + assert session.query(Product).count() == 0 diff --git a/tests/cli/test_worker_cli.py b/tests/cli/test_worker_cli.py new file mode 100644 index 0000000..040b7a3 --- /dev/null +++ b/tests/cli/test_worker_cli.py @@ -0,0 +1,54 @@ +""" +Tests pour les commandes worker RQ via CLI. +""" + +from types import SimpleNamespace + +import pytest +from typer.testing import CliRunner + +from pricewatch.app.cli import main as cli_main + + +class DummyRedis: + def ping(self) -> bool: + return True + + +class DummyWorker: + def __init__(self, queues, connection=None) -> None: + self.queues = queues + self.connection = connection + self.work_calls = [] + + def work(self, with_scheduler: bool = True): + self.work_calls.append(with_scheduler) + + +def test_worker_cli_success(monkeypatch): + """Le worker demarre quand Redis est disponible.""" + runner = CliRunner() + dummy_worker = DummyWorker([]) + + monkeypatch.setattr(cli_main, "Worker", lambda queues, connection=None: dummy_worker) + monkeypatch.setattr(cli_main.redis, "from_url", lambda url: DummyRedis()) + + result = runner.invoke(cli_main.app, ["worker", "--no-scheduler"]) + + assert result.exit_code == 0 + assert dummy_worker.work_calls == [False] + + +def test_worker_cli_redis_down(monkeypatch): + """Le worker echoue proprement si Redis est indisponible.""" + runner = CliRunner() + + def raise_connection(url): + raise cli_main.redis.exceptions.ConnectionError("redis down") + + monkeypatch.setattr(cli_main.redis, "from_url", raise_connection) + + result = runner.invoke(cli_main.app, ["worker"]) + + assert result.exit_code == 1 + assert "Impossible de se connecter a Redis" in result.output diff --git a/tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc b/tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc b/tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/core/test_io.py b/tests/core/test_io.py old mode 100755 new mode 100644 diff --git a/tests/core/test_registry_integration.py b/tests/core/test_registry_integration.py old mode 100755 new mode 100644 diff --git a/tests/db/__pycache__/test_bulk_persistence.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_bulk_persistence.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..7e3b502 Binary files /dev/null and b/tests/db/__pycache__/test_bulk_persistence.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 index 4dfdd8d..6d8f8e8 Binary files a/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc and b/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/db/__pycache__/test_repository.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_repository.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/db/test_bulk_persistence.py b/tests/db/test_bulk_persistence.py new file mode 100644 index 0000000..02afabb --- /dev/null +++ b/tests/db/test_bulk_persistence.py @@ -0,0 +1,40 @@ +""" +Tests de charge legere pour la persistence (100 snapshots). +""" + +from datetime import datetime + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.models import Base, Product +from pricewatch.app.db.repository import ProductRepository + + +def test_bulk_save_100_snapshots(): + """Le repository persiste 100 snapshots sans erreur.""" + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + session = sessionmaker(bind=engine)() + + try: + repo = ProductRepository(session) + for idx in range(100): + snapshot = ProductSnapshot( + source="amazon", + url=f"https://example.com/product/{idx}", + fetched_at=datetime(2026, 1, 14, 14, 0, 0), + title=f"Produit {idx}", + price=10.0 + idx, + currency="EUR", + reference=f"REF-{idx}", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + repo.save_snapshot(snapshot) + session.commit() + + assert session.query(Product).count() == 100 + finally: + session.close() + engine.dispose() diff --git a/tests/db/test_connection.py b/tests/db/test_connection.py old mode 100755 new mode 100644 diff --git a/tests/db/test_models.py b/tests/db/test_models.py old mode 100755 new mode 100644 index 34f6e20..bdfeacf --- a/tests/db/test_models.py +++ b/tests/db/test_models.py @@ -2,7 +2,7 @@ Tests pour les modeles SQLAlchemy. """ -from datetime import datetime +from datetime import datetime, timezone import pytest from sqlalchemy import create_engine @@ -30,6 +30,7 @@ def session() -> Session: yield session finally: session.close() + engine.dispose() def test_product_relationships(session: Session): @@ -42,7 +43,7 @@ def test_product_relationships(session: Session): stock_status="in_stock", fetch_method="http", fetch_status="success", - fetched_at=datetime.utcnow(), + fetched_at=datetime.now(timezone.utc), ) image = ProductImage(image_url="https://example.com/image.jpg", position=0) spec = ProductSpec(spec_key="Couleur", spec_value="Noir") @@ -52,7 +53,7 @@ def test_product_relationships(session: Session): reference="B08N5WRWNW", fetch_method="http", fetch_status="success", - fetched_at=datetime.utcnow(), + fetched_at=datetime.now(timezone.utc), duration_ms=1200, html_size_bytes=2048, errors={"items": []}, diff --git a/tests/db/test_repository.py b/tests/db/test_repository.py old mode 100755 new mode 100644 diff --git a/tests/scraping/__init__.py b/tests/scraping/__init__.py old mode 100755 new mode 100644 diff --git a/tests/scraping/__pycache__/__init__.cpython-313.pyc b/tests/scraping/__pycache__/__init__.cpython-313.pyc old mode 100755 new mode 100644 diff --git a/tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc b/tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc b/tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 index 7b80cbd..b0cbecc Binary files a/tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc and b/tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc b/tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/scraping/test_http_fetch.py b/tests/scraping/test_http_fetch.py old mode 100755 new mode 100644 diff --git a/tests/scraping/test_pipeline.py b/tests/scraping/test_pipeline.py old mode 100755 new mode 100644 index d0f1407..9acd517 --- a/tests/scraping/test_pipeline.py +++ b/tests/scraping/test_pipeline.py @@ -80,3 +80,33 @@ def test_pipeline_respects_disable_flag(): assert product_id is None with get_session(config) as session: assert session.query(Product).count() == 0 + + +def test_pipeline_db_error_adds_note(monkeypatch): + """Une erreur DB ajoute une note et retourne None.""" + from sqlalchemy.exc import SQLAlchemyError + + class DummyError(SQLAlchemyError): + pass + + def raise_session(*args, **kwargs): + raise DummyError("db down") + + monkeypatch.setattr("pricewatch.app.scraping.pipeline.get_session", raise_session) + + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com/product", + fetched_at=datetime(2026, 1, 14, 13, 0, 0), + title="Produit", + price=10.0, + currency="EUR", + reference="B08PIPE", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + pipeline = ScrapingPipeline(config=FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))) + product_id = pipeline.process_snapshot(snapshot, save_to_db=True) + + assert product_id is None + assert any("Persistence DB echouee" in note for note in snapshot.debug.notes) diff --git a/tests/scraping/test_pw_fetch.py b/tests/scraping/test_pw_fetch.py old mode 100755 new mode 100644 diff --git a/tests/stores/__pycache__/test_amazon.cpython-313-pytest-9.0.2.pyc b/tests/stores/__pycache__/test_amazon.cpython-313-pytest-9.0.2.pyc old mode 100755 new mode 100644 diff --git a/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc b/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..757eca1 Binary files /dev/null and b/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/stores/test_price_parser.py b/tests/stores/test_price_parser.py new file mode 100644 index 0000000..a54d98d --- /dev/null +++ b/tests/stores/test_price_parser.py @@ -0,0 +1,29 @@ +""" +Tests pour le parsing de prix avec separateurs de milliers. +""" + +from pricewatch.app.stores.price_parser import parse_price_text + + +def test_parse_price_with_thousands_space(): + assert parse_price_text("1 259,00") == 1259.00 + + +def test_parse_price_with_narrow_nbsp(): + assert parse_price_text("1\u202f259,00") == 1259.00 + + +def test_parse_price_with_dot_thousands(): + assert parse_price_text("1.259,00") == 1259.00 + + +def test_parse_price_with_comma_thousands(): + assert parse_price_text("1,259.00") == 1259.00 + + +def test_parse_price_without_decimal(): + assert parse_price_text("1259") == 1259.00 + + +def test_parse_price_with_currency(): + assert parse_price_text("EUR 1 259,00") == 1259.00 diff --git a/tests/tasks/__pycache__/test_redis_errors.cpython-313-pytest-9.0.2.pyc b/tests/tasks/__pycache__/test_redis_errors.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..19e3af7 Binary files /dev/null and b/tests/tasks/__pycache__/test_redis_errors.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/tasks/__pycache__/test_scheduler.cpython-313-pytest-9.0.2.pyc b/tests/tasks/__pycache__/test_scheduler.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..37821c5 Binary files /dev/null and b/tests/tasks/__pycache__/test_scheduler.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/tasks/__pycache__/test_scrape_task.cpython-313-pytest-9.0.2.pyc b/tests/tasks/__pycache__/test_scrape_task.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..67f2b66 Binary files /dev/null and b/tests/tasks/__pycache__/test_scrape_task.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/tasks/__pycache__/test_worker_end_to_end.cpython-313-pytest-9.0.2.pyc b/tests/tasks/__pycache__/test_worker_end_to_end.cpython-313-pytest-9.0.2.pyc new file mode 100644 index 0000000..b41e1eb Binary files /dev/null and b/tests/tasks/__pycache__/test_worker_end_to_end.cpython-313-pytest-9.0.2.pyc differ diff --git a/tests/tasks/test_redis_errors.py b/tests/tasks/test_redis_errors.py new file mode 100644 index 0000000..d20ba49 --- /dev/null +++ b/tests/tasks/test_redis_errors.py @@ -0,0 +1,127 @@ +""" +Tests pour la gestion des erreurs Redis dans le scheduler. +""" + +import pytest +from redis.exceptions import ConnectionError as RedisConnectionError +from redis.exceptions import RedisError, TimeoutError as RedisTimeoutError + +from pricewatch.app.tasks.scheduler import RedisUnavailableError, ScrapingScheduler, check_redis_connection + + +class DummyRedisOk: + def ping(self) -> bool: + return True + + +class DummyRedisError: + def __init__(self, exc: Exception) -> None: + self._exc = exc + + def ping(self) -> None: + raise self._exc + + +class DummyQueue: + def __init__(self, name: str, connection=None) -> None: + self.name = name + self.connection = connection + + +class DummyScheduler: + def __init__(self, queue=None, connection=None) -> None: + self.queue = queue + self.connection = connection + + def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None): + return type("Job", (), {"id": "job-redis"})() + + +class FakeRedisConfig: + def __init__(self, url: str) -> None: + self.url = url + + +class FakeAppConfig: + def __init__(self, redis_url: str) -> None: + self.redis = FakeRedisConfig(redis_url) + + +def test_check_redis_connection_success(monkeypatch): + """Ping OK retourne True.""" + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk()) + assert check_redis_connection("redis://localhost:6379/0") is True + + +def test_check_redis_connection_failure_connection(monkeypatch): + """Ping en echec retourne False.""" + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: DummyRedisError(RedisConnectionError("no")), + ) + assert check_redis_connection("redis://localhost:6379/0") is False + + +def test_check_redis_connection_failure_timeout(monkeypatch): + """Timeout Redis retourne False.""" + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: DummyRedisError(RedisTimeoutError("timeout")), + ) + assert check_redis_connection("redis://localhost:6379/0") is False + + +def test_scheduler_lazy_connection(monkeypatch): + """La connexion Redis est lazy.""" + config = FakeAppConfig("redis://localhost:6379/0") + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk()) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config) + assert scheduler._redis is None + + _ = scheduler.queue + assert scheduler._redis is not None + + +def test_scheduler_redis_connection_error(monkeypatch): + """Une erreur de connexion leve RedisUnavailableError.""" + config = FakeAppConfig("redis://localhost:6379/0") + + def raise_connection(url): + raise RedisConnectionError("no") + + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", raise_connection) + + scheduler = ScrapingScheduler(config=config) + with pytest.raises(RedisUnavailableError): + _ = scheduler.queue + + +def test_scheduler_schedule_redis_error(monkeypatch): + """Une erreur Redis leve RedisUnavailableError lors du schedule.""" + config = FakeAppConfig("redis://localhost:6379/0") + + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: DummyRedisError(RedisError("boom")), + ) + + scheduler = ScrapingScheduler(config=config) + with pytest.raises(RedisUnavailableError): + scheduler.schedule_product("https://example.com/product", interval_hours=1) + + +def test_scheduler_enqueue_redis_error(monkeypatch): + """Une erreur Redis leve RedisUnavailableError lors de l'enqueue.""" + config = FakeAppConfig("redis://localhost:6379/0") + + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: DummyRedisError(RedisError("boom")), + ) + + scheduler = ScrapingScheduler(config=config) + with pytest.raises(RedisUnavailableError): + scheduler.enqueue_immediate("https://example.com/product") diff --git a/tests/tasks/test_scheduler.py b/tests/tasks/test_scheduler.py new file mode 100644 index 0000000..599e84a --- /dev/null +++ b/tests/tasks/test_scheduler.py @@ -0,0 +1,184 @@ +""" +Tests pour ScrapingScheduler avec mocks Redis/RQ. +""" + +from dataclasses import dataclass + +import pytest +from redis.exceptions import ConnectionError as RedisConnectionError + +from pricewatch.app.tasks.scheduler import ( + RedisUnavailableError, + ScheduledJobInfo, + ScrapingScheduler, + check_redis_connection, +) + + +@dataclass +class FakeRedis: + url: str + + def ping(self): + """Simule un ping reussi.""" + return True + + +class FakeRedisConnectionError: + """FakeRedis qui leve une erreur a la connexion.""" + + def __init__(self, url: str): + self.url = url + + def ping(self): + raise RedisConnectionError("Connection refused") + + +class DummyQueue: + def __init__(self, name: str, connection=None) -> None: + self.name = name + self.connection = connection + self.enqueued = [] + + def enqueue(self, func, *args, **kwargs): + job = type("Job", (), {"id": "job-123"})() + self.enqueued.append((func, args, kwargs)) + return job + + +class DummyScheduler: + def __init__(self, queue=None, connection=None) -> None: + self.queue = queue + self.connection = connection + self.scheduled = [] + + def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None): + job = type("Job", (), {"id": "job-456"})() + self.scheduled.append((scheduled_time, func, args, kwargs, interval, repeat)) + return job + + +@dataclass +class FakeRedisConfig: + url: str + + +@dataclass +class FakeAppConfig: + redis: FakeRedisConfig + + +def test_scheduler_enqueue_immediate(monkeypatch): + """Enqueue immediate utilise la queue RQ.""" + config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0")) + + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url)) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config, queue_name="default") + job = scheduler.enqueue_immediate("https://example.com/product") + + assert job.id == "job-123" + assert len(scheduler.queue.enqueued) == 1 + + +def test_scheduler_schedule_product(monkeypatch): + """Schedule product cree un job recurrent.""" + config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0")) + + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url)) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config, queue_name="default") + info = scheduler.schedule_product("https://example.com/product", interval_hours=1) + + assert isinstance(info, ScheduledJobInfo) + assert info.job_id == "job-456" + assert len(scheduler.scheduler.scheduled) == 1 + + +# ============================================================================ +# Tests gestion erreurs Redis +# ============================================================================ + + +def test_scheduler_redis_connection_error(monkeypatch): + """Leve RedisUnavailableError quand Redis n'est pas accessible.""" + config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0")) + + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: FakeRedisConnectionError(url), + ) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config, queue_name="default") + + with pytest.raises(RedisUnavailableError) as exc_info: + scheduler.enqueue_immediate("https://example.com/product") + + assert "Redis" in str(exc_info.value.message) + assert exc_info.value.cause is not None + + +def test_scheduler_lazy_connection(monkeypatch): + """La connexion Redis n'est etablie qu'au premier appel.""" + config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0")) + connection_calls = [] + + def track_from_url(url): + connection_calls.append(url) + return FakeRedis(url) + + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", track_from_url) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config, queue_name="default") + + # Pas de connexion a la creation + assert len(connection_calls) == 0 + + # Connexion au premier appel + scheduler.enqueue_immediate("https://example.com/product") + assert len(connection_calls) == 1 + + # Pas de nouvelle connexion au deuxieme appel + scheduler.enqueue_immediate("https://example.com/product2") + assert len(connection_calls) == 1 + + +def test_check_redis_connection_success(monkeypatch): + """check_redis_connection retourne True si Redis repond.""" + monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", FakeRedis) + + assert check_redis_connection("redis://localhost:6379/0") is True + + +def test_check_redis_connection_failure(monkeypatch): + """check_redis_connection retourne False si Redis ne repond pas.""" + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", FakeRedisConnectionError + ) + + assert check_redis_connection("redis://localhost:6379/0") is False + + +def test_scheduler_schedule_redis_error(monkeypatch): + """schedule_product leve RedisUnavailableError si Redis down.""" + config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0")) + + monkeypatch.setattr( + "pricewatch.app.tasks.scheduler.redis.from_url", + lambda url: FakeRedisConnectionError(url), + ) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue) + monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler) + + scheduler = ScrapingScheduler(config=config, queue_name="default") + + with pytest.raises(RedisUnavailableError): + scheduler.schedule_product("https://example.com/product", interval_hours=24) diff --git a/tests/tasks/test_scrape_task.py b/tests/tasks/test_scrape_task.py new file mode 100644 index 0000000..9df90e7 --- /dev/null +++ b/tests/tasks/test_scrape_task.py @@ -0,0 +1,91 @@ +""" +Tests end-to-end pour la tache RQ de scraping avec persistence DB. +""" + +from dataclasses import dataclass +from datetime import datetime + +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product, ScrapingLog +from pricewatch.app.stores.base import BaseStore +from pricewatch.app.tasks import scrape as scrape_task + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + debug: bool = False + enable_db: bool = True + default_use_playwright: bool = False + default_playwright_timeout: int = 1000 + + +class DummyStore(BaseStore): + def __init__(self) -> None: + super().__init__(store_id="dummy") + + def match(self, url: str) -> float: + return 1.0 if "example.com" in url else 0.0 + + def canonicalize(self, url: str) -> str: + return url + + def extract_reference(self, url: str) -> str | None: + return "REF-TEST" + + def parse(self, html: str, url: str) -> ProductSnapshot: + return ProductSnapshot( + source=self.store_id, + url=url, + fetched_at=datetime(2026, 1, 14, 10, 0, 0), + title="Produit test", + price=19.99, + currency="EUR", + reference="REF-TEST", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + +class DummyFetchResult: + def __init__(self, html: str) -> None: + self.success = True + self.html = html + self.error = None + self.duration_ms = 123 + + +def test_scrape_product_persists_db(tmp_path, monkeypatch): + """La tache scrape_product persiste en DB et logge un scraping.""" + reset_engine() + db_path = tmp_path / "scrape.db" + config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}")) + init_db(config) + + registry = get_registry() + previous_stores = list(registry._stores) + registry._stores = [] + registry.register(DummyStore()) + + monkeypatch.setattr(scrape_task, "get_config", lambda: config) + monkeypatch.setattr(scrape_task, "setup_stores", lambda: None) + monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("")) + + try: + result = scrape_task.scrape_product("https://example.com/product", save_db=True) + finally: + registry._stores = previous_stores + reset_engine() + + assert result["success"] is True + assert result["product_id"] is not None + + with get_session(config) as session: + assert session.query(Product).count() == 1 + assert session.query(ScrapingLog).count() == 1 diff --git a/tests/tasks/test_worker_end_to_end.py b/tests/tasks/test_worker_end_to_end.py new file mode 100644 index 0000000..bdc592d --- /dev/null +++ b/tests/tasks/test_worker_end_to_end.py @@ -0,0 +1,110 @@ +""" +Test end-to-end: enqueue -> worker -> DB via Redis. +""" + +from dataclasses import dataclass +from datetime import datetime + +import pytest +import redis +from rq import Queue +from rq.worker import SimpleWorker + +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product, ScrapingLog +from pricewatch.app.stores.base import BaseStore +from pricewatch.app.tasks import scrape as scrape_task + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + debug: bool = False + enable_db: bool = True + default_use_playwright: bool = False + default_playwright_timeout: int = 1000 + + +class DummyStore(BaseStore): + def __init__(self) -> None: + super().__init__(store_id="dummy") + + def match(self, url: str) -> float: + return 1.0 if "example.com" in url else 0.0 + + def canonicalize(self, url: str) -> str: + return url + + def extract_reference(self, url: str) -> str | None: + return "REF-WORKER" + + def parse(self, html: str, url: str) -> ProductSnapshot: + return ProductSnapshot( + source=self.store_id, + url=url, + fetched_at=datetime(2026, 1, 14, 11, 0, 0), + title="Produit worker", + price=29.99, + currency="EUR", + reference="REF-WORKER", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + +class DummyFetchResult: + def __init__(self, html: str) -> None: + self.success = True + self.html = html + self.error = None + self.duration_ms = 50 + + +def _redis_available(redis_url: str) -> bool: + try: + conn = redis.from_url(redis_url) + conn.ping() + return True + except Exception: + return False + + +@pytest.mark.skipif(not _redis_available("redis://localhost:6379/0"), reason="Redis indisponible") +def test_enqueue_worker_persists_db(tmp_path, monkeypatch): + """Le job enqueued est traite par le worker et persiste en DB.""" + reset_engine() + db_path = tmp_path / "worker.db" + config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}")) + init_db(config) + + registry = get_registry() + previous_stores = list(registry._stores) + registry._stores = [] + registry.register(DummyStore()) + + monkeypatch.setattr(scrape_task, "get_config", lambda: config) + monkeypatch.setattr(scrape_task, "setup_stores", lambda: None) + monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("")) + + redis_conn = redis.from_url("redis://localhost:6379/0") + queue = Queue("default", connection=redis_conn) + + try: + job = queue.enqueue(scrape_task.scrape_product, "https://example.com/product", save_db=True) + worker = SimpleWorker([queue], connection=redis_conn) + worker.work(burst=True) + finally: + registry._stores = previous_stores + reset_engine() + + assert job.is_finished + + with get_session(config) as session: + assert session.query(Product).count() == 1 + assert session.query(ScrapingLog).count() == 1 diff --git a/webui/Dockerfile b/webui/Dockerfile new file mode 100644 index 0000000..1c0b9a3 --- /dev/null +++ b/webui/Dockerfile @@ -0,0 +1,13 @@ +FROM node:20-alpine AS build +WORKDIR /app +COPY package.json ./ +COPY package-lock.json* ./ +RUN npm install +COPY . . +RUN npm run build + +FROM nginx:alpine +COPY --from=build /app/dist /usr/share/nginx/html +COPY nginx.conf /etc/nginx/conf.d/default.conf +EXPOSE 80 +CMD ["nginx", "-g", "daemon off;"] diff --git a/webui/index.html b/webui/index.html new file mode 100644 index 0000000..2b4229a --- /dev/null +++ b/webui/index.html @@ -0,0 +1,13 @@ + + + + + + PriceWatch Web UI + + + +
+ + + diff --git a/webui/nginx.conf b/webui/nginx.conf new file mode 100644 index 0000000..f57fbc1 --- /dev/null +++ b/webui/nginx.conf @@ -0,0 +1,17 @@ +server { + listen 80; + server_name _; + root /usr/share/nginx/html; + + location /api/ { + proxy_pass http://api:8000/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location / { + try_files $uri /index.html; + } +} diff --git a/webui/package.json b/webui/package.json new file mode 100644 index 0000000..6ecd941 --- /dev/null +++ b/webui/package.json @@ -0,0 +1,22 @@ +{ + "name": "pricewatch-webui", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "@fortawesome/fontawesome-free": "^6.5.2", + "vue": "^3.4.27" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.1.2", + "autoprefixer": "^10.4.20", + "postcss": "^8.4.41", + "tailwindcss": "^3.4.10", + "vite": "^5.4.2" + } +} diff --git a/webui/postcss.config.js b/webui/postcss.config.js new file mode 100644 index 0000000..2aa7205 --- /dev/null +++ b/webui/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; diff --git a/webui/public/favicon.svg b/webui/public/favicon.svg new file mode 100644 index 0000000..d4a38d9 --- /dev/null +++ b/webui/public/favicon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/webui/src/App.vue b/webui/src/App.vue new file mode 100644 index 0000000..5d4e047 --- /dev/null +++ b/webui/src/App.vue @@ -0,0 +1,1566 @@ + + + diff --git a/webui/src/index.css b/webui/src/index.css new file mode 100644 index 0000000..5176bba --- /dev/null +++ b/webui/src/index.css @@ -0,0 +1,281 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +:root { + color-scheme: light dark; +} + +.app-root { + --bg: #282828; + --surface: #3c3836; + --surface-2: #504945; + --text: #ebdbb2; + --muted: #a89984; + --accent: #fe8019; + --danger: #fb4934; + --success: #b8bb26; + --warning: #fabd2f; + --shadow: rgba(0, 0, 0, 0.45); + --radius: 14px; + --font-title: "Space Mono", "JetBrains Mono", "Fira Code", monospace; + --font-body: "JetBrains Mono", "Fira Code", "IBM Plex Mono", "SFMono-Regular", Menlo, monospace; + --font-mono: "JetBrains Mono", "Fira Code", "IBM Plex Mono", "SFMono-Regular", Menlo, monospace; + --font-size: 16px; + background: var(--bg); + color: var(--text); + min-height: 100vh; + font-family: var(--font-body); + font-size: var(--font-size); +} + +.app-root.theme-gruvbox-dark { + --bg: #282828; + --surface: #3c3836; + --surface-2: #504945; + --text: #ebdbb2; + --muted: #a89984; + --accent: #fe8019; + --danger: #fb4934; + --success: #b8bb26; + --warning: #fabd2f; + --shadow: rgba(0, 0, 0, 0.45); +} + +.app-root.theme-gruvbox-light { + --bg: #fbf1c7; + --surface: #f2e5bc; + --surface-2: #ebdbb2; + --text: #3c3836; + --muted: #7c6f64; + --accent: #d65d0e; + --danger: #cc241d; + --success: #98971a; + --warning: #d79921; + --shadow: rgba(60, 56, 54, 0.25); +} + +.app-root.theme-monokai-dark { + --bg: #1f1f1b; + --surface: #272822; + --surface-2: #3b3c35; + --text: #f8f8f2; + --muted: #9b9a84; + --accent: #f92672; + --danger: #fd5ff1; + --success: #a6e22e; + --warning: #fd971f; + --shadow: rgba(0, 0, 0, 0.55); +} + +.app-root.theme-monokai-light { + --bg: #f8f8f2; + --surface: #e8e8e3; + --surface-2: #dcdcd2; + --text: #272822; + --muted: #75715e; + --accent: #f92672; + --danger: #c0005f; + --success: #2d8f2d; + --warning: #fd971f; + --shadow: rgba(39, 40, 34, 0.2); +} + +.app-header { + position: sticky; + top: 0; + z-index: 40; + background: linear-gradient(90deg, var(--surface), var(--surface-2)); + border-bottom: 1px solid rgba(255, 255, 255, 0.06); + box-shadow: 0 10px 24px var(--shadow); +} + +.vintage-shadow { + box-shadow: 0 14px 28px var(--shadow); +} + +.icon-btn { + width: 42px; + height: 42px; + border-radius: 50%; + background: var(--surface-2); + color: var(--text); + display: inline-flex; + align-items: center; + justify-content: center; + transition: transform 0.15s ease, background 0.15s ease; +} + +.icon-btn:hover { + background: var(--accent); + color: #1b1b1b; + transform: translateY(-1px); +} + +.icon-btn:active { + transform: translateY(1px); +} + +.pill { + border-radius: 999px; + padding: 4px 10px; + font-size: 0.75rem; + background: var(--surface-2); + color: var(--muted); +} + +.panel { + background: var(--surface); + border-radius: var(--radius); + border: 1px solid rgba(255, 255, 255, 0.05); +} + +.card { + background: var(--surface); + border-radius: var(--radius); + border: 1px solid rgba(255, 255, 255, 0.08); + box-shadow: 0 10px 24px var(--shadow); +} + +.card-accent { + border: 1px solid rgba(254, 128, 25, 0.5); + box-shadow: 0 10px 30px rgba(254, 128, 25, 0.2); +} + +.density-dense .card { + padding: 12px; +} + +.density-comfort .card { + padding: 20px; +} + +.section-title { + font-family: var(--font-title); + letter-spacing: 0.5px; +} + +.label { + font-size: 0.8rem; + color: var(--muted); +} + +.input { + width: 100%; + background: var(--surface-2); + border: 1px solid rgba(255, 255, 255, 0.08); + border-radius: 10px; + padding: 8px 10px; + color: var(--text); +} + +.input:focus { + outline: 2px solid rgba(254, 128, 25, 0.4); +} + +.sidebar { + width: 280px; + min-width: 240px; +} + +.detail-panel { + width: 320px; + min-width: 280px; +} + +.image-toggle { + border: 1px solid rgba(255, 255, 255, 0.08); + border-radius: 12px; + padding: 2px; + background: transparent; + cursor: pointer; + transition: transform 0.15s ease, background 0.15s ease, border 0.15s ease; +} + +.image-toggle:hover { + border-color: rgba(254, 128, 25, 0.8); + transform: translateY(-1px); +} + +.image-toggle.selected { + background: rgba(254, 128, 25, 0.15); + border-color: rgba(254, 128, 25, 0.9); + box-shadow: inset 0 0 6px rgba(0, 0, 0, 0.3); +} + +.log-status-panel { + border-color: rgba(255, 255, 255, 0.1); +} + +.log-entry { + transition: background 0.2s ease; +} + +.log-entry-error { + border-color: rgba(251, 73, 52, 0.7); + background: rgba(251, 73, 52, 0.07); + color: var(--danger); +} + +.detail-popup { + border-radius: calc(var(--radius) * 1.2); + border-width: 1px; + max-height: calc(100vh - 60px); + box-shadow: 0 20px 40px rgba(0, 0, 0, 0.6); +} + +.view-mode-btn.active-view { + background: var(--accent); + color: #1b1b1b; +} + +.app-root.layout-compact .sidebar, +.app-root.layout-compact .detail-panel { + display: none; +} + +.app-root.layout-compact .product-grid { + grid-template-columns: 1fr; +} + +.app-root.layout-wide .sidebar { + width: 320px; +} + +.app-root.layout-wide .detail-panel { + width: 360px; +} + +.compare-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 16px; +} + +.product-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); + gap: 16px; +} + +@media (max-width: 1024px) { + .sidebar { + display: none; + } + .detail-panel { + display: none; + } +} + +@media (max-width: 640px) { + .app-header .toolbar-text { + display: none; + } + .icon-btn { + width: 36px; + height: 36px; + } + .product-grid { + grid-template-columns: 1fr; + } +} diff --git a/webui/src/main.js b/webui/src/main.js new file mode 100644 index 0000000..0da401f --- /dev/null +++ b/webui/src/main.js @@ -0,0 +1,6 @@ +import { createApp } from "vue"; +import App from "./App.vue"; +import "./index.css"; +import "@fortawesome/fontawesome-free/css/all.min.css"; + +createApp(App).mount("#app"); diff --git a/webui/tailwind.config.js b/webui/tailwind.config.js new file mode 100644 index 0000000..c5c731f --- /dev/null +++ b/webui/tailwind.config.js @@ -0,0 +1,7 @@ +export default { + content: ["./index.html", "./src/**/*.{vue,js,ts}"], + theme: { + extend: {}, + }, + plugins: [], +}; diff --git a/webui/vite.config.js b/webui/vite.config.js new file mode 100644 index 0000000..bdd03aa --- /dev/null +++ b/webui/vite.config.js @@ -0,0 +1,9 @@ +import { defineConfig } from "vite"; +import vue from "@vitejs/plugin-vue"; + +export default defineConfig({ + plugins: [vue()], + server: { + port: 3000, + }, +});