From c91c0f1fc9ac8f12f7dcb47a9fc220590f3d2fd9 Mon Sep 17 00:00:00 2001 From: Gilles Soulier Date: Wed, 14 Jan 2026 07:03:38 +0100 Subject: [PATCH] codex --- .coverage | Bin 53248 -> 53248 bytes .env.example | 18 + .gitignore | 0 CHANGELOG.md | 91 +++- PHASE_1_COMPLETE.md | 267 ++++++++++ PHASE_2_PROGRESS.md | 437 +++++++++++++++++ README.md | 53 +- TODO.md | 86 ++-- alembic.ini | 36 ++ docker-compose.yml | 22 + pricewatch.egg-info/PKG-INFO | 7 + pricewatch.egg-info/SOURCES.txt | 1 + pricewatch.egg-info/requires.txt | 7 + .../app/cli/__pycache__/main.cpython-313.pyc | Bin 15708 -> 20375 bytes pricewatch/app/cli/main.py | 157 ++++++ .../core/__pycache__/config.cpython-313.pyc | Bin 0 -> 7958 bytes pricewatch/app/core/config.py | 186 +++++++ pricewatch/app/db/__init__.py | 41 ++ .../db/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 813 bytes .../db/__pycache__/connection.cpython-313.pyc | Bin 0 -> 8087 bytes .../app/db/__pycache__/models.cpython-313.pyc | Bin 0 -> 12364 bytes .../db/__pycache__/repository.cpython-313.pyc | Bin 0 -> 7814 bytes pricewatch/app/db/connection.py | 238 +++++++++ .../__pycache__/env.cpython-313.pyc | Bin 0 -> 2912 bytes pricewatch/app/db/migrations/env.py | 80 +++ pricewatch/app/db/migrations/script.py.mako | 24 + .../versions/20260114_01_initial_schema.py | 124 +++++ ...20260114_01_initial_schema.cpython-313.pyc | Bin 0 -> 8818 bytes pricewatch/app/db/models.py | 320 ++++++++++++ pricewatch/app/db/repository.py | 140 ++++++ pricewatch/app/scraping/__init__.py | 3 + .../__pycache__/__init__.cpython-313.pyc | Bin 160 -> 264 bytes .../__pycache__/pipeline.cpython-313.pyc | Bin 0 -> 2779 bytes pricewatch/app/scraping/pipeline.py | 52 ++ .../amazon/__pycache__/store.cpython-313.pyc | Bin 13238 -> 14223 bytes pricewatch/app/stores/amazon/store.py | 20 + pricewatch/app/tasks/__init__.py | 8 + pricewatch/app/tasks/scheduler.py | 75 +++ pricewatch/app/tasks/scrape.py | 160 ++++++ pyproject.toml | 13 + .../test_run_db.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 8336 bytes tests/cli/test_run_db.py | 106 ++++ .../test_io.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 66626 bytes ...y_integration.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 33442 bytes tests/core/test_io.py | 462 ++++++++++++++++++ tests/core/test_registry_integration.py | 174 +++++++ ...st_connection.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 9521 bytes .../test_models.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 8164 bytes ...st_repository.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 12706 bytes tests/db/test_connection.py | 87 ++++ tests/db/test_models.py | 89 ++++ tests/db/test_repository.py | 82 ++++ tests/scraping/__init__.py | 0 .../__pycache__/__init__.cpython-313.pyc | Bin 0 -> 151 bytes ...st_http_fetch.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 49501 bytes ...test_pipeline.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 7087 bytes ...test_pw_fetch.cpython-313-pytest-9.0.2.pyc | Bin 0 -> 44576 bytes tests/scraping/test_http_fetch.py | 290 +++++++++++ tests/scraping/test_pipeline.py | 82 ++++ tests/scraping/test_pw_fetch.py | 388 +++++++++++++++ .../test_amazon.cpython-313-pytest-9.0.2.pyc | Bin 60925 -> 60794 bytes 61 files changed, 4388 insertions(+), 38 deletions(-) create mode 100755 .env.example mode change 100644 => 100755 .gitignore create mode 100755 PHASE_1_COMPLETE.md create mode 100755 PHASE_2_PROGRESS.md create mode 100755 alembic.ini create mode 100755 docker-compose.yml create mode 100755 pricewatch/app/core/__pycache__/config.cpython-313.pyc create mode 100755 pricewatch/app/core/config.py create mode 100755 pricewatch/app/db/__init__.py create mode 100755 pricewatch/app/db/__pycache__/__init__.cpython-313.pyc create mode 100755 pricewatch/app/db/__pycache__/connection.cpython-313.pyc create mode 100755 pricewatch/app/db/__pycache__/models.cpython-313.pyc create mode 100755 pricewatch/app/db/__pycache__/repository.cpython-313.pyc create mode 100755 pricewatch/app/db/connection.py create mode 100755 pricewatch/app/db/migrations/__pycache__/env.cpython-313.pyc create mode 100755 pricewatch/app/db/migrations/env.py create mode 100755 pricewatch/app/db/migrations/script.py.mako create mode 100755 pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py create mode 100755 pricewatch/app/db/migrations/versions/__pycache__/20260114_01_initial_schema.cpython-313.pyc create mode 100755 pricewatch/app/db/models.py create mode 100755 pricewatch/app/db/repository.py create mode 100755 pricewatch/app/scraping/__pycache__/pipeline.cpython-313.pyc create mode 100755 pricewatch/app/scraping/pipeline.py create mode 100755 pricewatch/app/tasks/__init__.py create mode 100755 pricewatch/app/tasks/scheduler.py create mode 100755 pricewatch/app/tasks/scrape.py create mode 100755 tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/cli/test_run_db.py create mode 100755 tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/core/test_io.py create mode 100755 tests/core/test_registry_integration.py create mode 100755 tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/db/__pycache__/test_repository.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/db/test_connection.py create mode 100755 tests/db/test_models.py create mode 100755 tests/db/test_repository.py create mode 100755 tests/scraping/__init__.py create mode 100755 tests/scraping/__pycache__/__init__.cpython-313.pyc create mode 100755 tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc create mode 100755 tests/scraping/test_http_fetch.py create mode 100755 tests/scraping/test_pipeline.py create mode 100755 tests/scraping/test_pw_fetch.py diff --git a/.coverage b/.coverage index 5f7baa5d58734da5ed9d6d2597620a44bea44cf4..e2106b9cbfe7735ed707871ea76aa5bbb79d732f 100755 GIT binary patch delta 1757 zcmZ9Mdu$X%9LHy$w|lcYv-iF}ga#5c6nX`kLb0?5BDCe&i%}8;j%zIkUTp6gAVuVH z*J_MPdDzh)0d32}&gedJrxOP(Ty_62*7y~HlGE?9f4q%{tIkKe$J*l-WJ z*Sja1m&{&sp>fmLcgW*p47t9TB^On)I}LuRhn58)oDbQCMSout3O{yciHVxA^K;dkvpp87^ zg-m;PWvXp0xV2=Yf!nRwOeWR3E}hLls@i798p|B8lgV@@y)K#LrUhoTJQuUZ_*}i# zJ<`o?#2j|Cv7D7Q5p9;Cp!Iwys}Q$P8REG9*S*?aAa)*t$X#mw2;{N`Td`~=VDm}E%vsh5@K!8q0C*0#fDHha zEv?tWF%_sCPt4%dw9o<-ph4t304)f`GdVLYFkb~^#}iXod&~fASo8na#GQcV8=ZGX zA@q(;07a~MqAE~4o|xi)kfQUbvXLh|qao6fa_){{NZ4Q^LdPPs!JDI5^VBYvc%e~r z@yMWiI1>V=YE~)h`I`8DnueG=7N(1BNlXKlMAPq&0hW@+b|fR#D3mG`)*jlP>@2AW z+k=SSq*v*$^nKb#yJ(Wu(^85lBIn76gpsXe6UmY|Sx9OLv9H;e?Lk2JvYjgptCG1S z+&$2B&_Pi+k%Pn`BN-usN8sK2<@FF0laN#p1g)^-Gt-eeMaoO48No$?G9wazRCDm& zJm}{?7{ref$Vd5&1PS2;2KGQc?Ct*T zRIq2f?LbiMlMj2*{+c1GQ$EAv4iV6Dr~6!L>#j402mj34_N^TLEU8SjA+emwXgPW- z6+L)0H@u~-s4rYMT>0_qwbOR?Ong}HJ@C`3(#cN_Y}oL z()mTw6+TK~S9bU-!gML3x9MN>5Bf9x0scwO(r@SydV+4I&(e?RKKK;->2A7}_R^PV zC(Y6px)eUT2{gx_RtkrOghE2BppaldNWd=y`-J$tLcAU!K1Ya$3UP=KY6~Hj5E~0I f-9p@^5Zw@Bxb#B4yh|6MrU_A1ANq_l5^5m;DZLG(}%B)$Ya1V&Mqg`F8)S!-tCFy}kxo--elm=qF|!Yr=z zd9)%#U(hkyKsocFIb@a_YsQ>$)u=KQeL)}950M|_E;&W0_Ea0xic(I;s>%mF>`ADU z<%F`tY>bk5Y^re&TMEjoEtJjIyV-0oBxV69*aR--uh%&j6DeUL)2OFFw~A?}no)GPD`sUgCfACvnUAf646Uyp7*qx^I0{Nf1lU$ik6;sF!p0&4;N6Sp zOFC{08`b6|Lp2ZT&yC;uHQi5s(njLYo>AU>M5f7Ulwlci$nvWc32omuPE!AzEwN994_Y+C@P`bjmy>q=fA=w z*sH|yt)_j8V5f}Dgk<(7L?m2=lk(OiOM7Z8I#B3QX)~g$^eg>L-_jJlNAJ)Jv>EJH z(ve&hbLxJTf42|euH+5rYni~nlje+mpA@iNXB!LoV zI^t>zMw}#A?GjrVcf;QEh_2D^^gVq8|IHWri7wH3*n67#tSOwT+Cfz8fMq)f_6D3H P+HW$pgOvJq+wlDb5a=VK diff --git a/.env.example b/.env.example new file mode 100755 index 0000000..a89bb87 --- /dev/null +++ b/.env.example @@ -0,0 +1,18 @@ +# Database +PW_DB_HOST=localhost +PW_DB_PORT=5432 +PW_DB_DATABASE=pricewatch +PW_DB_USER=pricewatch +PW_DB_PASSWORD=pricewatch + +# Redis +PW_REDIS_HOST=localhost +PW_REDIS_PORT=6379 +PW_REDIS_DB=0 + +# App +PW_DEBUG=false +PW_WORKER_TIMEOUT=300 +PW_WORKER_CONCURRENCY=2 +PW_ENABLE_DB=true +PW_ENABLE_WORKER=true diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md index 685850a..90643a3 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,94 @@ Le format est basé sur [Keep a Changelog](https://keepachangelog.com/fr/1.0.0/) ## [Non publié] ### En cours -- Ajout de fixtures HTML réalistes pour tests pytest -- Tests stores/cdiscount/ -- Tests scraping/ avec mocks +- Phase 2 : Base de données PostgreSQL +- Phase 2 : Worker Redis/RQ +- Phase 3 : API REST FastAPI +- Phase 4 : Web UI + +### Ajouté +- Configuration Alembic (env.py, script.py.mako, alembic.ini) +- Migration initiale SQLAlchemy (5 tables + indexes) +- Commandes CLI DB: `init-db`, `migrate`, `upgrade`, `downgrade` +- `docker-compose.yml` PostgreSQL/Redis +- `.env.example` avec variables DB/Redis/app +- Tests DB de base (models + connection) +- Repository `ProductRepository` + `ScrapingPipeline` +- Flag CLI `--save-db/--no-db` pour la persistence +- Tests repository/pipeline (SQLite) +- Test end-to-end CLI + DB (SQLite) +- Worker RQ + scheduler (tasks + CLI) + +--- + +## [0.3.0] - 2026-01-14 🎉 PHASE 1 TERMINÉE + +### ✅ Phase 1 CLI complétée à 100% + +**Résultat final**: +- **295 tests passent** (100% de réussite) +- **76% code coverage global** +- **4 stores opérationnels** (Amazon, Cdiscount, Backmarket, AliExpress) + +### Ajouté + +#### Corrections et améliorations +- **Amazon Store**: Correction extraction images avec fallback générique +- **Amazon Store**: Support prix séparés en 2 spans (a-price-whole + a-price-fraction) + +#### Tests complets ajoutés (177 nouveaux tests) +- **tests/core/test_registry.py**: 40 tests (100% coverage) + - 24 tests unitaires avec mocks + - 16 tests d'intégration avec les 4 stores réels + - Tests de détection automatique multi-stores +- **tests/core/test_registry_integration.py**: Tests d'intégration stores + - Vérification détection correcte pour Amazon, Cdiscount, Backmarket, AliExpress + - Tests de priorité et exclusivité des matches +- **tests/core/test_io.py**: 36 tests (97% coverage) + - Tests ScrapingConfig/ScrapingOptions Pydantic + - Tests read_yaml_config avec validation erreurs + - Tests write_json_results et read_json_results + - Tests save_debug_html et save_debug_screenshot +- **tests/scraping/test_http_fetch.py**: 21 tests (100% coverage) + - Tests fetch_http avec mocks requests + - Tests codes HTTP (200, 403, 404, 429, 500+) + - Tests timeout et exceptions réseau + - Tests User-Agent rotation et headers personnalisés +- **tests/scraping/test_pw_fetch.py**: 21 tests (91% coverage) + - Tests fetch_playwright avec mocks Playwright + - Tests modes headless/headful + - Tests screenshot et wait_for_selector + - Tests fetch_with_fallback (stratégie HTTP → Playwright) + - Tests cleanup des ressources + +### Statistiques détaillées + +**Coverage par module**: +| Module | Coverage | Tests | +|--------|----------|-------| +| core/schema.py | 100% | 29 | +| core/registry.py | 100% | 40 | +| core/io.py | 97% | 36 | +| scraping/http_fetch.py | 100% | 21 | +| scraping/pw_fetch.py | 91% | 21 | +| stores/amazon/store.py | 89% | 33 | +| stores/aliexpress/store.py | 85% | 32 | +| stores/backmarket/store.py | 85% | 25 | +| stores/cdiscount/store.py | 72% | 30 | +| **TOTAL** | **76%** | **295** | + +### Améliorations techniques +- Architecture complètement testée avec mocks et fixtures +- Tests d'intégration validant le fonctionnement end-to-end +- Couverture de code élevée sur tous les modules critiques +- Détection automatique de stores validée avec URLs réelles + +### Prochaines étapes (Phase 2) +Phase 1 CLI est maintenant **production-ready**. La Phase 2 peut démarrer: +1. Base de données PostgreSQL + Alembic +2. Worker Redis/RQ pour scraping planifié +3. API REST FastAPI +4. Web UI responsive avec dark theme Gruvbox --- diff --git a/PHASE_1_COMPLETE.md b/PHASE_1_COMPLETE.md new file mode 100755 index 0000000..30fc48c --- /dev/null +++ b/PHASE_1_COMPLETE.md @@ -0,0 +1,267 @@ +# 🎉 Phase 1 CLI - TERMINÉE À 100% + +**Date de complétion**: 2026-01-14 +**Version**: 0.3.0 + +--- + +## 📊 Résultats Finaux + +### Tests +- ✅ **295/295 tests passent** (100% de réussite) +- 📈 **76% code coverage global** +- ⚡ **Temps d'exécution**: 41.4 secondes + +### Modules testés + +| Module | Coverage | Tests | Statut | +|--------|----------|-------|--------| +| `core/schema.py` | **100%** | 29 | ✅ | +| `core/registry.py` | **100%** | 40 | ✅ | +| `core/io.py` | **97%** | 36 | ✅ | +| `scraping/http_fetch.py` | **100%** | 21 | ✅ | +| `scraping/pw_fetch.py` | **91%** | 21 | ✅ | +| `stores/amazon/` | **89%** | 33 | ✅ | +| `stores/aliexpress/` | **85%** | 32 | ✅ | +| `stores/backmarket/` | **85%** | 25 | ✅ | +| `stores/cdiscount/` | **72%** | 30 | ✅ | +| `base.py` | **87%** | - | ✅ | +| `logging.py` | **71%** | - | ✅ | + +--- + +## 🏗️ Architecture Implémentée + +### 1. Core (`pricewatch/app/core/`) +- ✅ `schema.py` - Modèle ProductSnapshot Pydantic +- ✅ `registry.py` - Détection automatique stores +- ✅ `io.py` - Lecture YAML / Écriture JSON +- ✅ `logging.py` - Système de logs colorés + +### 2. Scraping (`pricewatch/app/scraping/`) +- ✅ `http_fetch.py` - HTTP simple avec rotation User-Agent +- ✅ `pw_fetch.py` - Playwright fallback anti-bot +- ✅ Stratégie automatique: HTTP → Playwright si échec + +### 3. Stores (`pricewatch/app/stores/`) +- ✅ `base.py` - Classe abstraite BaseStore +- ✅ **Amazon** - amazon.fr, amazon.com, amazon.co.uk, amazon.de +- ✅ **Cdiscount** - cdiscount.com +- ✅ **Backmarket** - backmarket.fr, backmarket.com +- ✅ **AliExpress** - fr.aliexpress.com, aliexpress.com + +### 4. CLI (`pricewatch/app/cli/`) +- ✅ `pricewatch run` - Pipeline YAML → JSON +- ✅ `pricewatch detect` - Détection store depuis URL +- ✅ `pricewatch fetch` - Test HTTP/Playwright +- ✅ `pricewatch parse` - Test parsing HTML +- ✅ `pricewatch doctor` - Health check + +--- + +## 🔧 Corrections Apportées + +### Amazon Store +1. **Extraction images** - Ajout fallback générique `soup.find_all("img")` +2. **Prix séparés** - Support `a-price-whole` + `a-price-fraction` + +### Tests Ajoutés (177 nouveaux) +1. **Registry** - 40 tests (24 unitaires + 16 intégration) +2. **I/O** - 36 tests (YAML, JSON, debug files) +3. **HTTP Fetch** - 21 tests (mocks requests) +4. **Playwright Fetch** - 21 tests (mocks Playwright) + +--- + +## ✨ Fonctionnalités Validées + +### Scraping +- ✅ Détection automatique du store depuis URL +- ✅ Normalisation URLs vers forme canonique +- ✅ Extraction ASIN/SKU/référence produit +- ✅ Parsing HTML → ProductSnapshot +- ✅ Fallback HTTP → Playwright automatique +- ✅ Gestion anti-bot (User-Agent, headers, timeout) + +### Data Extraction +- ✅ Titre produit +- ✅ Prix (EUR, USD, GBP) +- ✅ Statut stock (in_stock, out_of_stock, unknown) +- ✅ Images (URLs multiples) +- ✅ Catégorie (breadcrumb) +- ✅ Caractéristiques techniques (specs dict) +- ✅ Référence produit (ASIN, SKU) + +### Debug & Observabilité +- ✅ Logs détaillés avec timestamps et couleurs +- ✅ Sauvegarde HTML optionnelle +- ✅ Screenshots Playwright optionnels +- ✅ Métriques (durée, taille HTML, méthode) +- ✅ Gestion erreurs robuste (403, captcha, timeout) + +### Output +- ✅ JSON structuré (ProductSnapshot[]) +- ✅ Validation Pydantic +- ✅ Serialization ISO 8601 (dates) +- ✅ Pretty-print configurable + +--- + +## 📋 Commandes Testées + +```bash +# Pipeline complet +pricewatch run --yaml scrap_url.yaml --out scraped_store.json + +# Détection store +pricewatch detect "https://www.amazon.fr/dp/B08N5WRWNW" + +# Test HTTP +pricewatch fetch "https://example.com" --http + +# Test Playwright +pricewatch fetch "https://example.com" --playwright + +# Parse HTML +pricewatch parse amazon --in page.html + +# Health check +pricewatch doctor + +# Mode debug +pricewatch run --yaml scrap_url.yaml --debug +``` + +--- + +## 🧪 Tests Exécutés + +### Lancer tous les tests +```bash +pytest -v --tb=no --cov=pricewatch +``` + +**Résultat**: `295 passed, 3 warnings in 41.40s` + +### Par module +```bash +pytest tests/core/ # 105 tests +pytest tests/scraping/ # 42 tests +pytest tests/stores/ # 148 tests +``` + +### Coverage détaillé +```bash +pytest --cov=pricewatch --cov-report=html +# Voir: htmlcov/index.html +``` + +--- + +## 📦 Dépendances + +### Production +- `typer[all]` - CLI framework +- `rich` - Terminal UI +- `pydantic` - Data validation +- `requests` - HTTP client +- `playwright` - Browser automation +- `beautifulsoup4` - HTML parsing +- `lxml` - XML/HTML parser +- `pyyaml` - YAML support + +### Développement +- `pytest` - Testing framework +- `pytest-cov` - Coverage reporting +- `pytest-mock` - Mocking utilities +- `pytest-asyncio` - Async test support + +--- + +## 🚀 Prochaines Étapes (Phase 2) + +La Phase 1 CLI est **production-ready**. Vous pouvez démarrer la Phase 2: + +### Infrastructure +1. **PostgreSQL + Alembic** + - Schéma base de données + - Migrations versionnées + - Models SQLAlchemy + - Historique prix + +2. **Worker & Scheduler** + - Redis pour queue + - RQ ou Celery worker + - Scraping planifié (quotidien) + - Retry policy + +3. **API REST** + - FastAPI endpoints + - Authentification JWT + - Documentation OpenAPI + - CORS configuration + +4. **Web UI** + - Framework React/Vue + - Design responsive + - Dark theme Gruvbox + - Graphiques historique prix + - Système d'alertes + +### Features +- Alertes baisse prix (email, webhooks) +- Alertes retour en stock +- Comparateur multi-stores +- Export données (CSV, Excel) +- API publique + +--- + +## 📝 Documentation + +- `README.md` - Guide utilisateur complet +- `TODO.md` - Roadmap et phases +- `CHANGELOG.md` - Historique des versions +- `CLAUDE.md` - Guide pour Claude Code +- `PROJECT_SPEC.md` - Spécifications techniques + +--- + +## 🎯 Métriques de Qualité + +| Métrique | Valeur | Objectif | Statut | +|----------|--------|----------|--------| +| Tests passants | 295/295 | 100% | ✅ | +| Code coverage | 76% | >70% | ✅ | +| Stores actifs | 4 | ≥2 | ✅ | +| CLI commands | 5 | ≥4 | ✅ | +| Documentation | Complète | Complète | ✅ | + +--- + +## ✅ Checklist Phase 1 + +- [x] Architecture modulaire +- [x] Modèle de données Pydantic +- [x] Système de logging +- [x] Lecture YAML / Écriture JSON +- [x] Registry stores avec détection automatique +- [x] HTTP fetch avec User-Agent rotation +- [x] Playwright fallback anti-bot +- [x] BaseStore abstrait +- [x] Amazon store complet +- [x] Cdiscount store complet +- [x] Backmarket store complet +- [x] AliExpress store complet +- [x] CLI Typer avec 5 commandes +- [x] Tests pytest (295 tests) +- [x] Code coverage >70% +- [x] Documentation complète +- [x] Pipeline YAML → JSON fonctionnel +- [x] Validation avec URLs réelles + +--- + +**Phase 1 CLI: 100% COMPLÈTE** ✅ + +Prêt pour la Phase 2! 🚀 diff --git a/PHASE_2_PROGRESS.md b/PHASE_2_PROGRESS.md new file mode 100755 index 0000000..1408c7f --- /dev/null +++ b/PHASE_2_PROGRESS.md @@ -0,0 +1,437 @@ +# 🚀 Phase 2 Infrastructure - EN COURS + +**Date de démarrage**: 2026-01-14 +**Version cible**: 0.4.0 +**Objectif**: Ajouter PostgreSQL + Redis/RQ worker pour persistence et scraping asynchrone + +--- + +## 📊 Vue d'Ensemble + +### Objectifs Phase 2 +- ✅ Configuration centralisée (database, Redis, app) +- ✅ Modèles SQLAlchemy ORM (5 tables) +- ✅ Connexion base de données (init_db, get_session) +- ✅ Migrations Alembic +- ⏳ Repository pattern (CRUD) +- ⏳ Worker RQ pour scraping asynchrone +- ⏳ Scheduler pour jobs récurrents +- ✅ CLI étendu (commandes DB) +- ✅ Docker Compose (PostgreSQL + Redis) +- ⏳ Tests complets + +--- + +## ✅ Semaine 1: Database Foundation (TERMINÉE) + +### Tâches Complétées + +#### 1. Configuration Centralisée ✅ +**Fichier**: `pricewatch/app/core/config.py` (187 lignes) + +**Contenu**: +- `DatabaseConfig`: Configuration PostgreSQL + - Host, port, database, user, password + - Propriété `url`: SQLAlchemy connection string + - Propriété `url_async`: AsyncPG connection string (futur) + - Prefix env vars: `PW_DB_*` (PW_DB_HOST, PW_DB_PORT, etc.) + +- `RedisConfig`: Configuration Redis pour RQ + - Host, port, db, password (optional) + - Propriété `url`: Redis connection string + - Prefix env vars: `PW_REDIS_*` + +- `AppConfig`: Configuration globale application + - Debug mode + - Worker timeout (300s par défaut) + - Worker concurrency (2 par défaut) + - Feature flags: `enable_db`, `enable_worker` + - Defaults Playwright: timeout, use_playwright + - Nested configs: `db`, `redis` + - Prefix env vars: `PW_*` + +- **Pattern Singleton**: `get_config()`, `set_config()`, `reset_config()` + +**Justifications**: +- 12-factor app: configuration via env vars +- Pydantic validation garantit config valide au démarrage +- Valeurs par défaut pour développement local +- Support `.env` file pour faciliter le setup +- Feature flags permettent de désactiver DB/worker pour tests + +#### 2. Dépendances Phase 2 ✅ +**Fichier**: `pyproject.toml` (lignes 48-60) + +**Ajouts**: +```toml +# Database (Phase 2) +"sqlalchemy>=2.0.0", +"psycopg2-binary>=2.9.0", +"alembic>=1.13.0", + +# Configuration (Phase 2) +"python-dotenv>=1.0.0", + +# Worker/Queue (Phase 2) +"redis>=5.0.0", +"rq>=1.15.0", +"rq-scheduler>=0.13.0", +``` + +#### 3. Modèles SQLAlchemy ORM ✅ +**Fichier**: `pricewatch/app/db/models.py` (322 lignes) + +**Tables créées**: + +1. **`products`** - Catalogue produits + - PK: `id` (Integer, autoincrement) + - Natural key: `(source, reference)` - Unique constraint + - Colonnes: `url`, `title`, `category`, `currency` + - Timestamps: `first_seen_at`, `last_updated_at` + - Relations: `price_history`, `images`, `specs`, `logs` + - Indexes: source, reference, last_updated_at + +2. **`price_history`** - Historique prix (time-series) + - PK: `id` (Integer, autoincrement) + - FK: `product_id` → products(id) CASCADE + - Unique: `(product_id, fetched_at)` - Évite doublons + - Colonnes: `price` (Numeric 10,2), `shipping_cost`, `stock_status` + - Fetch metadata: `fetch_method`, `fetch_status`, `fetched_at` + - Check constraints: stock_status, fetch_method, fetch_status + - Indexes: product_id, fetched_at + +3. **`product_images`** - Images produit + - PK: `id` (Integer, autoincrement) + - FK: `product_id` → products(id) CASCADE + - Unique: `(product_id, image_url)` - Évite doublons + - Colonnes: `image_url` (Text), `position` (Integer, 0=main) + - Index: product_id + +4. **`product_specs`** - Caractéristiques produit (key-value) + - PK: `id` (Integer, autoincrement) + - FK: `product_id` → products(id) CASCADE + - Unique: `(product_id, spec_key)` - Évite doublons + - Colonnes: `spec_key` (String 200), `spec_value` (Text) + - Indexes: product_id, spec_key + +5. **`scraping_logs`** - Logs observabilité + - PK: `id` (Integer, autoincrement) + - FK optionnelle: `product_id` → products(id) SET NULL + - Colonnes: `url`, `source`, `reference`, `fetched_at` + - Métriques: `duration_ms`, `html_size_bytes` + - Fetch metadata: `fetch_method`, `fetch_status` + - Debug data (JSONB): `errors`, `notes` + - Indexes: product_id, source, fetched_at, fetch_status + +**Justifications schéma**: +- Normalisation: products séparée de price_history (catalogue vs time-series) +- Clé naturelle (source, reference) vs UUID arbitraire +- Tables séparées pour images/specs: évite JSONB non structuré +- JSONB uniquement pour données variables: errors, notes dans logs +- Cascade DELETE: suppression produit → suppression historique +- SET NULL pour logs: garde trace même si produit supprimé + +--- + +### Tâches Complétées (suite) + +#### 4. Connexion Base de Données ✅ +**Fichier**: `pricewatch/app/db/connection.py` + +**Contenu**: +- `get_engine(config)`: Engine SQLAlchemy (pooling) +- `get_session_factory(config)`: Session factory +- `get_session(config)`: Context manager +- `init_db(config)`: Création tables +- `check_db_connection(config)`: Health check +- `reset_engine()`: Reset pour tests + +**Justifications**: +- Singleton engine pour éviter les pools multiples +- `pool_pre_ping` pour robustesse +- Context manager pour rollback/close automatiques + +--- + +#### 5. Setup Alembic ✅ +**Fichiers**: +- `alembic.ini` +- `pricewatch/app/db/migrations/env.py` +- `pricewatch/app/db/migrations/script.py.mako` + +**Justifications**: +- URL DB injectée depuis `AppConfig` +- `compare_type=True` pour cohérence des migrations + +#### 6. Migration Initiale ✅ +**Fichier**: `pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py` + +**Contenu**: +- 5 tables + indexes + contraintes +- JSONB pour `errors` et `notes` + +#### 7. Commandes CLI Database ✅ +**Fichier**: `pricewatch/app/cli/main.py` + +**Commandes**: +```bash +pricewatch init-db # Créer tables +pricewatch migrate "message" # Générer migration Alembic +pricewatch upgrade # Appliquer migrations +pricewatch downgrade # Rollback migration +``` + +#### 8. Docker Compose ✅ +**Fichier**: `docker-compose.yml` + +**Services**: +- PostgreSQL 16 (port 5432) +- Redis 7 (port 6379) +- Volumes pour persistence + +#### 9. Fichier .env Exemple ✅ +**Fichier**: `.env.example` + +**Variables**: +```bash +# Database +PW_DB_HOST=localhost +PW_DB_PORT=5432 +PW_DB_DATABASE=pricewatch +PW_DB_USER=pricewatch +PW_DB_PASSWORD=pricewatch + +# Redis +PW_REDIS_HOST=localhost +PW_REDIS_PORT=6379 +PW_REDIS_DB=0 + +# App +PW_DEBUG=false +PW_WORKER_TIMEOUT=300 +PW_WORKER_CONCURRENCY=2 +PW_ENABLE_DB=true +PW_ENABLE_WORKER=true +``` + +#### 10. Tests Database ✅ +**Fichiers**: +- `tests/db/test_models.py`: Tests des modèles SQLAlchemy +- `tests/db/test_connection.py`: Tests connexion et session + +**Stratégie tests**: +- SQLite in-memory pour tests unitaires +- Fixtures pytest pour setup/teardown +- Tests relationships, constraints, indexes + +--- + +## 📦 Semaine 2: Repository & Pipeline (EN COURS) + +### Tâches Prévues + +#### Repository Pattern +**Fichier**: `pricewatch/app/db/repository.py` + +**Classe**: `ProductRepository` +- `get_or_create(source, reference)`: Trouver ou créer produit +- `save_snapshot(snapshot)`: Persist ProductSnapshot to DB +- `update_product_metadata(product, snapshot)`: Update title, url, etc. +- `add_price_history(product, snapshot)`: Ajouter entrée prix +- `sync_images(product, images)`: Sync images (add new, keep existing) +- `sync_specs(product, specs)`: Sync specs (upsert) +- `add_scraping_log(snapshot, product_id)`: Log scraping + +**Statut**: ✅ Terminé + +#### Scraping Pipeline +**Fichier**: `pricewatch/app/scraping/pipeline.py` + +**Classe**: `ScrapingPipeline` +- `process_snapshot(snapshot, save_to_db)`: Orchestration +- Non-blocking: échec DB ne crash pas pipeline +- Retour: `product_id` ou `None` + +**Statut**: ✅ Terminé + +#### CLI Modification +**Fichier**: `pricewatch/app/cli/main.py` + +**Modification commande `run`**: +- Ajouter flag `--save-db / --no-db` +- Intégrer `ScrapingPipeline` si `save_db=True` +- Compatibilité backward: JSON output toujours créé + +**Statut**: ✅ Terminé + +#### Tests Repository + Pipeline ✅ +**Fichiers**: +- `tests/db/test_repository.py` +- `tests/scraping/test_pipeline.py` + +**Statut**: ✅ Terminé + +#### Tests end-to-end CLI + DB ✅ +**Fichier**: +- `tests/cli/test_run_db.py` + +**Statut**: ✅ Terminé + +--- + +## 📦 Semaine 3: Worker Infrastructure (EN COURS) + +### Tâches Prévues + +#### RQ Task +**Fichier**: `pricewatch/app/tasks/scrape.py` + +**Fonction**: `scrape_product(url, use_playwright=True)` +- Réutilise 100% code Phase 1 (detect → fetch → parse) +- Save to DB via ScrapingPipeline +- Retour: `{success, product_id, snapshot, error}` + +**Statut**: ✅ Terminé + +#### Scheduler +**Fichier**: `pricewatch/app/tasks/scheduler.py` + +**Classe**: `ScrapingScheduler` +- `schedule_product(url, interval_hours=24)`: Job récurrent +- `enqueue_immediate(url)`: Job unique +- Basé sur `rq-scheduler` + +**Statut**: ✅ Terminé + +#### CLI Worker +**Nouvelles commandes**: +```bash +pricewatch worker # Lancer worker RQ +pricewatch enqueue # Enqueue scrape immédiat +pricewatch schedule --interval 24 # Scrape quotidien +``` + +**Statut**: ✅ Terminé + +--- + +## 📦 Semaine 4: Tests & Documentation (NON DÉMARRÉ) + +### Tâches Prévues + +#### Tests +- Tests end-to-end (CLI → DB → Worker) +- Tests erreurs (DB down, Redis down) +- Tests backward compatibility (`--no-db`) +- Performance tests (100+ produits) + +#### Documentation +- Update README.md (setup Phase 2) +- Update CHANGELOG.md +- Migration guide (JSON → DB) + +--- + +## 📈 Métriques d'Avancement + +| Catégorie | Complétées | Totales | % | +|-----------|------------|---------|---| +| **Semaine 1** | 10 | 10 | 100% | +| **Semaine 2** | 5 | 5 | 100% | +| **Semaine 3** | 3 | 6 | 50% | +| **Semaine 4** | 0 | 7 | 0% | +| **TOTAL Phase 2** | 18 | 28 | **64%** | + +--- + +## 🎯 Prochaine Étape Immédiate + +**Prochaine étape immédiate** +- Tests end-to-end worker + DB +- Gestion des erreurs Redis down (CLI + worker) + +**Apres (prevu)** +- Logs d'observabilite pour jobs planifies + +--- + +## 🔧 Vérifications + +### Vérification Semaine 1 (objectif) +```bash +# Setup infrastructure +docker-compose up -d +pricewatch init-db + +# Vérifier tables créées +psql -h localhost -U pricewatch pricewatch +\dt +# → 5 tables: products, price_history, product_images, product_specs, scraping_logs +``` + +### Vérification Semaine 2 (objectif) +```bash +# Test pipeline avec DB +pricewatch run --yaml scrap_url.yaml --save-db + +# Vérifier données en DB +psql -h localhost -U pricewatch pricewatch +SELECT * FROM products LIMIT 5; +SELECT * FROM price_history ORDER BY fetched_at DESC LIMIT 10; +``` + +### Vérification Semaine 3 (objectif) +```bash +# Enqueue job +pricewatch enqueue "https://www.amazon.fr/dp/B08N5WRWNW" + +# Lancer worker +pricewatch worker + +# Vérifier job traité +psql -h localhost -U pricewatch pricewatch +SELECT * FROM scraping_logs ORDER BY fetched_at DESC LIMIT 5; +``` + +--- + +## 📝 Notes Importantes + +### Backward Compatibility +- ✅ CLI Phase 1 fonctionne sans changement +- ✅ Format JSON identique +- ✅ Database optionnelle (`--no-db` flag) +- ✅ ProductSnapshot inchangé +- ✅ Tests Phase 1 continuent à passer (295 tests) + +### Architecture Décisions + +**Normalisation vs Performance**: +- Choix: Normalisation stricte (5 tables) +- Justification: Catalogue change rarement, prix changent quotidiennement +- Alternative rejetée: Tout dans products + JSONB (moins queryable) + +**Clé Naturelle vs UUID**: +- Choix: `(source, reference)` comme unique constraint +- Justification: ASIN Amazon déjà unique globalement +- Alternative rejetée: UUID artificiel (complexifie déduplication) + +**Synchrone vs Asynchrone**: +- Choix: RQ synchrone (pas d'async/await) +- Justification: Code Phase 1 réutilisable à 100%, simplicité +- Alternative rejetée: Asyncio + asyncpg (refactoring massif) + +--- + +**Dernière mise à jour**: 2026-01-14 + +### Validation locale (Semaine 1) +```bash +docker compose up -d +./venv/bin/alembic -c alembic.ini upgrade head +psql -h localhost -U pricewatch pricewatch +\\dt +``` + +**Resultat**: 6 tables visibles (products, price_history, product_images, product_specs, scraping_logs, alembic_version). +**Statut**: ✅ Semaine 1 en cours (30% complétée) diff --git a/README.md b/README.md index a48a31a..cfa1533 100755 --- a/README.md +++ b/README.md @@ -58,6 +58,13 @@ pricewatch/ │ │ ├── store.py │ │ ├── selectors.yml │ │ └── fixtures/ +│ ├── db/ # Persistence SQLAlchemy (Phase 2) +│ │ ├── models.py +│ │ ├── connection.py +│ │ └── migrations/ +│ ├── tasks/ # Jobs RQ (Phase 3) +│ │ ├── scrape.py +│ │ └── scheduler.py │ └── cli/ │ └── main.py # CLI Typer ├── tests/ # Tests pytest @@ -76,6 +83,9 @@ pricewatch run --yaml scrap_url.yaml --out scraped_store.json # Avec debug pricewatch run --yaml scrap_url.yaml --out scraped_store.json --debug + +# Avec persistence DB +pricewatch run --yaml scrap_url.yaml --out scraped_store.json --save-db ``` ### Commandes utilitaires @@ -97,6 +107,45 @@ pricewatch parse amazon --in scraped/page.html pricewatch doctor ``` +### Commandes base de donnees + +```bash +# Initialiser les tables +pricewatch init-db + +# Generer une migration +pricewatch migrate "Initial schema" + +# Appliquer les migrations +pricewatch upgrade + +# Revenir en arriere +pricewatch downgrade -1 +``` + +### Commandes worker + +```bash +# Lancer un worker RQ +pricewatch worker + +# Enqueue un job immediat +pricewatch enqueue "https://example.com/product" + +# Planifier un job recurrent +pricewatch schedule "https://example.com/product" --interval 24 +``` + +## Base de donnees (Phase 2) + +```bash +# Lancer PostgreSQL + Redis en local +docker-compose up -d + +# Exemple de configuration +cp .env.example .env +``` + ## Configuration (scrap_url.yaml) ```yaml @@ -196,8 +245,8 @@ Aucune erreur ne doit crasher silencieusement : toutes sont loggées et tracées - ✅ Tests pytest ### Phase 2 : Persistence -- [ ] Base de données PostgreSQL -- [ ] Migrations Alembic +- [x] Base de données PostgreSQL +- [x] Migrations Alembic - [ ] Historique des prix ### Phase 3 : Automation diff --git a/TODO.md b/TODO.md index 9143b29..8ce85be 100755 --- a/TODO.md +++ b/TODO.md @@ -101,72 +101,92 @@ Liste des tâches priorisées pour le développement de PriceWatch. ### Étape 9 : Tests - [x] Configurer pytest dans pyproject.toml -- [x] Tests core/schema.py +- [x] Tests core/schema.py (29 tests - 100% coverage) - [x] Validation ProductSnapshot - [x] Serialization JSON -- [x] Tests core/registry.py +- [x] Tests core/registry.py (40 tests - 100% coverage) - [x] Enregistrement stores - [x] Détection automatique -- [x] Tests stores/amazon/ + - [x] Tests d'intégration avec 4 stores réels +- [x] Tests core/io.py (36 tests - 97% coverage) + - [x] Lecture/écriture YAML/JSON + - [x] Sauvegarde debug HTML/screenshots +- [x] Tests stores/amazon/ (33 tests - 89% coverage) - [x] match() avec différentes URLs - [x] canonicalize() - [x] extract_reference() - - [~] parse() sur fixtures HTML (6 tests nécessitent fixtures réels) -- [ ] Tests stores/cdiscount/ - - [ ] Idem Amazon -- [ ] Tests scraping/ - - [ ] http_fetch avec mock - - [ ] pw_fetch avec mock + - [x] parse() sur fixtures HTML +- [x] Tests stores/cdiscount/ (30 tests - 72% coverage) + - [x] Tests complets avec fixtures réels +- [x] Tests stores/backmarket/ (25 tests - 85% coverage) + - [x] Tests complets avec fixtures réels +- [x] Tests stores/aliexpress/ (32 tests - 85% coverage) + - [x] Tests complets avec fixtures réels +- [x] Tests scraping/ (42 tests) + - [x] http_fetch avec mock (21 tests - 100% coverage) + - [x] pw_fetch avec mock (21 tests - 91% coverage) ### Étape 10 : Intégration et validation - [x] Créer scrap_url.yaml exemple - [x] Tester pipeline complet YAML → JSON - [x] Tester avec vraies URLs Amazon -- [ ] Tester avec vraies URLs Cdiscount +- [x] Tester avec vraies URLs Cdiscount - [x] Vérifier tous les modes de debug - [x] Valider sauvegarde HTML/screenshots - [x] Documentation finale -### Bilan Étape 9 (Tests pytest) -**État**: 80 tests passent / 86 tests totaux (93%) -- ✓ core/schema.py: 29/29 tests -- ✓ core/registry.py: 24/24 tests -- ✓ stores/amazon/: 27/33 tests (6 tests nécessitent fixtures HTML réalistes) +### ✅ PHASE 1 TERMINÉE À 100% +**État final**: 295 tests passent / 295 tests totaux (100%) +**Coverage global**: 76% -**Tests restants**: -- Fixtures HTML Amazon/Cdiscount -- Tests Cdiscount store -- Tests scraping avec mocks +**Détail par module**: +- ✅ core/schema.py: 100% coverage +- ✅ core/registry.py: 100% coverage (40 tests) +- ✅ core/io.py: 97% coverage (36 tests) +- ✅ scraping/http_fetch.py: 100% coverage (21 tests) +- ✅ scraping/pw_fetch.py: 91% coverage (21 tests) +- ✅ stores/amazon/: 89% coverage (33 tests) +- ✅ stores/aliexpress/: 85% coverage (32 tests) +- ✅ stores/backmarket/: 85% coverage (25 tests) +- ✅ stores/cdiscount/: 72% coverage (30 tests) + +**4 stores opérationnels**: Amazon, Cdiscount, Backmarket, AliExpress --- -## Phase 2 : Base de données (Future) +## Phase 2 : Base de données (En cours) ### Persistence -- [ ] Schéma PostgreSQL -- [ ] Migrations Alembic -- [ ] Models SQLAlchemy +- [x] Schéma PostgreSQL +- [x] Migrations Alembic +- [x] Models SQLAlchemy +- [x] Connexion DB (engine, session, init) +- [x] Tests DB de base +- [x] Repository pattern (ProductRepository) +- [x] ScrapingPipeline (persistence optionnelle) +- [x] CLI `--save-db/--no-db` +- [x] Tests end-to-end CLI + DB - [ ] CRUD produits - [ ] Historique prix ### Configuration -- [ ] Fichier config (DB credentials) -- [ ] Variables d'environnement -- [ ] Dockerfile PostgreSQL +- [x] Fichier config (DB credentials) +- [x] Variables d'environnement +- [x] Docker Compose PostgreSQL/Redis --- -## Phase 3 : Worker et automation (Future) +## Phase 3 : Worker et automation (En cours) ### Worker -- [ ] Setup Redis -- [ ] Worker RQ ou Celery -- [ ] Queue de scraping +- [x] Setup Redis +- [x] Worker RQ +- [x] Queue de scraping - [ ] Retry policy ### Planification -- [ ] Cron ou scheduler intégré -- [ ] Scraping quotidien automatique +- [x] Cron ou scheduler intégré +- [x] Scraping quotidien automatique - [ ] Logs des runs --- @@ -216,4 +236,4 @@ Liste des tâches priorisées pour le développement de PriceWatch. --- -**Dernière mise à jour**: 2026-01-13 +**Dernière mise à jour**: 2026-01-14 diff --git a/alembic.ini b/alembic.ini new file mode 100755 index 0000000..6b36638 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,36 @@ +[alembic] +script_location = pricewatch/app/db/migrations +sqlalchemy.url = postgresql://pricewatch:pricewatch@localhost:5432/pricewatch + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_sqlalchemy] +level = WARN +handlers = console +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = console +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100755 index 0000000..8a4c487 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,22 @@ +services: + postgres: + image: postgres:16 + environment: + POSTGRES_DB: pricewatch + POSTGRES_USER: pricewatch + POSTGRES_PASSWORD: pricewatch + ports: + - "5432:5432" + volumes: + - pricewatch_pgdata:/var/lib/postgresql/data + + redis: + image: redis:7 + ports: + - "6379:6379" + volumes: + - pricewatch_redisdata:/data + +volumes: + pricewatch_pgdata: + pricewatch_redisdata: diff --git a/pricewatch.egg-info/PKG-INFO b/pricewatch.egg-info/PKG-INFO index 7b80e3c..b784bc1 100755 --- a/pricewatch.egg-info/PKG-INFO +++ b/pricewatch.egg-info/PKG-INFO @@ -21,6 +21,13 @@ Requires-Dist: lxml>=5.1.0 Requires-Dist: cssselect>=1.2.0 Requires-Dist: pyyaml>=6.0.1 Requires-Dist: python-dateutil>=2.8.2 +Requires-Dist: sqlalchemy>=2.0.0 +Requires-Dist: psycopg2-binary>=2.9.0 +Requires-Dist: alembic>=1.13.0 +Requires-Dist: python-dotenv>=1.0.0 +Requires-Dist: redis>=5.0.0 +Requires-Dist: rq>=1.15.0 +Requires-Dist: rq-scheduler>=0.13.0 Provides-Extra: dev Requires-Dist: pytest>=8.0.0; extra == "dev" Requires-Dist: pytest-cov>=4.1.0; extra == "dev" diff --git a/pricewatch.egg-info/SOURCES.txt b/pricewatch.egg-info/SOURCES.txt index b616b16..a519fb9 100755 --- a/pricewatch.egg-info/SOURCES.txt +++ b/pricewatch.egg-info/SOURCES.txt @@ -11,6 +11,7 @@ pricewatch/app/__init__.py pricewatch/app/cli/__init__.py pricewatch/app/cli/main.py pricewatch/app/core/__init__.py +pricewatch/app/core/config.py pricewatch/app/core/io.py pricewatch/app/core/logging.py pricewatch/app/core/registry.py diff --git a/pricewatch.egg-info/requires.txt b/pricewatch.egg-info/requires.txt index e19f717..f366fd7 100755 --- a/pricewatch.egg-info/requires.txt +++ b/pricewatch.egg-info/requires.txt @@ -9,6 +9,13 @@ lxml>=5.1.0 cssselect>=1.2.0 pyyaml>=6.0.1 python-dateutil>=2.8.2 +sqlalchemy>=2.0.0 +psycopg2-binary>=2.9.0 +alembic>=1.13.0 +python-dotenv>=1.0.0 +redis>=5.0.0 +rq>=1.15.0 +rq-scheduler>=0.13.0 [dev] pytest>=8.0.0 diff --git a/pricewatch/app/cli/__pycache__/main.cpython-313.pyc b/pricewatch/app/cli/__pycache__/main.cpython-313.pyc index 4eaa2c65d05f7368129135f780a12fcc6a8c8217..0b4bad666c04ab43ad6f7ccc571c15bffeb35cfe 100755 GIT binary patch delta 8183 zcmcgRX>eP|dGF!jBJP{ui3c7c!J8s=kfLN!qAt)nf+xsS$Ql8GM+!3d04+erV$y<6 z;@Ivq^|5v=tM!O((vCcBGD?|AohCo*`bYhvogpmAWM3uQopC$kPLon2P1+exJN>?g zleC@mnCT9&@7vvPcfWggKi*k-w z4qAv+f%TKNpqpyV8>)v zu$y!%uye8}*h_j9*frS~>?i#S?4BG54w6BhnbC&gFS71&k5DUkg}M=r49(bEm|e&> z6f!*1)WQh$yVzoTm(WnpkljM#jA>XUG}SW~)nw1iP{>YMqqCv8geUmwD{Jgk#$UJ9 zt6?_+yBT@GFSLxPpqYhcZ>!L@OI6qa7Hyx|C+wIR8&(S)xco)7Ow9i63$=F&0bFR9 z6}o2j!QAd`GY))aM$fhxz1wCyR6PT!v-b)8!hkR+v%G)0oFL&C7IOV|yYBU6ZH z^$L50y~0RU*N93O74|^~2h5DkaKe809heyt9-7h2jA?-5VZCs08vzGP1k?-T3Qva+ zp)7;((r{^CvsXAQOyDL(LZIV_aCB?OG2wVs$4G(H6VRmw%3vEI2vkkN?_u}>g~JBn z;wf06q(wU~oEFXqU)mz4!RnnjAp|=K1sM!$g|pjsI9%GHPY73;&&13T z;oRsH!$kEAGc!~%PM(=p&8q^Btp1XlU>&+xYGENN#RIIYJ)Dx}67#fQ6ELx|VP4Ee zV%S5^X@0LumyiOb|G>hn@vN@Cd5cIosj0wB+_CsA&INM(8O7{4b&v- zi8!CgWJN;iph@cKoNh+b24Fw^Yu%nvSxdz1B9UUyQ!ZM7IQZTFIXKVD%yiL(7-qU? z>kPAtrEE<-wQzc>YjgDpEZT&`gc(8AYXpb|h(kzFE44fy!!$M-duvTWNE}G0K%OM`0kg_DT_;_?SDUR}=phn(cWpykS7iDWEMiS|4B$YsQOagyt@s3334D{O z`FWAZ8Zej?$;q1RvToavx9zxT>s+@D4xbvRf zZ@TiH{coA_o`;uDe`NHm8=LaRrgft)Z}i>KTb|3Ao;9u24%}$CVP10|xTSySj>Y-h z#oxVnC7QSJYkK};hwuB`%H&Oc;HF~`mR4KW)xNyix5f|tc<4s%=FqWQ>f=?t13&Is z?fHxTAN8;AJd)peSd<6+8wc9 zDJA1AnK}d*wV#{u~_)?c&> zEBFS+LzcdomcE}3`f3YC;cH?3$S~f-{aMermwVf*0Z>+75To%W zOGqIf4uMz#te%{JO=IPQ6B=bd{wrvFfm+#WwE|jIp%zx#GG(irVTyxq*Rx8|GaFX@ zT0K`exoTWhrE1Mmi(x#SP9`obf{TL(mzABVXnV;PPQxJup8t8yTu?7L4y4Pv#RB4X zh33ls{0658s!Ev{(n`7{C!wKKvjAkyK!qN?U5~!^^u872ZT;?w7E4Op=H126O~-u3 z>HCV8^R3MH4CBq*4|>MyxVP&x04}L|2mf!H1Bd*7P?g^hN+pxC(b!{O;^OS$ysX33;Pi;8VVhF10DwH+|4WFDFEe3P zXK{r7b5{fXPyHFXR^y`!HFcaCZmjo)+vyK$T8?Nwdy|xOVJ-%1gmt0%(&Ck`#7!fI zN(t-mo-65TzQ$+tm3uYxV6Tf>4aO!zb=2->q?+m0ZCH~Avr&B<<}RujCjFDloB?VDY1v`^hs0WSQHE* zY!3~TP%7Wwyh}kQ_R^X>bBsOxY*-W8i5GrqGB?oIOm=n;J|Hv>*u+8YHg}VAh^x$) z9w}`YHkW6OOCLkz{B@|2gh&Z;b3X2%2%=?4>rMF zp{PU!dw`Bx8g%EBr4Iq@vqEMBvo8{4bU;@pkgD#oMEA2Loy6f}g4Bu^ZDmL$xx&sC zBoEdAE=-{yN};2vzJ>%+FKW^lR|Ydu1f&^)4&YQweVaW4coP^w4pg=@EpEL;;h zkJGH4Ds_88)9^Cw(DA2P!7q6gn+|)y%Uh;@t3(b^_b6rUO`2AKzCzy~a?w3@XZxSS zYuqEWPYwomN_8NyI?%JJOa}%k)OO15ZU}_U)AQxktkV}taOfgh^PaA-t;&8N;DG&f zBk}#L&@sXZog*AbsUibgMF!@g3}FAyVegpwWDETdd;5W&s+d(%VjsBdRgwehQV);s zAv}7NGW9}HM=v^B{CyRic|BZ-frdgUUWv76r<^9NrTbkYoOWDGS!bQ*8F(crBkgTr z?N)VsR(2pQP*2^jm(`Bxh0@O9ac>XPRAwd#Gz3>O;T}8dL-ie=?^@= zuOpZXA!q5K+N(2ri4_}oWtY_nX)Z zg(qN*DB-&iHYVWr3?1|ineeVAtB;>Odxm&u+PljIUIX__ym`+pCJ9)FYmbhfc=*T! z!AMWcPzCA<`Z^J?QWgs%)PmJ{tPVpZ8z6;K$kolu`gAc5C)bpA%E)X&k(jJegdm$^ zQ7I)QVo_Z(0(la$NdKX3PcDK>aYURW3J3bqblISYKbDOo;<71|jb;}!5pf}PF+q_0 z4Bju`*8ByfkV4_eqC{Q+#INCulMwZnnO}`Ddh;E<{rQet`udO6^y&IgtD})NE(bSs zmM3!?j>dJzj=W>XGrEthzLllCb?5T&kJX0Nzpw9C>Hkg3m~5M>4(e!%41D0M-|z?4 z{iAvR=$m`j&YWHIkFNQ{&j&xK_22O}Uo*aHTuI%ycw=U*?a(dn;g2Y3im(Ipy{5x` zzj83P+6|j`-4@8(0@pj=%&s9cv0-wrn;P?`#w%0nrXBYp9OLYH5U0J(W2$3n`dIUj zeo4i+n(i|;`X|kyov@6nVZ+t9?&{6Edaqx2Yj_Qz;~TcRbsL|z@hh-H*G*g3r<;D8 zx%<-xr>M)f??l+d{Oy4W=Q)GwRyRN8V{V%a=XBg{PtQ3O_r6L4@cRbCRHNp7*Z9~} zt!ATE1C0%z;e0)}5$HMZ=KjvD0hsJtz2f__D%S(TE#@_hg*WU)>?!*Lxdctfx~2(c z^nM4&2v)z1Rr#hJfc3CLF|zLfwB^<8jAh9mz(_wlGBd9Mu*^I#F!WN(t86

;&8{@?^ zXJjE$NXvMX?xHnfDl(ugko|`^327piu|lW8L~~{_na$vnmFH{GR10>gQF7Vj`_MnZ zu9^a0Vb$#b(@R|w>~~gw()DdM@UXw%&#ur&f4y-E#)`3veyjfgyN|xpe~EpS?ix6H zUSX;%YPklJhKjopg;RLUFeHi9=kP46Q)y9>)dXS+Sw^I9V)YbO-=?<)I&vrqS({jZ z{7(klI4#C9N)!v!5L6NQW2}?_focQa$7mNx&0dt*IYorzCB(pRk*R?SMW7T#4uw6J zq5l$U^rujPe|jqZxwuC$lpNs>&O+Wa0Pa1cC2~3^QNBV)wMlt z)^t?XMIwM4iI8{by}dpqLm=ylIU<6~7E%h9Vpm_`ydr2#WG_^pxJ2wig;v>wZA4Re z&?AaL=~6f#r~ugsUKkU<8ioI*^g;XvY$6f^j96tO#&b-D!@pJJdb&K~sx5A*B)Gp0 zNQHCEOushbho=;@(fs5_#HBwQso9UmAF*TgG*;y!hPV{GZF|ROkyr`b8nftb`oTyi z>!G^Q=9=PRLTaJ4gZ0wB(N4CGJ~G;!!>})F&SHw51aOacu^ProQR_j3CZLk_<79qu z0aE?4_H-e;s+@1%LWCezC$YkNuX02vkwtOIf?1*h@&5->bOHW<@CKtKtVw;V(mhtN zfCJ>8pa}|wJOD?H`x?9Z?lL>?GQ9HF_>au)yUgH+%&`xd{U0(%K4gMQHm=0idQRP}JAF@MV*MLt+a?F!j~t!{*v56Nn<{`do6GBL zH2SZd3SB*QefQeli|9tELeu@(S+VoZ?G`C)pu1eR>B7SoL{|dJq-$Qfz zrp|Ag7+>499joM&>lDhOtwHVEJEf%@v~n delta 4082 zcmai1duUtN89zr?SMT@Bk}O%WEI%YaQ`Hz6sPh7iWa3jHG`p&OK~JKvSB?4*wg z>(h6>^WE?JeUI~<>o?!QAAQI1$ZoeF_UVbQRA-0Za0GQXxia@;*$@e-Yg;ay?IfMq2#I8) zB&y2nxvp%C#8lpqi)Xt@x5_(niENLL^r#D0E}88my>V19p|`*ZeeBy`7@jZ)17UPYe+jp}kF@=mf1$wEQ~xQ85PF%N)EI@?NdSN!ISVzVOSVv-wE~wa28?m zDRBGt_^v<_towxn%z{qy?A|XN6f(lIPbOt20WDvgaESSuYA4|EQ$EJqK1?IRk??L^ zCJK|n(PtMCDwz-}^zT+Ij#hQ6y410)oBVN1JB@wxjPZ4=WkqV1FBUI0oAoWuRKb2b ztEXZj(rUHQlnC)bt^M?>xxmL5o2K{72WTO1$hDBtks#~|m*4GrSw%JtpinP4BTIQI(_Hi%<){lN;rL~SLn4{C77&QfhG(9y=grXeLhT2#iKFvxK3-jI2uK=VJZ5A&e<_9O^0k&?zwG*HgyQqFN0~nMKn5)VEwX1 zzfCkb(70jOs>tPI^xZM9-K5E6vuLJ&4h@Fvs_q^Hr>Y2v=6pBkTX*#>;4|Y8EzI&B zZNVg4ml7J@d{X0fe`OofvBT57PR)B9E%j>}qD7O+4$+Yxpry_*y&vWeIPz&tXZQF2 zd!w&~)24ZR=7%i7Lv*9di$|ErLA;N$MDlLt!b9JU`lFt4PIJuQjB74Li}oD7(AjAi zMA{PXhm3HV(Su;l?9J$9r|8^4eJ#R=oL`5UKIn|-WE&*H>D%?(Hh&NTH^t*Q zu<(^;5t~RtAQ#KVWi~ijS!MtPw$M^Ee(UoXD1OUzX;}Bm@P;|3hZjv<& zeB$WL31-;STD2(c4twb*QD09;v_DzaGfBwPSLg=kXIH-GZWI%~UaX2_p~+&R5_ za_`Lx^p7}9-?x~kGtL_$XRr{JeXK@ZKGDZcSJ#rL8D$R6!FlClj9a8{O!?^d;$HD( zwM5;y;p~X)X92QHL^QWkVq?{01<`Em{xV0sqGhS5NkrSy1&z-yLvN+}MEhZ^S*oxy zvJ0&r$~20Fo(v}>GaL(iCoKP-u;#f_w-K^3ef7)^?3oblnZRO8iI`c_@1~;*Ev;%8 z7E6;ldcC6`r|EyXW0!*g0O0`>tU_7x+)PxUzJyC z1nv-`UNQPhwW_RG*Vx@rY&2^`Y7jH%n`bYVrFyeklNFxbKQ*H8%`0^&n+xGPds#PVAAIWf$^eLs;q3TkrD7QO0N%0;4%8AfgwK(D}}F@NJAnpaS6O7 z(2msHwFy``fw&k^40F>bPR<@zxJHu@cuh+M2n=h|14Mlk?i$5VuE{IaRRv3mxgnS8 zjf=HrL%~&&fn8n=N3oYna!sz5OKU|)N*1+ z3@Wxpv((&d6s7grr78iajQ}I@ul$g%mYGeNe4BBsjHt1#N9a}b*)#M<>7tIeZaD@= zb*9e@$mV*ei__WRBL0xxW^{;7jLZZddXPPE*B05fMSiw;*VgyxDGqsu9_Q(=N2YXD zj{a?A+{BKRFZu{MXxC^y!X9vk@4DjKuK11MU03>+EB*PysKY+^`QthI%h5w8gb4a; z=S*}Vtoz_l{9F*-idYspxLbWg3m)#ahiCllu;rYeznz@!J?G-@xOld>6STbG6Dcmg2fb&c1w{twp0wHPO5T~2(UK~BoAa7$hR0t zF?+y6A;~f$4R{M-Ic;UoP?4Haxk+kFxDC6%VB}Tw*n;R6&%A}>^sVu$`1Dp_;sXx% z(T@*2Z)m}yM-Qg)Fuio}?R98-A_HkV-2)eTm7D-F14L~LYh{y^pyi9}4RQ>`aF;07 zod$qImNK+p*I{=9$Y~%+x{>MdsI}+S-XfQo_#bHN?aT$t#~D9D|NHE5yu3AY==*wx z_%BDhag%;>G;D=YPj(+D&`zvkUX^wZX)CV7@aPdBUlF0A zuhk`4(GzwY30w!{1t8~vyhuya{Z$BDF;v$}t5SoVmbz4KkfThNGOGn4S3orhL_KY0 z^*j}8%a;_qq9%;6cP>PVj5J_ov=4+@!j?~P?A`iw`gJ`%Pp{6+PpC+Nq;!g@ex*{9 zo7FP8z9S&g#-_yHV#!ZbK`X->^j~wmI6SNrP*+fYo!qFh>+ipBR3iG!wkC`?nWTpuC7 zgL%$!i5giQJ$5n@iQhx)FYi{qb03p(8W op~dl#1LmO%G1jfJ5jIC3tJ{w_q;o#hsnS>UTeGK6V?99lA1O*!UH||9 diff --git a/pricewatch/app/cli/main.py b/pricewatch/app/cli/main.py index 27e9acb..3ad3d1f 100755 --- a/pricewatch/app/cli/main.py +++ b/pricewatch/app/cli/main.py @@ -13,20 +13,28 @@ import sys from pathlib import Path from typing import Optional +import redis import typer +from rq import Connection, Worker +from alembic import command as alembic_command +from alembic.config import Config as AlembicConfig from rich import print as rprint from rich.console import Console from rich.table import Table from pricewatch.app.core import logging as app_logging +from pricewatch.app.core.config import get_config from pricewatch.app.core.io import read_yaml_config, write_json_results from pricewatch.app.core.logging import get_logger, set_level from pricewatch.app.core.registry import get_registry, register_store from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod +from pricewatch.app.db.connection import init_db from pricewatch.app.scraping.http_fetch import fetch_http +from pricewatch.app.scraping.pipeline import ScrapingPipeline from pricewatch.app.scraping.pw_fetch import fetch_playwright from pricewatch.app.stores.amazon.store import AmazonStore from pricewatch.app.stores.cdiscount.store import CdiscountStore +from pricewatch.app.tasks.scheduler import ScrapingScheduler # Créer l'application Typer app = typer.Typer( @@ -46,6 +54,75 @@ def setup_stores(): registry.register(CdiscountStore()) +def get_alembic_config() -> AlembicConfig: + """Construit la configuration Alembic à partir du repository.""" + root_path = Path(__file__).resolve().parents[3] + config_path = root_path / "alembic.ini" + migrations_path = root_path / "pricewatch" / "app" / "db" / "migrations" + + if not config_path.exists(): + logger.error(f"alembic.ini introuvable: {config_path}") + raise typer.Exit(code=1) + + alembic_cfg = AlembicConfig(str(config_path)) + alembic_cfg.set_main_option("script_location", str(migrations_path)) + alembic_cfg.set_main_option("sqlalchemy.url", get_config().db.url) + return alembic_cfg + + +@app.command("init-db") +def init_db_command(): + """ + Initialise la base de donnees (creer toutes les tables). + """ + try: + init_db(get_config()) + except Exception as e: + logger.error(f"Init DB echoue: {e}") + raise typer.Exit(code=1) + + +@app.command() +def migrate( + message: str = typer.Argument(..., help="Message de migration"), + autogenerate: bool = typer.Option(True, "--autogenerate/--no-autogenerate"), +): + """ + Genere une migration Alembic. + """ + try: + alembic_cfg = get_alembic_config() + alembic_command.revision(alembic_cfg, message=message, autogenerate=autogenerate) + except Exception as e: + logger.error(f"Migration echouee: {e}") + raise typer.Exit(code=1) + + +@app.command() +def upgrade(revision: str = typer.Argument("head", help="Revision cible")): + """ + Applique les migrations Alembic. + """ + try: + alembic_cfg = get_alembic_config() + alembic_command.upgrade(alembic_cfg, revision) + except Exception as e: + logger.error(f"Upgrade echoue: {e}") + raise typer.Exit(code=1) + + +@app.command() +def downgrade(revision: str = typer.Argument("-1", help="Revision cible")): + """ + Rollback une migration Alembic. + """ + try: + alembic_cfg = get_alembic_config() + alembic_command.downgrade(alembic_cfg, revision) + except Exception as e: + logger.error(f"Downgrade echoue: {e}") + raise typer.Exit(code=1) + @app.command() def run( yaml: Path = typer.Option( @@ -67,6 +144,11 @@ def run( "-d", help="Activer le mode debug", ), + save_db: Optional[bool] = typer.Option( + None, + "--save-db/--no-db", + help="Activer la persistence en base de donnees", + ), ): """ Pipeline complet: scrape toutes les URLs du YAML et génère le JSON. @@ -88,6 +170,12 @@ def run( logger.error(f"Erreur lecture YAML: {e}") raise typer.Exit(code=1) + app_config = get_config() + if save_db is None: + save_db = app_config.enable_db + + pipeline = ScrapingPipeline(config=app_config) + logger.info(f"{len(config.urls)} URL(s) à scraper") # Scraper chaque URL @@ -158,6 +246,11 @@ def run( snapshot = store.parse(html, canonical_url) snapshot.debug.method = fetch_method + if save_db: + product_id = pipeline.process_snapshot(snapshot, save_to_db=True) + if product_id: + logger.info(f"DB: produit id={product_id}") + snapshots.append(snapshot) status_emoji = "✓" if snapshot.is_complete() else "⚠" @@ -180,6 +273,8 @@ def run( errors=[f"Parsing failed: {str(e)}"], ), ) + if save_db: + pipeline.process_snapshot(snapshot, save_to_db=True) snapshots.append(snapshot) else: # Pas de HTML récupéré @@ -194,6 +289,8 @@ def run( errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"], ), ) + if save_db: + pipeline.process_snapshot(snapshot, save_to_db=True) snapshots.append(snapshot) # Écrire les résultats @@ -359,5 +456,65 @@ def doctor(): rprint("\n[green]✓ PriceWatch est prêt![/green]") +@app.command() +def worker( + queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"), + with_scheduler: bool = typer.Option( + True, "--with-scheduler/--no-scheduler", help="Activer le scheduler RQ" + ), +): + """ + Lance un worker RQ. + """ + config = get_config() + connection = redis.from_url(config.redis.url) + + with Connection(connection): + worker_instance = Worker([queue]) + worker_instance.work(with_scheduler=with_scheduler) + + +@app.command() +def enqueue( + url: str = typer.Argument(..., help="URL du produit a scraper"), + queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"), + save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"), + use_playwright: Optional[bool] = typer.Option( + None, "--playwright/--no-playwright", help="Forcer Playwright" + ), +): + """ + Enqueue un scraping immediat. + """ + scheduler = ScrapingScheduler(get_config(), queue_name=queue) + job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db) + rprint(f"[green]✓ Job enqueued: {job.id}[/green]") + + +@app.command() +def schedule( + url: str = typer.Argument(..., help="URL du produit a planifier"), + interval: int = typer.Option(24, "--interval", help="Intervalle en heures"), + queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"), + save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"), + use_playwright: Optional[bool] = typer.Option( + None, "--playwright/--no-playwright", help="Forcer Playwright" + ), +): + """ + Planifie un scraping recurrent. + """ + scheduler = ScrapingScheduler(get_config(), queue_name=queue) + job_info = scheduler.schedule_product( + url, + interval_hours=interval, + use_playwright=use_playwright, + save_db=save_db, + ) + rprint( + f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]" + ) + + if __name__ == "__main__": app() diff --git a/pricewatch/app/core/__pycache__/config.cpython-313.pyc b/pricewatch/app/core/__pycache__/config.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..9347fe57b27cb7ef0f28062f54e11d65845d0c89 GIT binary patch literal 7958 zcmdTpOK=-Ub~C^bKZ5v>;8$8{MA9N5{w$HQMatA#6eWukm!des|tOIsguaPSh~b?BP@+rJl&Vs$>>iw4q>x)l?J4i| zU_bzde}|+hU4@69_xkg?-|P43-mR%|F;M;?7?c0Hfnoj)Kg?29PE;iRM6b*B#-nv2@o|xll=O~L+zhsYYBZThz%WWTKP^RN_>)vV z-XW$^aXDhl7K83kpzGI zM9-`kQImuR=7!4Fv?z-_jEQq7*vI6sh^LrHkZa(K?glgkb|qd+^U+5e3nC$6OoFaI zg{7tmEJ-9tm=)8iu{`)#l;X)$%2-r984=^qUPz}>Nuu(-IQ^^~&kF=th0OxSdB7-= znohaCLIn!4y5n*Rr6|UsUUfm1;?bZ*caMNP%>C4B&6*+iQ8}VQV^>U4!|`M+CXsA) zBuS)RV<{0F*ev#ZWp^7YcbO1FSU3(AfhATr7#6~Xsz9l&0w+`rT8J%V4><%IswKtS z1qa1DLoUEO3wW1MP4U&Vy}JlsL-B5kr|M0|ae+$X_%#&oqwPH~uV1JY>OjHk2RY&m z`Gkg$9gadHs{6@2P+^Hmdc%{Ch!<{6nvDr$^K z!j*|$-ARQt2bipPAI~FV!=l@x5=c3&>eW$6i4d7eN9T|#pQ?wEi~FFgb&f(;3J={K zJapL_lOsMME=alq&H-hxh>%t!qH_qd{zB`Ns3>=mB&s_KHQUs+@aRbRPaJC0pQ_U8 zY|km(CdU#W@C{CP0XCc>(yaWp?!Y?gobESkVJQ(wM&VHERnpsFBy<}A2Bpjb+=_yrUzP`HW|>|?7);eg_(MCo_6Gr_8?-~w@- zjYsCBg$y3kgcPCn?&|ae*oX^BvLLEMa2ly(mD};5zCPT~A#?9uN0|+waQaLExUw}R z?R9oYXJ6OZzKtqU2dt0)0xtp|f?5C@tj;M?d{)6d7n9>)yZTZjc?;N5 z`ao606nvUG&7I>n_XL1w$#rupJuk^wJXz_aP5h3Gbh~CrbW#=qd8`F zVHD>7g>>I>omDak@EYGFW4@F6va|x0n4e99+VbL}6yZhemWl-{>OfbX`n`0aeqR@g z`Vk(skQ_nKil7ZaI{-rg$Wf^3P7qs|&ZRefU7!Z#IQH2WYsZ>;KXWg$Hvho?VBn+S zhr)MU*lN)on8*|!qS@Yh~9Jfdi#@b*e zg8|(U4x>>ChjmvtypW8h<5+ix!?)97JWuh4!442bjmrrskp!|m9F8U<;V{87O7&Z} zDk{;Tp_oV{RqCe|prh97Rv7{cRQ;kefk#+(!+cUaY-ltXDUwV{M9mO1=%fo<)Q7_g zjur`npCa>X-ZI!U+y`6GN@akGn3io{Cs(b$af~qD2bYA-3e}S`uyma5GD}+ zGo%p#Y7=Qja0oz&6|@6_`nbP?%3TIL5B!%Gw8L0LI0fJ41^ca+A%#e=0(X`5;8?h- zvI0I4{b*&MSOt`dKC#~pp3j!OnBP)3XhlxX-2EcMM=(eSTX&=2^L|$3^k!W9woGTT z4zo42YeBBu*}Vo!2wKQX@TYSS=*}s)E}l2eEGm}nrD8EO13lL`AeA=&P|wxbeg7Bt zf3f0U8Ti}bzZqWXTl43XC_wSBc=jl1u>=Un~y`jPd~563pf)=xhWH@Q#j zAKM>fH1Ao>an?9jXf1+uq!<23AA*+=pgAQc5EP6U>Br9j1k_z)&`59@#?nyW1oxS| zg5XsIxD0X%L77b&LL36re&tsH&?fazoAlr>a=uZ+yo^$l^uV3-4I7Bzr=Ggivs%|! z&NIF=vTS|KjXyoqwmzzzc`JA5I#{5&73JN9$J}*lg${j#6|z^2P%DK0)C%oduxjiF za#h*RxF9C^KeRKL9T*k{vxXYMR}|Izac60?k1;()`yQG#1Pb^Xp`PNsGz(~;cpr?@ zC^QMpLW^(!ER`Qe2-OM)2U$`Vs)hM}WcCjQD7HQn0PG>e3atW9Q{x8V2x*KR4z}se zv#C_xid}^k*^W{xmQP+W(=EwrdoN^r&L=P(1=379#-pR-I~OG~lY|r(vn&XryD?bd zAsbBwsiD)WXk)n>q>v3>qc)Mhm7L*Kc|l4-rYI*M*@HtIRrt=6{DPc7cNo+?hWQNV zn|(!%;!Bf;6->@ztQ$!ag2}Sck_S&4krYKz^xAyiLYqkD&Fo2&_k5nggIiW)MTO`e zRuK^sJeUc(x>-n=fd|afgSu0qOAbe8vPbhH%!rZs7)hoRQFH3qbb^N6kRPiVy(Ztq z;6lFn@38V?qejNWPsPQ|9U{l()Z!gjfJhnc3FtQHE&1#!oGP?6w;|hl6>k#?cz#xl z$B`d?CIeD2lClP~KScUPW9U}!2bDYlLY+hdz`M*1 z_%zBiV#|uanm0rIceTV6sG=k2w|wt0Fu|(eGAbuf;Fv16##FDYfTQ{bcQlR8ysUVb z{=$vecdpw8TzJX`fX{E?xl-$I=DAYq3VZZ}c(w!2=u659jPd0T~s$mq2kIKpq;>5i_8-$9UZ1+Sm_VsH-xQmk)!PqEb0&mNVdyHUj zmLDD-=FgSfI2PSb0x|0zEm+(kKI<~GsUhA7URr_otewUkKz5Igz|9_-vM7A{t{FD> z>zKT09pZIX$p-SxifkryM$W#qOa|S$&A4FGIXMBQ^E~ik#22Mk1mrRj#t>jcL@pq> zh@c%nao*^H@yz@wQ1Df~``BM$2R}HJCFb`I&wJJHR%?M*wjHlNGd@=0YXkR(?hUPV zYk|IP$IB-A)>`0x=|2)esJskTOTCfPijBvrCnVQT_1TLdbPgSx7e-r&yIb1?6aOvd$taj zc0^9j+&iPSp3nmQ+m3RUNI-y5hq4C%qsQYfY)oC#j;~&8 zn|@)n)Y+EKFAwZ8mMYr|7juB$@zrTZCV+6jwlukN&LB9L!|gjhzjpWn5Dwcku3^<` zc4<7g;|pl5Zvmyzwlub^nq6=#AdDMh{bZ#uNk=!G{W1_bV5~!H`LP^Kdysa)S=xcp zvN~!G7C5>COqXTLFMLeP;q_nUn$JLx>v_zbdD?tv{pY#n)3Bt|kGa$T#ks0Z86v`M zn1m|_IBsz9W#nRjDijQNtnl0YD*(lAHUi=N4W!V5a>8=E*u7S8qlK&u%w>_wwi~^R zp*do)9wjd z=>nVkyA2J@jy9f^g7>N>yDDia!9tTA8IpWb8KAFIq2R?5|__A=$eC4U| zhC?sPRERjs!dmDyC6NVqe~ES^A($oj#6cV5y9>sFiVO0z=p|<=lt1-Td#XYnLXe zYUnm&h?1xa1y0ogb307ULExw&v`pi@Mp-G2ykeFh+^RwY*JX2kg-tXiWs0y9Zf6q_ zIR#HT^dZXFHQWT0;8_yOi_hS3sDc=I5KG#4_9Cuq)43A2mfzs0l$hMG8hbhojL|&} zvIJGL#1p_Xka}%cEHw+&WshR|PC$n{>>m}Hk{0#UhOQm%;C7Fx62`&B}e&*tA zjJK|6aYw)4It;tC19Rd_XWhzA9y?o?Y^;oFz&%S|i6ob%vgrtP_{j;&i6+hgGGdAF18S;;(S S;BW2CJ^cG)6UTPa4g4PhCxCDO literal 0 HcmV?d00001 diff --git a/pricewatch/app/core/config.py b/pricewatch/app/core/config.py new file mode 100755 index 0000000..66e4e7e --- /dev/null +++ b/pricewatch/app/core/config.py @@ -0,0 +1,186 @@ +""" +Configuration centralisée pour PriceWatch Phase 2. + +Gère la configuration de la base de données, Redis, et l'application globale. +Utilise Pydantic Settings pour validation et chargement depuis variables d'environnement. + +Justification technique: +- Pattern 12-factor app: configuration via env vars +- Pydantic validation garantit config valide au démarrage +- Valeurs par défaut pour développement local +- Support .env file pour faciliter le setup +""" + +from typing import Optional + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + +from pricewatch.app.core.logging import get_logger + +logger = get_logger("core.config") + + +class DatabaseConfig(BaseSettings): + """Configuration PostgreSQL.""" + + host: str = Field(default="localhost", description="PostgreSQL host") + port: int = Field(default=5432, description="PostgreSQL port") + database: str = Field(default="pricewatch", description="Database name") + user: str = Field(default="pricewatch", description="Database user") + password: str = Field(default="pricewatch", description="Database password") + + model_config = SettingsConfigDict( + env_prefix="PW_DB_", # PW_DB_HOST, PW_DB_PORT, etc. + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + @property + def url(self) -> str: + """ + SQLAlchemy connection URL. + + Format: postgresql://user:password@host:port/database + """ + return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}" + + @property + def url_async(self) -> str: + """ + Async SQLAlchemy connection URL (pour usage futur avec asyncpg). + + Format: postgresql+asyncpg://user:password@host:port/database + """ + return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}" + + +class RedisConfig(BaseSettings): + """Configuration Redis pour RQ worker.""" + + host: str = Field(default="localhost", description="Redis host") + port: int = Field(default=6379, description="Redis port") + db: int = Field(default=0, description="Redis database number (0-15)") + password: Optional[str] = Field(default=None, description="Redis password (optional)") + + model_config = SettingsConfigDict( + env_prefix="PW_REDIS_", # PW_REDIS_HOST, PW_REDIS_PORT, etc. + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + @property + def url(self) -> str: + """ + Redis connection URL pour RQ. + + Format: redis://[password@]host:port/db + """ + auth = f":{self.password}@" if self.password else "" + return f"redis://{auth}{self.host}:{self.port}/{self.db}" + + +class AppConfig(BaseSettings): + """Configuration globale de l'application.""" + + # Mode debug + debug: bool = Field( + default=False, description="Enable debug mode (verbose logging, SQL echo)" + ) + + # Worker configuration + worker_timeout: int = Field( + default=300, description="Worker job timeout in seconds (5 minutes)" + ) + + worker_concurrency: int = Field( + default=2, description="Number of concurrent worker processes" + ) + + # Feature flags + enable_db: bool = Field( + default=True, description="Enable database persistence (can disable for testing)" + ) + + enable_worker: bool = Field( + default=True, description="Enable background worker functionality" + ) + + # Scraping defaults + default_playwright_timeout: int = Field( + default=60000, description="Default Playwright timeout in milliseconds" + ) + + default_use_playwright: bool = Field( + default=True, description="Use Playwright fallback by default" + ) + + model_config = SettingsConfigDict( + env_prefix="PW_", # PW_DEBUG, PW_WORKER_TIMEOUT, etc. + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + # Nested configs (instances, not classes) + db: DatabaseConfig = Field(default_factory=DatabaseConfig) + redis: RedisConfig = Field(default_factory=RedisConfig) + + def log_config(self) -> None: + """Log la configuration active (sans password).""" + logger.info("=== Configuration PriceWatch ===") + logger.info(f"Debug mode: {self.debug}") + logger.info(f"Database: {self.db.host}:{self.db.port}/{self.db.database}") + logger.info(f"Redis: {self.redis.host}:{self.redis.port}/{self.redis.db}") + logger.info(f"DB enabled: {self.enable_db}") + logger.info(f"Worker enabled: {self.enable_worker}") + logger.info(f"Worker timeout: {self.worker_timeout}s") + logger.info(f"Worker concurrency: {self.worker_concurrency}") + logger.info("================================") + + +# Singleton global config instance +_config: Optional[AppConfig] = None + + +def get_config() -> AppConfig: + """ + Récupère l'instance globale de configuration (singleton). + + Returns: + Instance AppConfig + + Justification: + - Évite de recharger la config à chaque appel + - Centralise la configuration pour toute l'application + - Permet d'override pour les tests + """ + global _config + + if _config is None: + _config = AppConfig() + if _config.debug: + _config.log_config() + + return _config + + +def set_config(config: AppConfig) -> None: + """ + Override la configuration globale (principalement pour tests). + + Args: + config: Instance AppConfig à utiliser + """ + global _config + _config = config + logger.debug("Configuration overridden") + + +def reset_config() -> None: + """Reset la configuration globale (pour tests).""" + global _config + _config = None + logger.debug("Configuration reset") diff --git a/pricewatch/app/db/__init__.py b/pricewatch/app/db/__init__.py new file mode 100755 index 0000000..c466e97 --- /dev/null +++ b/pricewatch/app/db/__init__.py @@ -0,0 +1,41 @@ +""" +Module de base de données pour PriceWatch Phase 2. + +Gère la persistence PostgreSQL avec SQLAlchemy ORM. +""" + +from pricewatch.app.db.connection import ( + check_db_connection, + get_engine, + get_session, + get_session_factory, + init_db, + reset_engine, +) +from pricewatch.app.db.repository import ProductRepository +from pricewatch.app.db.models import ( + Base, + Product, + PriceHistory, + ProductImage, + ProductSpec, + ScrapingLog, +) + +__all__ = [ + # Models + "Base", + "Product", + "PriceHistory", + "ProductImage", + "ProductSpec", + "ScrapingLog", + "ProductRepository", + # Connection + "get_engine", + "get_session_factory", + "get_session", + "init_db", + "check_db_connection", + "reset_engine", +] diff --git a/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/db/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..6d900a4bb8697ef5f2acf106e0f061cbaf4fbbfd GIT binary patch literal 813 zcmaJ4 z&J(O#yZa;Ux00S=*@g82F3;k z1{?!D16>2I0h};_KEjLXp>{b>E5W(GTbsVVH+>D0x(?Gt_`4gwVp_76r)6x(49~w} z;Z|>3h(7+CepRKiQ2$q9>Pw1(^DDB&OhiA3meoq)LoEuaq8h5VI4>0`(-?GbBt$47 zN*595?O=FVZ*>0zGvd>F1joo^;ZpYyzIH6j`er+pv-4fc-VuZ!6JkBw**EU@!^zP8 J;KCie{u5zsCZQnHdGdqr!5t%M~;TQB=4K?=+c9K=NpEb=C=no|ObbJ|?e{ zIAJ6%#2u5ugo8OKmc>bNLSoW{lR5djH0hdfGxvndWd81)^h|h}mzQ0Wbrbcho|oN| zz6n3`PXt(izsr*i6OF8KqKP%}ch6+=M34n1LM+7Jy^}2yt*n)o>n7VK!YoY5Tx&Op z*Pn&i;lVjAoZ=CaXA5L1iHJ9Z;Q7vKd{eOt7HLXH^~A-c^-sVUfZ}$*W%P zL=o1I&LmC2f|^{+Wv&%foYyEWY0m@(DORc8%eXOay1Hlamk51Qxkbu_#nN$eviR4)#^UOCV?(m z)M3dq!nAo<2IRLPDp z2C4$p&&w-7$ZdU0qu$yCRKRcs!}&aHemb*Yco)<{0;ULM7=pjSg5V>=n=}H4spLz| z$M|R9Uuw#5+cRuPSZWj2VKH2$BE!XS?{i7ZOmQa&+Jp%I&(a)PX0eW6$$ubZh9m)$ zX0e@Lw#nq6Wm>XptIugywQQGi!mJHnWJa(dBIlTKOxM{m)u%l|To`{9g^cgzTWa@t{Q1yB;>63nfkDr1-8&X|~!X2gh9kqW~4r0Hh6z50y1;_kQ{tI;|e zvRh|_Gr}q8cV2j1xJF(wJ5P09)qU+DQE&0b2qV+$_mai@`dzRFtw@t#VqgYGzm5tK z9ZVss7PMRx0A!dg=!0Ggm-$r((O%)06wPx-m3*E?Oj}iH^%O;-RL{^UEeD3;o#yAj zQuFlzO~pl+&_0|%bcoxN8Kgx(FI_EWRc^It#n976!@R<9E?Em={&>?~zjr-TP-!>U zX~I!5tLbX0yMhyXrlby?JpllCBcWXf49aRB4E3J^%u7Az_YMJP0-+~G^4?`0( z8t`+^E6J;BE|thBORC{ALBo(TxwK|D`T7i3F%N4|85(QU$-oYX;Y!UTgfWUQo{XL_ zmv8vY#U`$PpuoA)aT$2@sPSgsqPC<0S;>NT>B}?i3b+itPfs!>53k_)K0wdc2S=bU zHQ!hLh_U>#iD#x}b_`U$3jg}g;C7Sj_7FMn;a`;N8gIS*DAe`Cxs~z9jjf-AKMH@+ z`BCSJ>yf|Z&h;(-;T5qQY<(DvYz8A+!DDw9H-oRQc%I1q+jY0XyRZ72ki?lZJ4)KK!trZr0PsN4m>By-%HDW7Dqa^!a!F zq?xX|Hk$_So`lKV+I>_JjDRB$9E4kT$7c2=;~Mz$Q8g?96r z#{JxP#cOcqFquWI{F)&)ngF88cI`BtseRY9Z(jwx&HzC?C(xITE!ktdRMHuD9fZg) znb`rjd=LWLdu7jNFUBtjN60LmT7J2Sau_^?qh?oI?Y8PO?w)b%)uIAv&N+r1X<@d_ zmiq&B$=|FiO@_oSk`|(#;(5gQ3Lyh%S15-Mq#^*5fc83#n5bXSiojuj1L@SHYu_5s zD;XdIrn}frO$GtVs|ZOmOL+}ACXhw$T=vlxQy4sg#TPlLq~O37wW8|vPH6?8jtC4h zoF%FhIZ6z;o`SF$vpAHJ&4M2~Ni!)GMk}bm<09Zg05kMOibN1xl(_*EI5qIPTp{Y6 zDz(|WSgOV0SS*HQXg-rmm5x-9Gh;E}(mvQMHC#65Y+>pkEjdomQrEbW$%1G>Gq)mH zz#`5WzZD3DYXmqNTN50lQ6I-()(xkJy@EG{GS&z;;Dpx74R3YnhIB>CIf>Zvn-0DZd_m@HP2p$x9`Ef{wuiMBs(2M_J8>2 zj~WmCuw%vbMDqUC4%0-$F~|^S#j;Uh{t!^cgy!srkgTdO>f}y zWCh~h*p>$#*+AO6TKLE1pDh18_%EIR-1)G7Y_ostVgJNt|HM}Rg{`CSZS+oU1TTIj zU;5pXxd`73Hcrwl?}G*g8W5JU$ZSRGrxi;*wN1Futx z1+BL830{-V7WGA28@T@hTmLyQMHmk#AuucNcvNC1VH1sp{c|w9Rh5PZ%UoHhb^ppN zTLuz?G~Tp7bbh8y;#rD1xH+*iP}b4!+IG~8TxnmR#NGsf5=#CF+<=QvBDZb2yQ;SH ze5Hl0D}X&l~~;Pvg8o zD4x{{Kd3#OGeS49ePGT3Ea1hAp%PV5GUK>TJ{Bl>X`dQT50TgbjKLnCk_8d`ZN)CY zN5*($R-My9mg1NZ_xjrx$9t53KNfT1u$UHX9}1&Z$EesV?4NbT|A;l%I^O{OQK#~+ zXsx4W2Eb0Am`NM!NYrFyzNMJ!`x!NxvRtu!j%zJ*1``^l5B(qmd77H@2|g2jqnR-= zMZh0WAl<1VIIJPNDFz7~wj8-+)<H$jT1=pOZV&S~iRE2?vNJpb{`)2yPglLQ>@^7l4yd z4D$9!Kn5qB!E_s)&uU3;$#3tUmWPy0s?=$}b2oZ|G8hJ#0uD7ur>gR1{1|m}L$88_ z;Tf~XXQ$zC88T|X-iDKR0i@zllH1OxxUG$1XeGJ?H{3nbL)X=OIW_`Ke~&VM2e&T~ zQ;v~9_iFE!KYH_>|L}+YI&<@#9m(Ttv;Ai1)or=A3=O{B`Uok+x3z9 z&@&>|INLL8-(4Hs2n}q@=gM;6A)uDrwk@}F$La>buXeo$EBhZE7yOsaBX0^n3JP4Xsz!J*w zYcJp(6bji8gI8(jZc3uM5hzc|@c^COX$K`;gu>rcj~N00d{x370L^XyO{r!C^jQ0@xN}C@3z}URq)5Zkg3EkD zu!0AXUw@!Z_sezK;24#Qk3qHfdv?+lLY7Jjg|4q29KtNtq1I7Xz zfXqn1OwDlWVo0b16S#UYTQE@vJtCrx1v||=u@8{}rj4!qJ^K#G{1X24e}WqX3GKB( z!j2ShhN|4=v9I~|4{rTn+t;}oEQi9M%zZTX)1KARR`+XLq0=kl<@%=E@7;RuZ!Z4U zN&JU*WyBNAwFFOW%O}f$=HJ#q^=H1$uYe5IcOcgf*_hUPdTn^^^hStB7J;B03sl?B zt|@ErjS!}59zi?C0-@jQgD~gYhejmwv*W|3#rvU7DBeHq9TD93hulzpAbCgI0h-!I zj*AbD`>;IFHX0JwLry3sp?Bp%(82#_KZ09rgfI^z?IkRuCLdGf{0D)N4Oa*(*YIC@ ziRM~B5u#G*Pa!d@s|82`o53Gl$ZGRSmPUAT5;6n@9Sy7+Y@g5Q5VT{~*y;eC;fSCL z;OUjLu^xjnA2x++BWNp=SqhDrzeJf`!S{o`6jWfHZ#xgyFyUCj6WDL30@dl zhT(+_reglC#xtF!(KS|UW9FVQ%%vHjuhqvYX;pu1+l?d-*|~)pwJq@T7eoKSkV_lw z)f8w9QlLPnn2JBwK%$;O(%4Gh9y&-0Q?|GibRI_AJ_-2I2Y<}U>O9Y2Vh4UDBalca zxtvzu`58TtsELR$Q(!V4WFwFdQOSZ`vvL!H97V<>G_--`+~gn1v&L=^490%wa-cPKRwNNE^c{xZn_}# z+Ii%rd!_A8@1`fbMQHhW-v(*86Z!P5&8El}>3iym3C(wIta>(EkL?gBuce>kYzf~r}Aoe zv+ei}f%4k&zqkIXXXDc4jp56iZ_Mms-7^s+cTn=_xX>kpD$1bzi^6jPkLTxwfN+fO G$^Qaz!_`Ru literal 0 HcmV?d00001 diff --git a/pricewatch/app/db/__pycache__/models.cpython-313.pyc b/pricewatch/app/db/__pycache__/models.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..7e342474b0ecba9d5ce2636fdebe19e7be1f7093 GIT binary patch literal 12364 zcmd5?U2Gdyb{yY1e6hLPon?1qE7dUsfs#)cdsFmu>*Rcu^EP13lQDq6pAF>CJ{6r0`SExp#&f z$#Rm#y4%w5(fv91+!?6LgpZLR7q$*rZs>3y;hK_Nctqs?aI$AC}TOV#94dF)87;Yj>;XP!Jz%_AQoX9tG zVtJ(DP$^p8zqAhz#MW0IQ8W&{Q1RbXXbh^l>vM9&sN{JLxA8Zhq1?NyK zULkNzUfO#@O6BOHlo*i~lbJ*mwgtl`feP6_^Eo5*8)YO!zQ>uz%Y)Vj^07{1CKG=|jII7O32O$7@Gj)1Hu`2;LHJXC?Gm@Gk zaw;Vs69P=LA)8`_ST>!Og&bW2R5FT6q+~`F0w=|ZDdA*V%4ahnK~`f!C~;a+a1CMc z{M0FPXW2{!RtAey2|VQ-E)z7NN^|&16;=)k#8-+vk72515Xa=O$Hw=NIwi zf|d%S{Nlv9aq;5GbJK=@bU}_?8!hbFaF1qFxpch;8SdEKrGUzn?u&k6s;NjkoPr>J8 zxX($8i*nqkOw+&77@OS-Pm8_*%0hB6#;gOQV8H0_c@zBkITzuG1I$^8=pdzH32}<0 z#FcOcU4|3IZ}kIHZZ8n52{5%{J_I$J1MPtt)31K{LmS*w&~5P1XhurQ(Wv2xMu9Dt z!m>9Sy`Ga&W{*D_olg=)O(ipOCJWtV(P%sygF<~Ys;IDqSX5F$J##sDX3;2VhP6D0 z|D17ncp;mXhZ9M7EtO#~5Lab*&xh&P1XXy!Z=%^52E7c&=Z0Yca!MImTq3w}N^o-u z59FuZ6X)Uobn>Lr>e{95unc1L_zEXUAIDio7j%CnKO13xVj5m?UbwA z#?@2c>Md|}P_AAZS6_jvzrZC>u6`R=fO2)hx++AU7!(JNmPWduHYipcf;S9*v>plY8tuC?tIf}w6+D6Q)drmHodq}N`^p^Q5KiyrCdrS!&s}N#yP6Uat6$BzHU;2 z!D(5Qz*9;>ZV?@3&`C#_m5?+1sWMp;W7Bp6M^P0mti`lVaDRi`m5-yauuY&q7jSQd~AFx8WBQN`V&zXoj2N1Ea!h6#%}8``!&V zkpb6aVwU>_WpQQjA0OZIOF-aM;h5!SptSSn10MrDre(1ALQg65IPGn$3_Z?+Wj$Qb zHI?_iV`?Ihj33XJhfHjIJnyxExZ^KNg6{@P37mX5f~K0__ks#gsL;LUBwKJEs z%O7a|4>kTn=5x`lPX+r(3;ZRmNZOFJBk2HQR7Rr|4l>+8>aoL%gf6%aSxNzN8UQGy z3;;X^eqdpKWWo&@rGPH!u1FQO9YivM#Es+-lEX;wu@HPNhC8RmGTEEt7`D8FqyfoE zBxujcD3Wm`?;)v1QUS#9nahnzB!RF+#2x_&?v(8rwMlv`g4SATg@^nz?pcZ3#Xqg$ z>N}RtJ#%x7J!|jXuX}h=Z_0;>n_?$s-VP3Ol?`VKzky(_BLI|d^A z-OFe0%&%Ntjotl7_jOxS_mAMz*S{!V9n%ITUdQL*`bQq~)vM#{NB;T5#tCigW5#)I z<*n6#UR#hBd<(c9S2sMaYWju$uK#iMo?mp_?RZ?(_pHJlbT6M@xwLvlukU@%xnbWm z+`$u%`G&P6?bsBEJLq0UaXqU)(VK_#+F@&jp(8e?5Yyers!y-&*S!IYX#gg!UrTC7 z&w-=?H&sQ?%5A--SFh-^n1Y9FOt74>JAEs4D`~yDNB6**@L71d@;$3fYj10ZW^`VB z!dGg(&b1?(e25l%)~;>x2WYWh3m)6#-+o%|YH%-4J@q+U5yxuxw;cXuFAlylG5BBj z6#!}pygz>uKrM?FnkXo=_%D@=H5L0K<_E(mpf09Y!E00m&LU#tRd1gKS!Y8%|C z{*EySV`^cZIvd=qry~%cRz>PXkASrq=LT7=R2G8FHnCJ}KU#tiQ*1MPJ7{kk^m@fs20|(HYDX^;)DX4eF9E@^D9=zbZj_Cl6h}{vje`pU zP{E+V=?w=%DW2jaTNt(5g7=;)A}I-I+D#9sIpt-?Y1}GeqlIi- z2rQ`TVn|p_NlQ0LGO?fnJJm&$F}&<&?AByTE*6s&2u%PQSCbN8p?N8plH-M8jPYfS zg?yz)d;`b?^0ZLQvKtRPhF3d_)8!A_RUkm9 zC(v)Htv@96+ZBg(Q+55pmwk3+uz{#CWGdf~-X-ym^oX@C)I?jvv4;*THgtU+$@VHNY=Wu8DaOgT1VT2hCpfC_*wGtH44# zit0U1ra>&gYYK7ZpiE>W86+2g*iab-7)HH)rUEKkM^=0r5S4YUjr{8HFAuK;*N@+? z`(4LxJG3Jc51X`^OOIwE`bRqa~#(Y3sOEw5euRQvR{=KqPt|HKZ|x?c;_ z$SkgI7kEYxo1z%ZiPd=|6tyj2=`xb5NK#13kWdZaLuGgj_g2JS z1ic~o0Lg1n7n(4WVyEma%zx%KrU_pm_ zIlSUtQSMeSU+>`pho12Mm6DZ{U-;JQ);|ND0D}&J)x&yCw_ed>L5F?EcXIAya3!#I zSZ^NGYeN?2;1LV%UDO6Y1d)U8<;j&k02SR|w&^wf*4&*#79YSuL@k|+ulDYsC2AYX z?5-?5`zb8b22VhD58L1^y-KiV=^9#3KJ@FImzUqWb7^HluWa9RUVg?S%4uEuv3Bf= z&VNAh%;0)tlRvht*r+vK*yLxP@KE^CCV!F^!khfrrwifyDR(H`s|r(pYA{{)1&dU^=9 zr5dKpc)2~$!(9INdT)A=jzWxL7v~M>nIyvkG6A240sVwT;xZ9Z{aF%+5KA0_!|W(N zkwk%9T>%rVy4j{QZkHh;)cIjpO&`1q^-1q~AQFaNuTc=RI0AP0I8 zasy`0SAg#@_i6eI^9)bz>uucc>s&=qT)|cR8}KW`Kwj%ATC~|Kk7noe**WcEOpD2y ze_rF~?Y?2p8}beGkp5%b0CfR$D-BrPVeY@%(&N`bKN0*wlz#B)*yI^)!y}LZ3Inl8#^P5pI_T%KY6|Lg3W$jucUZ|{HUcscU?Z`g9{fxz z$np?)+^a<^2{sZAd19^E%Z_(w6}@0#8^tEDkquDWQ?!w^-YiyG2dR-#(zwlLHi6CD zLv7|5sBB9IlKTrF5Fh^mnOb^G`d*ufaYl?l?`ATYrG$_-CYS|z9uv3#`>AD)L7Elp zBt$lu;i2)$A7m!C9>Ny#0~qEkri5VyLO>o$3;{MiMWjp|K{8|}vji|QJs9!UO7peb zhtj;X;i28M%RMz3=>;|F?ea@R^71hw2}0 z|Ni#FtoBh<^M9i8pV;ka^Bc0G6iU;zQ#(q{>d$a1f4nip&xRnuPe=K8AU0$ACSY^h zfGu2kWf!~Y<~l;_WA_hf;i%sJiG{j1ou5FglOobGy>r@TQKv17n%9n;)A=cCQ3uv1 zH~FKq*uD0_Ccp1#nUfkh(8s=FM2GKbT{^M^3PZ>LYkq)uTaw!ny?56TCsq2e$ z7IkIL&CtLA0@7+#jiiBgg@I1PO=-NiZ=IJVZ`|%C^1*lG_C0 z`TiSB;7d=R6|(e3O$P53vFxttDIod=H^J~!ff+g>{oo(rb`gXjX?IeE`#TvS2C>Np z8*q@ZT_a-mdxqKdo*3k{t?NV}-n<1gWAGY=8lxMEaTJ<+t1Z#i@E+$2n_EJA1&m7SMyz}yn%UHuDcIu*sXC~NS* zkpnO1Dc4hZVH|H&LEuwS@S2;XCF)A8n28g2G)$$on!1_MuIURq9XbK7fh^r$BF>MM@I9x z09W9I&hol6bbXxM#I@UFhX=8Nz9X-X9oE?d7ck5TI6x?v_{8&I5ySM|tNTsUubb|9 z?gzgbdo(tykIibMm$XYUExzz5uIO<^i>aD=L-XI%_?vc6Gw_B$jT?7Fp$y#{MK%=Z z(9a3I$tX-o33?nQ28UfBV&O zz41aZeS6_abH6PLxO;LKgOn^e9M?jaD1{g$CWjYRmAh%(Z-yv6+_q@ppBhlK^1PQ} zzFoqK1Bdb4+@l4~KtH5g&#pAA@@utUR_fJ-Z0pe5CeJwV44KK_HSx=Hdi{WPyvAUu z_Y3zUzD4I-R>#-g0hsr=y8dz1foEk-2%KV`{K)DUNS@GZ`*bfvPo0pZ^>71+Uo&tR z2AcWb!0O0a>6b^X954>_aiJsk%e9%y`rs9cKhLaQ(d+v4%D|@c3W89d=Ck3>zIA?+ z-@g-$hSoEiJj8>)BP@E$#HY7Re2VYbR8XLoB-o_|xWPkjt`aA78E$n6^IYt@0)91c zvb&y=*cELSO{ej4Op~y{Jbk;pSj8`c_>j;~i+*m^P&L|%B8K9SuEL9#Lq(rJzaPVA zV}d)$q!tBXrj2C>=;zdg)gB}Wyl9+;o-gQEv*W6S!N0>Uk-kg*0hqxuamrr;d0Ogl zIG&aG9nNpMI7j<`a4lbRo!>gWj*@RVi#)^9H_e=5|JU64P44^)cd4WN&WRTs{CVLW zbvPo9JMkABExed4dCT#cV`bz8hksuXN0%c(tN8at*wO8f94kF9IQ;u!(lOz1+=BWy J94$~|_di}dWI6x< literal 0 HcmV?d00001 diff --git a/pricewatch/app/db/__pycache__/repository.cpython-313.pyc b/pricewatch/app/db/__pycache__/repository.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..cbee9174aef56ccb1c2e4c94ab392309fc2fada3 GIT binary patch literal 7814 zcmcgxTWlLwdOpMJkQ`n_+PYA(G_n<2OeD&7)=sS?GQ4&Yt!yb?P3Wkr1k({YqL`(I z%$cDb2@BNdLm;6oyhah-LRh_l$>nSlM&YJL981R`y=>&+wFIW#7fXOppdy*?%!KBT#{r`HSJ1 z2#w4{X>=w=V>4Z}YbH+PB6%%%m`H(BoHaU4^b+_ECW#a}hCLdBvYmQ|`3bVJp$swjQ`(z#-GMO|G>2ZQHT&7exLpsQk0)x{Dp zDn_BC>EikM%mq=$=0IO9am(V(wT)6ED+Ju97t*O)2XuB_kdDElw)M znIT2fN`}=JeDW`sv0J4GoyuB!XyuI!w0lCKV;WQ zy=U;xg-1{%*@nbnnfTzJt>G1rgm*}W%rkOlHV+*3wAkvUDMRx(kS(>j=6PEcd}g@G zX+n1KFfGx^mvJFaQzqxbowfLNszK5`8P8~=tiTp$JR@Y$+aQpIc*ZSpXWYQ$yvAsg zv;CX#G}k8Xw=-RB?q;buA7}$JNV(Qu1Pzj{fCf;hSgMFwszL!|98Nq}DHtgCVz#J? znqpL_3SbaNb-y6WcRb_0U&V;MNAY$#tfq# zfo@Edg&V@AP~&^6eDCd^9e%h)Nmlvf4nI(jc5g%|%0hNGkXz#0Z9(7V1pOAE;Gi>( z92u@H_P~L?+LEmg2M*|?VTjl$EzxRsWb4C$1Nvz74D&$2wxw1daW;wr2lUa}p*)x8 z0K6w>RYO$7H<-agi=`S$uB2%yl$!w73u3OImrGh<2{c)HF6FTx&=H zC~@Pt8_(5~hwqNv8QXa2&o0)1GIe9BJ}`23?#>)gW`R-*4ZvT0=vZy&cy;Lb#!EZG zfD`#w!oWi}5f1Hz6-pnhgL~wIv9&n%6-;HZ`5*&bV9P zf>meSyESiybD9&3h-TB4fC}KWC!nH+vL&E)+eq*^k8*BH`*5Oav?btl*axs1{(bGn zAzK&l*H*>T8BZ%$pRb33YmGf z6qur>3g#7n^$Ibw?uK@Fdd5j7^vXx_%cU!hw3Cf8*sZ zx(|c%bZh1tmZ3U>$Z3_5-E4Apv`toA(lRg~`u zZiF7jNg%w-UiI(rPt>EyEk3zdli;Z^P4XX508PR<8K?xg;?`X!z^P1-=?EdEr=3ht z!8Hrg79t|iJes@h6=hsa4+mPGA*2G*TfVZ8dlp7SA8Hn)0sJKgQ!$UTv!%sQlK^vd zkfQ|^J+S78BAJC@a87xb?h;`IntPd~-0S1gnwDLmU``shrvhF*ba7Nc83Iw!MXgl1 zik_05N`on3pbaoC8QdY0XZJO@_RN9&_^j6Sd6=6SP_@b`Ud2=k{UQ)4&YFG)vXsAV zmFQU@m|oV{3^!qn-RpD;8=#ZKd~0-#AX(l~NQb%0){y3*y&&FIeHlAw7^l(Tgj^vJJcy>%gGc~JvZVc^}@ zK794#AAIzKPoAzGT6!R4zxI>CiU0oEP5NHsn4H}>cy7Ye;rO(0<+B{0Z^6}Z3s<1~ zopF_M%dS~)jhe^;{n_%kVR?0!Z$!5Grt8U}yHDMD>aWK) zXEx3=*p1XroW8ABV@Dpu(x1l;+!AgIwU}6qiGS1o_x@i5e-`{ist#u##ByH;NdG9p zFZsO&{JPIgb;1vih^e)IWbj*r+)5c293UtidjhJ*P6L2&PHG9LPOx!siuU1jP+P$1 z0IPl4J2^GZb8>QYoH-qwpXa&J#+Kw@$1v2j&wLzQq`hq=*O?GFagNT@Jl^1JDYHw7 zqqB!fyXD}B5M)AshQ%?!^jpEQ(h`W#Rb{DA1d)I%LWpT?rbTz%JWs$t^dx5Jf0&_M z#R{9rtGXFoF;^-9Hp{FrJcfoRNzKWE7!olE1;9;`UBk9}WkJO{RTYRtOk?a;o zRMSj!{~3gKKaZpZ%%c$7X|;B;3li7%1ps^w@uLhUxg_@)@DB1W;BfFc0QSzp4O6uz z;B^r1Q?MdT8jj#_%UKw1MxR8qN?`5m$;N?DvZaM#ryER1zdg1l@CmIPZZjRN9QJG> zKJRd|v33#cvqK#SuxEWGc)1bG#@JK?$IbY&m@TcAix3IODGD%L?E%=T67rxJ{96O<~HZH`p@qO7rIt0#Isk9XJNBh~oGy@mJX_v8;(YNw~Gr>8#|{g<(S8rzP)$^u(8ex%Be+151iT5UU_>&m)if%@4``;gYZ*Ye{&NvQL`9rr?KA!w& za*K!Z6hy*qtZ%Mw@qOPS+dqQUyh8>N>mcR!lxfNA2pQfb-+rRN1yXbEism|C6U>w= zLY{8jIz*xp$)&l@xt6(Q^1OS9EOTH?#v}#&`@BNIjKFbZMR5P|nFH?3pz~FDJZV35 zq}LPh+R;gy6fp+12y>^XvM74lrk9zEX5ZksrjV7E@Duf9%EvIz%9e>kcZst4?6=picQ@af7BBC5DSEx5OWc9=0K9t{ zi!hhh_vsjgaXpeNWEt0(xQCSqCz~1G?S3C_ja<--MyF`(VMp4WYz-<>qxd?aDXS{H zjs#^H9@#2IEDN&yW<@DBYNE2dT%fv9gn)}yf@;4k=So>wrsy+Sip8c*m?mLrhhgiF zfX_TAv&c&!3qciw7M38&qs#IKgh?2?6jC%QEQ5Ou(ebAFHu<&Z%wrGd9eV7GdV3xp z4tP&I?(XsSKR)U6=D5c(zgOiR$79~U$0J?dcNkQ{$^2WyhsAjnJ9D+OLUboF!i(G3wv!!!)sGI3;H*W>K6M#F*cJ< z96G*sDiqJj?6on#Yan~v)UDtWV$Zs0v&#?j)KDs|y(wY|O`Ua#=AW~UY>FB{LnWl6 zN`L6)IPNQ#hx7cWK)54clKwBrlfNN{za%GrLr#B2mVQN+{w Engine: + """ + Récupère ou crée l'Engine SQLAlchemy (singleton). + + Args: + config: Configuration app (utilise get_config() si None) + + Returns: + Engine SQLAlchemy configuré + + Justification: + - Singleton: une seule pool de connexions par application + - pool_pre_ping: vérifie connexion avant usage (évite "connection closed") + - pool_size=5, max_overflow=10: limite connexions (15 max) + - echo=debug: logs SQL pour debugging + """ + global _engine + + if _engine is None: + if config is None: + config = get_config() + + db_url = config.db.url + url = make_url(db_url) + is_sqlite = url.get_backend_name() == "sqlite" + + logger.info(f"Creating database engine: {db_url}") + + engine_kwargs = { + "pool_pre_ping": True, + "pool_recycle": 3600, + "echo": config.debug, + } + + if not is_sqlite: + engine_kwargs.update( + { + "pool_size": 5, + "max_overflow": 10, + } + ) + + _engine = create_engine(db_url, **engine_kwargs) + + logger.info("Database engine created successfully") + + return _engine + + +def init_db(config: Optional[AppConfig] = None) -> None: + """ + Initialise la base de données (crée toutes les tables). + + Args: + config: Configuration app (utilise get_config() si None) + + Raises: + OperationalError: Si connexion impossible + SQLAlchemyError: Si création tables échoue + + Note: + Utilise Base.metadata.create_all() - idempotent (ne crash pas si tables existent) + """ + if config is None: + config = get_config() + + logger.info("Initializing database...") + + try: + engine = get_engine(config) + + # Créer toutes les tables définies dans Base.metadata + Base.metadata.create_all(bind=engine) + + logger.info("Database initialized successfully") + logger.info(f"Tables created: {', '.join(Base.metadata.tables.keys())}") + + except OperationalError as e: + logger.error(f"Failed to connect to database: {e}") + raise + except SQLAlchemyError as e: + logger.error(f"Failed to create tables: {e}") + raise + + +def get_session_factory(config: Optional[AppConfig] = None) -> sessionmaker: + """ + Récupère ou crée la session factory (singleton). + + Args: + config: Configuration app (utilise get_config() si None) + + Returns: + Session factory SQLAlchemy + + Justification: + - expire_on_commit=False: objets restent accessibles après commit + - autocommit=False, autoflush=False: contrôle explicite + """ + global _session_factory + + if _session_factory is None: + engine = get_engine(config) + + _session_factory = sessionmaker( + bind=engine, + expire_on_commit=False, # Objets restent accessibles après commit + autocommit=False, # Contrôle explicite du commit + autoflush=False, # Contrôle explicite du flush + ) + + logger.debug("Session factory created") + + return _session_factory + + +@contextmanager +def get_session(config: Optional[AppConfig] = None) -> Generator[Session, None, None]: + """ + Context manager pour session SQLAlchemy. + + Args: + config: Configuration app (utilise get_config() si None) + + Yields: + Session SQLAlchemy + + Usage: + with get_session() as session: + product = session.query(Product).filter_by(reference="B08N5WRWNW").first() + session.commit() + + Justification: + - Context manager: garantit fermeture session (pas de leak) + - Rollback automatique sur exception + - Close automatique en fin de bloc + """ + factory = get_session_factory(config) + session = factory() + + try: + logger.debug("Session opened") + yield session + except Exception as e: + logger.error(f"Session error, rolling back: {e}") + session.rollback() + raise + finally: + logger.debug("Session closed") + session.close() + + +def check_db_connection(config: Optional[AppConfig] = None) -> bool: + """ + Vérifie la connexion à la base de données (health check). + + Args: + config: Configuration app (utilise get_config() si None) + + Returns: + True si connexion OK, False sinon + + Note: + Execute une query simple: SELECT 1 + """ + if config is None: + config = get_config() + + try: + engine = get_engine(config) + + with engine.connect() as conn: + result = conn.execute(text("SELECT 1")) + result.scalar() + + logger.info("Database connection OK") + return True + + except OperationalError as e: + logger.error(f"Database connection failed: {e}") + return False + except SQLAlchemyError as e: + logger.error(f"Database health check failed: {e}") + return False + + +def reset_engine() -> None: + """ + Reset l'engine global (pour tests). + + Note: + Dispose l'engine et reset les singletons. + """ + global _engine, _session_factory + + if _engine is not None: + logger.debug("Disposing database engine") + _engine.dispose() + _engine = None + + _session_factory = None + logger.debug("Engine reset complete") diff --git a/pricewatch/app/db/migrations/__pycache__/env.cpython-313.pyc b/pricewatch/app/db/migrations/__pycache__/env.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..b9f270e8db65488e823fa6d566498287c09ae413 GIT binary patch literal 2912 zcmai0&u$N+( zP9f^S$8tibLhX?Qm;MJmRfYZoNg%aZrK+mJp|=33kT~_tuFWPcsL%H7yqPz%Z{GX9 zH+~h1$q3||Q)T^b2%*22(hm6(6WBG9 zPkR0REsYn#E4^m}1!)xB5NKZ^T9DmdKlCDQZvgx`zJHC^PoZ<|!Oo-_22c9^ag55L zm4SnLCbnLY=d0$bUanKs)~hC-HAtnXYq(aeQ+$Q$8u?PSwd+|~&J(R(Ba~oc>RSF1 z{%j7HNUg40c(zva_V5;!RjQh{PI2AD7P0F!tW~eWI@8X|e={dDoFlL52Jsew7SgJw zO>WwmfYVRRvThP(l~yYX41;HmP^(rAwx~?(_84fwoNAFJN2KryYHID%yN>|03<DDWN{hn*QNtr2J%=vm*`cge;=E0VY~b9<&4 zNp41x8$$9GOrirZ1GIQC+kwuzW48P@V(*Gm3Szn}vVEB_))n~E96W{TAV&MvvNv^! zkIDdWW1p?d^n=E}E$}qRKIhtg0m6ev&`H!`KnbF}3amd2d$PG1{xDkQGUB=>e|{6d zYy*^uh5fK%V&Y1*L~wO=)c_O%=ppYeIhZNSGlMC?>Kf5}AXuttVT}?(acRoKZ&tqQ z$Tc8B#jeg_JTu8im6+aa*M}h0;De9fL^>KkF(q4YU_Uk>q1}yfMEcM4CckOGvb$@ts3ct)bD@Q0o50jl{`T zV&r8+42pZbNbYM&sVynBEluu(O8kcA6>ypmC0b~Z2dpnbPxF@;=7!+7$d32>96oS({ zu)$w5)S2_2J{w%-__pu3vwT}%j>tYfbGr4i_sTf6k6EC=2jSTUxoEW_(#9n!iNt`li{1Lq_3Fql{j;22|%or!LC zy5XV>p=X%!187`wCdQhB>CM6PgSF<=h0Uo8j|XR)gBO3vH!km_KYE;=Zd~5zPi+cE zpY>0+!iiQqxzj)N{i(aBUW9}Jv9Y)tMRKAkB{!wy6KUf92QFzxw!$N=;UhcA6RnZ) zmmxutp$haQnz8g&EWI5&z7rj2MpIkS)RX8WFuN!7I*#PS8`8w<9#FvGjXSU{O}w!t z!O@3DPtQmAM*~B1=Yx+jVb(t{&xfU7g%I?AjmYyOp-g}><}GCsPDoKoRZUTx9-~@@ z!dA|Dp`$E9P6%ow`>W!WLfOwe^)oZ--=bN#MPWT9olO|?okLv)rDB%d0ER``HAOMA z>1`u`{bx~FA$P)^%I%e19{w75m|!*2&5aY+txhFX*p5FU%E;*@HT*FQ)eZ6qWjTZb zhpYxPyFABnF9HH5{1Zi7`Z*f=1LdBh4}V9QT@fnkor&8M4H3G6+~h|$**E=E==pZ7 z8BcG<(@%scm>U>oS#893MV=e~;mjU_$6mCTJF=G^=8o;2MNqx@(Jg+o$tO4YIu802uyZ?j) literal 0 HcmV?d00001 diff --git a/pricewatch/app/db/migrations/env.py b/pricewatch/app/db/migrations/env.py new file mode 100755 index 0000000..34cc133 --- /dev/null +++ b/pricewatch/app/db/migrations/env.py @@ -0,0 +1,80 @@ +""" +Configuration Alembic pour PriceWatch. + +Recupere l'URL DB depuis AppConfig pour garantir un setup coherent. +""" + +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +from pricewatch.app.core.config import get_config +from pricewatch.app.db.models import Base + +# Alembic Config object +config = context.config + +# Configure logging +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Metadata SQLAlchemy pour autogenerate +target_metadata = Base.metadata + + +def _get_database_url() -> str: + """Construit l'URL DB depuis la config applicative.""" + app_config = get_config() + return app_config.db.url + + +def run_migrations_offline() -> None: + """ + Execute les migrations en mode offline. + + Configure le contexte avec l'URL DB sans creer d'engine. + """ + url = _get_database_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + compare_type=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """ + Execute les migrations en mode online. + + Cree un engine SQLAlchemy et etablit la connexion. + """ + configuration = config.get_section(config.config_ini_section) or {} + configuration["sqlalchemy.url"] = _get_database_url() + + connectable = engine_from_config( + configuration, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + compare_type=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/pricewatch/app/db/migrations/script.py.mako b/pricewatch/app/db/migrations/script.py.mako new file mode 100755 index 0000000..44417d4 --- /dev/null +++ b/pricewatch/app/db/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} +""" + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# Revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py b/pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py new file mode 100755 index 0000000..94afcf5 --- /dev/null +++ b/pricewatch/app/db/migrations/versions/20260114_01_initial_schema.py @@ -0,0 +1,124 @@ +"""Initial schema + +Revision ID: 20260114_01 +Revises: None +Create Date: 2026-01-14 00:00:00 +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# Revision identifiers, used by Alembic. +revision = "20260114_01" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "products", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("source", sa.String(length=50), nullable=False), + sa.Column("reference", sa.String(length=100), nullable=False), + sa.Column("url", sa.Text(), nullable=False), + sa.Column("title", sa.Text(), nullable=True), + sa.Column("category", sa.Text(), nullable=True), + sa.Column("currency", sa.String(length=3), nullable=True), + sa.Column("first_seen_at", sa.TIMESTAMP(), nullable=False), + sa.Column("last_updated_at", sa.TIMESTAMP(), nullable=False), + sa.UniqueConstraint("source", "reference", name="uq_product_source_reference"), + ) + op.create_index("ix_product_source", "products", ["source"], unique=False) + op.create_index("ix_product_reference", "products", ["reference"], unique=False) + op.create_index("ix_product_last_updated", "products", ["last_updated_at"], unique=False) + + op.create_table( + "price_history", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("product_id", sa.Integer(), nullable=False), + sa.Column("price", sa.Numeric(10, 2), nullable=True), + sa.Column("shipping_cost", sa.Numeric(10, 2), nullable=True), + sa.Column("stock_status", sa.String(length=20), nullable=True), + sa.Column("fetch_method", sa.String(length=20), nullable=False), + sa.Column("fetch_status", sa.String(length=20), nullable=False), + sa.Column("fetched_at", sa.TIMESTAMP(), nullable=False), + sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"), + sa.UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"), + sa.CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"), + sa.CheckConstraint("fetch_method IN ('http', 'playwright')"), + sa.CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), + ) + op.create_index("ix_price_history_product_id", "price_history", ["product_id"], unique=False) + op.create_index("ix_price_history_fetched_at", "price_history", ["fetched_at"], unique=False) + + op.create_table( + "product_images", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("product_id", sa.Integer(), nullable=False), + sa.Column("image_url", sa.Text(), nullable=False), + sa.Column("position", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"), + sa.UniqueConstraint("product_id", "image_url", name="uq_product_image_url"), + ) + op.create_index("ix_product_image_product_id", "product_images", ["product_id"], unique=False) + + op.create_table( + "product_specs", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("product_id", sa.Integer(), nullable=False), + sa.Column("spec_key", sa.String(length=200), nullable=False), + sa.Column("spec_value", sa.Text(), nullable=False), + sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"), + sa.UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"), + ) + op.create_index("ix_product_spec_product_id", "product_specs", ["product_id"], unique=False) + op.create_index("ix_product_spec_key", "product_specs", ["spec_key"], unique=False) + + op.create_table( + "scraping_logs", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("product_id", sa.Integer(), nullable=True), + sa.Column("url", sa.Text(), nullable=False), + sa.Column("source", sa.String(length=50), nullable=False), + sa.Column("reference", sa.String(length=100), nullable=True), + sa.Column("fetch_method", sa.String(length=20), nullable=False), + sa.Column("fetch_status", sa.String(length=20), nullable=False), + sa.Column("fetched_at", sa.TIMESTAMP(), nullable=False), + sa.Column("duration_ms", sa.Integer(), nullable=True), + sa.Column("html_size_bytes", sa.Integer(), nullable=True), + sa.Column("errors", postgresql.JSONB(), nullable=True), + sa.Column("notes", postgresql.JSONB(), nullable=True), + sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="SET NULL"), + sa.CheckConstraint("fetch_method IN ('http', 'playwright')"), + sa.CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), + ) + op.create_index("ix_scraping_log_product_id", "scraping_logs", ["product_id"], unique=False) + op.create_index("ix_scraping_log_source", "scraping_logs", ["source"], unique=False) + op.create_index("ix_scraping_log_fetched_at", "scraping_logs", ["fetched_at"], unique=False) + op.create_index("ix_scraping_log_fetch_status", "scraping_logs", ["fetch_status"], unique=False) + + +def downgrade() -> None: + op.drop_index("ix_scraping_log_fetch_status", table_name="scraping_logs") + op.drop_index("ix_scraping_log_fetched_at", table_name="scraping_logs") + op.drop_index("ix_scraping_log_source", table_name="scraping_logs") + op.drop_index("ix_scraping_log_product_id", table_name="scraping_logs") + op.drop_table("scraping_logs") + + op.drop_index("ix_product_spec_key", table_name="product_specs") + op.drop_index("ix_product_spec_product_id", table_name="product_specs") + op.drop_table("product_specs") + + op.drop_index("ix_product_image_product_id", table_name="product_images") + op.drop_table("product_images") + + op.drop_index("ix_price_history_fetched_at", table_name="price_history") + op.drop_index("ix_price_history_product_id", table_name="price_history") + op.drop_table("price_history") + + op.drop_index("ix_product_last_updated", table_name="products") + op.drop_index("ix_product_reference", table_name="products") + op.drop_index("ix_product_source", table_name="products") + op.drop_table("products") diff --git a/pricewatch/app/db/migrations/versions/__pycache__/20260114_01_initial_schema.cpython-313.pyc b/pricewatch/app/db/migrations/versions/__pycache__/20260114_01_initial_schema.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..8258ddb5e8dd77fa5386d611f04df7675719efcb GIT binary patch literal 8818 zcmds7O>oml7WR*9D~_Fn5C|jzOac>}Kj#-n2n0xAHjo&I!a#)#8pUpGfi0Pq43k`z zn#)%0Ww>nBR!9|8LKWG=R{7|vz2wN=Y-hHhw6lBJz2r7qF!y~eS&k9}$f<1Y&KlI} z?)T~Sd#~Sn>Tczo-|w}-?~ffZ{?7+&wttX@{&N&7?a~ z&Z|yL!JWG5y5h!e=yPEYxbOnsGPS?o+rDY96)bi#dZ83%ADwUgo((K#Igs zlJk0}&?8>rMIkUgHWcXX?mgMv)6>s(_hwL7~AqyL&o&`UBnF zL+XF;1`#@FR~%w03^#8|l;jvj(!+#OXE>ZacILFt6+-QR17qIXKPDcuC69-FNh z6u(319aC<-ws{AH!+@YSDSEBhHc+fGPWVR+QU<&^N0$MiPMOY2yTh1n#xrOKGgk~c z4Onw-UIW6MTV)yCdGl}7RqcZH zYIn>Jynp)-{L006nY~QipmU&7qXvu-QoH>LHA415QgDTcH|`&#Ea2+_?=1s=0PwY@ zc=L!oXt0TSMC^XVHW zd-!j+fryN0fq$4rN}YjNdQ_=5^cu%h{9uI$V&l9d6LG!z;CxhZsiY0^X5Ju+5XCRW z`BaJ*Vr&G?mlPj75_!N%GAE~{4YD~?eD{zXiL*&0$Hl1L(g2EArKxHzfbF%(=z?)P zqZgRb3uBjvbwweH5=chMA=MBD(MlT2d=f33GL9V>4+l=P@&bqg0=0GoTE(=?iuX3# zY2kq&J{DSo3tt!|)Ea*8~ZN^tX!F&~S|fa}(9n(P`*N=G6{l2j;%$vGaW-sAWL zina!oM%Bb-5`&H(5;v`(RF#aLj58ao#%f&pAo3vwSy>Hjhr}uFQ8AHD3W{f3fKWzQaWgUo z-&d$<^sTH^Opi}qVx})lURP>vs*yJ;3X+UD9@2--jR$Z3R`G<>Nd!@*)Lj-a;$y;9 zG;aV`kH%5t!Da_0$E=F$3Ue)d@ej6l(6A|NC@v;ZD8?reND9F`+((efLaHgC2~PKz zgTM=MsZ=OB7fSLm&GJ&{5yIqvSqd4BfmvQVh6YoTW9Y8byy8ix;E^c83=FpyF6mD$ zo9&71N6P)1Yu{aaOV|ELd%v&vZN<}LOOEH>Mej3T=Ga;^PhZvhPb^)0KE61wPZw`v#lKL$uD-wXQZ~;+dJ(t^yS< znj%-%wt6MonDxI#1!}s~*}1xq8=lOa3FmIj7O1cPsoSkym-Sv-vSHgs9y4c{qixaA zmjA5pAKe4KbFAb$=CjeUDz4Glh1}Gw0(HAIN9Nex%(XQkcj;#CZe*Q`;v0~}w|wt| ze6D@(dkc#>E8qJV2jd)cY+3)CpG8rYkuJ+fw+XWJ{Sh=N{>WEH{Y=(^q#ihhwWd>r z3DSIgx{UF?OyvjTn{%x!gR776f56pzrm9Sk=HvUz7{8jSu{i!7%g9<2q`9Ouo07s# zZ*Lf11mD0zP{4-5t1jqRvTec_NdI|KjF4iG6n&&PO$xHr!(F84B*kG;bdcf@Dab)C zCg%W{918@U+N#kT#bOFpQ)HE=)z#tfbpj*-h^L@XDx$FW(3V%Xp&f%udJl!Ryqf%O zGFRK3g)BTtB3x~DWk(9s)l#!9^XmfjWvRJ^`ZV+d)l3q#`e1A3T7ep~f(+$`CyYZj z|3I~r*o3mcW}?JqOLOah?OAVu8Z9ADWM&G~`BL+ErmaAoEjFLF{e?QDngw>>Zj9tK z7Q#Wl;$hjS7-3n(!zEC1j*k?L3xB=9CCCbO9*+?-?%nL@io))TEP=JfGL4IWN1^zM zsbR~k{aJ str: + return f"" + + +class PriceHistory(Base): + """ + Historique prix (time-series). + + Une ligne par scraping réussi avec extraction prix. + Unique constraint sur (product_id, fetched_at) évite doublons. + """ + + __tablename__ = "price_history" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Foreign key + product_id: Mapped[int] = mapped_column( + Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False + ) + + # Price data + price: Mapped[Optional[Decimal]] = mapped_column( + Numeric(10, 2), nullable=True, comment="Product price" + ) + shipping_cost: Mapped[Optional[Decimal]] = mapped_column( + Numeric(10, 2), nullable=True, comment="Shipping cost" + ) + stock_status: Mapped[Optional[str]] = mapped_column( + String(20), nullable=True, comment="Stock status (in_stock, out_of_stock, unknown)" + ) + + # Fetch metadata + fetch_method: Mapped[str] = mapped_column( + String(20), nullable=False, comment="Fetch method (http, playwright)" + ) + fetch_status: Mapped[str] = mapped_column( + String(20), nullable=False, comment="Fetch status (success, partial, failed)" + ) + fetched_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, comment="Scraping timestamp" + ) + + # Relationship + product: Mapped["Product"] = relationship("Product", back_populates="price_history") + + # Constraints + __table_args__ = ( + UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"), + Index("ix_price_history_product_id", "product_id"), + Index("ix_price_history_fetched_at", "fetched_at"), + CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"), + CheckConstraint("fetch_method IN ('http', 'playwright')"), + CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), + ) + + def __repr__(self) -> str: + return f"" + + +class ProductImage(Base): + """ + Images produit (N images par produit). + + Unique constraint sur (product_id, image_url) évite doublons. + Position permet de garder l'ordre des images. + """ + + __tablename__ = "product_images" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Foreign key + product_id: Mapped[int] = mapped_column( + Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False + ) + + # Image data + image_url: Mapped[str] = mapped_column(Text, nullable=False, comment="Image URL") + position: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, comment="Image position (0=main)" + ) + + # Relationship + product: Mapped["Product"] = relationship("Product", back_populates="images") + + # Constraints + __table_args__ = ( + UniqueConstraint("product_id", "image_url", name="uq_product_image_url"), + Index("ix_product_image_product_id", "product_id"), + ) + + def __repr__(self) -> str: + return f"" + + +class ProductSpec(Base): + """ + Caractéristiques produit (key-value). + + Unique constraint sur (product_id, spec_key) évite doublons. + Permet queries efficaces par clé. + """ + + __tablename__ = "product_specs" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Foreign key + product_id: Mapped[int] = mapped_column( + Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False + ) + + # Spec data + spec_key: Mapped[str] = mapped_column( + String(200), nullable=False, comment="Specification key (e.g., 'Brand', 'Color')" + ) + spec_value: Mapped[str] = mapped_column(Text, nullable=False, comment="Specification value") + + # Relationship + product: Mapped["Product"] = relationship("Product", back_populates="specs") + + # Constraints + __table_args__ = ( + UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"), + Index("ix_product_spec_product_id", "product_id"), + Index("ix_product_spec_key", "spec_key"), + ) + + def __repr__(self) -> str: + return f"" + + +class ScrapingLog(Base): + """ + Logs observabilité pour debugging. + + FK optionnelle vers products (permet logs même si produit non créé). + JSONB pour errors/notes car structure variable. + Permet analytics: taux succès, durée moyenne, etc. + """ + + __tablename__ = "scraping_logs" + + # Primary key + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Foreign key (optional) + product_id: Mapped[Optional[int]] = mapped_column( + Integer, ForeignKey("products.id", ondelete="SET NULL"), nullable=True + ) + + # Scraping metadata + url: Mapped[str] = mapped_column(Text, nullable=False, comment="Scraped URL") + source: Mapped[str] = mapped_column( + String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)" + ) + reference: Mapped[Optional[str]] = mapped_column( + String(100), nullable=True, comment="Product reference (if extracted)" + ) + + # Fetch metadata + fetch_method: Mapped[str] = mapped_column( + String(20), nullable=False, comment="Fetch method (http, playwright)" + ) + fetch_status: Mapped[str] = mapped_column( + String(20), nullable=False, comment="Fetch status (success, partial, failed)" + ) + fetched_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, default=datetime.utcnow, comment="Scraping timestamp" + ) + + # Performance metrics + duration_ms: Mapped[Optional[int]] = mapped_column( + Integer, nullable=True, comment="Fetch duration in milliseconds" + ) + html_size_bytes: Mapped[Optional[int]] = mapped_column( + Integer, nullable=True, comment="HTML response size in bytes" + ) + + # Debug data (JSONB) + errors: Mapped[Optional[list[str]]] = mapped_column( + JSON().with_variant(JSONB, "postgresql"), + nullable=True, + comment="Error messages (list of strings)", + ) + notes: Mapped[Optional[list[str]]] = mapped_column( + JSON().with_variant(JSONB, "postgresql"), + nullable=True, + comment="Debug notes (list of strings)", + ) + + # Relationship + product: Mapped[Optional["Product"]] = relationship("Product", back_populates="logs") + + # Constraints + __table_args__ = ( + Index("ix_scraping_log_product_id", "product_id"), + Index("ix_scraping_log_source", "source"), + Index("ix_scraping_log_fetched_at", "fetched_at"), + Index("ix_scraping_log_fetch_status", "fetch_status"), + CheckConstraint("fetch_method IN ('http', 'playwright')"), + CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/pricewatch/app/db/repository.py b/pricewatch/app/db/repository.py new file mode 100755 index 0000000..5474b98 --- /dev/null +++ b/pricewatch/app/db/repository.py @@ -0,0 +1,140 @@ +""" +Repository pattern pour la persistence SQLAlchemy. + +Centralise les operations CRUD sur les modeles DB a partir d'un ProductSnapshot. +""" + +from __future__ import annotations + +from typing import Optional + +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from pricewatch.app.core.logging import get_logger +from pricewatch.app.core.schema import ProductSnapshot +from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog + +logger = get_logger("db.repository") + + +class ProductRepository: + """Repository de persistence pour ProductSnapshot.""" + + def __init__(self, session: Session) -> None: + self.session = session + + def get_or_create(self, source: str, reference: str, url: str) -> Product: + """ + Recuperer ou creer un produit par cle naturelle (source, reference). + """ + product = ( + self.session.query(Product) + .filter(Product.source == source, Product.reference == reference) + .one_or_none() + ) + if product: + return product + + product = Product(source=source, reference=reference, url=url) + self.session.add(product) + self.session.flush() + return product + + def update_product_metadata(self, product: Product, snapshot: ProductSnapshot) -> None: + """Met a jour les metadonnees produit si disponibles.""" + if snapshot.url: + product.url = snapshot.url + if snapshot.title: + product.title = snapshot.title + if snapshot.category: + product.category = snapshot.category + if snapshot.currency: + product.currency = snapshot.currency + + def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]: + """Ajoute une entree d'historique de prix si inexistante.""" + existing = ( + self.session.query(PriceHistory) + .filter( + PriceHistory.product_id == product.id, + PriceHistory.fetched_at == snapshot.fetched_at, + ) + .one_or_none() + ) + if existing: + return existing + + price_entry = PriceHistory( + product_id=product.id, + price=snapshot.price, + shipping_cost=snapshot.shipping_cost, + stock_status=snapshot.stock_status, + fetch_method=snapshot.debug.method, + fetch_status=snapshot.debug.status, + fetched_at=snapshot.fetched_at, + ) + self.session.add(price_entry) + return price_entry + + def sync_images(self, product: Product, images: list[str]) -> None: + """Synchronise les images (ajout des nouvelles).""" + existing_urls = {image.image_url for image in product.images} + for position, url in enumerate(images): + if url in existing_urls: + continue + self.session.add(ProductImage(product_id=product.id, image_url=url, position=position)) + + def sync_specs(self, product: Product, specs: dict[str, str]) -> None: + """Synchronise les specs (upsert par cle).""" + existing_specs = {spec.spec_key: spec for spec in product.specs} + for key, value in specs.items(): + if key in existing_specs: + existing_specs[key].spec_value = value + else: + self.session.add(ProductSpec(product_id=product.id, spec_key=key, spec_value=value)) + + def add_scraping_log(self, snapshot: ProductSnapshot, product_id: Optional[int]) -> ScrapingLog: + """Ajoute un log de scraping pour observabilite.""" + log_entry = ScrapingLog( + product_id=product_id, + url=snapshot.url, + source=snapshot.source, + reference=snapshot.reference, + fetch_method=snapshot.debug.method, + fetch_status=snapshot.debug.status, + fetched_at=snapshot.fetched_at, + duration_ms=snapshot.debug.duration_ms, + html_size_bytes=snapshot.debug.html_size_bytes, + errors=snapshot.debug.errors or None, + notes=snapshot.debug.notes or None, + ) + self.session.add(log_entry) + return log_entry + + def save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]: + """ + Persiste un ProductSnapshot complet dans la base. + + Retourne l'id produit ou None si reference absente. + """ + if not snapshot.reference: + logger.warning("Reference absente: persistence ignoree") + self.add_scraping_log(snapshot, product_id=None) + return None + + product = self.get_or_create(snapshot.source, snapshot.reference, snapshot.url) + self.update_product_metadata(product, snapshot) + self.add_price_history(product, snapshot) + self.sync_images(product, snapshot.images) + self.sync_specs(product, snapshot.specs) + self.add_scraping_log(snapshot, product_id=product.id) + return product.id + + def safe_save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]: + """Sauvegarde avec gestion d'erreur SQLAlchemy.""" + try: + return self.save_snapshot(snapshot) + except SQLAlchemyError as exc: + logger.error(f"Erreur SQLAlchemy: {exc}") + raise diff --git a/pricewatch/app/scraping/__init__.py b/pricewatch/app/scraping/__init__.py index e69de29..7afef5d 100755 --- a/pricewatch/app/scraping/__init__.py +++ b/pricewatch/app/scraping/__init__.py @@ -0,0 +1,3 @@ +from pricewatch.app.scraping.pipeline import ScrapingPipeline + +__all__ = ["ScrapingPipeline"] diff --git a/pricewatch/app/scraping/__pycache__/__init__.cpython-313.pyc b/pricewatch/app/scraping/__pycache__/__init__.cpython-313.pyc index 1895f3301f1b994a57f22b2773616f377ca7fd88..9cc8384830c8290dd2a1b5111b7e1ca90616e1ac 100755 GIT binary patch delta 192 zcmZ3$*uhl)nU|M~0SNx;rDw(g>Bk@r3@||%pT&TTsSLpk!HnJvUW`SI3Jk$a`iw

CBoeFF}el8E**$Cl@6aWagy@WEQ06Wagz7F#{$1G?{NH6clA9r3WQrmwy~Q3MpO}*qAHQ;oEtUWP delta 87 zcmeBRTEJ-jnU|M~0SKJs(lSBxV-NuYj8MjB79e9PLpp;dqu)w~A|?igiBp2aS%HEe R6~!RNM`lJw#v*1Q3jiV}3yuH) diff --git a/pricewatch/app/scraping/__pycache__/pipeline.cpython-313.pyc b/pricewatch/app/scraping/__pycache__/pipeline.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..da613bb4ea70b8829ee894fa1a03f46393cfe98c GIT binary patch literal 2779 zcmb7G&2JM&6rc63*Y?^@oDYa&@-e2Bh$(VPNf4xbh!9lCi2}{m6{<;a!LKOl%mNmrp#RiTPQC1~0r!KH83yG|RBs;;d!znOXS z=FOXVznSf1QbF*&%va6dWQ2a!fB{1eXb}6Ut2H5X%5;Hz=+VhMA?;*@Epb zuE7kd*G;RMSCm;8Vw(b7Oh*GOJ@${orHZd|u4+tD4)|dcJWCUJBbO&%=wUn%B%l z72OqP?+UK$2Q1`DjMOT$8~PX@hp~HlbirOCqgAs8`#)Nz_98JHHtIzMs-{6c)g5DD zRIk@tjc_1FgHYw`%WhQDOv`jM&F$a4&V10-&nJw4xQ;gW4ty2g28awlwj3kKrPeB@ zz^_}Vh~~WJe~XjCh~H3#EX1hPwlMCuTVgR3=;&Pt6~#F}EB|RJ&?v(`l=#fvHx~=g zPN@n(USU)X4WUvz=qrjrPY793DcNq9f_5uh49|52?K@?$ZPSvA(j0iX`wLcsQBh_g z)%yjxjM3XDiw>do8X)g0A&iD09^A*1;xMWRSybYi?0pfWA4SND6ot7xLEBELip59= zIuZ)JhN$jDq;E$rK(Ff$BQ<12&P1xCiA&i0b9C2HZ;AFyZv+#?DH707U(S>1?FnXW{EDP6@2h~a2v*%iq(gGIp%_CMOoZKgoQ z0G{3PD*GB6>{c0of$Ry0>*!em?akaS-7IbFdH4ImcQaooKXvwho7u>Y!+$H${q@Y3 zGi&{0>xuErp1x;NGM3)z8u;du^{$aSA8hrF-06GNcld_1IhfrT9D6u8wsP^&;HewR zW~%RY$IXuQ6yEI1zKEgj+(y?3{9pZ{pw#d-5@YGjMDIpo@L^(b4IjJLb1%F$bZR|u zI+$_ak(IM6<7@rL*Ao+4J$ zh(Bb)fGOt-%DFx#d1t5D6I`C9_BDCgl__!HK)Vt@vpM0ToJ_NPAm=!`AA~E-+jfl} z1lUZrEtr;bITz)eZs|(|I9<`SB}fK1m71H-v?~q0=J#}JTE(Qy;qM8{hVF=_m2E@Q zC}$bU`N`d*X^h|UhNe3XHRl@+VVZUqQBEx0!J(WuJ;Cop#?R8{=tpVtFG&awzL3-5 z^m7EFHGu2R=6dO2D7j~q2CNZ0qx>FlWoMbcRwyTYw}V}&>8)2p9!_j4H!?f-SsqS% z-mobF!5a@j1d#J7#Y5L~Bn@(G$=C7XV_%K|B)$L3kdL1nJKN65e`(_B}xde?=!A kqxT-8qmR+pV>Iz|tY@wFz@ym6b@_!nB&2^wAUxB515~=WGXMYp literal 0 HcmV?d00001 diff --git a/pricewatch/app/scraping/pipeline.py b/pricewatch/app/scraping/pipeline.py new file mode 100755 index 0000000..cbf7865 --- /dev/null +++ b/pricewatch/app/scraping/pipeline.py @@ -0,0 +1,52 @@ +""" +Pipeline de persistence pour les snapshots de scraping. + +Ne doit jamais bloquer le pipeline principal si la DB est indisponible. +""" + +from __future__ import annotations + +from typing import Optional + +from sqlalchemy.exc import SQLAlchemyError + +from pricewatch.app.core.config import AppConfig, get_config +from pricewatch.app.core.logging import get_logger +from pricewatch.app.core.schema import ProductSnapshot +from pricewatch.app.db.connection import get_session +from pricewatch.app.db.repository import ProductRepository + +logger = get_logger("scraping.pipeline") + + +class ScrapingPipeline: + """Orchestration de persistence DB pour un ProductSnapshot.""" + + def __init__(self, config: Optional[AppConfig] = None) -> None: + self.config = config + + def process_snapshot(self, snapshot: ProductSnapshot, save_to_db: bool = True) -> Optional[int]: + """ + Persiste un snapshot en base si active. + + Retourne l'id produit si sauve, sinon None. + """ + app_config = self.config or get_config() + if not save_to_db or not app_config.enable_db: + logger.debug("Persistence DB desactivee") + return None + + try: + with get_session(app_config) as session: + repo = ProductRepository(session) + product_id = repo.safe_save_snapshot(snapshot) + session.commit() + return product_id + except SQLAlchemyError as exc: + snapshot.add_note(f"Persistence DB echouee: {exc}") + logger.error(f"Persistence DB echouee: {exc}") + return None + except Exception as exc: + snapshot.add_note(f"Erreur pipeline DB: {exc}") + logger.error(f"Erreur pipeline DB: {exc}") + return None diff --git a/pricewatch/app/stores/amazon/__pycache__/store.cpython-313.pyc b/pricewatch/app/stores/amazon/__pycache__/store.cpython-313.pyc index fc01085d8ed678f3aebae07d6b701a0fbccbf3ee..89edb9b78e97c6fb4b60c93a5f2e69a47b5874b9 100755 GIT binary patch delta 1417 zcmY*YZ)_7~9Dd*4`tPoPuIt}zU3Ycex`uUIH(;(~Ol8DL*qj@eKn5L z(YdIQWI|ocG$&%Yo)M!G!^IEJazbiueO2U-q@of!LQTRT`iAmSZG+*?69)%RG@tOH z`xNhT0NPrxI?Fm>u`cOP>$CNHHg@IcF(epzRUI|*B?GS_YQ}BowxJRI_?T4}J_Z06 z20%8OEou#qTa|v0TWz)QiIN5w9C^yUNx9#p@=W(8)19Nb?^<2UyU&i?@pa~Xy_>$? z1@b2CEztFOn%Sh8Jni44{n@sOoAf?8V*gA9FUrlXQ!01`U95jc^|lIK4*F43+jjXv zg9Z|C+>W?9t68@VDnAJTfn*abQbPU8z^pRZA4f ziZPXp&L@k;s;fwJ-&%)$ZuDwz;N#L>h(4(ALPMT;-VH4FCG(;=%WTcj+tO+rm!-kQ z!EAFlXYEhx?o#xb{e?QudCyr-CbI0w*7cepTmLj`w|yD8#J5@JN(`yH~a=^BLh-U^1F zuVhcjk_Vk>u|Q>IM=y8i(Tx_t@LzN$t!TKl5o*v(>-OOy!Oz74q!?4CG^>Def5aCQ zNMk6HQj!ys(lI_cTbUxB8!tqrxOlu+6X&I5k&vRv0EKR~1|3Bmwx@7GNnFBbB?<$> zAg#YkW5TyX`V|fMUkm+S3us5d*|2nS@#G4db8blwf9GskAPSDUrPyL@If{h5qd!A@ zq+Qjnk-4^hbl&gP{-OmgrcBCT`|m;Mmp=}?p@J9Cp7!mUOG>X9^lJOluvh-J-K&Bh zqEd$+rje`jARLuboh=Z)h0b+F;ESh2y&;?0wlYxym@@mne0@K0C&K{z_S~N<$ER66*6f)!vnn62NpdP@#>Q zjft=HWyL9VBTF|AucDW_H|k}jXO9AI3`O0JxkMxtkM0!L(V?DU%TsD-TL}CH6t2AX E4}HjyEC2ui delta 743 zcmY+A-Ahw(7{`Cl+0N{2XFA<%n{MvRnRa}E>D-o<7FhHGOVI+u5~Xs^LSbi`S4QB4 zH`2=^W<@X~Ou-;I5EdoYE?RdECD-T=2(M1Lx+tiAXVFE^#rN~P{&;@R&5`Hf{9U6_ zPpCa{4Ma}XtmGRE>{A-*@pgk1{f1UtF`NT88hAGz;0J9MVlt=A)~t>cn(uOVxQ&w5 zbcn z-@DB_QoLiE_oR5wMn(G;-y#1hUSr^q{MvSd0T~aMK7^OpXYZ8{I&@%VIxXt9|68XO z=Z)t2J%0F`7f6_Mlb)h~U3sKmONc;RBs5FN0BO;<$N-3XoG5Fyjhbj%M>H}n437*8 zvFOO?SYqk4$m7qlLxyHri-xGsW@n`x`&<>Qq}~f(xV(7@>PRVqifa){5ZpGABaX^l z0>$o(O;9wHKO}XFx25_vUYp?cZ6$dLJ5ZIw?hgPTt-0ZkLBKgE>7#wdF=Hb2SHx&x*1r& zWJ489qSA04f^x{W4`2qR#!E1S>BeAv5o?KeDTHFN``N{%3t81w_BXM%Igdgp;%zxB zwrABy8N(xfpUXn0S`=9fg~)WGiZZX7zmyyyrKomE%6&s87-rq`2!R_%5eYy diff --git a/pricewatch/app/stores/amazon/store.py b/pricewatch/app/stores/amazon/store.py index 713593b..a2bdaca 100755 --- a/pricewatch/app/stores/amazon/store.py +++ b/pricewatch/app/stores/amazon/store.py @@ -214,6 +214,18 @@ class AmazonStore(BaseStore): except ValueError: continue + # Fallback: chercher les spans séparés a-price-whole et a-price-fraction + whole = soup.select_one("span.a-price-whole") + fraction = soup.select_one("span.a-price-fraction") + if whole and fraction: + whole_text = whole.get_text(strip=True) + fraction_text = fraction.get_text(strip=True) + try: + price_str = f"{whole_text}.{fraction_text}" + return float(price_str) + except ValueError: + pass + debug.errors.append("Prix non trouvé") return None @@ -270,6 +282,14 @@ class AmazonStore(BaseStore): if url and url.startswith("http"): images.append(url) + # Fallback: chercher tous les img tags si aucune image trouvée + if not images: + all_imgs = soup.find_all("img") + for img in all_imgs: + url = img.get("src") or img.get("data-src") + if url and url.startswith("http"): + images.append(url) + return list(set(images)) # Dédupliquer def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]: diff --git a/pricewatch/app/tasks/__init__.py b/pricewatch/app/tasks/__init__.py new file mode 100755 index 0000000..7ffa53d --- /dev/null +++ b/pricewatch/app/tasks/__init__.py @@ -0,0 +1,8 @@ +""" +Module tasks pour les jobs RQ. +""" + +from pricewatch.app.tasks.scrape import scrape_product +from pricewatch.app.tasks.scheduler import ScrapingScheduler + +__all__ = ["scrape_product", "ScrapingScheduler"] diff --git a/pricewatch/app/tasks/scheduler.py b/pricewatch/app/tasks/scheduler.py new file mode 100755 index 0000000..628594c --- /dev/null +++ b/pricewatch/app/tasks/scheduler.py @@ -0,0 +1,75 @@ +""" +Planification des jobs de scraping via RQ Scheduler. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Optional + +import redis +from rq import Queue +from rq_scheduler import Scheduler + +from pricewatch.app.core.config import AppConfig, get_config +from pricewatch.app.core.logging import get_logger +from pricewatch.app.tasks.scrape import scrape_product + +logger = get_logger("tasks.scheduler") + + +@dataclass +class ScheduledJobInfo: + """Infos de retour pour un job planifie.""" + + job_id: str + next_run: datetime + + +class ScrapingScheduler: + """Scheduler pour les jobs de scraping avec RQ.""" + + def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None: + self.config = config or get_config() + self.redis = redis.from_url(self.config.redis.url) + self.queue = Queue(queue_name, connection=self.redis) + self.scheduler = Scheduler(queue=self.queue, connection=self.redis) + + def enqueue_immediate( + self, + url: str, + use_playwright: Optional[bool] = None, + save_db: bool = True, + ): + """Enqueue un job immediat.""" + job = self.queue.enqueue( + scrape_product, + url, + use_playwright=use_playwright, + save_db=save_db, + ) + logger.info(f"Job enqueued: {job.id}") + return job + + def schedule_product( + self, + url: str, + interval_hours: int = 24, + use_playwright: Optional[bool] = None, + save_db: bool = True, + ) -> ScheduledJobInfo: + """Planifie un scraping recurrent (intervalle en heures).""" + interval_seconds = int(timedelta(hours=interval_hours).total_seconds()) + next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds) + + job = self.scheduler.schedule( + scheduled_time=next_run, + func=scrape_product, + args=[url], + kwargs={"use_playwright": use_playwright, "save_db": save_db}, + interval=interval_seconds, + repeat=None, + ) + logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}") + return ScheduledJobInfo(job_id=job.id, next_run=next_run) diff --git a/pricewatch/app/tasks/scrape.py b/pricewatch/app/tasks/scrape.py new file mode 100755 index 0000000..3db721a --- /dev/null +++ b/pricewatch/app/tasks/scrape.py @@ -0,0 +1,160 @@ +""" +Tache de scraping asynchrone pour RQ. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from pricewatch.app.core.config import AppConfig, get_config +from pricewatch.app.core.logging import get_logger +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.scraping.http_fetch import fetch_http +from pricewatch.app.scraping.pipeline import ScrapingPipeline +from pricewatch.app.scraping.pw_fetch import fetch_playwright +from pricewatch.app.stores.aliexpress.store import AliexpressStore +from pricewatch.app.stores.amazon.store import AmazonStore +from pricewatch.app.stores.backmarket.store import BackmarketStore +from pricewatch.app.stores.cdiscount.store import CdiscountStore + +logger = get_logger("tasks.scrape") + + +def setup_stores() -> None: + """Enregistre les stores disponibles si besoin.""" + registry = get_registry() + if registry.list_stores(): + return + registry.register(AmazonStore()) + registry.register(CdiscountStore()) + registry.register(BackmarketStore()) + registry.register(AliexpressStore()) + + +def scrape_product( + url: str, + use_playwright: Optional[bool] = None, + save_db: bool = True, + save_html: bool = False, + save_screenshot: bool = False, + headful: bool = False, + timeout_ms: Optional[int] = None, +) -> dict[str, Any]: + """ + Scrape un produit et persiste en base via ScrapingPipeline. + + Retourne un dict avec success, product_id, snapshot, error. + """ + config: AppConfig = get_config() + setup_stores() + + if use_playwright is None: + use_playwright = config.default_use_playwright + + if timeout_ms is None: + timeout_ms = config.default_playwright_timeout + + registry = get_registry() + store = registry.detect_store(url) + if not store: + snapshot = ProductSnapshot( + source="unknown", + url=url, + debug=DebugInfo( + method=FetchMethod.HTTP, + status=DebugStatus.FAILED, + errors=["Aucun store detecte"], + ), + ) + ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db) + return {"success": False, "product_id": None, "snapshot": snapshot, "error": "store"} + + canonical_url = store.canonicalize(url) + + html = None + fetch_method = FetchMethod.HTTP + fetch_error = None + duration_ms = None + html_size_bytes = None + pw_result = None + + http_result = fetch_http(canonical_url) + duration_ms = http_result.duration_ms + + if http_result.success: + html = http_result.html + fetch_method = FetchMethod.HTTP + elif use_playwright: + pw_result = fetch_playwright( + canonical_url, + headless=not headful, + timeout_ms=timeout_ms, + save_screenshot=save_screenshot, + ) + duration_ms = pw_result.duration_ms + + if pw_result.success: + html = pw_result.html + fetch_method = FetchMethod.PLAYWRIGHT + else: + fetch_error = pw_result.error + else: + fetch_error = http_result.error + + if html: + html_size_bytes = len(html.encode("utf-8")) + if save_html: + from pricewatch.app.core.io import save_debug_html + + ref = store.extract_reference(canonical_url) or "unknown" + save_debug_html(html, f"{store.store_id}_{ref}") + + if save_screenshot and fetch_method == FetchMethod.PLAYWRIGHT and pw_result: + from pricewatch.app.core.io import save_debug_screenshot + + if pw_result and pw_result.screenshot: + ref = store.extract_reference(canonical_url) or "unknown" + save_debug_screenshot(pw_result.screenshot, f"{store.store_id}_{ref}") + + try: + snapshot = store.parse(html, canonical_url) + snapshot.debug.method = fetch_method + snapshot.debug.duration_ms = duration_ms + snapshot.debug.html_size_bytes = html_size_bytes + success = snapshot.debug.status != DebugStatus.FAILED + except Exception as exc: + snapshot = ProductSnapshot( + source=store.store_id, + url=canonical_url, + debug=DebugInfo( + method=fetch_method, + status=DebugStatus.FAILED, + errors=[f"Parsing failed: {exc}"], + duration_ms=duration_ms, + html_size_bytes=html_size_bytes, + ), + ) + success = False + fetch_error = str(exc) + else: + snapshot = ProductSnapshot( + source=store.store_id, + url=canonical_url, + debug=DebugInfo( + method=fetch_method, + status=DebugStatus.FAILED, + errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"], + duration_ms=duration_ms, + ), + ) + success = False + + product_id = ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db) + + return { + "success": success, + "product_id": product_id, + "snapshot": snapshot, + "error": fetch_error, + } diff --git a/pyproject.toml b/pyproject.toml index 9e92604..4697124 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,19 @@ dependencies = [ # Date/time utilities "python-dateutil>=2.8.2", + + # Database (Phase 2) + "sqlalchemy>=2.0.0", + "psycopg2-binary>=2.9.0", + "alembic>=1.13.0", + + # Configuration (Phase 2) + "python-dotenv>=1.0.0", + + # Worker/Queue (Phase 2) + "redis>=5.0.0", + "rq>=1.15.0", + "rq-scheduler>=0.13.0", ] [project.optional-dependencies] diff --git a/tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc b/tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..10efd145df471f1746ae06231f1ea9625407316c GIT binary patch literal 8336 zcmb_BTW}jkasD@?jxmC?!&p{BUSlH<%=+-5Va-OITiaue{|?7 zxm@{3_be6wDcZ6sHza0ydU|?#dU|?hdN%_Bkw98LR4B%Mg#0&F?BuEgn{6B+H;7Cq zkvZ8h#Zd<H|eK-2D_&MlWjD}V9!+h zWC!hFurL*x4AbyrC+(c zC3L9Zh;=PKCtgx?LzfgSA2&*ID5P?!LM1gTD*Q`XV-At0)nc-uX$nQ;e72}T=`ScohAIVM zLl3u<&sT5w+W=p1zfwGY`D!S=As~EYtpD4y$DQ^Z}qNRK#XUJN%tk0EHxt1GfmqDro=Q&!YMRrnmfseUN|Jm$S<%OBkC2h7?co2dn(^;YFZBT>96fzkt zJFjFirkKghgUBkv{!HfeN>;761T&f0BGnBQB-BdK>;z4ifX!-J3FZ#-(E;0)$>>Nx?;->;s_9#9s~l;xrVtGQXtL%jtd<~0MT zy64N~y6(Dxqv^=coHbo}w1G>eSJARFptAf7o9A>)q!E+~DQOh59?aMr`Vbm|%;aZE zC6x|iy&p4Fku{qEgd{sdE;>@cjD^lMzOxqUUOKZpw#IkYBE3s5E*t;J_gWpX&lWYf z0fA}rZOC5RrU==eB4eD)_mV5Z6`u043$5m|dyJRefa9sBj&T9T!!Ux4k)3f^1i%Xn z@0GpK*Xl2hxn+@IeX@_j7!w=~43k*E6kn*!&o3}ret>f{-Bd9uRGZF$mun;*AhhmH z39^&i4U&e^N|(44c@=yk`oLqHL<%J4oQ7pUwy;$M9k)rQn2$M3mtKKgkCx5$SO!qM zuBfx7H82(uZW6@d5VQrxouWdScchG9>FHu&j9pYmpQzN{(i0yE4Y5+d~ysYQlX^tH(TWqnl`z2k(8iMvHo)H{TvW{NmZ4i4_F^}%4U`}lS{A=&P+aL7 zq`<`-)22Il=Ir4kqu()fhw`RDvpMv@vkFB&ZuYl~|48dT6ao)X6r!7nVDfXuyqd$= z+9?14oBdw`0Cv`}!W2p3#J8msJ9}lP%%|=1nWXIp9_y2`W&3KDV^imF;Ly$!7~&lB@dxNi`85HpmTqL?Os{?etFs24$xoH~8_ zj4a2Tmie7TB4&fAF3C0sS2;^{Ws4Cq%mEd@%FOs$`|ScU)4OJ4L4_OqE-F09rcR1!xbWQ~MQp_kHN!@HmuxtkuHKula!{;XdWeO!EJ(LS zWoT1OW&aD?=e0##4r5-3aN@M5ny(n!lv({ad-l%tJ|I%MZDID(IH^N}<^W)Z8N^ z?-2 zIoL{{#w!?0w#1refTTRhJ{#XGo3bX+XKYJyyWEk+TLDWcr#*8y)Pm(3*zkzke#B_P zD@s$d&Ev+F2(%z1r8?u5Li5UlK&$dFoQN(a@dBNB|OTyQa;di zPs=F#TL|mmAV_V`)@GN5)o$s9o@HBV z?RAj@nVK`%nc8cQ@mNaHrj(-zXB-Y_%`f-Jy(#}8j@1Hb^i3?a@WOLoTLKvaHZQwW zIJs|v)4UU0`Hv|+d1PA`sFwb=slk@j`VCzE`sH1JcHK8D?TK{X=momp)uM&}ukL-e zp5%eL?%SR~+iiAlyR@`j{%zVeNz(L<4x$B9!Q|6BuRt1)5|(UUrQauy+qh<_wWr#f z>!o$byR}fNBNf8EriD{sTdU`|{7<&n^mpyIFzh!8IkfD!J%9F958AV`<@-T9uF(rt zea{bHb$RfK$NteybE}LFS6Tv&R!>kx*E=Vv4c??~{BD_&C)iyjR2B=R$hqW-#CG?e2bBO|vt_RN55IUWa#z8nizB^I*t z>Y`Ap7>O%-Nn5-oLgY$6E=p2d8k{qXvJQP)9@7|A!m!(*SSqu)6p~hSB~w6_-LUAY7_;%C7-)cJRViQ83Z{tRJWI-4)2R@G z_cy#FZ3>WK5aSXVCX`*ny8r_`+*U94^sr(Orf@>d&d=ntC(Zq+8;DaBSqQAIRvlwu zC)5%=!1a>=ShTMIJNDH5W{Lch+j{EyA(&VxLm&>qA^Og8;bj2c-Yqg&Pj41Z!>12T zV|E#`zkr{P^h@NEVQTvO%Z;M4fsl3B6m+*`1j1+6R%o=I3Yy1=#Iykg}CBzu#rV?!`>*q zR$|<@j){pFw|IE(zVgD6m@d9m2c!uJp+hk}p}Yx!SXfg^x=<%420V;ey!7IlJ zbeJ8%Y<#f|LdFU;SpR2$5RlJIFFxOb9kT}lpgz!dj}1{z@%p})c-=%R&y+C`?WB2- zo+^+b`>_Y87?{S}DDyF!I{Me68d!m~=gepZ z+(AXnGtx{!EzM+A7C48XqMpUTRGFG>&oj$_aQGQ~37g@}Y>7f_E~C5&?nh&Ra?=fx zz?xG(3hH9YXX-RvOzW|3Q!wVs%nnT7d`Y{iEPzcwz|WJPv9MmWcm~orWKvfOT`+@X zs$46UD!Q#NH|!Dkm_gAIm_D4DwHk+8D`TydQLADJ{&Z{QXlrGhUfssBPHXW$YN@e4 z*x@a;Lsrci;qd5|+Bjv~9pl~*YT6-Lks34lT~3mI1YKF^;Y)A;;1|E(+uxb4wMB0B zRNDrZ&fO36-cs-Mesb=UW2+-)ssm@Mfpbe|@AJOpv+rE|yghnzqT0T1=|zYf-rQde z4lkXr@qu+dTIHh~4v&}LXeXhbjWBWbd__c;`|Bua@Bii7OXu%LdT$kWKJFpfx zxOB1B-LrK5F2w73|2p4Q<-7jo%3rH@`JvDG;5y%3<-2bQD+mA3c9%a@Yr!P`;ovsR z!Rnz`@A9vG5s3UUy>u2taCO%@xs|N#ogZ9y??NpQ`tba%+)C`X+Q)@I=${<@ z$Ls%kb=9BTbU8!fvU@`${Ri#}(fdN@`)!~5I@W!CRbSsKFWr2dWmTWF&e#7x<)u|$ zU!!UP_jqaf=?1RatU*JAdJXwCh#CN(YrgOYr`|iY%J(myTJ!ZodC%9s&TsjzLgUc} zuBCPldsz)@eE;er9Y5v!zwmZ`_|v=I{uNIx82RA(d)HTmBg@xs{cJ6`7ohdvfokvo z0FbW*;{e?0fNV7wztd3-9$5zp_J6mIYalG(9`JgjfvYxa*l@kkU;)76*3TL^Ahv5j zyc*nVwX7k0paqUM;5!|S&aJ?w!jXqgBJNuC_J6YxXe{Qh+P-}#0{Wh}|63iy8~-+N z^yMh|kBPIPm*dWAH1zU*XEiRq{H!N74Bj)7FXi9^onX5)`W9~9o{Z%P6V^5bx0Rp@ zbHJvzfigXH99_UZzQ$KmMW=Y=n;tx|)Zz@qYl0at7U0T~!0iclB+DCzSWx;6bYcl` z$O-V(6rSdURR_hSx-HAlc%_;L)D~OFDF&4>YA>T(>!rcjOTpE0A zggG+DOYyW}Cy*K4#vF*@GpM5KmRH_Bwso;e;1!p)BNKLbNeJPQWL;*^32nPm!41#) zx_XlS4qEX**Z&H#4JXHOUpX8c|4<+t_YS#7_IyU7Uy`9O$;fA9{7)pkM$&&GQ&lqc z89DeFIke#>4j)9Nxc~&3xxl6qsvE2-um&gB2XRcUuigOA*Zv46e%SLhf#Ts2p6mUJ NK(^U-kh8uE{U5Hz(ZT=# literal 0 HcmV?d00001 diff --git a/tests/cli/test_run_db.py b/tests/cli/test_run_db.py new file mode 100755 index 0000000..b22274c --- /dev/null +++ b/tests/cli/test_run_db.py @@ -0,0 +1,106 @@ +""" +Tests end-to-end pour la commande CLI run avec persistence DB. +""" + +from dataclasses import dataclass +from pathlib import Path + +from typer.testing import CliRunner + +from pricewatch.app.cli import main as cli_main +from pricewatch.app.core.registry import get_registry +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product +from pricewatch.app.stores.base import BaseStore + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + debug: bool = False + enable_db: bool = True + + +class DummyStore(BaseStore): + def __init__(self) -> None: + super().__init__(store_id="dummy") + + def match(self, url: str) -> float: + return 1.0 if "example.com" in url else 0.0 + + def canonicalize(self, url: str) -> str: + return url + + def extract_reference(self, url: str) -> str | None: + return "REF123" + + def parse(self, html: str, url: str) -> ProductSnapshot: + return ProductSnapshot( + source=self.store_id, + url=url, + title="Produit dummy", + price=9.99, + currency="EUR", + reference="REF123", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + +class DummyFetchResult: + def __init__(self, html: str) -> None: + self.success = True + self.html = html + self.error = None + + +def test_cli_run_persists_db(tmp_path, monkeypatch): + """Le CLI run persiste en base quand --save-db est active.""" + reset_engine() + db_path = tmp_path / "test.db" + config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}")) + init_db(config) + + yaml_path = tmp_path / "config.yaml" + out_path = tmp_path / "out.json" + yaml_path.write_text( + """ +urls: + - "https://example.com/product" +options: + use_playwright: false + save_html: false + save_screenshot: false +""", + encoding="utf-8", + ) + + registry = get_registry() + previous_stores = list(registry._stores) + registry._stores = [] + registry.register(DummyStore()) + + monkeypatch.setattr(cli_main, "get_config", lambda: config) + monkeypatch.setattr(cli_main, "setup_stores", lambda: None) + monkeypatch.setattr(cli_main, "fetch_http", lambda url: DummyFetchResult("")) + + runner = CliRunner() + try: + result = runner.invoke( + cli_main.app, + ["run", "--yaml", str(yaml_path), "--out", str(out_path), "--save-db"], + ) + finally: + registry._stores = previous_stores + reset_engine() + + assert result.exit_code == 0 + assert out_path.exists() + + with get_session(config) as session: + assert session.query(Product).count() == 1 diff --git a/tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc b/tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..88a79f91f7b9b1fd3af055cc2c60226dcad519fc GIT binary patch literal 66626 zcmeHw3ve9AdFC$mfx%)Iizh*XujQMdzy-mV__iR46iLaD#O2DADQgV@OK>G%7n<3X zM97krOKelIE@GT>8Ga;zFG`9$D+%k8OR4Ns&I(nEiKJD$ z;?+IAzk8;;XEBQfC{mUz4wpZAdb)dhx@Wq-|IvS64TS-f*x{Z#>wfHyv!&n-8|=Ei8}!{&@#m z_11&)_4x-E=nD?E>1_uW>I);T`L5-zSaq8#rfsR>_0<=#xEjO-MBHK)SBtoyh+D$q zLWrvqaZ6cT7;*I?F2dp(5Z5T;ma(`d#5Ie!*{*gT0Z#WM4L?Cp*ub>C;mbiagr=@dr8&#vbg6B(u0| zM2=4cs%a)Vi;SkIzP^bQRs`pjHL z?=#5+yIH-8;<6s*PnceNXYEeY|fu#p}q9s~hm=%dW>Ym=Cl%<_Fbc)q2f<)?QO=qV6eahPRiYtlVl4rSArlk)cfg zxzqStl04dHRf1|#jsG6}zxs?u6*?-eZK10;((btXkokV{7*5lubY^4R^MEUJ?^^Rh z|F>PPqt>_QI^1??x?%rZPiB-4WdA3}2Gv|8HjXKFEaZX)j-J@bkkCig!i+a|hUDCdoKUN+0 zZE&+Q|Ir4kK1x}QR-F8cC-YlnS$~u6n6}%UuG;M$z6X8PEK^${^i|D!=&Qi(zPd$y zg?F;_c#q#z?d`t0MSV5Dw5RYDzqc_kc)PD|QD3z=_Em_+z#aQtbDgp3*o+mv-CMV! zw*YJEb8FCF4*)Gb(>s{V>42ZTdZho{nPa`VYy{{o8v*vz0XCCH^za#Xu_|SpagS_W zwRZT##&#p{ycvk>j!Iiq#noAJ2G-9;7C+ zk;s)Uzz*ok)quJA3xwR-H54>NqgyvD~2T9>~%)Lly==35%9yy0cOyiC1k| z-~RMZU9L0k>s7mVkZ!k2r2HkCGt{=p1V7{&PsfZ*IGkUQpaV(<`^=1>v=g@KKUPAcAYO`^RHKNIiVHT7P zQ4GjuI#XTr8p*+9It|vvTB;rI(k}I6vwE_LpYWS+RZq4V1db#8D0X!o&kQ9y2U3Gz zYdVMZ%#(N@jZV~~ces<9$mk>ni3Jm>Omz4}v5B=@e-f|pU^anwJC`(yYYtCG@KJ|^ zD{*gQFM#(;uAek68*f~3vU(~!Z!)~95MGtn)}LBKndBum%3h-@UXiAW2PRwDG_>I;Z0Cb9&iM_)?82oVyV>nn(?B0^$xeKUv({&f;N z7aJK$@5>q4%#elVSC>Wewsh;Mz#|rzpIR81YMwteZ{gIUWmBz-F4y{-0w=5AsdLr0 zTKGPCx`m)murCn;yXNn_wwfTjj6iq- zaloNwTtS}E1vBwD6(rJ#0HPIq-#)fCuMr8iPip4xyw=VTyNto539X&(XvP)f87_#_ z<5ZMLBm#(5@O|fL@GwNeos*jRJFj)JN0u?TG@*6UEzP)sJi`T%dYp<9i9`U=KL>oT z@4*{p!}r9_GkpId0>Jm6{BTm{ELYSvmRh086>%LU4wxOp6>N7yq3NN2mf|qt8t^Q0 zb+{}gOFxt@$EqT(rN~Ryc6*k(j=9^txpowCn2R1Fsr0dPXY>>VJ&|)`k**CJHVhdN zFvSr)`D79wcr^7h6m!PSKIEQuZ_&?X5JkV0$Gh*EVnY zK7gE-@0M^{e!*!~$F!TqY1LSq)<8{rAV;%^^;YlMt-i|EWED;QBRXBb7YUU~#%W4w z53qNc&tCLB>GaL$>xkSzgeFOyCK5eLq?3sAM76lw%g?@pV%J<=f0m^i_cA^U!emjW z4;LCYn$jpP#b_TpOk?@sLg`ozDaIVeax*oJ<%df$;`D;n&PKU2uonmvwFHwWCKAO! zA^}ZzZZL^(Fp+pV*E>kEMx2EciDN0<$PT8`$#e!0)rmxZrZ17u>5JCssq_Ox9snt} zBoYR6`>DP}Z#Ju^p2(pUk+g>r(CEqr7M*mmj4VUIAXxEv*WY?KT=7=>?zqwz0$Jkm zb-Ww!_}1}Ul%Ws*#evVC9apImb1?ycAwW$`0}aG#^xA7 zu9=n`4%PrXwcQwi1}`>7eFL+HeT?d44>ZlKS*%23N@ibq{z#Q>Qr-W&=>` JV%plvsv6f|w8DuWU+R0p~Rkd;t=hULtDy9_13>8y) ztyYa`Q6D#Sh>_gOZ<;++A8oSQRVmBCfB|L?)mB;7Uzk0ZvXrz3rVodm%~WDRdNI>5 z>dK79YuKB(-0I_g7(6NWE%zQ);XMo;d0D=PN*?F7)T}yIOj%>4jMYV}SPKPW;aGh< zV75^099pQ>D$Dw7wh)X%hTG9Pj?$*1G=#AuUUgLP%@)4P{J?4}e6@GET(O2&bq{x8?*ir&gYp8?D?_@$C*;JsN8^zyF)? zJ`Z<5X3#RSiceq>0Q%Gr)W#85`SnHOhaNB@xpeC3TyiLx&PJ~{9nWTmjUAnx$>(~9 zh6j_dVjO}7*_+b`jkl>K7%YR*^dKosp$X;MWCWx(c`aSqWL-kQywpXCW|R|npFqtm;x&7f8pH7WOy5ue*oAgEI>9*@(Mc>>7wRO>pL>qc^2nxXej5N}HaT7~V zyBT`gD!tdtzS*X(-pbY0TSpElWg?P44g#}#98)zOc%g$S!Er}CS}~9^px@2*ru&i% zq(bmQ&%~;tKS816M4kj`uhVJ3)rsHKA16ZZ*=(>Z1I15KVw%|6LyV`>69}v7+Ku|1 zB{6rZmu(8%?OZ?2Rl8j_74Qaqn<$1~-=eC1ZDnpTLP%@sf%zZKWODtk`2t$zrZ$O? z39^7Bp>gLgs+sN}iZuQhFCZXw!_xffo%vO}3JtqY`lp%}d}Zga@62l}UolSY91kx= zXsnBoLKspN^Ea+RRI^l^^1uXT5^)9F8IQiGtvt0=O<|oO>#3dmf?71fV_hP>2q5)T zxaHO7Uw(eH>*W{o+SdHCt(+FZDBk>?*S4N~k)K_V!Eu&aBoYC{PqjsUJNngVUfc1? z$Y}KS&hhXLgfE77Olr1Yff9%~`x%eEsO=bqjLOV~6xP`Uf)JqDio2lgcpb7HnblP*SrrFXM0{1e>X6sEBLAj6 zQuY!Th%4=lRc8*zeGJDOvqo?eQBxh*;cP++F4zwwQ*&gmL^83!QGufmIbzRgh+5vo z5&OtF!5aImTC&+(=5{o0ot_)>OdwV}r&=5nl^%!OLjR!l8= ztzt@nlPjk5S*;oinloy3EEKDYSDScIJ4%9Yb|_^zAU_~#fl{q9ffvJ&ZvaiFy}*n~ zyqN-BS*5~w?yH9c8j!K+01)<*aX-w#zhkv&|KHf!RPb&x7x=vTcrD(_h9keHypJJuNWGX5YKYl=0;gJyq+=Fs0E>#o*cv%g8E6+nOgN!k7; z#8PsgzpJRf*+?|E9@kipx7_aWk|xEeYTo~T^!Pxyecs3-6G=+|jkP-_5RC3i)kDVy z*a>Q?)*eOZdcB3K3EMDGUBxjUjTmnefaUVePf>{BKtgi*r-}3vNrLn+H64&{jF2wC zCP%9Xm}s?(b*Dh7W$ip+y?MsVK`N#L6Q3?Y$Of9XbI`mUHF)qwYhbAiRD(*~y*tERh``HYmKopt;_LXidGz zd$?M&Jynf?lm2)7uI9E64dQ)~(C(JWa7Q8BfxuZGC*duVh-JU?+LnB{L!9!! zgtq16Ga{}a&*%amGUXjtG9rA|C(?@mQWNZEmFSubuPubv=C!WVP^5ws!T{>#@4VKP zC%Z`*gYbmbMU^n)3i6CDpb~UpT*-*=Y3NjKW#IIp)-@gf63fQ$?`II0i?)&J6|3yJ z#!hG%nXb_br0lh1QP3xPXV5izObrAT z9|xuu!h$484ImTn!TPxGxc*5zK@SFxz4)u|BSI2&&eiTmNL4d(O^c~sOeRPekVp;< zXHO(xx{xhyDEG~j%RyYTu^L3tH0IZTY&>+=rC{4w)%oBGQZ{zGCqs8#4Bf@Gjb96_ z_xyffXBlnd5AYS1@*|zZk<6rV^v0?>nZ+QR-RHKv@3ws>z6C7ryCwFWkbDJmG@+5T zOwt(c?X#?7(#dQ#bK=|?BRbL=iCB}fBJ_$x^b^R)UT>YIa7*yjK{H>OiIz*ZEcp|J zVoV-WYE1*xrJUvn{Lez%(_Ss*JAA%kvl*MTwo?%_X-&7mS;^)n`{!doTQ_VZo;{A` zyR1oych2s+o0jp@bk8q>m_nJ4<-23~Rfh`=amZ!3yd$q2$S>WT-?~4)K#4e0x?}x*A9k>W=)fB1QOc z>S2Xcred!N`BE%w+PVZ{^|6MVt~>k<%sP#^2)-$rbp)a^%`%PLF#JReF}uumFPmA4 zuIp8{>E?OlofSLC<|Q^w(I{Lp&$QPn)685{3++K=u9+?8Bqt$~kgB{A7yd^aW+FW~ zlz9eLBMbr-H_dJqvSlK!*yys<0>y@XA4)C>dVvqXJSljRm(Zv;gD1h<>fP>!4Wy6$ zDSU}<2EN2+@+JO})v;70i?x^dh_Hl^e@M)-nfMcIPGCKCtrSDHP(l~-Vcb zCEW>CxzgUFqD&_(({k65&GJ14&w_vQT+u)Y>!7qR?sKx(O8bwZcKnboO48M_K;{c^ zjq#CUMmP0UG_JD4S@RUDjnkYSJo20}r-zOT1artywoY;~I_O2|C+ib=7e~7yX=hi1$BTMc(q$J@pbb}`V{a$jt z)x2o*g+g=3$(pxX*5y0)wXXAxT%mREWuIq$;AHS;nyaqyRQqV`LTK68!1c5Bo!dayB5Z)aWS;)x*Bv*)}@-UE}D&XshJZ~@bh6^ptPD(E#Dv3MKw}%hF`aqb*UAs3q;h4c=f%p zEm346d=b0arf#!7_nrHrHPQW}IuLCfzw*j;dXMf)Sdo>4O-e3arFcy+D#lt2r zub%@j7mR}f%&EV@h!HR!e%l$AqrW2k4cjjBSoeW+IHT+^B-nT4ty4k{}o6mal8j!DLS+yDyVV_cI?Dux3kV(#hw@ zxx^i6Tv*sK5n6vKxM=jz3&AzScSa{e>o11ZU%y&|EMvhfp8pW&_LS+v*O5f+Lm09P zg$OR02YSMcjkqeQMWphbxk2Oxic}MJ*!sAH^O$Lz9}vpG9F5UdAG49387cA}enhx8 ziH-!A&W;OUNc}g0&Qt|8mUgh`jIS8rI~Zh>&t+xrbL&MWD*1inxA*aalChTCE>gc0 zJ1P&iGWA!fWKz)a5%)8N<~vT-Olb|1+MFuK8iB-5xZrv5p^UBRjkuk)vj0gucfJ zFJ&ae%C(AOn&KY)-{XGz^zJVGq|ICjUes(pX?qSr1;%a=-t8|In%67co?pBjE9*qR zpH7U2cB^0bp2^Vei=o}u^*=!gHnD*ioa^zysvkD3rHgqtOcxvoZTy@C2Yz^L@XH}u ze)vvWPS}!(RZP}k!fcmAcm#IXhK7=|2mIxS|DN4^_%EMgz;8|v)wzwh73+4R7Z%KY z$FZ_GvWDJ~HRh|tO$lhr750WP*GDE|oR7S0F~>d*8DWU|TNL%%M8=5hC9)5sShM#z z^0@}TNo>NvvR}3gfx5i}3>u_kV?d5i9^C(uoIHk729^*HFbnBowDQoRrayzC4U!Yv z;oP$P%FSn!`4zkJt-C7&y1HiOw`u!CXqEa7uFi*6U8{yDYXYLIYkpVVY9zRBwBl}~ zt=+3VZ#D$G@ASO6I?%nj`pr87cl+#P5)G>~XnYznoL4 zl7BE|z06H7dC7rKH)us!xuF&(iPv0eNfIx4AIDlswY#Vt(kZzdYbSG|R)LDCMXyy% zDV6oI-p)0U%KCB=FFZXzl)E~}t;#6tZ4$4UaMFG5o~O;huI~EMZszzgNlGfR*iH{% zH-Mp`B*wiYc*FO`2xFO6$>>L)bkTzw&0Vu(*%>;FkF#Kw4SGG<5NZ-Wk}fZSXxy`y z!BkanSViqo+){Z!b;$NdxS#RAK7CpzBH^&wFX!2!ax8dY`imDYXp zJ>Q=n*tf>ARST83Rr_7EmVTPZ9}xLtB5x4+QzCB?IZUL72;pJYhx#amh>$YSpizMg z*4AF}*1=XSQVY`N^{u@U^n=thqWRXo+pPIk@j@tTAO4Gj<{Ub%9tW9jv-X(J$vsxg z^H}v}7>>2Mp7dH~QrKFIwi&Bo+l&QhH#*p{k&m%p47<=_kGlY9U946Q4+Pso#b$cZ zK0*7W9falLA-?C@+kb{ij6~>?Y1=k^CpB2N!F^*Ad&H$}UJ*$N$|g{1N_8y*tqW5; zCIIzui#K33LWekF@aq?=ksiDbneqH8r{p`J6Eh_5enSe8RONzm4DZ~r4S zWK4YbHjM*JO@2K{ZEBVJ-(UUnch^4l|Bl>M^z4lvDpnuf8#~;!anr~mr&3cx16|Q4 zhX+PNtcZKCx!yoVKT-7Et@na?uc25LJOlTdLwpxIKe@ld)^>iN$Bb?iePH&J2J9LA z26hANOBOwVn8m;`+W0Qn4=;H|UpAEmtLS4J1s1iw94uJVeJ6@RxXA5$in-H6c{O^H z_WXeJn4%x9dIsRZQ|l8IGKQ0VxEI@>uf0YO)5Fsnp;OZrwF5ng*x?87xnF0kLNi#= zyZ`Xvhjaok#p+o6o_qGjV$&>Ou}KKEe8RAH9olOH$DXY-B)QazX1|zZAY$ zrs~&UYF#?!9qT>aaQg1E?z3Ivty`uVBU6p7Q!QZ|RO^DNdGjyV_?qf2FKDa}yyI%D39zrc z&-$owe-~Z`{$2P1?-XEZQu%*Z(zqKj5Wi{(5B2ov04m zc8_L9b~yVOL4Dl(It_AkuxQ-IIskO$Mdc7tmhSd2x~(nw_~qN&yBvs2tB7- z61c8VOX;;l)Q-Tl%;i`+nG3b5IBVQM+X;Kj8*000xlzlSiq9n9?trPVe9v!p+IMuZ z^%lrBOV`9|GiTxfW;-i?^=*@n21ru^Z?LRFy z+Q06D`R*$1?fj+-JHv`8g%=vwW?Q~xX#JSi8SZi#)@Kzd} z!`vYZ6s)_O?NNvJ__An^1!jA+eYD$SMp1yL=hsY8z``8OQ0HEzy{4HkAen;{+yr>h z>p^o1=qT+6eZ5<8gE)J~GE9jEe~xR(>tPKQ#Q1Dz%4mqzN&LZF9rlcDnYny8b7Wp4 z;k}W2Qp5s#l&L?35@5aBo9!)n$wdV8bgblFTk%qj56?6kXnnqe_etdziu*};dSuqH za9|+|eu7FTBU_vI8+fYJ`{)YnL|ytMk-s2vjK}~H61sBZ+#-d30McHsladBZ0_htK z#XmXC$Bs_Qe~Z$4sWI`)ncjZZ2>QQJ^2dp^gFp@=Ma%y{vHwJ5E0JwPbRw@2`5#0E ziDZc|J6z(cx&IrozBOpvQY9$}b74vw7@BN?v!^7FRyhQ=EN&WyP(H&H-DwzYrz$@# zX>Fqq<`;IHPF>Kpy%nB68D3f-7mmx|tZ|$+d|Ng_Tf5nQ1#Q{M6CzGIn_$`am2nE& zvLXJwwhVj<8SlQ57;|50YW>RgFKr)v_&0W)yq8p{i;0P88(sODcbxVNTy>Fl2ekbP z+A4CSXvW#kc=SbW)hJv}$`s0(NbwrX0b~m6Y(m3M8@Aga74C5<+<0p97fN&RW;JUGWv?JfXX*=>^-$B}a51Z-bX%9jQ(|&vVcu=K?w#s~#^?ujb9=PZk z+cV8Y4;*-sdz}m9b;k_DZg*^tFxV9VB)Jwsm;PGfIujJ0YifPC%TwqI9CB6LI7i}T zm_+5b{-RPCP)sYqO zktq}=5tsCIhgg%#1lQ#|DunP(-U`8WiFrNkhi`5CQ?z|Tp#2HK?LiIVb2R)Vv@4LJ zlVK+FW=II7!Djf|cr=1OVaK86R!IJJ;5Ou4R?2i%2%H*??Ch}GfyQ1sm>NXpUd>}w z>XaL37lw{=swHu1@;;8Wl>D=(9dT+hmt*Z@F4U^2Vrs!rS4=6D8_g!#4X|}~($hN4 zL+nt>T4%+{zj&jZRw9z;Ouh;cygA{zSSv$!!PtDU?(kWwB^cV3E#ZuI>*LjUmloL8 z9m+h;ttW7&@(j)?1!9l2iFJnyV~fPP!@$wmw};>)toIRUm)e8Y9lj9v$Go=oaDTMh zV<22!$A4c{&p{LIE@pn7Otd>Zm@{a@B>3Dg@vaet^c7%cqqF?ilqzr3{}bLs4kw?d z&|eWDyf;!u0#Qr09N9Rq^mG68@EiYk|6TeI>1u#oWrAS+JR;gP4iE|S=zmLuA;KR~ z=n|116ZtzLZ-ex38`S?v=M3X9(Dn|UUM9j|=ZuK6eO{?3IQU@vaN@yz)5O7nhaTvD z^b>~;+_(R*DHMLF`_SP7-48IJ%GG;U>83vuwP8?|UT+1ycF^g1B2glpAk#o8wxKqg ze;V`P6-u zv#a&~TF+nA2JUaO!#|-ocmN>q%ff9$tcu`O%)>y|%~T@54aLlWu>{@_aM18#vg4L1 z5!7wYoW*1ULMpX!PAT9c+--RHCN^^r1PNHFOpOE+_Cj4F>`tuqfOGA89hEq7*@_tn zBK{CP3Pr+4rTr`RlmLUV8WZAabLc4n5bZt1T%mtrpc>n~5g5Ejho>v%zo$O~lQwKr z8X4||aucd=BNa)XI|Gl5MsG@o(${0;o+x_yQ#zXx^`9a)pEdp&p`MXXDH0@yjXGv8 zwBq*Sbh1L-)8DQP!M z%inKV%#@V(tep(6E`(R-wYAtUbe!DuuALx6i~SM!_YWHu=SkMslkH4HqrLVGL>^y!79 zV^-Q@kqaDSW$m%hHCG%?#1tX4$G$lXCt})Ah7-~rZ%TX_)O*`z(pB_)&*a^LIg&{UO$Fu)TALH0B_7f1=86uI zhnLO?jBA3(pKbgE`M%|N7t(*mWxx$TiGJ9YOEytHdFCb20Cr6x>AXJ6@x7EeB9;hV z7Ufu9SqQK0nzUKpjsloN05SSolJ^#6{O9XNn1}3jmDn1YHb|rW|Hcityq&Kxp)r2r z=n3o%`hE!8XW(n##(sWVUR!W7drE&9BJ~CQcb;Oz84pa@;tKK%7f$IViE)ZD$V7ed zzqUlbjjj9Ec)q_Tuy2bU`_r_`M898xyhUPvyN?Aj8lu5_yC(v}n?sbodfvc*tt z8-8Ri=NgGz7=B>NVh$?RBqUch+@Ync>`+?80okJ#Vt`Vn1Z-hB{0PL~LB^TY$muy= zg`R@ph1pXzGxk(K5sr(Y3sXvqRX5oJnLmlw?!Z)U+uHG4v*^=MJcx;+?#NTJPh;Vk z4Jjpfpm&J5uC6;q4p{zKBrb9c8a+snC?#rHR-Ru;MFtF73|LX;nCi250KA7Wk~LMCXa zJ39OCTbPINu~S20rBNoBelaYkj|c9 z_6y9NR=XPpvn)l?9l%LdD&4N^3Yki?JsJTHU{A|kRCTrU{UQ0Li~-D;3>Nf^1hHfS zy9H!f;V8i8kuZ6gx!MnKeO%Lc<{CrL}zFESJ{uE~0%Mu~d7IuWp-(leMRxk9!*@ zL0)nJL(E$;7Amx?KN%3r>yzJYIIrC^)wFoBX-%PN&3IG$Ne$ZNytb4S)?gCN-+65* zAq_gaAcNvE;!Zs$&I-0O5k0Og<#nhlv_x%nXg;s)v)6&N_}jktLSF0mhB3D7>lA>W zG1~dY{EcfJCtna}JTRekkSgCyQLvry=!;s%scmWs>kL_^6?cJdt|nH_L~&!b;;b_i zH(FXdWI>73jj{cB6&S914c$~8SIOlH(kxhEusu!=cq^D^)1BGjycpF&YHW6;c`G2& z2d$9&3j`vVU)zW(b2*R0A{Rzfk8%&^)MEUtm{PFFg2DDHqphr!Gnx|fqcYku^Xvg1 z#-9I(=XR94VY3p(B7K(<%#u&CHY@oi<@pC94vf+C%}P|6m&5w2$}%s9Osi?Q_$5@q z$?E9!zO(J$N?*`+E0U;NV|fH~B)&AM2UY2TQP_jjDi0BPn8+a_F(P|Giq*&X5*e=3 zV=eeDUHm^pNQ|cwcc;@>s4paPm@ZJ+{4Vr@>)*4;pCMq}1wyN+*1hGc{leqF^!Phf z&A!0c(yK1~x$1HGn_0w_4z|Rq&DU{do8Q-dr9R+myD~rMTmSANuWvaooXVvcNgNhW z^nS0dY9eNsTxuMIY`|Pxm0FMH7+FeJRL9|LhBFNwv9?P8J}PfpLf?c4HfH@f0?L?0 z9!Y316*v8120vnn6byf1_8evloHS?_>Z5+3qL7_`j5S(g+H98E$35U;6eAv#ihouK z;6STZy2`7=>8dGGX&PB0mRVUIFb>8Z?BUyf)6!7O5R`xQe}Tu={}N=RY2${Co7ZpX z!Vl}Ec5K*yA2Vgt|C-YL5TvK5rP5fgghfmG-%uXMm0DO+MN6uRUg``zk90*Zt-UH% z8#yddH4F?p<5c7YA`?U|Qli@aoX#L#V#p|lk7tJl6GmzznRwzv7R#$bGb`4MA>(*z zc$n7C_hoQfrf2`pRQ7)%@)nW*O62c|d;x)=u^VYNaYhol^^7qqLcGNm{A=S3pA8aCdX~(|H!-uhdLjck{$nzE9jV5gzt)Wv{%l9s{F}1PaoO)n3 z?@6=OqgFjdPt4{$nU)(p60F$HLPQn|-Sp=nnLImg=y|5)#`A@3hGlw@t ztmUz4%-@vZ&slFt=HEHlykZOrPalK&f5kR7%rpYkt0PdwXB5J)iYag8duytwlzuOQ*xj+o8MDwwtTc&bD#!0bVo{j^fNDFHq84N znz_X0QW`57y4~`!hg*QEOPCQF{saOqSo%#8AbRv(QO=VT`T+>aSwBNL%>96^7>4O9rA|8P>9JGw^E3tkxWAA ztT==($Da4QB7Mmu+ z%L=qMY}44|j1HCYi8xr{Z?na&k!} z7ny}hsk}pWy_)vZ)Rnt%OI0>2y`Uy8XzNS8Vakr3g*QwM<*Oa5NF@|E!w}1PB$w`_ zbbzFRdC`n3-eXEF%4Y8elYEhE=MHiq-H7YPa%x>}At8rJPk z=U{oe245pgaSf~tx9qK^d(#cVr4iX3aUCm3%Zi?-l5mRzH<`I)&q!;325ZZ?GBu^k zqWA`!rl(9HSaW)_0E$S|p?{MI(-ORfkU>KmMhgGTs*(eq@?UAs4^6RsAh!Ue zJs`{_Tvn^|iLF9!rfkZ-7(f zMN9J9lF_?g{_Lc-g#XTKOUQcMJmY~0TULJ$5;zk|}`Lem~_IHOwD=^DqGPwKwnHFJpm-0wKFNa^)4ctiEJmbi^#`_9Hw+su3{Ll zlr9jVtimv$T^I%|_ANNO`Kk+lOv3<#uI$l#cU)=q!X^N2S78`X4boWcTl#LJ$G3tP zPDS?NR^q@pJFafPpXqK_V;(q5^^t>B-Ot>r!l5?8TC7^H8PM8miVf7~F-Y(3O+Jwu z*bhx`>LrvpvY6sc^}d123FZwmL8@Zf7M%DL>!A0H&aX~kXa&e}-031NhGKBaw8x$( zfo#7m6;j$8_d2l$8FjUxgix5hh;2}Y45~}XQDX5+!&rr3Pg*9^M@a`;8X|K!4}M~} zp}|$ThjVHHh$^NORz)$Nsd!LTd$Lx}z{_diBJ0mb(9#Y=UD_9`&V0^Xwj*2m+qPD7 zgk_|9gsQma$Pops{-XDdfTFR0**9b#A5<3jH1Idb)_%@CC6rbcSOKi1-T26Y>^OSM zVX+i?E0jqB!uw-&+ufP(#63*O{{!p2g+s(P+nvkx9NE!_kNnX`sOQ92Rq^|i_Sp3| zeWVd!#_F+IjWi+`MjE(BH1~omZzW!7jBv6NH{U#FDgLJeeprfk;7yM8=AKCo^y=8g zWB*}n2G>u!eZX$qTzO0jk0br(&K&E_Wuw=Z?4s#n&#osj{U`R&e6p*P1$T9__+m{K zaaeLYkYuaQinZK&jQO|I{~34dxn3udL@5u`C!KMRbdUxHS|F|0C+6y*7EIl$ z@p-X!D3gXX2|Gvk9oIjOtNK17jJe!KH_?eO9S~Fc?4VPo7h<;OOeHjoxbkM_+HEj$Z97@}R=CYlAFO~zU_87cudSE}ub9+qzk;^n!FiJ`HBR9Iq*PD+r&;mKTBntHqcpBY!JGM;`sfc`KA>c7A_^ zxmK*s?5O~)K2&y$miSb*pEEucI7q>#w3dwUX$tt%0b2r}HkkOd5%}~6CO-YKg?3DQ zD(_sb=R&}zO-IgsgnCZmQ`tJseJmX8#hP#W7$K0R)ng2wR;rQ61$^2ZYccU@U2I;g zRXFnt&%x+0Y`t;TUvNg|o5%3!Ut=ZXe5QPJ#HTPz68IF}aWd(2FSJ|d&P0m=Db>*L zC1A9d$O;S_fP~Cks0{wI9NbvuLV$e?Srr4BTy{9e;17ea(qVCrejgRSpNJii>IW$9 zAdv@%Fr3yyp?=haL&G4)gdU2ezL};s260e3m5K-~ziBijusxk;o)REy zffR@)kgw64a2Zd-=LBD&Wz}hr^=JKsmaS!GhXs@22=k)89L_Dr$!v1@gkn$14L)Hr zdJ>kK(t^}FgMBFTiAh&r3Ce6J%~U-3d6%(B^dTrHL@T zI596#)wLRyXZWClp&WF@Gv}?jZpFAMaN?Z4W|3=FtKIHS*C~r;kqdnsuGq>#CW4je zHKwnghFD`<`s$(O80=8WBIc>&z)0Yvg`q{5@Xedn@Snn1AO^oZHSJ9!OB7bOKgYE( z&twL3*nc>gdoDT>RFyA9_j9SS&%5Dby+j2Q80^&tl2Ql5)dL(RNUn4RDvK2?qom;2 zZfRW*kXje!bmva%5(4HWEYRUP8U)NJT5VHSeFAk2)F+AtrAs)SrI1ww%HVq$fjy@M zK+J-M*k-^Q3^Oie;m#sDCB2XSS&*W4_(WG1qZ=9BJQ7x%-gZF)#G<#tx^FJ{h_Ttc z8tu4^s)H%7b{V8vF0RcU!8O2i{maKJOkI zhkomnHBCzz+@-Ob;@Hb48{I8g}i zpVZ9XIMRoAic=n#pqwJEU_0Z{7q$JTWG-O zadjR3%+72{*WmG5E4Ch14VGfyC6a05cqW_reUv$}g04l=UR5U{fa^+Bjw;~x7D-WgG3~i$sTp-by=n%zn$T{a( z)YSMo2r#MEq*@Ox+yZAym*(A(@7R^!^WgZrhu$YlDh-IXanORYv>S`MnMsgs zT{xD1lY+{O!@6+mjI9fYxe;9%XJv_0B7k@%ja*fr-8FY&ugmc;(D2Uu9XktzHqGBS zT>>bTF$hm+JLy(tTtS}U0^2l`t~lnrz;2*s#r?#o2wc>5P8VYe{Ivsu9jO9JWA$i> z+UK^p&~0;}nH-kiHWzX^uLqU8jVdBGXHya(Ui|r(3ppY})tH?Sp%+sNJIWgWkuiJ2 z5fu@B$c_J$XLjy4i3l4NF{40PR*T(`nWGST+r+4{7o2M(a)F=3POjGGS7Z(p*Gvy} zp*ffUIUz#J_-qj7Kl5^%@su+l!tzEywGvMldWP`MFcA`Z>W@%}!5^VNRV{jG4XqRY z({;MQuu}ZN(9iR7@4V!%+EP8H(H$XT?@OImNhmEYW(gyzAtx9g&@5pJ8LJ%{^ zX;av--waDBvft6kAAw?$^BkSZ$>pc0fP+cSTsZjYCOPFM0|%2FXFeRvwq-T)#re+E*|wPEmUzm%?7nX@l*iSzM49K0&j3lxr4KnKZPo zY?qK?jfg5%n{nK>_$qEz)UxmuLXN?7G8!sYV-4By!PFCcS^Qo^>Ay|!^O*0#XK8_1 zw0C$o+Lysbqp6GzsE#$~M3|!kGW_5gRrZe8Qr0%7tVZAQE=#LN?CGtHYY_&b;CyM{9TxxwTf{qsnIpUZd^;)b$hP` zU2gZUxPH0zN3QUXU5kJ0n*aB%(2rcT*Swm$>Y5AWGLdVIes|ToE|6=DE8OcYyFjiz z;BI$sybSI5#%o91``zwUmt91!JyJF3b}zr|B6{s{&qHqaipwsd*N#=Kb02VDb`iby PN%u1M$}27q){*}oW*^4h literal 0 HcmV?d00001 diff --git a/tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc b/tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..0f51b6bee61d2b98f3167a540745cd550352aee7 GIT binary patch literal 33442 zcmeG_TWlLwc0-C3hZH61{E*_+O&&cS+pMp>Y`Emo=!K7X>2x!sDp~2v7W5pREA+uB#1Vu|)(KINjwTlLubkCjZ z;CPRFl_M_bBO|SY!+@fbU_ifLp9!+@ByPG3^u^C`M^Z{(a zjvztN53p4X0BjRm0EWaMz;>|}V29WSuu}{H>=N4nc8eVVw~3tqw~Ji>cZl5pcZ%D{ zu9@xO9qM-MMx#+Cr9_5)mhw>tRHYCF;MmQqS4XP)Rc$6xzCh%pOaB!7!-0!!bLZay zyvI$kIOBU|SIjkOv>v1}ep6nu;iQTr`4%oyg_cvXlNA%WJ%ZF_5ajd1;qO5k?ZMht4 zXXb)hc`fbjoZ4-)z@{mijFe(?)DwXok-ef%KJLn$1iaH_rZ!wgAIm;5kX?xRu&=ZGjFw6LrLAS0$5u^+u5ern9SbXJQwEAd zl6ujjHbFuJg9wHZjD6OYP!x&eX(>lBeeUYf3AH&vH#&W6yph^cEpoN z{1GWflG(Xjf=Fr*qT?3}=}bN?!}~K?NTH}Zfl6LXU4k1L63 z2}@Aho}!-#8^zOvWQp1ppUx84@Wo+El}X5S3#qnbVCSflo8cDHhM$E^r?%jWwT2q9 zdPc3D5zV8OJ8tznN)A+&GGL9iWgJtG#uv;VK}td0tu+gyhIRTGJ}#vWu0t5FpXCuK zQt1Mc^KI@wL!G6rBb88Ji66Y-qM#faT;=tr!Vi{0eax#lR`|i6DJ-mP^J&4iAY0cF zYZB&#%&bHx{f3LBVh#vi>j(^hAN_3qY(Ae;j`jCnxpF0{t;LY2Kb7l$df@29=*yEY zPrN(s)*jQ_eH0ebbjta6D0qm`Sz|csGA%nC4 zhe=vQX&Lc!$|NmdabIkrWg^8eM2oYn{^Q!}KfV}HgDDBwtlGj#kN_eP1jyqf{RjpS z3?djpFbqKDjh8N3SK_7u$&gc+2=~3#<#&+a&X+unV7w}B*t*;vbzWh&(dAOFs@CN3 zX`=RRw76EM#+Ft74Zz#nhi$_PO>6CkR@+C*?V~r6mG)x`-nBhKi5H5eN+DsD*PkU` zSST=`=2)?Xl}#T+6i-zp1}~&o3w8g|TR(lP#D}l#q@WxMuk!k{#D^E&Vm{5W0%7o8 zJw@5{VT7Kzf~B!E%(2Xe8>Qw@^ zLH1>0zQ5kIp~ofJK4WegTsv^N|T)B}L8vr^Sv|`-wDtTp{tn_~67=V%Q!B3`>tj z>8Ai$O<7%!)sAoP%8ZmW!@e&xux|tVHn1;D3H((Zp*N^j+A0QTMdL!P$J<~+O;tqe zloj2wqk$;Wt!Y`~o}(C6e=?J}oR&yL0l}`c61gH>OeW^~r8g3Dxs2359T^-L9~c}O zj_4uL+5B8)GfW(Y`csC-=b?3N90FGHnm7b3HL@oduD$f-;M%@xz?TM4M#jGWEb)D~ z={Ezwy#istA<)ChrVk_Z#1$-vrC|;boM~_f8XSTKA9?He$TkjvDK1)tZ2PUq);xSg zX7V(N0L=BLbF*1liVTiMfTG+Cfgb)M2-LPECH<@uq9$nGSYT&haO1HaI8f;s#@e> ze#F49(93H+CZocgsS6_213;vqBewzqQp)J75JxBdw(0nYGR`=6Qycm%!b$$ zlmCvR$iRkT%#K+uKvR$ug7sxq-*we3v&uDOR<}}S6{H~j^AEwvD<~5rv$7025TLmW zOnzAsCV&$ao!@7W0x10`T})inhb#~QL&GDZW8+631>r|fO;8Ng1kis#!AF#V1l0s5 zF_%R@c@9TPjpB!kyea-qzKI#B=nNi}k{1v#VMB!nuOY6l3G3Skhk|@b&R&r<>BpJ~ z2V0Ifl&-uE07VLi90-S;dhE7h_&3e?M(VFH-q?kCA4Q zscst~xSKWfU{ zr?Makl(?KtQ^O&15*jW5BO+kggMg$E+7nT9!yi%dBviqam)DMuPhpr{UUC9d+2}g+ zJjT_y*o6I#iw#|8P?VkE?opF$p9GhJb|Z`xN6?m>Uh9m^k#v@%^PsXuwfN%M%Myua z^Rp6B)Dtz#&<*lm!uS6ez&aA{AAZ?l=-4*Y4#PPZpzm@0psw^SR9JF%anHKX>;%&y zFky2e>nJd>i2*7!?yMAR1_h=oYy$sN4GJ_(F&~}4FWDwAo9y>UG@4Brqi!{cMzdTs zDKIfx1dJHuEvht*Q(5%In%cNwrUoYRsOTa3W4x|Pq<|T zW8O)$?NRm1UWs`q!u@NbO~A5--D(9xS!Py1xq(=KsyyYt@;%1X#Khn|x5>cJxzV~c zjX0JIMqE&A)zN#)rsJp$$B`I{c>uOM_cx3)bA)!#an>1Y(Z^Y^j&T+=#+UK0kF)t_ z-Os~f+QJDOP*+{^eJ5uVsIX1-k_6bSfckK9R#W1}bplS$(eM*B6cr77!ftcT2)jSq zYu(c@aUi;}(FHjP3MYl#^I>aB^Kiy`WN+la(|Bu+^;oC4Yoh#8f(%i|GpPc6D)&?& zS&;P)Oldl5Hy5>)aK1fo!lEh;`d^v~8ETJ(>(w3`0p+j`(7Z;4x0JRs0`0P}rcg%C zK#{0bHZtG!e4=m}PT#DyxaI@LghW0s$zY~y-rpx=FM=DK_WFV9J3%dHP~)b{@rElg z=$h-=;My0?DKn-b8o`x3+-k1SYJje_$pmz|wl#bcQmnf_q>Hyz|7?_?jV7fP09(Y- zp)KSGXyH;KIjgseHp-NER?b|F<3v+s^FpXOMxg_TVo zL|pT*#86i0(8*t&{MBgbpk5k_yA?dU*0u8&C;s@v$6QluTj{`9xohlGbbf;1f0{qy zKJkI$-4itMHU@s$%<*mST>ccpe`}=rn5G(~ss2|_=X;&UNIWwTJpjyCd(va-;AMZ~g6CrLku|ZgO>;aedl^pMh_s zRmH6IYiJN{h}%BsoDFv4r`nOyai(ZQtMw^2TAu=nhS{E~Q6(EKPkC&Xr(E}ZePJ|1 zUF!?W1?!8;vZ@4OgjfBn8yL~6t7LmcP|Ch%mYS%QrYYn_(qGIy#po2njG;+Q0(An& zSmZ^2U2R#56)(0mGFCL!rgnPBv6ZIo$_;Jnht|E3&Ep2yKZm4Bo({X9iZ7sS5F}@Dt08U$8x9nR7A24c1_QMY6(Jz1xer})WAsDIY}wTupy3)(%nX}h!C-hILc{wbuE=3kyDIs|zn9%hKcqI4dxOSrq?O|dCKv^4|_DLRn)%Z8K@fh4g#vg z=|vaSM#6n51CIx)jhzQgzMppb=O6*19nXCabJMEPS|OuV zK{CV2%KyPJ7o5VOp~T^G$MB6am5$?JD)HgH zXe#l=QkAJhuqtTUP6YP~*i1CTz&@hkgNWjbRf)k1DSjQ=RXlhdY(+*(eDqc*y2{(0 zG9UffJPWh?wBTEOw0O{(!s25ou7lY~y<8~6@U8IClG7VMM0Vv^xqIx!o0aa93w~r* z@;$(=tTN|#l^JH#94lbg(F_9{j)o5+fL&==f@BUzfuA3~{#qq8QsPH$g+^9++f(L8 z@bmRByH5+g#g7!h+?JWb;$tbUzh+Hg_*VFllG7VM+!k5gHeTK~{>xxx+v$bC+TH_T za`IYtsmkOeRLrzN3GNlJJ!yu4ok_z75!bq_5`z~~;O8H{o~?w&O8nTZ5ZKnVJ!O6j zKVJ{C`?TO&{8;g@HHF2;Qe4kkQy9J#eyrs5#^2@7U*g9$;0fP`U6l2{3)A|2;}(K@ zbOz5j>vd6}EElZwU!z+HM7Gn0dk9*xy)pN5Tz0`|nVQZSf-xS>8Cow8%f8{>?%+@ZWeL(^Ughq4UkF`uP4nE7k$-KBTdq$Tw`xgU4)MQ@U9Sg!5 zx9h?Wzw+~dojPthW0(w>xLcS13WyJhdtw1yR;#s+v(8qdhm3!%r(_;_>z_fq_u;}5 z&`+QqrLaphB!P)E!To%f&8s(h(-}eTh1({CT&4im2^#WFQG#<%(;7v7Exnq8-UC69 zT|G%xP9mU8gJ;lXmn9Ad zwO99H6biU)Vlg{V&@T>wi`$;V(Hpas z&Lf!g3Gu&aL8PB zELfY#^+3V;&n7G7nc|J`8r4tCvs~wMRz^b^YiHFS%(+G^7f{!5tKgPMaN#;K0yJ); zouSLOyqyD$cE*{tyz~Sca zv^;~F<(WcW0IhPc@(nr}RA)np((8@MKFCUX0N@qIIRXSLAqO$67XhwG8f9N~niuw~ z9!zi1>cdJ@6Xin~HVh!_vryiDh@r<2P#T<3*$S-p23f704n9mz2WiFVSHRul)?1KA zHaaF;FV|neOO$0epa%N7Ye#A4K&5LCE<+9h1$+3~p>hb0W%Or>Ka2z|`j$;6i-xdk zhnTNy^ReIxf0$m_w!e6^9NNFi>(3IuA1`b}-?Hgs(GXTV%6w&;j|Erw{q(}NgT>i$ z2#yltjK2rJGqU)knk!4>`>y;ZHZ_(nO@yUOd&65op#wxMs?bh2m& zE5b+ViOV)03$F0JTU9i?pPm9Oauea*Dj%oT;Nx*M5RcD+`TC3m?qEFrdLfZPJMrKP z#pBayqU1AaS(395;ETso*(5lcwaXV(-?VmrxHellaL^t{AR>4P!7B(}MG!}D5y3Qq zG=kR=5ita-a%EBzBp(b`kr&sw6rZ(=P<0 zJ}R}K9&k=*>X~V2QD{Ov=$zJ~X{IGcDtx`f7HcwEVnkzFf7$y`BiK`XTp&*8k$hKH!dhz&-f^_cT18Hvhicb?7#SkaoiV16HF^ A2LJ#7 literal 0 HcmV?d00001 diff --git a/tests/core/test_io.py b/tests/core/test_io.py new file mode 100755 index 0000000..db2edcb --- /dev/null +++ b/tests/core/test_io.py @@ -0,0 +1,462 @@ +""" +Tests pour pricewatch.app.core.io + +Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug. +""" + +import json +import tempfile +from datetime import datetime +from pathlib import Path + +import pytest +import yaml + +from pricewatch.app.core.io import ( + ScrapingConfig, + ScrapingOptions, + read_json_results, + read_yaml_config, + save_debug_html, + save_debug_screenshot, + write_json_results, +) +from pricewatch.app.core.schema import ( + DebugInfo, + DebugStatus, + FetchMethod, + ProductSnapshot, + StockStatus, +) + + +class TestScrapingOptions: + """Tests pour le modèle ScrapingOptions.""" + + def test_default_values(self): + """Les valeurs par défaut sont correctes.""" + options = ScrapingOptions() + assert options.use_playwright is True + assert options.headful is False + assert options.save_html is True + assert options.save_screenshot is True + assert options.timeout_ms == 60000 + + def test_custom_values(self): + """Les valeurs personnalisées sont acceptées.""" + options = ScrapingOptions( + use_playwright=False, + headful=True, + save_html=False, + save_screenshot=False, + timeout_ms=30000, + ) + assert options.use_playwright is False + assert options.headful is True + assert options.save_html is False + assert options.save_screenshot is False + assert options.timeout_ms == 30000 + + def test_timeout_validation_min(self): + """Timeout inférieur à 1000ms est rejeté.""" + with pytest.raises(ValueError): + ScrapingOptions(timeout_ms=500) + + def test_timeout_validation_valid(self): + """Timeout >= 1000ms est accepté.""" + options = ScrapingOptions(timeout_ms=1000) + assert options.timeout_ms == 1000 + + +class TestScrapingConfig: + """Tests pour le modèle ScrapingConfig.""" + + def test_minimal_config(self): + """Config minimale avec URLs uniquement.""" + config = ScrapingConfig(urls=["https://example.com"]) + assert len(config.urls) == 1 + assert config.urls[0] == "https://example.com" + assert isinstance(config.options, ScrapingOptions) + + def test_config_with_options(self): + """Config avec URLs et options.""" + options = ScrapingOptions(use_playwright=False, timeout_ms=10000) + config = ScrapingConfig( + urls=["https://example.com", "https://test.com"], options=options + ) + assert len(config.urls) == 2 + assert config.options.use_playwright is False + assert config.options.timeout_ms == 10000 + + def test_validate_urls_empty_list(self): + """Liste d'URLs vide est rejetée.""" + with pytest.raises(ValueError, match="Au moins une URL"): + ScrapingConfig(urls=[]) + + def test_validate_urls_strips_whitespace(self): + """Les espaces sont nettoyés.""" + config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"]) + assert config.urls == ["https://example.com", "https://test.com"] + + def test_validate_urls_removes_empty(self): + """Les URLs vides sont supprimées.""" + config = ScrapingConfig( + urls=["https://example.com", "", " ", "https://test.com"] + ) + assert len(config.urls) == 2 + assert config.urls == ["https://example.com", "https://test.com"] + + def test_validate_urls_all_empty(self): + """Si toutes les URLs sont vides, erreur.""" + with pytest.raises(ValueError, match="Aucune URL valide"): + ScrapingConfig(urls=["", " ", "\t"]) + + +class TestReadYamlConfig: + """Tests pour read_yaml_config().""" + + def test_read_valid_yaml(self, tmp_path): + """Lit un fichier YAML valide.""" + yaml_path = tmp_path / "config.yaml" + yaml_content = { + "urls": ["https://example.com", "https://test.com"], + "options": {"use_playwright": False, "timeout_ms": 30000}, + } + with open(yaml_path, "w") as f: + yaml.dump(yaml_content, f) + + config = read_yaml_config(yaml_path) + assert len(config.urls) == 2 + assert config.urls[0] == "https://example.com" + assert config.options.use_playwright is False + assert config.options.timeout_ms == 30000 + + def test_read_yaml_minimal(self, tmp_path): + """Lit un YAML minimal (URLs uniquement).""" + yaml_path = tmp_path / "config.yaml" + yaml_content = {"urls": ["https://example.com"]} + with open(yaml_path, "w") as f: + yaml.dump(yaml_content, f) + + config = read_yaml_config(yaml_path) + assert len(config.urls) == 1 + # Options par défaut + assert config.options.use_playwright is True + assert config.options.timeout_ms == 60000 + + def test_read_yaml_file_not_found(self, tmp_path): + """Fichier introuvable lève FileNotFoundError.""" + yaml_path = tmp_path / "nonexistent.yaml" + with pytest.raises(FileNotFoundError): + read_yaml_config(yaml_path) + + def test_read_yaml_empty_file(self, tmp_path): + """Fichier YAML vide lève ValueError.""" + yaml_path = tmp_path / "empty.yaml" + yaml_path.write_text("") + + with pytest.raises(ValueError, match="Fichier YAML vide"): + read_yaml_config(yaml_path) + + def test_read_yaml_invalid_syntax(self, tmp_path): + """YAML avec syntaxe invalide lève ValueError.""" + yaml_path = tmp_path / "invalid.yaml" + yaml_path.write_text("urls: [invalid yaml syntax") + + with pytest.raises(ValueError, match="YAML invalide"): + read_yaml_config(yaml_path) + + def test_read_yaml_missing_urls(self, tmp_path): + """YAML sans champ 'urls' lève erreur de validation.""" + yaml_path = tmp_path / "config.yaml" + yaml_content = {"options": {"use_playwright": False}} + with open(yaml_path, "w") as f: + yaml.dump(yaml_content, f) + + with pytest.raises(Exception): # Pydantic validation error + read_yaml_config(yaml_path) + + def test_read_yaml_accepts_path_string(self, tmp_path): + """Accepte un string comme chemin.""" + yaml_path = tmp_path / "config.yaml" + yaml_content = {"urls": ["https://example.com"]} + with open(yaml_path, "w") as f: + yaml.dump(yaml_content, f) + + config = read_yaml_config(str(yaml_path)) + assert len(config.urls) == 1 + + +class TestWriteJsonResults: + """Tests pour write_json_results().""" + + @pytest.fixture + def sample_snapshot(self) -> ProductSnapshot: + """Fixture: ProductSnapshot exemple.""" + return ProductSnapshot( + source="test", + url="https://example.com/product", + fetched_at=datetime(2024, 1, 1, 12, 0, 0), + title="Test Product", + price=99.99, + currency="EUR", + stock_status=StockStatus.IN_STOCK, + reference="TEST123", + images=["https://example.com/img1.jpg"], + category="Test Category", + specs={"Brand": "TestBrand"}, + debug=DebugInfo( + method=FetchMethod.HTTP, + status=DebugStatus.SUCCESS, + errors=[], + notes=[], + ), + ) + + def test_write_single_snapshot(self, tmp_path, sample_snapshot): + """Écrit un seul snapshot.""" + json_path = tmp_path / "results.json" + write_json_results([sample_snapshot], json_path) + + assert json_path.exists() + + # Vérifier le contenu + with open(json_path) as f: + data = json.load(f) + + assert isinstance(data, list) + assert len(data) == 1 + assert data[0]["source"] == "test" + assert data[0]["title"] == "Test Product" + + def test_write_multiple_snapshots(self, tmp_path, sample_snapshot): + """Écrit plusieurs snapshots.""" + snapshot2 = ProductSnapshot( + source="test2", + url="https://example.com/product2", + fetched_at=datetime(2024, 1, 2, 12, 0, 0), + title="Test Product 2", + price=49.99, + currency="EUR", + stock_status=StockStatus.OUT_OF_STOCK, + debug=DebugInfo( + method=FetchMethod.PLAYWRIGHT, + status=DebugStatus.PARTIAL, + errors=["Test error"], + notes=[], + ), + ) + + json_path = tmp_path / "results.json" + write_json_results([sample_snapshot, snapshot2], json_path) + + with open(json_path) as f: + data = json.load(f) + + assert len(data) == 2 + assert data[0]["source"] == "test" + assert data[1]["source"] == "test2" + + def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot): + """Crée les dossiers parents si nécessaire.""" + json_path = tmp_path / "sub" / "dir" / "results.json" + write_json_results([sample_snapshot], json_path) + + assert json_path.exists() + assert json_path.parent.exists() + + def test_write_empty_list(self, tmp_path): + """Écrit une liste vide.""" + json_path = tmp_path / "empty.json" + write_json_results([], json_path) + + assert json_path.exists() + + with open(json_path) as f: + data = json.load(f) + + assert data == [] + + def test_write_indent_control(self, tmp_path, sample_snapshot): + """Contrôle l'indentation.""" + # Avec indent + json_path1 = tmp_path / "pretty.json" + write_json_results([sample_snapshot], json_path1, indent=2) + content1 = json_path1.read_text() + assert "\n" in content1 # Pretty-printed + + # Sans indent (compact) + json_path2 = tmp_path / "compact.json" + write_json_results([sample_snapshot], json_path2, indent=None) + content2 = json_path2.read_text() + assert len(content2) < len(content1) # Plus compact + + def test_write_accepts_path_string(self, tmp_path, sample_snapshot): + """Accepte un string comme chemin.""" + json_path = tmp_path / "results.json" + write_json_results([sample_snapshot], str(json_path)) + assert json_path.exists() + + +class TestReadJsonResults: + """Tests pour read_json_results().""" + + @pytest.fixture + def json_file_with_snapshot(self, tmp_path) -> Path: + """Fixture: Fichier JSON avec un snapshot.""" + json_path = tmp_path / "results.json" + snapshot_data = { + "source": "test", + "url": "https://example.com/product", + "fetched_at": "2024-01-01T12:00:00", + "title": "Test Product", + "price": 99.99, + "currency": "EUR", + "shipping_cost": None, + "stock_status": "in_stock", + "reference": "TEST123", + "images": ["https://example.com/img.jpg"], + "category": "Test", + "specs": {"Brand": "Test"}, + "debug": { + "method": "http", + "status": "success", + "errors": [], + "notes": [], + "duration_ms": None, + "html_size_bytes": None, + }, + } + + with open(json_path, "w") as f: + json.dump([snapshot_data], f) + + return json_path + + def test_read_single_snapshot(self, json_file_with_snapshot): + """Lit un fichier avec un snapshot.""" + snapshots = read_json_results(json_file_with_snapshot) + + assert len(snapshots) == 1 + assert isinstance(snapshots[0], ProductSnapshot) + assert snapshots[0].source == "test" + assert snapshots[0].title == "Test Product" + assert snapshots[0].price == 99.99 + + def test_read_file_not_found(self, tmp_path): + """Fichier introuvable lève FileNotFoundError.""" + json_path = tmp_path / "nonexistent.json" + with pytest.raises(FileNotFoundError): + read_json_results(json_path) + + def test_read_invalid_json(self, tmp_path): + """JSON invalide lève ValueError.""" + json_path = tmp_path / "invalid.json" + json_path.write_text("{invalid json") + + with pytest.raises(ValueError, match="JSON invalide"): + read_json_results(json_path) + + def test_read_not_a_list(self, tmp_path): + """JSON qui n'est pas une liste lève ValueError.""" + json_path = tmp_path / "notlist.json" + with open(json_path, "w") as f: + json.dump({"key": "value"}, f) + + with pytest.raises(ValueError, match="doit contenir une liste"): + read_json_results(json_path) + + def test_read_empty_list(self, tmp_path): + """Liste vide est acceptée.""" + json_path = tmp_path / "empty.json" + with open(json_path, "w") as f: + json.dump([], f) + + snapshots = read_json_results(json_path) + assert snapshots == [] + + def test_read_accepts_path_string(self, json_file_with_snapshot): + """Accepte un string comme chemin.""" + snapshots = read_json_results(str(json_file_with_snapshot)) + assert len(snapshots) == 1 + + +class TestSaveDebugHtml: + """Tests pour save_debug_html().""" + + def test_save_html_default_dir(self, tmp_path, monkeypatch): + """Sauvegarde HTML dans le dossier par défaut.""" + # Changer le répertoire de travail pour le test + monkeypatch.chdir(tmp_path) + + html = "Test" + result_path = save_debug_html(html, "test_page") + + assert result_path.exists() + assert result_path.name == "test_page.html" + assert result_path.read_text(encoding="utf-8") == html + + def test_save_html_custom_dir(self, tmp_path): + """Sauvegarde HTML dans un dossier personnalisé.""" + output_dir = tmp_path / "debug_html" + html = "Test" + + result_path = save_debug_html(html, "test_page", output_dir) + + assert result_path.parent == output_dir + assert result_path.name == "test_page.html" + assert result_path.read_text(encoding="utf-8") == html + + def test_save_html_creates_dir(self, tmp_path): + """Crée le dossier de sortie s'il n'existe pas.""" + output_dir = tmp_path / "sub" / "dir" / "html" + html = "Test" + + result_path = save_debug_html(html, "test_page", output_dir) + + assert output_dir.exists() + assert result_path.exists() + + def test_save_html_large_content(self, tmp_path): + """Sauvegarde du HTML volumineux.""" + html = "" + ("x" * 100000) + "" + result_path = save_debug_html(html, "large_page", tmp_path) + + assert result_path.exists() + assert len(result_path.read_text(encoding="utf-8")) == len(html) + + +class TestSaveDebugScreenshot: + """Tests pour save_debug_screenshot().""" + + def test_save_screenshot_default_dir(self, tmp_path, monkeypatch): + """Sauvegarde screenshot dans le dossier par défaut.""" + monkeypatch.chdir(tmp_path) + + screenshot_bytes = b"\x89PNG fake image data" + result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot") + + assert result_path.exists() + assert result_path.name == "test_screenshot.png" + assert result_path.read_bytes() == screenshot_bytes + + def test_save_screenshot_custom_dir(self, tmp_path): + """Sauvegarde screenshot dans un dossier personnalisé.""" + output_dir = tmp_path / "screenshots" + screenshot_bytes = b"\x89PNG fake image data" + + result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir) + + assert result_path.parent == output_dir + assert result_path.name == "test_screenshot.png" + assert result_path.read_bytes() == screenshot_bytes + + def test_save_screenshot_creates_dir(self, tmp_path): + """Crée le dossier de sortie s'il n'existe pas.""" + output_dir = tmp_path / "sub" / "dir" / "screenshots" + screenshot_bytes = b"\x89PNG fake image data" + + result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir) + + assert output_dir.exists() + assert result_path.exists() diff --git a/tests/core/test_registry_integration.py b/tests/core/test_registry_integration.py new file mode 100755 index 0000000..a8f6076 --- /dev/null +++ b/tests/core/test_registry_integration.py @@ -0,0 +1,174 @@ +""" +Tests d'intégration pour le registry avec les stores réels. + +Teste la détection automatique du bon store pour des URLs +Amazon, Cdiscount, Backmarket et AliExpress. +""" + +import pytest + +from pricewatch.app.core.registry import StoreRegistry +from pricewatch.app.stores.amazon.store import AmazonStore +from pricewatch.app.stores.cdiscount.store import CdiscountStore +from pricewatch.app.stores.backmarket.store import BackmarketStore +from pricewatch.app.stores.aliexpress.store import AliexpressStore + + +class TestRegistryRealStores: + """Tests d'intégration avec les 4 stores réels.""" + + @pytest.fixture + def registry_with_all_stores(self) -> StoreRegistry: + """Fixture: Registry avec les 4 stores réels enregistrés.""" + registry = StoreRegistry() + registry.register(AmazonStore()) + registry.register(CdiscountStore()) + registry.register(BackmarketStore()) + registry.register(AliexpressStore()) + return registry + + def test_all_stores_registered(self, registry_with_all_stores): + """Vérifie que les 4 stores sont enregistrés.""" + assert len(registry_with_all_stores) == 4 + stores = registry_with_all_stores.list_stores() + assert "amazon" in stores + assert "cdiscount" in stores + assert "backmarket" in stores + assert "aliexpress" in stores + + def test_detect_amazon_fr(self, registry_with_all_stores): + """Détecte Amazon.fr correctement.""" + url = "https://www.amazon.fr/dp/B08N5WRWNW" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "amazon" + + def test_detect_amazon_com(self, registry_with_all_stores): + """Détecte Amazon.com correctement.""" + url = "https://www.amazon.com/dp/B08N5WRWNW" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "amazon" + + def test_detect_amazon_with_product_name(self, registry_with_all_stores): + """Détecte Amazon avec nom de produit dans l'URL.""" + url = "https://www.amazon.fr/Product-Name-Here/dp/B08N5WRWNW/ref=sr_1_1" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "amazon" + + def test_detect_cdiscount(self, registry_with_all_stores): + """Détecte Cdiscount correctement.""" + url = "https://www.cdiscount.com/informatique/clavier-souris-webcam/example/f-1070123-example.html" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "cdiscount" + + def test_detect_backmarket(self, registry_with_all_stores): + """Détecte Backmarket correctement.""" + url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "backmarket" + + def test_detect_backmarket_locale_en(self, registry_with_all_stores): + """Détecte Backmarket avec locale anglais.""" + url = "https://www.backmarket.fr/en-fr/p/macbook-air-15-2024" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "backmarket" + + def test_detect_aliexpress_fr(self, registry_with_all_stores): + """Détecte AliExpress.fr correctement.""" + url = "https://fr.aliexpress.com/item/1005007187023722.html" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "aliexpress" + + def test_detect_aliexpress_com(self, registry_with_all_stores): + """Détecte AliExpress.com correctement.""" + url = "https://www.aliexpress.com/item/1005007187023722.html" + store = registry_with_all_stores.detect_store(url) + assert store is not None + assert store.store_id == "aliexpress" + + def test_detect_unknown_store(self, registry_with_all_stores): + """URL inconnue retourne None.""" + url = "https://www.ebay.com/itm/123456789" + store = registry_with_all_stores.detect_store(url) + assert store is None + + def test_detect_invalid_url(self, registry_with_all_stores): + """URL invalide retourne None.""" + url = "not-a-valid-url" + store = registry_with_all_stores.detect_store(url) + assert store is None + + def test_detect_priority_amazon_over_others(self, registry_with_all_stores): + """Amazon.fr doit avoir le meilleur score pour ses URLs.""" + url = "https://www.amazon.fr/dp/B08N5WRWNW" + store = registry_with_all_stores.detect_store(url) + # Amazon.fr devrait avoir score 0.9, les autres 0.0 + assert store.store_id == "amazon" + + def test_each_store_matches_only_own_urls(self, registry_with_all_stores): + """Chaque store ne matche que ses propres URLs.""" + test_cases = [ + ("https://www.amazon.fr/dp/B08N5WRWNW", "amazon"), + ("https://www.cdiscount.com/product", "cdiscount"), + ("https://www.backmarket.fr/fr-fr/p/product", "backmarket"), + ("https://fr.aliexpress.com/item/12345.html", "aliexpress"), + ] + + for url, expected_store_id in test_cases: + store = registry_with_all_stores.detect_store(url) + assert store is not None, f"Aucun store détecté pour {url}" + assert store.store_id == expected_store_id, ( + f"Mauvais store pour {url}: " + f"attendu {expected_store_id}, obtenu {store.store_id}" + ) + + def test_get_store_by_id(self, registry_with_all_stores): + """Récupère chaque store par son ID.""" + amazon = registry_with_all_stores.get_store("amazon") + assert amazon is not None + assert isinstance(amazon, AmazonStore) + + cdiscount = registry_with_all_stores.get_store("cdiscount") + assert cdiscount is not None + assert isinstance(cdiscount, CdiscountStore) + + backmarket = registry_with_all_stores.get_store("backmarket") + assert backmarket is not None + assert isinstance(backmarket, BackmarketStore) + + aliexpress = registry_with_all_stores.get_store("aliexpress") + assert aliexpress is not None + assert isinstance(aliexpress, AliexpressStore) + + def test_unregister_store(self, registry_with_all_stores): + """Désenregistre un store et vérifie qu'il n'est plus détecté.""" + assert len(registry_with_all_stores) == 4 + + # Désenregistrer Amazon + removed = registry_with_all_stores.unregister("amazon") + assert removed is True + assert len(registry_with_all_stores) == 3 + + # Amazon ne doit plus être détecté + store = registry_with_all_stores.detect_store("https://www.amazon.fr/dp/B08N5WRWNW") + assert store is None + + # Les autres stores doivent toujours fonctionner + store = registry_with_all_stores.detect_store("https://www.cdiscount.com/product") + assert store is not None + assert store.store_id == "cdiscount" + + def test_repr_includes_all_stores(self, registry_with_all_stores): + """La représentation string inclut tous les stores.""" + repr_str = repr(registry_with_all_stores) + assert "StoreRegistry" in repr_str + assert "amazon" in repr_str + assert "cdiscount" in repr_str + assert "backmarket" in repr_str + assert "aliexpress" in repr_str diff --git a/tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..e883ddc6b189a8ea889201b22cdf234898b11acc GIT binary patch literal 9521 zcmeGiO>f&)lBB3lT9&^OJ5Fk|Nt{MaEPupy5^vo$&4*Jb_3l~`Qot?q8H{ERf#v7xq+OPgd;21~&_I(Ov@V!G%)-Nx| z@@ziNyqWhkk8ft)JncrK5f0M-PG)nTggEYNM4aTef!z_F<38n*9O06DQnf!lCe(iZPDfJNH)>ZW-KKKY^;T1oi^4=v4b|&MzKR#A-=EF7rCaH zMN=*qi$u<=a@tr-&uel<14!4k_i~0VC;xc$TpmgnmJ*RiIP%FjZ-p~zQBCJn(*z`# z)6IgGF2?;5B%AedZH zOIW@|lD7ipyLrGYao#YC1iuYSEEuF%3XLvk3kF#lwL&PfS=H3O01-A=Aw|*E1x-<` zh@vdObn^%|D9XEwYTl;A6lE?)%wiryryD>HDoVyk1K@*REFi18ZWL7%v#BVg3Hl|? z*tsvK7fB03`xM1Qj)TS9q%S35@#Qj(7`zqYfQc?WSggK6qoh zt!w4-YWjiLMV@nN)?eU3ApeX74#AQ-3mnb{;~^`Gi+HY3uvf7Q+9`G0s|dyQmN8)o znb}L0KcmeqW~o#r;}L>0BKt7gk6Alr9hlKMcOrBIGE1B_j66AnI0}?a47(+=lObke zQz)GHSE05Iv2DAhedXfn&;zlZ)j;NJ_}#0EMPt#_s>FgH6Lz-y5(Q)s%`_-ImK;TZ zis}aUsb8*3%krmvb9`JV9h<_9Cg)#Z+fmNujafA>Yw)pXGAzGL*E<1QoP?o+nn_tP zwohSuv-$)V{TMtZ3hWWL!aWXf{^k!S|1E|eicNo;{N5CS@P6kZ-U<+{xJdM>sRe-O zgJ<_JWZr2_V<}Cj%|So<&hkgOxL7)3uO{1ka!&7M+sm^t6Qvfm8Atp2`kdVd6KaI^ z;Dueq1-3qPHLDlSin7Z1t%D{YF9Lf49=e2dehmjcySmnq-_h?Ai!jO!CIHlz;Rx#~E|SZO+U$ z95;7^Dr_Xr*ici|ByX+XgLU4MTX6c=lhgOJImMsN>32Aj{)E35q^<{(U}93iex72X zY5cHg@{AAnNlk)zhn%*Yx5g(!W4tcF|!oo z1V6+I&x?c@#A;RM46>ktxAI;g zujX}5QpF~zkqCo`zG4H7Tt&*Te-9?HGe#MYjj;4+LDfyrDFiBOUE{(;{e>bOtZ2y zv;}1Pe-FvWU6cX$#i7*^x2#;jkTA*!8?;>yY&QbscP_hiy#Vx0HD#5O48riK0ayX3 zzLErs<{AU9>YR3t03Kzd3)9#lO(|gt1zM9Fd+BU6-3N;FLdu7V%}6=QD)SFeR(q&K zP^5mUNY7Uksi#hTwdkrBP;agZoI*dXCe=~7mGRV9BCe8iW2*s6)q;j&*Y5a`qkwec_!!IM9$JPIsYbd zZg?i=-$KsdUPept*C8T<0gp7gnQ#^b|AUkdL548jZpe#_m^ip<$L=6??CkiL z0@7BYfQ{88Zvs1x$ow7-is0Re(&`(VV&_BG%X`?=z83f-w(|PJXw&TjTg@+)n_t{$ zKDiO?MGtV_y4bz8zbticiT3|Ju^SyX#5OCSONO$w{VrCn!rbBwv3pzUTo;e7&6TC2 zTcZ7cPdvK%3d1%lpi73bwK*3nS7C1PhIn*aI=C+OtlcO}JzJvve^2aLmku&?(*wH< zP`Gx(#mZHfTf8Cm{Ho*rbwmPkd)ix;;G3Y{vJBt&FY%TIPUkEBGKN%=zNC28rwb5( za6M*d-A*a@Xf1vQxx1e3lAk&vPd^N7fmz0CR%Ob#pTOjq{Ut$9&z~ z9RK4ySwG{5HHl$ z*C(%+TIY+!f(h|c?LC!pL7lh{E;-I!#U?gaN3*>bMjH#AxwQJjT2VRFJea%co{CR)7i6^3y?C zemW43YfP*0=cT1d>bP_Xl$?bDE#5%a{BZ1;irNh9POl4+RB4m}N2VbyV@Q&mx= zVH1sp<9ooH#g*FKxRNa`2zTRGL3H3AhsQh%Yg%zVz6rP8erq#)`wa@mAwzsN+eHiyVC%;UfF&THCSxA1~ipUJnhfE`N4? zBh~}ZR_sJMb^?I)*olo;F93I&X;zL6ZULG8Zy>qXMHz4(nz`wgl`9w$mR$k^HbaA- zU3bfX*};H$J=Vi0w3`zi*zOX_?>4)Yy#O@y#0Rrn45#)w&rDYgFExK?J}SM`y)l?}?=>6+=!{ZD^HHho z8bqgmG|^AjW?_?GG)hp`o_^|%vJy~qM~C+mdyKA93r6MNliM}vdvdyNS2B1w#UFCxslG^-BG=4rZDglyT*%fU0LW0OG|!&ZK8RxOpHL zsHIvjE;4OAfdy0><4teXdGHK751zqGSo|Vc<;uD3rvBiw~u)wdD<|cU>I5+r~0!cuTbZH^kv}snpud;V)K z`X8?Ai5TF8?>NjFIlgx%zzNb5A;5R*ZW;- zVwr#LTS4Sok=V6^kWKt6U;=-EzrDQ6;Tsv6SkX?z#}D7>eayjYry;`k+*y9i!E2{8 Z!k@Y`hVZGKP9HyV=Z)`xXN0c!{{aso&yxTE literal 0 HcmV?d00001 diff --git a/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc b/tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..4dfdd8da99e318e6ef62bba1ab6e19d46bb88555 GIT binary patch literal 8164 zcmeHM>2Djy6`x%$4{>?uAax(B!-?5AlvLaCk*JC7I*uhpNw{U=LP-}Zaz$-S?$WcA zW6O3S7X>V*DI63Hst^5OpeW#cDf|Z%1>6rsfqo$?$zm4@nj-myM0Se7 zEX_TWoWTlc(u;ipDe+ACC;iw@X?7}r10dt3nkIudNM+tBelmnZl=e-9CnGpA8O2e` z`=^>GTX4%{D{h@^!)=o>9Gi^e_+&e77trg0RMKfwEf)i?M-H%JQ#X2(J7|k>rx>JC zo=9<*&4nn}onsT>rIFzG6r*GaMST$qdBqS4dR8I1_{#LLd}cu@TucT(A^v6(j2Xzv zC8eYm6rjTytNP4C1dhp2FpcN%T4P!(}$JPBGxjs zF+hj|W*C0k>y!{+qGlOKL1Yfvw1#F`N17rwyPlMPmjKu_a;yb33&CMMX(X~o7>!9Y zL&zC+jqOKi_B?uz5g>MQ|8gWLlcoDagmXL(}j8`D}1TCY_d??aW?ipPRk1apH#Raat zc=g3iuD8PVuD-g?jcm0I+p7ncs28+Rz=g1;{&Xt;hCw?(FdZP3zCNF$3CR_>pdH`yyoF*T<8{ ziXM?W;?bB{qCp?gEBZvg7!aFg-N;J$oYGoeix)9?#HX>fm=byL5IVwYo>@0`#IP6< zqafpENqgu6b;AqjYt_^&wur5xXp9kC#kNrf^4O7}=1nsx=-cim)v{XxtAMSo0RF%P_=vriS6mY0Y>aNlRLm3v|~x^oE>p= zQja-QEw4q@(;&=3-|A{{DzB-dA(ZwxeQ=6p80k8zZ`G;WWWCi(475pbbW)AA{Z1WX zSCV}aR)f|gc8fjfrZI+c!PzGr4|Tp8Y;#a^(DCKu3M0lRnBujxuTd0h{@HQIciL@- z#*4lBpVFbDFCe6U;Pm_ylZBb6YeD0A=}U)UUi#1cwr*YqzSg`zwA9t&RPQts@*y-k z=CDpb9W}Xohjqgdf6yA{Y2g1odg{ikVXxD?Gge=!8FDm5A?c+t7!e1>p>)KK$mrLI z$f)DX$*qXg{1c4ELu772WHxwHKD5XjeiWJYZzy-&JjD0fm&V)wZe%vtjUQTMjy#IY zN0CW(xAa$eXEr$TJ+#Q&^C&VOMdnwF%v?P2*wTq<07S zG~i=hxgZycc_o?A3-7~sw*d%EMlqYPGOu7o1JbjI^SKxPwiNr#zZagv1kf-8swM%P zk~wQ~#0`9xFJ>};y#VIog@%Ap*jP}D1VWKAx&dGd)dm~6v}l;2d8L$DkN^f)(6bI_ zmj!8=l9lArQeT5Mj?ykB-z??;`Dvnz1PwF;Mcq)zaG3rR`eI&L#3nbRs{|hkkkOFN zDi_TlrOwOwMMb?0pE@cE5YPcoi-KXYFHI$axF7n5$=5q3A7pqBNyrBfejE~$&n{v* z6%rUk7fOY^WT;DuGmxhJs5}=OE~S@Zcu|D*%t|!~}Q~i8&MnFWH1JLA9ZRy|!no z2&%A)#uEU3khzf-EGR5^B@w(LOjw48Ni}&zOL*~9q@z&fI6g%Beb{CXFGcH`H4ZhI z94L)JYo@U0x+d_Ud6C(g3lF6 zWR)Ztn=KMRV0j5EMGU${83VKl#nRlOnlGsu4JFVp%udNzfF+Qns!}em&&hd7lMAq_ zB||35x`@rlF&bmAU?_}gwn_6kF32TGxlqi@8VzzxKJF9E6st1*fUXhX6qAotSThth zuIgMNXV0#+jeHvRhkeT@KWj$euJZ8lKfY24y|jFCi*LW#bbG$S zPb`0TD;Tea2CLDIYHR;)>uRjC+Syxe?pSSIxl)a`uf|q=<;cLNah~@rpSahJyrE65 ztHO0{azcd@ZpPQS$EzHFZRYBXC12gM&Iv?95z5XA*SQ*7=lY3c`s(y1*HPg*Rz2%n z?}s3nx;pi)f1T^JJ*;!F55ut=Z(M)lmxtEE!<*qmC7f6bKe7CLwX3hp^{vLreBUN# z|NoimTY1XmEMjAatWx8kU^TW&8CcM$M&p~&;YxJ4%q8C0Uycs1@gqRqVrWv~6Puj< zzs4oX{D{k1#0If)WtAPACBDlgR?gRJI2@Qlbyi8eZ=i26u7OK{I<)o1mFri^+|WDw zSFWt_FepPeTWC_@hc-F;e~lZW167ki(FQScRVq6;OMI6bB2BZcK*Qm{bmfXwQ12S( zn=P)sOMo_M=jH2{%iJKe^YR+s3)D>*P)I8L;3jAPuW^HAzSm_fVuKjDvdRw565r(p zNy}^}py6;}x^mensCNzYO&HHwy%r7Hq@90YcpRU*m)_ z-{Z0tu|bSn8LWjm&Jy3{1ky6w31~PRn66y13hG@0y?V~ocL{K_4Q~Rp1CS?RItApP zfvxuo0eKg~ry#kP!t^Y_xh_yo{94a0K-lZZYq*GgDc;r32g)Ku8!1naE%gnQH!V5ft_F4LEZZ$=3^u68p^B-Z2L&Go<4rDB;bI$H`+YA6K;?6T97FC5 zw;T_9ejg4V@9`&kOur;$^^7E$0q3}Yr$Ec>lB_{VT6-=W2a;HM3vO58u;7YJzb(V3 z!G@lDuuQCjaOEdk*SGY3&y3UrN##Pu8sd5(oG~y7Ty$;$Tf0tI{(1ERT*@kR#^^>)$NYVSCE}ZI{efq|6?_PM2a{QVz(mFzB=rK|NYjdV3m-1U6)18*3kANS#i2m^ z-s~r3#9Kdx$rNHSPu9ypQ-O>{{>>Ka_C`P4fW~pkuB~G1=0?gi`Pj9iiJrt{MA)RK4&(V-+GyF zce#7C8XDVp)HGlYacr(1?@A$7hH`CO-zx2P=9_Z}H-#O=J|DJnkfcd%WD1bj7 z=s)Li|AWU3u+~JRWr;{~M#@;$32j>8kK=yymypz3uHah28u|!sffGD}SMUkGX`Een z1i#QE1Oy)JF*wQ!At5}*E_w7ihFLPGz_R^jnVm1u%@Xls&@MG0Ap-oG;UAshgq9h8 zkcqc0YF@D*t}F6-_iCwBRKYZ)_r*dnFC{Wc0kwpSJNFw{MxWyVKM!Mq%n7c$^&nl% zxl#-jxN`BI*W3Q~e^$SzxnE8tSD>Cg>phKF=L#$1iFb-Cn(w@_mY3Fu#w{v2qJ>_1 zS0bgHs!9b(E@`gU7ho&$f<5AH<5l%7$SV<|5LNTY3UnOjp#rwTjHJ2ONL~vp;|3*V z(_%^UlyarKqnx082U?q0sfR$C`(;htK!trk68JKR}o>8>>=%8}7p zq+{!NIeheCxcy;k-)`$brFDP;$1APJYi$GN*1=k9`%jL4(d3B)zG#mIy?ad5<9!63 z=_{ztNKtrdq`!;S8>ziP%B;g!;oS-wq$|ZHEptT1`jc{*o2u-Ft+2tiYj1^6Z*99~ zdoXRwcuh`E+E8UK#U;@g_dQA~<(|FBFv{`?qb9otym9NGPqM{??7W{5+$rxAd<|R` zrd-p;YeeuYwV5=@!zN_s_G{uhP?+tm>jk%~eNR7=;>?kmvWdXgAsbrZzQ@_>brxi@ zN79(uWA-6DrzvosYn+sRErMEON3M0NP@ruDtbW`Toa zL|>-GFs$w7tAFLtV)U8T#*w;S8m{f7K~ri|W>ZQL=Gh@*!&!g4HLR8BALtHbY3-m< zrz~4LkTPV>s?d_~&>bTpv%iok` z9(rz>&bOSkZod9^bvT`>i@v);y??szO=sauli_m%+!*dgTA`IqP}{b>+731g{=hQz3ay=)x{8s z5>az1IFzaT51e}^V|isIp#{#3omrfIEBV&qTLis1pRm7k&CSKx_2FYDiq|LNYT!={ zAT}36Xe_QKa@oL**qn|}#Z}E`_!PB(0i<(Ta3Z^b8j}f3rq;tEH~>kBa(Mw-LE#G! z$ygtMLQj?}*6wgRuEyQuEXYb;gap}4*YQ5B^FB?`{Q&$gA%AG4j|2dFBqy+H(yRi& zs2ayXQpaK7M%?cD406s(a@<4v$F9dQnIz{7NzSZC9HShE$b%Jhf&8^6i=E`NY!1s#R(W{*n zi55*mNF$GN2?9-uB9XKpRHv`4(x?R{T z++^I6(W1%{*!<|Aw43dhCL4`$K zMSd2M1we5va`^Vm&6_ahk0x*3tnz&TZ6BqHa|%GXGT&#(ssk|8k0z}$m;(o<7k>(q z@t%{y#Bi2w>J?7b0B;|)=q&)KQO+BiH_BW;$a$m6_X4yXrAdYF-{p*7mFq9_y%wqi zJJ`r7tJGn7@uyrrN@g$t3KN5-w{GYaPSyZ#M=g2_0BVqPH!@g>43@d!k0#5JK`=@H z?}Dh1RQTat&iGZi;WB^BLUmvVTUlk5I!rJAlp99j3?@KfV$c-2(`%f(0lo{e+8ygK zkbf44-X7T;xx-fjgS&wfmB5K=V0hzvjSt^GyLq6f!Vm3o#_v8ibnC2z>A((@ zfxHGq#m2B=Kk)-w1cRvke%R7xTxb9 zqxJrj*Kj=hmJXRRIcB9{xG?22T?J+ij%OAe&Q~BH+90T)r23g9D_mfHnV z`KdjBazJQOewN~Cu;ic2xq)EGrKiwbAQc47@mK%Gp}FAKt~u)ljw#(!Y0hc|nhQE+ zyRn{ar%C9ULgbr$DhdmFZv<~|RQU*+`I*`%| zK@TM#g~dKR#hySfOE)$5X9~=<{`v zve!k+*S;Em>n+I-~ z?iX;ez}2)hxdcTHcgr}m)^7^pR|Z_#vYb8yW!Ggr4-oobW~ifOqTc_4-U;--Oi^DD zJzU3|ZQlWzM>Y$XypG8mm|VsriHU&86--i?yom|CZyDXG;Gq)hJR`c8SF!R3n9#Gi zh|muqaUQ9sNE(a(2$PIeryt(W=tpPcBtKq z>;M}$I~ybw8^ek@t3n4JaEUwZRv8eR7!a3vIJF`h+6|nBEZFE0%I{vb8an}CWDRNp z2Gvauj1#;3$qIk6%uST}lU05MfO~L+grvex>~hAhiq#_)sslUR1P;m|sn{4+z@Y5r z06*X+?sQvaKx|?_Oi!msO}ja%Q#rc?_#T`r>lo_jeQtuDZ{kNZUpk#tGU>GDH~naI zL1~?7-Sd{vjT^jS2_n5m;O4T{WEE*XqfB3pcskO&dOIxu_nbxj7CgPwtcC4@1i0D= zUSQO(Gj~y!+Iu2g7)^-9B5W;0N~m~&Ues@U;>R4l@=33OY90INv)2*@B`f7sdU2Yr zcDl^9p2rF8Yt3|9(R|Cf_u=BZq`U0ga1E7uDX9aCF0)SI2yV)rB|ip{ z@RF&z4#}RIW!W!WE|&XGKf|(r%Iq+`zh*+eWCnl5jQuOKRArWa#k^QyUi>99yXR$G z{tx>;=-YDz*v_A{KVl$z FakeAppConfig: + """Config SQLite in-memory pour tests.""" + return FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:")) + + +def test_get_engine_sqlite(sqlite_config: FakeAppConfig): + """Cree un engine SQLite fonctionnel.""" + engine = get_engine(sqlite_config) + assert engine.url.get_backend_name() == "sqlite" + + +def test_init_db_creates_tables(sqlite_config: FakeAppConfig): + """Init DB cree toutes les tables attendues.""" + init_db(sqlite_config) + engine = get_engine(sqlite_config) + inspector = inspect(engine) + tables = set(inspector.get_table_names()) + assert "products" in tables + assert "price_history" in tables + assert "product_images" in tables + assert "product_specs" in tables + assert "scraping_logs" in tables + + +def test_get_session_commit(sqlite_config: FakeAppConfig): + """La session permet un commit simple.""" + init_db(sqlite_config) + + with get_session(sqlite_config) as session: + product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com") + session.add(product) + session.commit() + + with get_session(sqlite_config) as session: + assert session.query(Product).count() == 1 + + +def test_check_db_connection(sqlite_config: FakeAppConfig): + """Le health check DB retourne True en SQLite.""" + init_db(sqlite_config) + assert check_db_connection(sqlite_config) is True diff --git a/tests/db/test_models.py b/tests/db/test_models.py new file mode 100755 index 0000000..34f6e20 --- /dev/null +++ b/tests/db/test_models.py @@ -0,0 +1,89 @@ +""" +Tests pour les modeles SQLAlchemy. +""" + +from datetime import datetime + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, sessionmaker + +from pricewatch.app.db.models import ( + Base, + PriceHistory, + Product, + ProductImage, + ProductSpec, + ScrapingLog, +) + + +@pytest.fixture +def session() -> Session: + """Session SQLite in-memory pour tests de modeles.""" + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + SessionLocal = sessionmaker(bind=engine) + session = SessionLocal() + try: + yield session + finally: + session.close() + + +def test_product_relationships(session: Session): + """Les relations principales fonctionnent (prix, images, specs, logs).""" + product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com") + + price = PriceHistory( + price=199.99, + shipping_cost=0, + stock_status="in_stock", + fetch_method="http", + fetch_status="success", + fetched_at=datetime.utcnow(), + ) + image = ProductImage(image_url="https://example.com/image.jpg", position=0) + spec = ProductSpec(spec_key="Couleur", spec_value="Noir") + log = ScrapingLog( + url="https://example.com", + source="amazon", + reference="B08N5WRWNW", + fetch_method="http", + fetch_status="success", + fetched_at=datetime.utcnow(), + duration_ms=1200, + html_size_bytes=2048, + errors={"items": []}, + notes={"items": ["OK"]}, + ) + + product.price_history.append(price) + product.images.append(image) + product.specs.append(spec) + product.logs.append(log) + + session.add(product) + session.commit() + + loaded = session.query(Product).first() + assert loaded is not None + assert len(loaded.price_history) == 1 + assert len(loaded.images) == 1 + assert len(loaded.specs) == 1 + assert len(loaded.logs) == 1 + + +def test_unique_product_constraint(session: Session): + """La contrainte unique source+reference est respectee.""" + product_a = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/a") + product_b = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/b") + + session.add(product_a) + session.commit() + + session.add(product_b) + with pytest.raises(IntegrityError): + session.commit() + session.rollback() diff --git a/tests/db/test_repository.py b/tests/db/test_repository.py new file mode 100755 index 0000000..d93824a --- /dev/null +++ b/tests/db/test_repository.py @@ -0,0 +1,82 @@ +""" +Tests pour le repository SQLAlchemy. +""" + +from datetime import datetime + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.models import Base, Product, ScrapingLog +from pricewatch.app.db.repository import ProductRepository + + +@pytest.fixture +def session() -> Session: + """Session SQLite in-memory pour tests repository.""" + engine = create_engine("sqlite:///:memory:") + Base.metadata.create_all(engine) + SessionLocal = sessionmaker(bind=engine) + session = SessionLocal() + try: + yield session + finally: + session.close() + engine.dispose() + + +def _make_snapshot(reference: str | None) -> ProductSnapshot: + return ProductSnapshot( + source="amazon", + url="https://example.com/product", + fetched_at=datetime(2026, 1, 14, 12, 0, 0), + title="Produit test", + price=199.99, + currency="EUR", + shipping_cost=0.0, + reference=reference, + images=["https://example.com/img1.jpg"], + specs={"Couleur": "Noir"}, + debug=DebugInfo( + method=FetchMethod.HTTP, + status=DebugStatus.SUCCESS, + errors=["Avertissement"], + notes=["OK"], + ), + ) + + +def test_save_snapshot_creates_product(session: Session): + """Le repository persiste produit + log.""" + repo = ProductRepository(session) + snapshot = _make_snapshot(reference="B08N5WRWNW") + + product_id = repo.save_snapshot(snapshot) + session.commit() + + product = session.query(Product).one() + assert product.id == product_id + assert product.reference == "B08N5WRWNW" + assert len(product.images) == 1 + assert len(product.specs) == 1 + assert len(product.price_history) == 1 + + log = session.query(ScrapingLog).one() + assert log.product_id == product_id + assert log.errors == ["Avertissement"] + assert log.notes == ["OK"] + + +def test_save_snapshot_without_reference(session: Session): + """Sans reference, le produit n'est pas cree mais le log existe.""" + repo = ProductRepository(session) + snapshot = _make_snapshot(reference=None) + + product_id = repo.save_snapshot(snapshot) + session.commit() + + assert product_id is None + assert session.query(Product).count() == 0 + assert session.query(ScrapingLog).count() == 1 diff --git a/tests/scraping/__init__.py b/tests/scraping/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/tests/scraping/__pycache__/__init__.cpython-313.pyc b/tests/scraping/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..7701a564e1ffc273086305ce65e86c0f0382e35b GIT binary patch literal 151 zcmey&%ge<81glijGePuY5CH>>P{wB#AY&>+I)f&o-%5reCLr%KNa~iEenx(7s(yNA zPEKmEenC-wR%&udv3_xKQDT99Nop~Sotc-eA0MBYmst`YuUAlci^C>2KczG$)vkyY UXbi}jVi4maGb1Bo5i^hl0KxqvDgXcg literal 0 HcmV?d00001 diff --git a/tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc b/tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc new file mode 100755 index 0000000000000000000000000000000000000000..6cd0c6aca3d3b782aa20fc0a1a82eac22fea0758 GIT binary patch literal 49501 zcmeHQYit|Wl_n)Vh7=|0VLP^CJGA@~V~et7D~=r{b>hZ#?ZinuX6jUFSy~#&Y$%es zL;9gSn(b!0N|8lUq-m9XAj%>|tfB>iMX|_civ@NA?Doh0ASJnSXNzrs7Q4-l1rjGk z0sq?X+?lyElt-duiHehQAf37Q&Y3&Ixij~E=bm%!`M9w$=)(5qwxQJPJ6x{cQpVgo zM&je|BJrwAb}^Uimg|nYSsmZkAM>bn5!W#<^F`=L{c-;>iAl!-EWq=1$Ac`$IU865 z_MYRRV~wowSQBgF$GykH$C_F5u@=@6ajkW=yJX*bm+bGU)33`~d6|T=fGBGls*45- zPk|$NN>TG_B$q9)NRFlYmGjAb|JhhFmy4GckU#nBMk@@&`Bwlqr?qY5{c06uYFV``TT)*DMys`(>CwoEtvX4na{-~sd>2cUD z@j)|c7};!lNcidf>7=SE5gS{q-@GJU*^mEv{D1tcM3?Uq$EX$Mdf6j;4|p=}xcj8h zMs$+7WpJC>?s)wJuIv{(jKYC;U9LX!J#wA&nY`AXT0QRB=a&7ax9qFyF?eOE4|kw% zC;1y{=Jb8LtH&s~3;Tgv%dQeC=cQb`EAH+wevNr=$u(tXHO+t4LvGxY^-G+&>&$CZ z2_I)@AM$w;a0LovFLqojZ)S zT4iyYDYR7`wbj9+uEn-iBX6yyT5DCQweVKX-q2d1C45=oT3c1At=7(NDSXH1CzteP zHP>3JO0Bitv9&x)YOR)9YgMDQhCtwF`v1vSy19ZMC9%WrFKWvm{C{onMg|Ig*{PK^Dz$UChE0%a1rSy(sQke z&=$*c#{FPK;r%#4O^mQ33xjCEL%EB<$@xT*X)THVWICN-N{;nshjU4$XdxU4O@24NEwIv3#Y&e-u zC>L_+WQM;ejDWA^4ACj6R5RnmiuhF7hDZb5IHJ@(8#duFem(XViaJ#(i@ zKQ^Lo+TC4ev%^Z)P$~^+p)1F--&FeZY8Mwmy68C=X$T}d%Td2BmBexvwKo2!j28>P zp34VY=xw&cbO)w3+=KcsfcSH+-?ePGvTE&>maSLT-ip7E0^Zi(sPDrjSM!>eo_*oj zqO@&%l}^IjW+dZpQrb59tjOt!%P2c1%1Ty_4j9Z+RGZ)t2^3!muYT#N7oI9gdy1R( z=(H5xGb0&)lhU5ir$kOqTt*o#XmFIQ933#2r+9sWNhDBCSA6<~r;E~-vAj;gTV^EV zZ&KPa`n1UDiOVR%^$d=Zm7@a&^Ay!4ctirlxZF&5b1A&JD0Pg>oRq>HGm`N)DRmUX zn?+VnT$VcMiUv!`%He?Eo}&5$lSrU=r7t-xb$pB$%T<#L=_$BeC!HldAx(bnaA6i3#-*y%PSY^tyPl?tI}FrE^NoJ zJf~bZumi|nDX`d)YmNJM?~NSEvNNfH0VVUnE13WNU;yVvJzP#;ng$QhxRLC~stmwj|HcQN7B=a)*fouzrP7u4W!S9cG{7p*RNdmA6kiLvEU|XZCWaa39 z!8}D32_BI^u@D#9#|DZLk#PHrWc*D^?S%iW0;Dga43{&?N>+{z7|c^tk>C*t6bo4) zHukNe1X2peW+dZpQi>7$w+fKHj51u#C@WbxI$$tQQAL7BBv8E4!2Q#a71(~fR?e~l zg9c}|4(Wh<*|kUY9Lsphkf--1)E;?ldK_n&;zqPbKA}DG%hIxGj{+67M}ZsCN9b)8 zthH5@+R9l8~YB-tJO?-@L_DCUlr!zY&rrW$$^zr@Qp?U!-}{(L=gNZ5H~L?3O+j@0 zR8b-lj?PHN-=q}fbN^Ne4qcX_BySjHB`b#mf_sXp5=lA~BpDD5MpnLy5$bg=Cuac*unliNO?8^XXJZ$z)OBPb3Di{fPu4Yc6&- zkwYNb>O_J*qf~z)na{J-nF0h}aNS1?Byeysn2}u=C#3OH$;#vvp=EzVlUY9n^;ao1c4mnW+CYz~-S({uR zuai9o>LRYbfO$yvng(Z?`gpyQnO?>t`?9CvUYG<9nr3(0Bv6>>IjdJ>f7}O?KdFoA>NoG`*O?~R)TT4y=HY5Q4F{RH|daeK^XRFP9W zO*v=?b$+=)4#oY3NuboX&iwthx*VVh?ol$YYVKhY*a)G{1Cu}nZADXBvo^F<9kmrV z30z!jHSyL8*IKJettGxqtI5z>?j^NWbFHi0bT1pj zW>blDbno6BdFXgF#u~vivPHMx>^+kmxXA4|BM&Esl-@3$H*7kug~$PiuyAZh$-`I^ z`Fw$866ccXf^w~mTxryUU0uqB?I2fuF8gFZiR%#d8t}c%+DuWI9~UC5 z#pEQGXFPF_EP25MUo~G=iTk*yc-|DA2eR;|t01Dwfw&)neem?7mSAr?$5~7lf^kJo z;Rz=<#Ok;oog|0k#<*mN(1AX(>foELE(c8vMCgDiI-7fl&`l7by%3?#4K+o>1?qyu z0LMK@^SZZZB&RSno5>{8DfPVxeUQ5r)(5$VjvE6Sdl09y6GR>+avTK1n%}5{C8&^$ zNHB!7(m=E)s+tY~``{2zNA3iBi0mtKG`K@|G`Qo$M58r^u&+>UVhGV%b>nw#oUaTd zFsTGPdv2+pWJ55@f8sptZG%kts4@}c!w5S~wLS)-Nqiu|$gDN+YV->1=JdPyaAGg% zw%gSA*faZ8ny(XMjWO74vJEBYQ~9%r{(_p%4kwH!rtO+@;BnxlXRiJT1lIFwH@AZy1z4Nt~dy8$yN2T9Mt;M#Ex7#PBo)3MlKnqO6VL?6(!}m3tX4Z6+ z)^yCQ=_;-18f~}|UO#rA6y8>pc3x`bq!ivcBN=~_(#|3o;!}3omYyPR!6g!4nWroH zHGsnMcO^1D@!gm&vUMqb`uvx>30;Qz z29no;aYzuI_YOg+$_7yoZJ0GkuqRNeH5t9OdBrpe*)S0?w|P}+JB`{LGv zrSQQS$@rT@NjRcsr)}ve&MDYL0_9?e*}lINzOyJDedhsAO5vk3lJPew9W93M)U(sJ z^b~OmE|Ea-`{7%QQqQFWrQ3S2FW%Nu3ir%N#vjH#vU{Z5w3VFVe1b_Nz%tLU^)5u& zg$!FzM^TSBYa^AFT+h`}LRlpXRSQ=~q5jz$ruMv!)3Ag#_;%6(3-c8!NFC)bn=F$0 zgcaXOfqKn)XvGN=V+(bZbrnbT)g0<5>n!IXN+_|oCsHMD@r?Q^aSGHeR;cjv6)G%s zl!c|Ox}34JlRAo4s4&%04oWPvRcHxq)mUpQcp_D!t?crw@w%g}=2AvkZ-(=i<)k{S zeOb%&Wy#IiSDaM6*~j8u4z+(Qp!U0v-7B?KOFW2Pu=Vt-HmGf#z5DH^Nid=~pl zs~2n&N_p~ZSG!tY%ax4v@uTz19u};Na ztW&`qe}d0U)Uz`o4vC2&-4b#uU66#1h3Xb2u7mpX zY7%s~Pe|Vx->>`{?}*`=9-+2z>d;($n)c8|_RcV{52)Z`j`+Qw$@#Z9!q z&Fm^6htGptv%FZjk((?u)g+r`#$Q|tmBYYa4sz%XV&!Izm0SKqv2ts5u`=*i+sJ0J z5djR!vuxp9@=TgzuYHjdFvB>KEo26wvDEi5tG8I}C1BSG+{bOzaRA1iL9fQ11(|4C zI(*til|2Wt5EQg80Rs~tA^fs=QIL&~?o)lrac1*oLP4KjSOOR`&kSf2W-e<;0q>}> z)sTYuB=r3fX4Ps)fmT$m(tLFdDU`pbOPZ;vGE$J+YN)Qfwd}p@f;2|ftXh@Z3aDN1(zJChv9p==!4@6F7vylgrnTrvcb0afg;JcqL1>b4)sCE{r8wpyM zXV2ro%>&QY*(a&G4s=zjm$89K=+@UmuZ6H1 z3zfoKXC&iqQrbFN&~wvPQXEFvm`~(NR_-!YBo2tgl(h8S{`6~DM0Dra)3eO9 zNZoB%X_-m8S=JZ~QVpA~63#Yqm#HFgKqTfj%vM(#Qqyq~Y-7=dBoIjrdz>F9yq*q1Yi%mmx|h)ks{0-h`b8vJBw zXb@G;MuQ>y6)NGuTWxp{btU*cvjo?~YSB3n;cV6))-a*DR=R+ND~|~`!YApi4R36~ zZhS+j+%<_7wj?j4X}BO6N#SUOvhg({2PY*n2PdWR4dQ@EfPbDD!kR9zyuY%`t~E?p z!-RbNx(O{>Ycqu9-|bC~GulROrGXwHB%w|mNRcmeit7`5cJBqQ_~TM>#ok%Cq7ss5 zFGCW0Ig-H5B?_73xnwF$x=wBPJb=RSj5pCFAJ>$DMi>?yTC6Cpz}!@AsK-lKbBAzT zoy+bHQ8OEIHt?+t^|QxSGLruMnAlfe!PGX=eHelNBKYI$wB|A5_rp`a5a~c{x`=Wi z7TLGw&d8T{3`dfYypqhpP&$_S`=ucO)VN3lVE1${WS+=m&u938m}cCAH>;e^%`bKUvwNF5t2p}V z#l`P3?~U82T4y_3xD5g^Vizy zBAfeJ8@dPK!ftarr*$mq;dOEfZROyxhqh{3LR*DvZG|;Qmc6au&Gl_Z3)Q1>5BKJJ z&3tzk^j)>Y1KgYIdF#8fy-Ut-u<+yjjDCUEcV&8WE&q2V;k)`izAL9CR?Y9qy~sXr z74HLA*S@Q2d{y)v2Pdba06@U=1+%hxPuZwXr;*nK{ffW$l6#-}?wCc=2 zBwL`h=o-}2fRa!K2XTzMczrbFWs|HFD?2t)+!A8(rf7hkJ8LbX$-e%;YgogfW9)%hL1jlT>hl$d&1xovZUkjx zM??v*M` z5PV7%hwd8zp8)|r$>n*u01eG_aPWyWk-zN(KeIOmXs{|108RMxjRBg^W~*9&rnzcu zWda%p=O_pYA^u%07SOct@2a)-T~*_|BKN4mCXBB4U+7nI9IRmFT~$dIB17*@WZs)# z6#T}3G>s>e{P`?%U{je8W`lunLNR~6cKe*llIwW?gRbzj9h6)Nt^f7fj!U;qtv&Fe*A-a( zk*hus{3PgV-EiH4v?~QoZYxS%Z}q;>i`{r{Dcm(98Gn;fS228>o}IR(1p^Al?-03? zmAgz;i31`rC3V>rPOYH@ydAX7F;!JV3uI{N!bz zS#_|&Z8oe>5I@ZZ7dq#J3r_6Qc&F679WlK8^`EUT_MVv9^AJsAJnXb|{YoK)_M+7N zR{D)JcH`+%xO+x2{wAev9&MYl)3&r=K;ihH$d#_m?dyyw!ulqUWW)+-SK*;sJ>V>l}@v7d~;H4K}A&yK~3YnSFTD3YB`vF@}z@3 zKz4t;o~u1Z7u8k`e0}85Y1O9ou-eyAnP{odAYL7q1cjcgj zM7+VJgvxEIg*C25SYw3B4UcS$pL`&44$2E?I`UbB(@_u_SD|4Z?iMkv;w->5$*f@Z zxZaBnGa9V!6OpWQoC-6;y7&6vO4xvQEJ0B2AxU+l(0jo$Y~7)zN!rcbr=c`*M@D57;OZ$ zI0AygUZ`@bT5VP3VVk0+kL;$NoJX-iG_Yj9qS}I_JB|(`^-u{{nkHtK@C0U!4G0WC zvmuMqV)IGh37qB>hRnVe!}+r*^!oG*cxONNS`6ik|3g*(9t2Bs) zXt+StL&F6{XgG?lzFeWfMa42snE}L zO+p1IBA_@;XU^%{Zk^>YwPrsHGDW~5c)kjvrd^7L6!^kJ`J8fq>KI`kxMhi8ejfDE~~1p##0VgYMZzS#wa((yxgazp>N^ubA$Oln0x(w&iyr_?K-{79=YWP!_>F( zo~NzW^Hihf8S+I}@hHAFaKg?05tf~Txey!~R#cvGjIGGw$O+3*U!SX{=uPZaBGiA3 zJgUdq=m{L<2crjA! z-g>N)8TB&q<>C=v*taOvPlVtQ4@l2WQR-2UXtQnc@~=>-hX{?-+(*C`_m||zxr599 z6CHgQ1T+0HN}ne37!m3c*<7LISq4Y#p@4hzXpc13S$+m)ka@epUObMJniy7+Xd7if zn_FS;#2!R1qy9U{-1y9NM{o>2Z?0FqBGzyexqN+OX8o?xdPGs_esQF@e%DX>r^4OH z{L-e+*Y)sF)4zyFCx(u{Jf|1`QtBQs>KmK0V9B)^ougoXXc3ex~s#c zz-9vQ=|x!=i1Xj@S}zQ7%Js6R6Vh5IUO?GPG+vh$CIV1viYSvfWNfkzF(`SAbsGj1 zNM{}l8gPtjXyH6ZCA1vmBSAyiNDx}oSd1>oHZD3Mo;hMrTKDFNbgZ*5_J(ZNaOS=d z3~J7R`+=as%Z5RlZ5UKwJ+pmb(h%qae$h9}%^rphp6rj}!Gq6Zs^gzctO69;UiN&Q zTd^hHz`ZWNYMCF%ZHsEfPX|%8pySf)r-BImN{!f-01U@k(3 zC?me!v-#olJ(~MM>d<{**NDHjixeEW*dXz#UBSomYb0lm-N_$Yh=E+^eejcedEA;-de+7v)Ee);g zw?mgZ^bBIu4JY#lw9uj4MG9Dl%$_|ebpm!kV?r9c!U}U#Lkgz5Y27JRnP>6va>iQ- z#ysW(W8UAm?NZyN^Aj6O8~48xEp0qJ+Bhw(zcMeVdFj~~o-In-#!fy2z<)R*tl57?_ldVGiA7YS6u z=d##+Xu=cGwd^pxJzpepKanFuzC`4!M4luvLgdeg`~{I0i2N0i?-BU{k-sA{PUKA@ zKPK{1BL6_-ABp^&$S;T-B|_KN?_?jPj+vwv3H2Kw-*NrQ6T9jOdv{!I4|#W8-O%j4 z)qb_~)uM7BVTA8RMkkZ2uFwI0Cfh(tLVdnq;`w{i4$)eBj$v`M z^T~Yw*;q1{i>duAnM-AcVrTRDTw+i`IV1F@i#H^l>(8+KxoT4HaCV@OR_M6&f$e}0#^pJ}b#gVYJ(Ete$C?}^%5mcA<|7UkeN){}u zFwz1YlK=j%nVtFnncZ)OTaieRKpE}N7ryZm@+AsR@z~7PHZboKkx(LXqASHw7rVMA zdEF(DNe}f3XmO`}lYZ)FJf8|o255lsu2hf)!R|?gCc`v58KIHMI$AdwrBP=0rs^kS zG&b2l8zviRBh&d(O_R;EneqNqe6oeM2xP|JO8STxXeVND#AWqv#O@?*wXGqrh99uD z%><{0ZP%I$v=$NTMmaIsPAV;Ri1qD+c8W2eU3picVX-?nt>}g>l(aGx#4MFd1vP)A zP*REoRY?XnvBk>?&J5&aL(UduT?fGrf?^crl>~1F&MUL!{6%$6Gee9O4cRE`W@th& zve#ZzjB8rXj9#HyuADVQRW9k*G$Y|QLwUuJbVb(-nriwAYQd0lvu2noI_MQO54VRq z`Rt0o*6;3Jwq~@%Z3|lhFz=HYLOIAPWRtoNlNo--Bf6dfi|D4_JfHBG;R*TY%K6!I znmSj=Z`Om!beCyy83yY!{gR~0^NJ*yK}nj2o)?i1OVX=lxoBIWk~CMKx`A<0H88t% zgPNd~RaJu!z_4`CHcOI@QdW`;gBE7Xh5~BZ05{x%A2FZlzoyMAFiDF=Mei?B?Pp5X z(ED{vML#$@Jfww*Qprh9X@N$e1x9cHx(~>^4Zp2Fz;mv4I2qKkf=@Yk zHn>MQm1LZ!AxV=Fd+0DwoK2Q5FM2Y8isTi2qF)S%K`}JqwLQZ4tP!Ib92F~>u)|GZ zZ+4XoPFGf@Ful8qSU>6#W21hR%V3gONfWUF`szx%rdk|b^_u3yMzLwsrMkst?Od9_ z#A)dxwmJ7ZLNYkLSee3gv8%w7GM@Gfb7ITY;bGSZh7^j}n!(VsGS%&H)oWknsrn2k zb1*q;?jltE9IlU~-A;6!zR9ovYHIa4zSV1Ot(Q3HQ*|VJd-Kjc#I~gC1ms8citS=Y z+Ix~?k}rdkij|tSo^*88E8N-VwB=kCPHZ0IO4FHy<5!(AE~5I={;8vmrFzxUeI#?- zk)5`*-%ceRUG=K{z8Y6GB>J>Z(%~}~2+{62F?)f_UD^?~y))XGjzC7cu6|UL(eB5d zQNJ@9r+1H((Mm1IXm?E(_Rgrsxrf+up#Jylh>}o^vBT|^mC86Es{8fq!JNW)diJZ8 z4mji4*E9IYo}owf3_G5pko2(?Q786_ed#)TeMAqiKB7)r&ed8Us%MN-1MmyNF9hr5 zuGowp2e}C(-`q=%6DQU=c;?Qz1kwX9AGD3RdkDNv4lSO%qzKO2UMMM4FX)CMlvP2u zVHCVsgjrcv1o0=A3Wk#0ZqQ#X0&}LnzyHjg0~3OnUU^<#)YR>c zYlczM!BKfbo-Y-ZWLBF;^noaC^BZ{fZ{nL^6Cd82_$=SVMrhBKQCgSvcBv$y*dERtit+a2ImkXDmlPL zrpG84MaA@#Xd$bZ{%o02Ma?dl0jkU?$SMpUEOVwe%Rt*Sut8mZO_2-@Fb2F-x4i{j z0PvOI7UPGHlon1VbivXHC=MlbGiVP{D&(MhZty`ev|R_lR-p#-8v#F1jx2NAuCcLY zZt)jbdo-a3-?SNFj5(fiRKVCOsB3uvW2*)o6)?66I+lF3tk8wv^&@uA62a?`g0gCW z3V316y2SugGJQ5orkIFjA03Aa_Hd9+U{WsF;wjXPntW+tU~oHHGoXQCXav>6%dHlG zVwelJN7Ro5UwIT!0L7QS$zPbBzC!WcZu-RZxpU*9NFylKL-eu$*(GNJn!&TB1q3yc zOwE{-m5W7*DkTb&sU%ay41-ykEfZXqw_NC4c;gfWt9PJim1f&uu!t#m>w3x<$C%fwD>Fi^gHW_ zoCT(uj_@ zNZ3^<;$E|dMcEmJWIqk6nV==`dIEm>$FOjh$XAi}Tg8?BbF1CwS0m#~6L7+Y%`+NcvY>KZp<@r{O#yLBzM-dykLU+wAt zD71b$wF=+5OW(!<_1>im-^EG2aQn5@XmaVoMyzc;c62p%^n=rX;#XovS7PH!lV5c8 zuJFA#r&mI~>%9H_l<&Q9rXpF)ogKQ>ssx6c)4QBO1dp!*O}}~hvq0x<-$pe4-t}K! zU-1v#xPI%Ewdi5s)}zN(qsM?*i5^>vo&e^<7$d9E!F4dR?;4sE=9z${*$q6L5+umdxqIh}|$pDf;BtuB>7@3|z z@*I$9dKM@i{<;cq>buy0P}RoNHfm*Pik`9Ad%6B2~sVacKvqQI9mB4VbX_qsI;Boi=4fs3_z$fhfBpgh&`x4!zPm*$477odiDY4y?*}2Oqh+}i8!0J-nxIPP1Qi{rod6OMbE+#$X9NY|Gn@-@kmHs*nKZ? zg}!%jT;x8uMz-7_-e;np8QfgQeFDTbpcp1D|GV99b>HMyxvp#E$d{7BYL$f*Wn;=NZ||u z*d9Q#Xy3-kU2f^5a=TZ1dy#LGTBTCSNn4l8CaI+IsHFBzo0OAWst8dGcvE+kt-4Dp z&R=@$maANKNq%3?OwRxs0(^+F?BW#pp{M8To=4C0_x}1}TU*3~?XUXAv%?2Gp1+_S zYxA3hhrf%$_dSY-dlawI@`jhUh+p3+zt$4>oC@$@oR0Y32%Tb_oeJ}?*l&3w!Xv2n zztMUs%G*R;;EncEF&;bB!8=ZM^3J$ty=SXO32yW#p#v?(`FNM8W2g(ux;3IMg1S~& z*FD~nh?f60T1C#QT1kr+3uPWJ@@z)Enl5E7C)34ZQp@mkF`FMx7O$qpRMbWUvKr5& z+S~*wJJH$1nit4+dE3Li7%B`A z_bDyhulP24ctG(329*F{NC|Q_9!fBM13fm^>-c01`kH*!@mTS=GP$&-sgB3wxbS4g zx(vFS5dMAmfA}p%mvm7cqxVMfDSjm|v|l0by;=xFRn3Y^=194UM2dXH{bH2w|E?5)oo*2C5%-IjnxaUc@{fb zZDO?AS4V4wMhkCc{f5!Z|vD`zsQrs;laphs*xy>+x~@Rg|v?Uq+> z$pTIe_VpL9?MrBp_sl~4#WA{Pxk?fz&-*8K5Hjl zjF$xBAt0ji(3xA_&stu55$D<6;5>AtILV8~nQ;=}?`wc>-&1|7fd;Kq;&W^61*dtg zK?BF*sy$CTCw7k$r=&-Y73uMpQfaPtrZQlMQ(P_bOktv!=BggW;nbyaHdo5#@$7O1 zU}*aK6b7!G8x^Rj@m%3jI+x0)CsaM0($Zrp^`N)EES@Li0KuW_Yf@tcK9Me^)OW#* z=f%6jV{{=MbRhw8f1H@BTOr-Askt#uZ;Reamtc&|Ugyq!=gxD+j&X*A&YkBq;+^Bh zrXL=>T$oS?$FsSdstp!-;aAm6NgEV0%^*Dovkdu0?4~RsCt19vuMrQ*e5-KpLcSO{ zKOf7VbU`N`LQ}w_f8hDc&K+~z>*qSR&#fDrTfgakIM5ZD3f^n?bo6}ZTOWRFn)Ti2 zHbAVe%FMqztZ(XDa?dEtqV9^UtJr&Vz(k&*-XxAJpn5L0_B-!=_}(M<2mX zioJ&e5_^XFlSr~~mn9zJ-SVtTjr0^eo?&-6P|BKLI;n9voNe1Gue1`p5+rVj7yd9jnP__8dqwx9lfM6? z9x@gC8p#DhDsV`P*?-+ia-tAk__II|(t{8iI4NN{=~RW_z-jK~^pbFBp*;}g79zyi zk|N)PRtfLqLS!;q^y6Ql6Ow?OaEMua8`W+n&_`egft>_M=gkuY$k<@WLv^>c8wZ?1 z41W$4dZ&;sjK^b&BEq`hYJD(75hpTThIZB#L^jNIJu|m%m#ZMMZ61I$$EtP)8C+7Emn(k*(K9rx}6R)+#gq?y#*SS=bdQ&!P^e zGwUk$9vv`|XQ(5IBMYdO@<9LfYtsy%66>!r^Y0GpCl=qXKzSB*IGtHnvG?eJi9ACc zNgP=~wU7r4q36)__CwQ*U?p~_%FMqzs3D=pu0eU09inrZbrpLL2PF24=u2YB0_r~p zDTE)yo8>Nra9ED&ZC69y$8NRq2`*`po&{(-kAIll5=;~Ol&{(<49?Kc4S{=+Py;`1^ zg``*G;Y53x1cPB|pqj|$vlHoD@?^qmh!KtD20nz=i?Qf9M2I+=LWEejmB!JQ#3k;| z`x}M`^F9`8h^R&1kKVPHEdKvyxzc%HD)=+#$c5?Q%gXUvc8C5OuY%_XsO9+uOTA#G zUt)v3zwxSM_GQ7~`afr*!}JOp9R$073;uawouz~gFqP6HsnkSaw475>j;2!IET?m1 z9mHNNl^V-(t(42=)qDXJp;T(LkV&ODy^#C`0!IP#wW*W_YjQS|N|#DJd#Ma@7g)y# zfg}te1aopg6M($yG!p%P)AOhP0}uRdfn5)J0e;cT0=tX`gj?h0hnHV$55J3_N8SA1 z4h*~atC91c<>3a$KXL+O!U-@;K9}&$3<0((@U`SoB@Ec6z{ireE3JSrB?{P~v;lT1 z?SNfM3~-Io0obi{06L4 zc@#EmkKN+{{y?n$u8fs@&HOJ%OU5GS|SI2q(~YRT}IjpNfmo&8!QKT8Mz0;U55(zBIWj0}O&^Sh^qJKAV2q|q=}$h!0XWtM>BB2)lY5#u{wB3+`HaJ} z7ET#auHvHesVnJRSxxx$NYVDY4PD}et8lT4eLTU6HFx<)UD>0}T$6m)Cr8;e2H6E0tM z(iQa;bgBIbzz3e6Gtb%$-#a-Ko=cpc3Rl^N3fpiODzn|kX>U`7ZG!#x&vxwk@wUp2 zgHz$}w13!smu>rb(6fHi_pVGu=LQZ|+2#t{e3$LAEB73!vUr8X@3H|@ZrJ>T;8fe( z?&oEHsQk*dAB;`2XXbY9zst5`Kfe73SEkvfxqT;~N`uDjE*tzAYyVF4!{}V}=-h^Z zdjU^)?R}3g9Jv?uJd>PTw~hX~H^aH1BXZ9Z2uBR{kepa>cha@_S1?06Cn!Z4Yx6Yy zINP8ZZF%@(7mknk;n+OVZ&Y~}PPaN}}bBRr~5#tfFD^={cvL#3^YiFI?-(si2>B;p$`#V1>L`T1 z$klYVlq%;-*<7N76HjjFM(JrS*l~dzHVmf>(%$m70Q7Z+>$Y$$S4UF?#CoPE^zxP= zq45l1l4#761O^H0A+VRgJ^&Zls;dOC)`&h$Spfq(+ytvf@!p{x{~S+}Jg~P+Z#^{A z`Mg8(`GlRWU!U%MeVP%h#9ps5^Y0FOog5hK3Y2G22Uh^Iu43=e0TX#f^dymF0rd|5 zgD=1GbKoXC=Ly6Hs?7Ym!v=&8wOxZlvuuDKs##aD_i#XB&rnwqNfuD;aIe^L;~e?O zoU7FO$cRoB>?9+x$xG&(bdHe)RNo7F!oAnW;N@RsyDDr~mAz77ugpcaOec@s^LfI@ zy$?Js4aMv~@YRdi6hS=H^4NuJzaWBv8X_2c?0y(hTwbz}9c(H(FGa`>nSK~Sg(>03 zCS*sN3fZko6tbgYv}y&n)zMnG71B*5Vvm+rX%{Sui(+JW9Vk-WX6co|vP2TGGBGuE zEQ`peU*^Fa3jdljdB0Lv~K}bZGzyJ{K*Sj7i_B@XRE+l-T zgEWg&4ED|F$tI+{4*xwmLmSK+7D@lOyV7~)^C0S$eev-0_QTVRU?q0A%FMqzs38|W zy9VW1c9=+zSy!?5a6n?uh`uD2ETG;=5cX8qo+>+DVaI3Li3Xb1(&9;ge@WV)fS6Q* zH6$SP6q10@ijV*%NkCYMJT?;0DoFrzqVrBFE;_y84fsy#HhZEOS!{1E+6^~}EYC)m z(ni;r4?|o3^^pLy^*^$R^=P3c)rX6T5ot&QBN6C-+b?|I*48&2udt<=Zi*SOFqK%c zMWpy>RXUW;kycYL9$kc9JZkl2{X#EJ;|1M#@gI%1Cb}lK{!#%cZ!W9p(Xnza-ft(m ziC@WT*-JS!VNB(4PSF{mAx>3mDt4a32hyEvx1)YCp09_K{WGT0{bVO zan1*gIOl_j*b-*^Z_r7`-&hb8>+H%6?SfV_-nK5LW7U`|JHdB z8M((AY7SOE>My}Zt+XlaLVex}S(G@pmrEri>Qmz)^9#P_9Fk}tb}xQhQQ{iX|7Egc z*>9Fr<0GGJPa@C8XgO0FC}m4I72kWOP&Ol3M+#{^(6+!A^x0>B<^K?eqle6|cS&;U zZ_yXu=(FEa|JgrsvHF39eEE+w`-ndNTh_-lx4C35w9n$=--jH3lH^$;jd1d@Te)a_oUP#G z6Ta9C+txoe!?an0h!j3sm8e7blw{j+Ru`uNT%(&tt4#`@EyFGXAjYaqj8(mP+rDlW zpIVGnyBMq3>R7GBSdF(OI>fwfv0}Lb*MukOPB>Q^GkG~~&*RC-4#k|8;})YtY?sh6 zPPQfUh5Uf}ZdRkF{O{mE)7g6*NDxi;G%$RcNwjJ$o{4t7({^SxFQ3yCuD5F0Q8lHG zje&kFL3kC3r{+;jZg)}uGK)GXm1O^(Z$<>>Yx$3H`C7LSC*e~0RqqAwT(tM0ujQ-W z$TZt*+IDDCzRQmPoO!yof>iuCWl@UA6Q&ffaT)occHG!b7Ow5}QQ+@M^OR9&7zJ)3 zlYMJ@T|4Z;rPW|RO{NJC%Hbc{3#w8pQ(7daK?t!(wX*2Mx2qw<(8M9}?TUmD!w>YE zZXTkALnA&)8eEvNNa=!nd0R)A@D9}$p&gW@4IZ|VfR}u+OghDxQc_JCvHj(Ml%(nR)vTcg& zB9ULa<>S}zageV0IZhS_5kI*kQBCAQAwqpCY$%c&x(V^WMko6KKnnv%Ww4s-F{8bYxi+l;Dk(tUAK zeDl9Y;O_$G;wA0hY9a}ylhZN$DX)E^Tv-uHoBsNE1 zE?yw>DE7RTVozx;{EcB3g<#>e5x_tby7^}7mzu}P1 z>^AQA$}G7aPw zze4#G&j_D?p}K1_OP5KZJAY60(|4Qj?>C~gmo|~#iY(MW#KGIsYbJSnGt_#kW8z0v z38X_5k0CXNz;KwD@F2K;unDU_PrIMublU#|_`vhC7mnKcIr6=}%MQ7P5IiYq;8}PW z{?qqwA*K1@o9{p3X9y<|@+rJaUW8^@x(%Z8W{uCGwFjTWn@D9y4V{0$Z$Kg5npZC; zX~wE42yc*^&g;uy3R2oZF}?1NtoO@$ zQZ#)%(vtn~^hc*DBk}3_jKpSU;k(E;T%TkZb=ObI%wtGSZ0#XA@%7X4fGk)UiH#LX zo=ChL(#$kqA}%~eCc+XQgy1QK4ZHy6Hm~4mfBI7Fu-XV=#$E7y`aDWP4&M%Ug1MCG zZa%?tc^)M)oB?a|Jx(^FO|TK|OJpNLPobYs^HH@)Q%tfEwSwpJ$Eqrpn95kI7e*3AX2Sws zuma)CWT$x!n^|eVNa*1c?`G5@trWe|?8G*Ac4FMP#p(C{tcavLT46`;vSV}byv#yws^ zw#MfKd)Pj?z2PJU_J}M=lM!N&7RwWId%o)6{{)w|EMD)QQ3o`6@qbQW9tZAc9PFNB zA~M*#!6s~8eS>>Ak@jZ*n74*r`N@%)p|ka7po5>E8EEAvwEs1)|CM~vU0A5qNFTg0 zFo4@`1Yf#HpZ0k&4K>rExuvpPH5x2WGh@ePS#COuCLu64be4DFrByOqRr6Tky%f)+ zi%92w`&NA7_KzmuMVv!C4Y^#db!2$IBjc$vVj6Zl&MzDwZy1a1)chXj6~ zz>f(0A%TBE;9n8=Hv~=+I7Z+gf#(RkW`UPb(8$U~3yAdpEYSMxU;gXAe7|K!Ao9@X z33lJ3(u1?!cEC6bob*2E4hQx;=m`bB?tReJirS5Bft?R}gMn=iIs#aHq&*Zk@}Mgc zNIqCEK$zO}v;zKOV+7E+20DEP|8g^hZqLI#sCslw5?XzP2gIr)L9yxxR*ygpQ$jpE z&aei{TFKme4LLSXrgOPVSScbKLBEq*=vd9FAtI~6n2A)sa0tnkKKwuYV~Ea92W*R9 zH5LxBeg9~Y5S#2PSj-|v?Zt*T5^~C~+Y~<*ya? z7a*W`3+hM^abH*a&1S+|O`3JweFTxMHUw|A@P)T*Z#5-yvBNs^aF z#==6PkpgWc^q1yBLZAFD`ul8PzEy%KeYTOJIccWULIr=5+U%XLP4Q|?3)_uD@R!Zh zqk^XtdAGOVEMP2&{1|DhDT>T+IiV?t79#d6bWLu((@biKxAjJ`Zl`JJH+BW z;%?1#xMOw8`7}vyP4ZH`N9$LJ3rdMj&HUCWIH=ms!;yl-N$8CXX7e|rB$?Vz0I;M` z(6cH2gM-s-{oJ;lfOlDZE{>#?C~ThFxl0tb%f)>T&uvK5F7C7Chr2)8Jr{im#;#`~ zQ!#R!>6`9*VW#u2BUIwaD0OA5+R$|G&@>}hi6NckgGLNr-9}6tRKdoD`$O$jJs;h-U3-ZxLH@CdB|*OK=;n6`+; z66E1U=) zccx9UVofn63#?cn24N+pbc}>erY*7vrcI8FH9}z8I>EFB!L)rFV@1nhsFHk|EsZGJnpcKRB#NdL`*_`H}-YM&8>qD;cx`T|5&DXS_ zNcp8lNO`|W%D3zhh2D86_|K!Bt%v5i*UxqCn_Iu>e#pNSzOcQtXrD#b=Qa$2;6JnT zp2sKXd*msg?=MaFzBJ7UR$?zznfZ5zy+kD2u0VMfb)fHNUB%v`119o}=t&~U0_sg- zOHRSFY z(@ic5LW*`Hp2X>j%{{xxJjAmJ$+adFiU%CUr|O6}l)4i0wD^HuK$5{4;%Q%-t6s%! zC5UG;Y@2vC!)#7G>2nvG6?dm+E-3OLEQ#c$DMetj+XhPmN{B2 z!$fxKHtSu7b>81Z#s+bnd`{%|c2LC8EJud2c)ym;YjFtA>1!%CMDWp_=}QGZiiF>P zgma#s?0tlqzAD<9;cl$h-$e$5aU#q!zFF(JXS&Z#g&`c9B zk6n1zA*FUab}3a7LHbZ>Taal%wgRE;Sh(!j6w?mNw89!AN|&-`Bw|Xbt&5OSTP^9- z`h}F*4Jnn>{OtV6NQ*=HpfGK`&&V?nFJD+oly?jp`=j@WRo6959o6?Y5OwBlHA<7T zPfXHIh2^59oUIF+#vuU3`+j`letfmk`L!oLrJGagAXa5SYW9!U z&9HNZ$@mx^{uwBsU&gPS37!8ZI`Ah1Dg^#Jfs+J|5jaRdn7;o5l?ae=SR>AdOyAEw z@NWq0esHiiF!11*cVl4S7thB7!=|~KuoG(yOFg?(&gM$lye3!#eM8Ey+$RlXIR&J0 z^(sda9z86p^pII6bnkjwIiE%TS*?^L(c6&lAy0YuzXQhWea1cRp`rKX`GAdcFw_lr9}M!WK9=KJ?Xvg5kH^46`b@! MG<@I@KjLivAK{1UXaE2J literal 0 HcmV?d00001 diff --git a/tests/scraping/test_http_fetch.py b/tests/scraping/test_http_fetch.py new file mode 100755 index 0000000..b54cfa4 --- /dev/null +++ b/tests/scraping/test_http_fetch.py @@ -0,0 +1,290 @@ +""" +Tests pour pricewatch.app.scraping.http_fetch + +Teste la récupération HTTP avec mocks pour éviter les vraies requêtes. +""" + +from unittest.mock import Mock, patch + +import pytest +import requests +from requests.exceptions import RequestException, Timeout + +from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http + + +class TestFetchResult: + """Tests pour la classe FetchResult.""" + + def test_success_result(self): + """Création d'un résultat réussi.""" + result = FetchResult( + success=True, + html="Test", + status_code=200, + duration_ms=150, + ) + + assert result.success is True + assert result.html == "Test" + assert result.error is None + assert result.status_code == 200 + assert result.duration_ms == 150 + + def test_error_result(self): + """Création d'un résultat d'erreur.""" + result = FetchResult( + success=False, + error="403 Forbidden", + status_code=403, + duration_ms=100, + ) + + assert result.success is False + assert result.html is None + assert result.error == "403 Forbidden" + assert result.status_code == 403 + assert result.duration_ms == 100 + + def test_minimal_result(self): + """Résultat minimal avec success uniquement.""" + result = FetchResult(success=False) + + assert result.success is False + assert result.html is None + assert result.error is None + assert result.status_code is None + assert result.duration_ms is None + + +class TestFetchHttp: + """Tests pour la fonction fetch_http().""" + + def test_fetch_success(self, mocker): + """Requête HTTP réussie (200 OK).""" + # Mock de requests.get + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "Test Page" + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is True + assert result.html == "Test Page" + assert result.status_code == 200 + assert result.error is None + assert result.duration_ms is not None + assert result.duration_ms >= 0 + + def test_fetch_with_custom_timeout(self, mocker): + """Requête avec timeout personnalisé.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "OK" + mock_get = mocker.patch("requests.get", return_value=mock_response) + + fetch_http("https://example.com", timeout=60) + + # Vérifier que timeout est passé à requests.get + mock_get.assert_called_once() + call_kwargs = mock_get.call_args.kwargs + assert call_kwargs["timeout"] == 60 + + def test_fetch_with_custom_headers(self, mocker): + """Requête avec headers personnalisés.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "OK" + mock_get = mocker.patch("requests.get", return_value=mock_response) + + custom_headers = {"X-Custom-Header": "test-value"} + fetch_http("https://example.com", headers=custom_headers) + + # Vérifier que les headers personnalisés sont inclus + mock_get.assert_called_once() + call_kwargs = mock_get.call_args.kwargs + assert "X-Custom-Header" in call_kwargs["headers"] + assert call_kwargs["headers"]["X-Custom-Header"] == "test-value" + # Headers par défaut doivent aussi être présents + assert "User-Agent" in call_kwargs["headers"] + + def test_fetch_403_forbidden(self, mocker): + """Requête bloquée (403 Forbidden).""" + mock_response = Mock() + mock_response.status_code = 403 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.html is None + assert result.status_code == 403 + assert "403 Forbidden" in result.error + assert "Anti-bot" in result.error + + def test_fetch_404_not_found(self, mocker): + """Page introuvable (404 Not Found).""" + mock_response = Mock() + mock_response.status_code = 404 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.status_code == 404 + assert "404 Not Found" in result.error + + def test_fetch_429_rate_limit(self, mocker): + """Rate limit atteint (429 Too Many Requests).""" + mock_response = Mock() + mock_response.status_code = 429 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.status_code == 429 + assert "429" in result.error + assert "Rate limit" in result.error + + def test_fetch_500_server_error(self, mocker): + """Erreur serveur (500 Internal Server Error).""" + mock_response = Mock() + mock_response.status_code = 500 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.status_code == 500 + assert "500" in result.error + assert "Server Error" in result.error + + def test_fetch_503_service_unavailable(self, mocker): + """Service indisponible (503).""" + mock_response = Mock() + mock_response.status_code = 503 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.status_code == 503 + assert "503" in result.error + + def test_fetch_unknown_status_code(self, mocker): + """Code de statut inconnu (par ex. 418 I'm a teapot).""" + mock_response = Mock() + mock_response.status_code = 418 + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is False + assert result.status_code == 418 + assert "418" in result.error + + def test_fetch_timeout_error(self, mocker): + """Timeout lors de la requête.""" + mocker.patch("requests.get", side_effect=Timeout("Connection timed out")) + + result = fetch_http("https://example.com", timeout=10) + + assert result.success is False + assert result.html is None + assert "Timeout" in result.error + assert result.duration_ms is not None + + def test_fetch_request_exception(self, mocker): + """Exception réseau générique.""" + mocker.patch( + "requests.get", + side_effect=RequestException("Network error"), + ) + + result = fetch_http("https://example.com") + + assert result.success is False + assert "Erreur réseau" in result.error + assert result.duration_ms is not None + + def test_fetch_unexpected_exception(self, mocker): + """Exception inattendue.""" + mocker.patch("requests.get", side_effect=ValueError("Unexpected error")) + + result = fetch_http("https://example.com") + + assert result.success is False + assert "Erreur inattendue" in result.error + assert result.duration_ms is not None + + def test_fetch_empty_url(self): + """URL vide retourne une erreur.""" + result = fetch_http("") + + assert result.success is False + assert "URL vide" in result.error + assert result.html is None + + def test_fetch_whitespace_url(self): + """URL avec espaces uniquement retourne une erreur.""" + result = fetch_http(" ") + + assert result.success is False + assert "URL vide" in result.error + + def test_fetch_no_redirects(self, mocker): + """Requête sans suivre les redirections.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "OK" + mock_get = mocker.patch("requests.get", return_value=mock_response) + + fetch_http("https://example.com", follow_redirects=False) + + mock_get.assert_called_once() + call_kwargs = mock_get.call_args.kwargs + assert call_kwargs["allow_redirects"] is False + + def test_fetch_uses_random_user_agent(self, mocker): + """Vérifie qu'un User-Agent aléatoire est utilisé.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "OK" + mock_get = mocker.patch("requests.get", return_value=mock_response) + + fetch_http("https://example.com") + + # Vérifier qu'un User-Agent est présent + mock_get.assert_called_once() + call_kwargs = mock_get.call_args.kwargs + assert "User-Agent" in call_kwargs["headers"] + # User-Agent doit contenir "Mozilla" (présent dans tous les UA) + assert "Mozilla" in call_kwargs["headers"]["User-Agent"] + + def test_fetch_duration_is_measured(self, mocker): + """Vérifie que la durée est mesurée.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = "OK" + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.duration_ms is not None + assert isinstance(result.duration_ms, int) + assert result.duration_ms >= 0 + + def test_fetch_large_response(self, mocker): + """Requête avec réponse volumineuse.""" + mock_response = Mock() + mock_response.status_code = 200 + # Simuler une grosse page HTML (1 MB) + mock_response.text = "" + ("x" * 1000000) + "" + mocker.patch("requests.get", return_value=mock_response) + + result = fetch_http("https://example.com") + + assert result.success is True + assert len(result.html) > 1000000 diff --git a/tests/scraping/test_pipeline.py b/tests/scraping/test_pipeline.py new file mode 100755 index 0000000..d0f1407 --- /dev/null +++ b/tests/scraping/test_pipeline.py @@ -0,0 +1,82 @@ +""" +Tests pour ScrapingPipeline. +""" + +from dataclasses import dataclass +from datetime import datetime + +import pytest + +from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot +from pricewatch.app.db.connection import get_session, init_db, reset_engine +from pricewatch.app.db.models import Product +from pricewatch.app.scraping.pipeline import ScrapingPipeline + + +@dataclass +class FakeDbConfig: + url: str + + +@dataclass +class FakeAppConfig: + db: FakeDbConfig + debug: bool = False + enable_db: bool = True + + +@pytest.fixture(autouse=True) +def reset_db_engine(): + """Reset l'engine global entre les tests.""" + reset_engine() + yield + reset_engine() + + +def test_pipeline_persists_snapshot(): + """Le pipeline persiste un snapshot en base SQLite.""" + config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:")) + init_db(config) + + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com/product", + fetched_at=datetime(2026, 1, 14, 12, 30, 0), + title="Produit pipeline", + price=99.99, + currency="EUR", + reference="B08PIPE", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + pipeline = ScrapingPipeline(config=config) + product_id = pipeline.process_snapshot(snapshot, save_to_db=True) + + assert product_id is not None + + with get_session(config) as session: + assert session.query(Product).count() == 1 + + +def test_pipeline_respects_disable_flag(): + """Le pipeline ignore la persistence si enable_db=False.""" + config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"), enable_db=False) + init_db(config) + + snapshot = ProductSnapshot( + source="amazon", + url="https://example.com/product", + fetched_at=datetime(2026, 1, 14, 12, 45, 0), + title="Produit pipeline", + price=99.99, + currency="EUR", + reference="B08PIPE", + debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), + ) + + pipeline = ScrapingPipeline(config=config) + product_id = pipeline.process_snapshot(snapshot, save_to_db=True) + + assert product_id is None + with get_session(config) as session: + assert session.query(Product).count() == 0 diff --git a/tests/scraping/test_pw_fetch.py b/tests/scraping/test_pw_fetch.py new file mode 100755 index 0000000..27f5c25 --- /dev/null +++ b/tests/scraping/test_pw_fetch.py @@ -0,0 +1,388 @@ +""" +Tests pour pricewatch.app.scraping.pw_fetch + +Teste la récupération Playwright avec mocks pour éviter de lancer vraiment un navigateur. +""" + +from unittest.mock import Mock, patch + +import pytest +from playwright.sync_api import TimeoutError as PlaywrightTimeout + +from pricewatch.app.scraping.pw_fetch import ( + PlaywrightFetchResult, + fetch_playwright, + fetch_with_fallback, +) + + +class TestPlaywrightFetchResult: + """Tests pour la classe PlaywrightFetchResult.""" + + def test_success_result(self): + """Création d'un résultat réussi.""" + result = PlaywrightFetchResult( + success=True, + html="Test", + screenshot=b"fake_screenshot_bytes", + duration_ms=2500, + ) + + assert result.success is True + assert result.html == "Test" + assert result.screenshot == b"fake_screenshot_bytes" + assert result.error is None + assert result.duration_ms == 2500 + + def test_error_result(self): + """Création d'un résultat d'erreur.""" + result = PlaywrightFetchResult( + success=False, + error="Timeout", + screenshot=b"error_screenshot", + duration_ms=3000, + ) + + assert result.success is False + assert result.html is None + assert result.error == "Timeout" + assert result.screenshot == b"error_screenshot" + assert result.duration_ms == 3000 + + def test_minimal_result(self): + """Résultat minimal.""" + result = PlaywrightFetchResult(success=False) + + assert result.success is False + assert result.html is None + assert result.screenshot is None + assert result.error is None + assert result.duration_ms is None + + +class TestFetchPlaywright: + """Tests pour fetch_playwright().""" + + @pytest.fixture + def mock_playwright_stack(self, mocker): + """Fixture: Mock complet de la stack Playwright.""" + # Mock de la page + mock_page = Mock() + mock_page.content.return_value = "Playwright Test" + mock_page.screenshot.return_value = b"fake_screenshot_data" + mock_page.goto.return_value = Mock(status=200) + + # Mock du context + mock_context = Mock() + mock_context.new_page.return_value = mock_page + + # Mock du browser + mock_browser = Mock() + mock_browser.new_context.return_value = mock_context + + # Mock playwright chromium + mock_chromium = Mock() + mock_chromium.launch.return_value = mock_browser + + # Mock playwright + mock_playwright_obj = Mock() + mock_playwright_obj.chromium = mock_chromium + + # Mock sync_playwright().start() + mock_sync_playwright = Mock() + mock_sync_playwright.start.return_value = mock_playwright_obj + + mocker.patch( + "pricewatch.app.scraping.pw_fetch.sync_playwright", + return_value=mock_sync_playwright, + ) + + return { + "playwright": mock_playwright_obj, + "browser": mock_browser, + "context": mock_context, + "page": mock_page, + } + + def test_fetch_success(self, mock_playwright_stack): + """Récupération Playwright réussie.""" + result = fetch_playwright("https://example.com") + + assert result.success is True + assert result.html == "Playwright Test" + assert result.screenshot is None # Par défaut pas de screenshot + assert result.error is None + assert result.duration_ms is not None + assert result.duration_ms >= 0 + + # Vérifier que la page a été visitée + mock_playwright_stack["page"].goto.assert_called_once_with( + "https://example.com", wait_until="domcontentloaded" + ) + + def test_fetch_with_screenshot(self, mock_playwright_stack): + """Récupération avec screenshot.""" + result = fetch_playwright("https://example.com", save_screenshot=True) + + assert result.success is True + assert result.screenshot == b"fake_screenshot_data" + + # Vérifier que screenshot() a été appelé + mock_playwright_stack["page"].screenshot.assert_called_once() + + def test_fetch_headful_mode(self, mock_playwright_stack): + """Mode headful (navigateur visible).""" + result = fetch_playwright("https://example.com", headless=False) + + assert result.success is True + + # Vérifier que headless=False a été passé + mock_playwright_stack["playwright"].chromium.launch.assert_called_once() + call_kwargs = mock_playwright_stack["playwright"].chromium.launch.call_args.kwargs + assert call_kwargs["headless"] is False + + def test_fetch_with_custom_timeout(self, mock_playwright_stack): + """Timeout personnalisé.""" + result = fetch_playwright("https://example.com", timeout_ms=30000) + + assert result.success is True + + # Vérifier que set_default_timeout a été appelé + mock_playwright_stack["page"].set_default_timeout.assert_called_once_with(30000) + + def test_fetch_with_wait_for_selector(self, mock_playwright_stack): + """Attente d'un sélecteur CSS spécifique.""" + result = fetch_playwright( + "https://example.com", wait_for_selector=".product-title" + ) + + assert result.success is True + + # Vérifier que wait_for_selector a été appelé + mock_playwright_stack["page"].wait_for_selector.assert_called_once_with( + ".product-title", timeout=60000 + ) + + def test_fetch_wait_for_selector_timeout(self, mock_playwright_stack): + """Timeout lors de l'attente du sélecteur.""" + # Le sélecteur timeout mais la page continue + mock_playwright_stack["page"].wait_for_selector.side_effect = PlaywrightTimeout( + "Selector timeout" + ) + + result = fetch_playwright( + "https://example.com", wait_for_selector=".non-existent" + ) + + # Doit quand même réussir (le wait_for_selector est non-bloquant) + assert result.success is True + assert result.html is not None + + def test_fetch_empty_url(self): + """URL vide retourne une erreur.""" + result = fetch_playwright("") + + assert result.success is False + assert "URL vide" in result.error + assert result.html is None + + def test_fetch_whitespace_url(self): + """URL avec espaces retourne une erreur.""" + result = fetch_playwright(" ") + + assert result.success is False + assert "URL vide" in result.error + + def test_fetch_no_response_from_server(self, mock_playwright_stack): + """Pas de réponse du serveur.""" + mock_playwright_stack["page"].goto.return_value = None + + result = fetch_playwright("https://example.com") + + assert result.success is False + assert "Pas de réponse du serveur" in result.error + + def test_fetch_playwright_timeout(self, mock_playwright_stack): + """Timeout Playwright lors de la navigation.""" + mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout( + "Navigation timeout" + ) + + result = fetch_playwright("https://example.com", timeout_ms=10000) + + assert result.success is False + assert "Timeout" in result.error + assert result.duration_ms is not None + + def test_fetch_playwright_generic_error(self, mock_playwright_stack): + """Erreur générique Playwright.""" + mock_playwright_stack["page"].goto.side_effect = Exception( + "Generic Playwright error" + ) + + result = fetch_playwright("https://example.com") + + assert result.success is False + assert "Erreur Playwright" in result.error + assert result.duration_ms is not None + + def test_fetch_cleanup_on_success(self, mock_playwright_stack): + """Nettoyage des ressources sur succès.""" + result = fetch_playwright("https://example.com") + + assert result.success is True + + # Vérifier que les ressources sont nettoyées + mock_playwright_stack["page"].close.assert_called_once() + mock_playwright_stack["browser"].close.assert_called_once() + mock_playwright_stack["playwright"].stop.assert_called_once() + + def test_fetch_cleanup_on_error(self, mock_playwright_stack): + """Nettoyage des ressources sur erreur.""" + mock_playwright_stack["page"].goto.side_effect = Exception("Test error") + + result = fetch_playwright("https://example.com") + + assert result.success is False + + # Vérifier que les ressources sont nettoyées même en cas d'erreur + mock_playwright_stack["page"].close.assert_called_once() + mock_playwright_stack["browser"].close.assert_called_once() + mock_playwright_stack["playwright"].stop.assert_called_once() + + def test_fetch_screenshot_on_error(self, mock_playwright_stack): + """Screenshot capturé même en cas d'erreur.""" + mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout("Timeout") + + result = fetch_playwright("https://example.com", save_screenshot=True) + + assert result.success is False + assert result.screenshot == b"fake_screenshot_data" + + # Screenshot doit avoir été tenté + mock_playwright_stack["page"].screenshot.assert_called_once() + + +class TestFetchWithFallback: + """Tests pour fetch_with_fallback().""" + + def test_http_success_no_playwright(self, mocker): + """Si HTTP réussit, Playwright n'est pas appelé.""" + # Mock fetch_http qui réussit + mock_http_result = Mock() + mock_http_result.success = True + mock_http_result.html = "HTTP Success" + mock_http_result.duration_ms = 150 + + mocker.patch( + "pricewatch.app.scraping.http_fetch.fetch_http", + return_value=mock_http_result, + ) + + # Mock fetch_playwright (ne devrait pas être appelé) + mock_playwright = mocker.patch( + "pricewatch.app.scraping.pw_fetch.fetch_playwright" + ) + + result = fetch_with_fallback("https://example.com") + + assert result.success is True + assert result.html == "HTTP Success" + assert result.duration_ms == 150 + + # Playwright ne doit pas être appelé + mock_playwright.assert_not_called() + + def test_http_fails_playwright_fallback(self, mocker): + """Si HTTP échoue, fallback vers Playwright.""" + # Mock fetch_http qui échoue + mock_http_result = Mock() + mock_http_result.success = False + mock_http_result.error = "403 Forbidden" + + mocker.patch( + "pricewatch.app.scraping.http_fetch.fetch_http", + return_value=mock_http_result, + ) + + # Mock fetch_playwright qui réussit + mock_playwright_result = PlaywrightFetchResult( + success=True, + html="Playwright Success", + duration_ms=2500, + ) + + mock_playwright = mocker.patch( + "pricewatch.app.scraping.pw_fetch.fetch_playwright", + return_value=mock_playwright_result, + ) + + result = fetch_with_fallback("https://example.com") + + assert result.success is True + assert result.html == "Playwright Success" + + # Playwright doit avoir été appelé + mock_playwright.assert_called_once() + + def test_skip_http_direct_playwright(self, mocker): + """Mode Playwright direct (sans essayer HTTP d'abord).""" + # Mock fetch_http (ne devrait pas être appelé) + mock_http = mocker.patch("pricewatch.app.scraping.http_fetch.fetch_http") + + # Mock fetch_playwright + mock_playwright_result = PlaywrightFetchResult( + success=True, + html="Playwright Direct", + duration_ms=2500, + ) + + mock_playwright = mocker.patch( + "pricewatch.app.scraping.pw_fetch.fetch_playwright", + return_value=mock_playwright_result, + ) + + result = fetch_with_fallback("https://example.com", try_http_first=False) + + assert result.success is True + assert result.html == "Playwright Direct" + + # HTTP ne doit pas être appelé + mock_http.assert_not_called() + + # Playwright doit avoir été appelé + mock_playwright.assert_called_once() + + def test_playwright_options_passed(self, mocker): + """Options Playwright passées correctement.""" + # Mock fetch_http qui échoue + mock_http_result = Mock() + mock_http_result.success = False + mock_http_result.error = "403 Forbidden" + + mocker.patch( + "pricewatch.app.scraping.http_fetch.fetch_http", + return_value=mock_http_result, + ) + + # Mock fetch_playwright + mock_playwright_result = PlaywrightFetchResult( + success=True, + html="OK", + duration_ms=2500, + ) + + mock_playwright = mocker.patch( + "pricewatch.app.scraping.pw_fetch.fetch_playwright", + return_value=mock_playwright_result, + ) + + # Options personnalisées + options = {"headless": False, "timeout_ms": 30000, "save_screenshot": True} + + result = fetch_with_fallback("https://example.com", playwright_options=options) + + assert result.success is True + + # Vérifier que les options sont passées à fetch_playwright + mock_playwright.assert_called_once_with("https://example.com", **options) diff --git a/tests/stores/__pycache__/test_amazon.cpython-313-pytest-9.0.2.pyc b/tests/stores/__pycache__/test_amazon.cpython-313-pytest-9.0.2.pyc index f3111bfa70ace36bd1229e36729a46992e7ac504..c2e61529abda04953bccdf218326eee850c2123c 100755 GIT binary patch delta 1610 zcmaKsZA@Eb6vyu=?FHIPS1h$n9a|kRsbjEYFN`HJG2tbC0HraxDx84mUPm00TJb=wu5@P2?pjYa%-1%X4nY))2S*;df8Y z+w;HYdG7P~jhMTE7=1V)LCdkn|4wD~C)wxpm-avhj_v7>&B)2I{ruJ$T&%F8qr46O za&5zbz1u;+Klc`b5A*h2jLwlA<$_w@$2v)|3`e*SZ)2J1^2q}qfM%Nfm{U=%YIuQl zUyXqSM)Z_vaHygzs*4G0U4IxBqnl_Af?W6Vp;i1K^NM}AAf1I?Y_0DFPJXe$V}>tL z?=uUuqW2p2lrHk zVr3HO^sU7bU%q+e&a%W#@K$1rl!=qRT+0kqHWIuH_QZ&`VcO&krV;XgPZ%dGkFc&g z#r|-W%CwX*`td?{HDt+YJ$eoFlR!TrI%3rG@(MbdopfHk)V-vaHkPUv7YKwB9u9Kbx z!d=2Y1P$fK69hsc!A!6M!frFZG`VxDUL%}vg*o;L$8da7NX4v>jpK#49d9~9s&y=a&wqtujLT4?yN>b-~`mjoq=o>^x*VQ&*-8?jhR!Wx~K~P zk4+VTN4_?7i;rA>SFm@+$@{58|K4r&fIhE zx&M3anS0Nty8o-n;MeOl9DCjok5p{Qykz+L02zV71D&w9FBdKrJtB7%>Il(8Y{^FO zA57q#CB=j!j0Tc4O2PL_4BGnZ!^~{)uC!=6ZJhQ?v(;p3XA5(IX(fL3}5> zPUn&>xJa$RDoJ))ow#A6qb5U?XfGSwPv66^?-4&Benj-arp9`cq6~xRaRxD(de9z# zo<>X7HD>xksr`tZ+AD2#I*2Z1*`Lu9VzXA;%d2ROPpEj!T5gx<1P-euhutR8A=uQk zBURar1?Rnq_yv)U7)FdBuESeRn?t?Klp+}5v6sT(uqzn6P0tXc9DCNP(<)ii7(4@S ze=tST;MMb$7L=a%@U1bAXbfy+7N^|=!I>l6MRN%DI%Bpj>ry10K#!uP9<2t%JfaSv zX!XMT&SH`%|Lsf-S>vR3->8lFa`7mhR8EV{6gVGDqLkOLqPzl@mkz>wOGw=Yta`S~5ti*qP{(y)5@qJ9P8Kz6vHtd5<~7p7 zV7CeKI`$ZL;!kplrE1_N4bQ`k>If)1_KF;95e_#ufQ243)4f3qK`|4l3 z=^=s?JukR3mB48D)V+)Bk#D(sgA0S$;(;v$8V_?ms9B)Ol*gF=?vP%^z#fLr!d5mQ zjw*@Np!ZmW0sOuO*&cJ`awS%@B2;;z`C$%>BU$hEtHI-Wg6xxp{&8&rdC>Rx^^lDx6RzJsHOD<;J zj)ya&=ZHg2^X=eC8x)T{tzEGb_9~GyN#`7oy{c{Xqnwd8sOgS9bGa` z6%pymb~OoTBODMvmrN2MXU@fVcP@@(K-7GGOumYw{mCKk926o+>TQmJhBGz)0