chore: sync project files

This commit is contained in:
Gilles Soulier
2026-01-13 19:49:04 +01:00
parent 53f8227941
commit ecda149a4b
149 changed files with 65272 additions and 1 deletions

137
test_selectors.py Executable file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""
Script de test pour valider les sélecteurs avec les fichiers JSON de test.
Usage:
python test_selectors.py test_amazon.json
python test_selectors.py test_cdiscount.json
"""
import json
import sys
from pathlib import Path
from pricewatch.app.core.registry import get_registry, register_store
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.stores.cdiscount.store import CdiscountStore
def load_test_config(json_path: str) -> dict:
"""Charge le fichier JSON de configuration de test."""
with open(json_path, 'r', encoding='utf-8') as f:
return json.load(f)
def test_url_detection(config: dict):
"""Teste la détection d'URL et l'extraction de référence."""
print("\n" + "="*60)
print("TEST 1: Détection d'URL et extraction référence")
print("="*60)
# Setup
setup_stores()
registry = get_registry()
store_id = config['test_config']['store']
test_data = config['test_data']
# Test de détection
for url in test_data['valid_urls']:
print(f"\n📍 Test URL: {url}")
store = registry.detect_store(url)
if store:
print(f" ✓ Store détecté: {store.store_id}")
# Canonisation
canonical = store.canonicalize(url)
expected_canonical = test_data['expected_canonical']
if canonical == expected_canonical:
print(f" ✓ URL canonique: {canonical}")
else:
print(f" ✗ URL canonique incorrecte:")
print(f" Obtenu : {canonical}")
print(f" Attendu : {expected_canonical}")
# Extraction référence
ref = store.extract_reference(url)
expected_ref_key = 'expected_asin' if store_id == 'amazon' else 'expected_sku'
expected_ref = test_data.get(expected_ref_key)
if ref == expected_ref:
print(f" ✓ Référence: {ref}")
else:
print(f" ✗ Référence incorrecte:")
print(f" Obtenu : {ref}")
print(f" Attendu : {expected_ref}")
else:
print(f" ✗ Aucun store détecté")
def test_selectors(config: dict):
"""Affiche les sélecteurs configurés."""
print("\n" + "="*60)
print("TEST 2: Vérification des sélecteurs")
print("="*60)
selectors = config['selectors']
for field, selector_config in selectors.items():
print(f"\n🔍 Champ: {field}")
print(f" Type: {selector_config['type']}")
if selector_config['type'] == 'css':
print(f" Sélecteur: {selector_config['selector']}")
if 'attribute' in selector_config:
print(f" Attribut: {selector_config['attribute']}")
elif selector_config['type'] == 'regex':
print(f" Pattern: {selector_config['pattern']}")
print(f" Attendu: {selector_config['expected']}")
def setup_stores():
"""Configure les stores."""
registry = get_registry()
registry.register(AmazonStore())
registry.register(CdiscountStore())
def main():
if len(sys.argv) < 2:
print("Usage: python test_selectors.py <test_file.json>")
print("\nExemples:")
print(" python test_selectors.py test_amazon.json")
print(" python test_selectors.py test_cdiscount.json")
sys.exit(1)
json_path = sys.argv[1]
if not Path(json_path).exists():
print(f"❌ Fichier introuvable: {json_path}")
sys.exit(1)
print(f"\n📄 Chargement: {json_path}")
config = load_test_config(json_path)
store_name = config['test_config']['store']
url = config['test_config']['url']
description = config['test_config']['description']
print(f" Store: {store_name}")
print(f" URL: {url}")
print(f" Description: {description}")
# Lancer les tests
test_url_detection(config)
test_selectors(config)
print("\n" + "="*60)
print("✅ Tests terminés")
print("="*60)
print("\n💡 Pour tester avec une vraie page HTML:")
print(f" 1. Récupérer la page: pricewatch fetch '{url}' --http")
print(f" 2. Parser: pricewatch parse {store_name} --in scraped/page.html")
if __name__ == "__main__":
main()