138 lines
4.2 KiB
Python
Executable File
138 lines
4.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script de test pour valider les sélecteurs avec les fichiers JSON de test.
|
|
|
|
Usage:
|
|
python test_selectors.py test_amazon.json
|
|
python test_selectors.py test_cdiscount.json
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from pricewatch.app.core.registry import get_registry, register_store
|
|
from pricewatch.app.stores.amazon.store import AmazonStore
|
|
from pricewatch.app.stores.cdiscount.store import CdiscountStore
|
|
|
|
|
|
def load_test_config(json_path: str) -> dict:
|
|
"""Charge le fichier JSON de configuration de test."""
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def test_url_detection(config: dict):
|
|
"""Teste la détection d'URL et l'extraction de référence."""
|
|
print("\n" + "="*60)
|
|
print("TEST 1: Détection d'URL et extraction référence")
|
|
print("="*60)
|
|
|
|
# Setup
|
|
setup_stores()
|
|
registry = get_registry()
|
|
store_id = config['test_config']['store']
|
|
test_data = config['test_data']
|
|
|
|
# Test de détection
|
|
for url in test_data['valid_urls']:
|
|
print(f"\n📍 Test URL: {url}")
|
|
|
|
store = registry.detect_store(url)
|
|
if store:
|
|
print(f" ✓ Store détecté: {store.store_id}")
|
|
|
|
# Canonisation
|
|
canonical = store.canonicalize(url)
|
|
expected_canonical = test_data['expected_canonical']
|
|
if canonical == expected_canonical:
|
|
print(f" ✓ URL canonique: {canonical}")
|
|
else:
|
|
print(f" ✗ URL canonique incorrecte:")
|
|
print(f" Obtenu : {canonical}")
|
|
print(f" Attendu : {expected_canonical}")
|
|
|
|
# Extraction référence
|
|
ref = store.extract_reference(url)
|
|
expected_ref_key = 'expected_asin' if store_id == 'amazon' else 'expected_sku'
|
|
expected_ref = test_data.get(expected_ref_key)
|
|
|
|
if ref == expected_ref:
|
|
print(f" ✓ Référence: {ref}")
|
|
else:
|
|
print(f" ✗ Référence incorrecte:")
|
|
print(f" Obtenu : {ref}")
|
|
print(f" Attendu : {expected_ref}")
|
|
else:
|
|
print(f" ✗ Aucun store détecté")
|
|
|
|
|
|
def test_selectors(config: dict):
|
|
"""Affiche les sélecteurs configurés."""
|
|
print("\n" + "="*60)
|
|
print("TEST 2: Vérification des sélecteurs")
|
|
print("="*60)
|
|
|
|
selectors = config['selectors']
|
|
|
|
for field, selector_config in selectors.items():
|
|
print(f"\n🔍 Champ: {field}")
|
|
print(f" Type: {selector_config['type']}")
|
|
|
|
if selector_config['type'] == 'css':
|
|
print(f" Sélecteur: {selector_config['selector']}")
|
|
if 'attribute' in selector_config:
|
|
print(f" Attribut: {selector_config['attribute']}")
|
|
elif selector_config['type'] == 'regex':
|
|
print(f" Pattern: {selector_config['pattern']}")
|
|
|
|
print(f" Attendu: {selector_config['expected']}")
|
|
|
|
|
|
def setup_stores():
|
|
"""Configure les stores."""
|
|
registry = get_registry()
|
|
registry.register(AmazonStore())
|
|
registry.register(CdiscountStore())
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python test_selectors.py <test_file.json>")
|
|
print("\nExemples:")
|
|
print(" python test_selectors.py test_amazon.json")
|
|
print(" python test_selectors.py test_cdiscount.json")
|
|
sys.exit(1)
|
|
|
|
json_path = sys.argv[1]
|
|
|
|
if not Path(json_path).exists():
|
|
print(f"❌ Fichier introuvable: {json_path}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n📄 Chargement: {json_path}")
|
|
config = load_test_config(json_path)
|
|
|
|
store_name = config['test_config']['store']
|
|
url = config['test_config']['url']
|
|
description = config['test_config']['description']
|
|
|
|
print(f" Store: {store_name}")
|
|
print(f" URL: {url}")
|
|
print(f" Description: {description}")
|
|
|
|
# Lancer les tests
|
|
test_url_detection(config)
|
|
test_selectors(config)
|
|
|
|
print("\n" + "="*60)
|
|
print("✅ Tests terminés")
|
|
print("="*60)
|
|
print("\n💡 Pour tester avec une vraie page HTML:")
|
|
print(f" 1. Récupérer la page: pricewatch fetch '{url}' --http")
|
|
print(f" 2. Parser: pricewatch parse {store_name} --in scraped/page.html")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|