""" Tests pour pricewatch.app.core.io Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug. """ import json import tempfile from datetime import datetime from pathlib import Path import pytest import yaml from pricewatch.app.core.io import ( ScrapingConfig, ScrapingOptions, read_json_results, read_yaml_config, save_debug_html, save_debug_screenshot, write_json_results, ) from pricewatch.app.core.schema import ( DebugInfo, DebugStatus, FetchMethod, ProductSnapshot, StockStatus, ) class TestScrapingOptions: """Tests pour le modèle ScrapingOptions.""" def test_default_values(self): """Les valeurs par défaut sont correctes.""" options = ScrapingOptions() assert options.use_playwright is True assert options.headful is False assert options.save_html is True assert options.save_screenshot is True assert options.timeout_ms == 60000 def test_custom_values(self): """Les valeurs personnalisées sont acceptées.""" options = ScrapingOptions( use_playwright=False, headful=True, save_html=False, save_screenshot=False, timeout_ms=30000, ) assert options.use_playwright is False assert options.headful is True assert options.save_html is False assert options.save_screenshot is False assert options.timeout_ms == 30000 def test_timeout_validation_min(self): """Timeout inférieur à 1000ms est rejeté.""" with pytest.raises(ValueError): ScrapingOptions(timeout_ms=500) def test_timeout_validation_valid(self): """Timeout >= 1000ms est accepté.""" options = ScrapingOptions(timeout_ms=1000) assert options.timeout_ms == 1000 class TestScrapingConfig: """Tests pour le modèle ScrapingConfig.""" def test_minimal_config(self): """Config minimale avec URLs uniquement.""" config = ScrapingConfig(urls=["https://example.com"]) assert len(config.urls) == 1 assert config.urls[0] == "https://example.com" assert isinstance(config.options, ScrapingOptions) def test_config_with_options(self): """Config avec URLs et options.""" options = ScrapingOptions(use_playwright=False, timeout_ms=10000) config = ScrapingConfig( urls=["https://example.com", "https://test.com"], options=options ) assert len(config.urls) == 2 assert config.options.use_playwright is False assert config.options.timeout_ms == 10000 def test_validate_urls_empty_list(self): """Liste d'URLs vide est rejetée.""" with pytest.raises(ValueError, match="Au moins une URL"): ScrapingConfig(urls=[]) def test_validate_urls_strips_whitespace(self): """Les espaces sont nettoyés.""" config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"]) assert config.urls == ["https://example.com", "https://test.com"] def test_validate_urls_removes_empty(self): """Les URLs vides sont supprimées.""" config = ScrapingConfig( urls=["https://example.com", "", " ", "https://test.com"] ) assert len(config.urls) == 2 assert config.urls == ["https://example.com", "https://test.com"] def test_validate_urls_all_empty(self): """Si toutes les URLs sont vides, erreur.""" with pytest.raises(ValueError, match="Aucune URL valide"): ScrapingConfig(urls=["", " ", "\t"]) class TestReadYamlConfig: """Tests pour read_yaml_config().""" def test_read_valid_yaml(self, tmp_path): """Lit un fichier YAML valide.""" yaml_path = tmp_path / "config.yaml" yaml_content = { "urls": ["https://example.com", "https://test.com"], "options": {"use_playwright": False, "timeout_ms": 30000}, } with open(yaml_path, "w") as f: yaml.dump(yaml_content, f) config = read_yaml_config(yaml_path) assert len(config.urls) == 2 assert config.urls[0] == "https://example.com" assert config.options.use_playwright is False assert config.options.timeout_ms == 30000 def test_read_yaml_minimal(self, tmp_path): """Lit un YAML minimal (URLs uniquement).""" yaml_path = tmp_path / "config.yaml" yaml_content = {"urls": ["https://example.com"]} with open(yaml_path, "w") as f: yaml.dump(yaml_content, f) config = read_yaml_config(yaml_path) assert len(config.urls) == 1 # Options par défaut assert config.options.use_playwright is True assert config.options.timeout_ms == 60000 def test_read_yaml_file_not_found(self, tmp_path): """Fichier introuvable lève FileNotFoundError.""" yaml_path = tmp_path / "nonexistent.yaml" with pytest.raises(FileNotFoundError): read_yaml_config(yaml_path) def test_read_yaml_empty_file(self, tmp_path): """Fichier YAML vide lève ValueError.""" yaml_path = tmp_path / "empty.yaml" yaml_path.write_text("") with pytest.raises(ValueError, match="Fichier YAML vide"): read_yaml_config(yaml_path) def test_read_yaml_invalid_syntax(self, tmp_path): """YAML avec syntaxe invalide lève ValueError.""" yaml_path = tmp_path / "invalid.yaml" yaml_path.write_text("urls: [invalid yaml syntax") with pytest.raises(ValueError, match="YAML invalide"): read_yaml_config(yaml_path) def test_read_yaml_missing_urls(self, tmp_path): """YAML sans champ 'urls' lève erreur de validation.""" yaml_path = tmp_path / "config.yaml" yaml_content = {"options": {"use_playwright": False}} with open(yaml_path, "w") as f: yaml.dump(yaml_content, f) with pytest.raises(Exception): # Pydantic validation error read_yaml_config(yaml_path) def test_read_yaml_accepts_path_string(self, tmp_path): """Accepte un string comme chemin.""" yaml_path = tmp_path / "config.yaml" yaml_content = {"urls": ["https://example.com"]} with open(yaml_path, "w") as f: yaml.dump(yaml_content, f) config = read_yaml_config(str(yaml_path)) assert len(config.urls) == 1 class TestWriteJsonResults: """Tests pour write_json_results().""" @pytest.fixture def sample_snapshot(self) -> ProductSnapshot: """Fixture: ProductSnapshot exemple.""" return ProductSnapshot( source="test", url="https://example.com/product", fetched_at=datetime(2024, 1, 1, 12, 0, 0), title="Test Product", price=99.99, currency="EUR", stock_status=StockStatus.IN_STOCK, reference="TEST123", images=["https://example.com/img1.jpg"], category="Test Category", specs={"Brand": "TestBrand"}, debug=DebugInfo( method=FetchMethod.HTTP, status=DebugStatus.SUCCESS, errors=[], notes=[], ), ) def test_write_single_snapshot(self, tmp_path, sample_snapshot): """Écrit un seul snapshot.""" json_path = tmp_path / "results.json" write_json_results([sample_snapshot], json_path) assert json_path.exists() # Vérifier le contenu with open(json_path) as f: data = json.load(f) assert isinstance(data, list) assert len(data) == 1 assert data[0]["source"] == "test" assert data[0]["title"] == "Test Product" def test_write_multiple_snapshots(self, tmp_path, sample_snapshot): """Écrit plusieurs snapshots.""" snapshot2 = ProductSnapshot( source="test2", url="https://example.com/product2", fetched_at=datetime(2024, 1, 2, 12, 0, 0), title="Test Product 2", price=49.99, currency="EUR", stock_status=StockStatus.OUT_OF_STOCK, debug=DebugInfo( method=FetchMethod.PLAYWRIGHT, status=DebugStatus.PARTIAL, errors=["Test error"], notes=[], ), ) json_path = tmp_path / "results.json" write_json_results([sample_snapshot, snapshot2], json_path) with open(json_path) as f: data = json.load(f) assert len(data) == 2 assert data[0]["source"] == "test" assert data[1]["source"] == "test2" def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot): """Crée les dossiers parents si nécessaire.""" json_path = tmp_path / "sub" / "dir" / "results.json" write_json_results([sample_snapshot], json_path) assert json_path.exists() assert json_path.parent.exists() def test_write_empty_list(self, tmp_path): """Écrit une liste vide.""" json_path = tmp_path / "empty.json" write_json_results([], json_path) assert json_path.exists() with open(json_path) as f: data = json.load(f) assert data == [] def test_write_indent_control(self, tmp_path, sample_snapshot): """Contrôle l'indentation.""" # Avec indent json_path1 = tmp_path / "pretty.json" write_json_results([sample_snapshot], json_path1, indent=2) content1 = json_path1.read_text() assert "\n" in content1 # Pretty-printed # Sans indent (compact) json_path2 = tmp_path / "compact.json" write_json_results([sample_snapshot], json_path2, indent=None) content2 = json_path2.read_text() assert len(content2) < len(content1) # Plus compact def test_write_accepts_path_string(self, tmp_path, sample_snapshot): """Accepte un string comme chemin.""" json_path = tmp_path / "results.json" write_json_results([sample_snapshot], str(json_path)) assert json_path.exists() class TestReadJsonResults: """Tests pour read_json_results().""" @pytest.fixture def json_file_with_snapshot(self, tmp_path) -> Path: """Fixture: Fichier JSON avec un snapshot.""" json_path = tmp_path / "results.json" snapshot_data = { "source": "test", "url": "https://example.com/product", "fetched_at": "2024-01-01T12:00:00", "title": "Test Product", "price": 99.99, "currency": "EUR", "shipping_cost": None, "stock_status": "in_stock", "reference": "TEST123", "images": ["https://example.com/img.jpg"], "category": "Test", "specs": {"Brand": "Test"}, "debug": { "method": "http", "status": "success", "errors": [], "notes": [], "duration_ms": None, "html_size_bytes": None, }, } with open(json_path, "w") as f: json.dump([snapshot_data], f) return json_path def test_read_single_snapshot(self, json_file_with_snapshot): """Lit un fichier avec un snapshot.""" snapshots = read_json_results(json_file_with_snapshot) assert len(snapshots) == 1 assert isinstance(snapshots[0], ProductSnapshot) assert snapshots[0].source == "test" assert snapshots[0].title == "Test Product" assert snapshots[0].price == 99.99 def test_read_file_not_found(self, tmp_path): """Fichier introuvable lève FileNotFoundError.""" json_path = tmp_path / "nonexistent.json" with pytest.raises(FileNotFoundError): read_json_results(json_path) def test_read_invalid_json(self, tmp_path): """JSON invalide lève ValueError.""" json_path = tmp_path / "invalid.json" json_path.write_text("{invalid json") with pytest.raises(ValueError, match="JSON invalide"): read_json_results(json_path) def test_read_not_a_list(self, tmp_path): """JSON qui n'est pas une liste lève ValueError.""" json_path = tmp_path / "notlist.json" with open(json_path, "w") as f: json.dump({"key": "value"}, f) with pytest.raises(ValueError, match="doit contenir une liste"): read_json_results(json_path) def test_read_empty_list(self, tmp_path): """Liste vide est acceptée.""" json_path = tmp_path / "empty.json" with open(json_path, "w") as f: json.dump([], f) snapshots = read_json_results(json_path) assert snapshots == [] def test_read_accepts_path_string(self, json_file_with_snapshot): """Accepte un string comme chemin.""" snapshots = read_json_results(str(json_file_with_snapshot)) assert len(snapshots) == 1 class TestSaveDebugHtml: """Tests pour save_debug_html().""" def test_save_html_default_dir(self, tmp_path, monkeypatch): """Sauvegarde HTML dans le dossier par défaut.""" # Changer le répertoire de travail pour le test monkeypatch.chdir(tmp_path) html = "Test" result_path = save_debug_html(html, "test_page") assert result_path.exists() assert result_path.name == "test_page.html" assert result_path.read_text(encoding="utf-8") == html def test_save_html_custom_dir(self, tmp_path): """Sauvegarde HTML dans un dossier personnalisé.""" output_dir = tmp_path / "debug_html" html = "Test" result_path = save_debug_html(html, "test_page", output_dir) assert result_path.parent == output_dir assert result_path.name == "test_page.html" assert result_path.read_text(encoding="utf-8") == html def test_save_html_creates_dir(self, tmp_path): """Crée le dossier de sortie s'il n'existe pas.""" output_dir = tmp_path / "sub" / "dir" / "html" html = "Test" result_path = save_debug_html(html, "test_page", output_dir) assert output_dir.exists() assert result_path.exists() def test_save_html_large_content(self, tmp_path): """Sauvegarde du HTML volumineux.""" html = "" + ("x" * 100000) + "" result_path = save_debug_html(html, "large_page", tmp_path) assert result_path.exists() assert len(result_path.read_text(encoding="utf-8")) == len(html) class TestSaveDebugScreenshot: """Tests pour save_debug_screenshot().""" def test_save_screenshot_default_dir(self, tmp_path, monkeypatch): """Sauvegarde screenshot dans le dossier par défaut.""" monkeypatch.chdir(tmp_path) screenshot_bytes = b"\x89PNG fake image data" result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot") assert result_path.exists() assert result_path.name == "test_screenshot.png" assert result_path.read_bytes() == screenshot_bytes def test_save_screenshot_custom_dir(self, tmp_path): """Sauvegarde screenshot dans un dossier personnalisé.""" output_dir = tmp_path / "screenshots" screenshot_bytes = b"\x89PNG fake image data" result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir) assert result_path.parent == output_dir assert result_path.name == "test_screenshot.png" assert result_path.read_bytes() == screenshot_bytes def test_save_screenshot_creates_dir(self, tmp_path): """Crée le dossier de sortie s'il n'existe pas.""" output_dir = tmp_path / "sub" / "dir" / "screenshots" screenshot_bytes = b"\x89PNG fake image data" result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir) assert output_dir.exists() assert result_path.exists()