codex

2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions
--- a/tests/core/test_io.py
+++ b/tests/core/test_io.py
@@ -0,0 +1,462 @@
+"""
+Tests pour pricewatch.app.core.io
+
+Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug.
+"""
+
+import json
+import tempfile
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+import yaml
+
+from pricewatch.app.core.io import (
+    ScrapingConfig,
+    ScrapingOptions,
+    read_json_results,
+    read_yaml_config,
+    save_debug_html,
+    save_debug_screenshot,
+    write_json_results,
+)
+from pricewatch.app.core.schema import (
+    DebugInfo,
+    DebugStatus,
+    FetchMethod,
+    ProductSnapshot,
+    StockStatus,
+)
+
+
+class TestScrapingOptions:
+    """Tests pour le modèle ScrapingOptions."""
+
+    def test_default_values(self):
+        """Les valeurs par défaut sont correctes."""
+        options = ScrapingOptions()
+        assert options.use_playwright is True
+        assert options.headful is False
+        assert options.save_html is True
+        assert options.save_screenshot is True
+        assert options.timeout_ms == 60000
+
+    def test_custom_values(self):
+        """Les valeurs personnalisées sont acceptées."""
+        options = ScrapingOptions(
+            use_playwright=False,
+            headful=True,
+            save_html=False,
+            save_screenshot=False,
+            timeout_ms=30000,
+        )
+        assert options.use_playwright is False
+        assert options.headful is True
+        assert options.save_html is False
+        assert options.save_screenshot is False
+        assert options.timeout_ms == 30000
+
+    def test_timeout_validation_min(self):
+        """Timeout inférieur à 1000ms est rejeté."""
+        with pytest.raises(ValueError):
+            ScrapingOptions(timeout_ms=500)
+
+    def test_timeout_validation_valid(self):
+        """Timeout >= 1000ms est accepté."""
+        options = ScrapingOptions(timeout_ms=1000)
+        assert options.timeout_ms == 1000
+
+
+class TestScrapingConfig:
+    """Tests pour le modèle ScrapingConfig."""
+
+    def test_minimal_config(self):
+        """Config minimale avec URLs uniquement."""
+        config = ScrapingConfig(urls=["https://example.com"])
+        assert len(config.urls) == 1
+        assert config.urls[0] == "https://example.com"
+        assert isinstance(config.options, ScrapingOptions)
+
+    def test_config_with_options(self):
+        """Config avec URLs et options."""
+        options = ScrapingOptions(use_playwright=False, timeout_ms=10000)
+        config = ScrapingConfig(
+            urls=["https://example.com", "https://test.com"], options=options
+        )
+        assert len(config.urls) == 2
+        assert config.options.use_playwright is False
+        assert config.options.timeout_ms == 10000
+
+    def test_validate_urls_empty_list(self):
+        """Liste d'URLs vide est rejetée."""
+        with pytest.raises(ValueError, match="Au moins une URL"):
+            ScrapingConfig(urls=[])
+
+    def test_validate_urls_strips_whitespace(self):
+        """Les espaces sont nettoyés."""
+        config = ScrapingConfig(urls=["  https://example.com  ", "https://test.com"])
+        assert config.urls == ["https://example.com", "https://test.com"]
+
+    def test_validate_urls_removes_empty(self):
+        """Les URLs vides sont supprimées."""
+        config = ScrapingConfig(
+            urls=["https://example.com", "", "  ", "https://test.com"]
+        )
+        assert len(config.urls) == 2
+        assert config.urls == ["https://example.com", "https://test.com"]
+
+    def test_validate_urls_all_empty(self):
+        """Si toutes les URLs sont vides, erreur."""
+        with pytest.raises(ValueError, match="Aucune URL valide"):
+            ScrapingConfig(urls=["", "  ", "\t"])
+
+
+class TestReadYamlConfig:
+    """Tests pour read_yaml_config()."""
+
+    def test_read_valid_yaml(self, tmp_path):
+        """Lit un fichier YAML valide."""
+        yaml_path = tmp_path / "config.yaml"
+        yaml_content = {
+            "urls": ["https://example.com", "https://test.com"],
+            "options": {"use_playwright": False, "timeout_ms": 30000},
+        }
+        with open(yaml_path, "w") as f:
+            yaml.dump(yaml_content, f)
+
+        config = read_yaml_config(yaml_path)
+        assert len(config.urls) == 2
+        assert config.urls[0] == "https://example.com"
+        assert config.options.use_playwright is False
+        assert config.options.timeout_ms == 30000
+
+    def test_read_yaml_minimal(self, tmp_path):
+        """Lit un YAML minimal (URLs uniquement)."""
+        yaml_path = tmp_path / "config.yaml"
+        yaml_content = {"urls": ["https://example.com"]}
+        with open(yaml_path, "w") as f:
+            yaml.dump(yaml_content, f)
+
+        config = read_yaml_config(yaml_path)
+        assert len(config.urls) == 1
+        # Options par défaut
+        assert config.options.use_playwright is True
+        assert config.options.timeout_ms == 60000
+
+    def test_read_yaml_file_not_found(self, tmp_path):
+        """Fichier introuvable lève FileNotFoundError."""
+        yaml_path = tmp_path / "nonexistent.yaml"
+        with pytest.raises(FileNotFoundError):
+            read_yaml_config(yaml_path)
+
+    def test_read_yaml_empty_file(self, tmp_path):
+        """Fichier YAML vide lève ValueError."""
+        yaml_path = tmp_path / "empty.yaml"
+        yaml_path.write_text("")
+
+        with pytest.raises(ValueError, match="Fichier YAML vide"):
+            read_yaml_config(yaml_path)
+
+    def test_read_yaml_invalid_syntax(self, tmp_path):
+        """YAML avec syntaxe invalide lève ValueError."""
+        yaml_path = tmp_path / "invalid.yaml"
+        yaml_path.write_text("urls: [invalid yaml syntax")
+
+        with pytest.raises(ValueError, match="YAML invalide"):
+            read_yaml_config(yaml_path)
+
+    def test_read_yaml_missing_urls(self, tmp_path):
+        """YAML sans champ 'urls' lève erreur de validation."""
+        yaml_path = tmp_path / "config.yaml"
+        yaml_content = {"options": {"use_playwright": False}}
+        with open(yaml_path, "w") as f:
+            yaml.dump(yaml_content, f)
+
+        with pytest.raises(Exception):  # Pydantic validation error
+            read_yaml_config(yaml_path)
+
+    def test_read_yaml_accepts_path_string(self, tmp_path):
+        """Accepte un string comme chemin."""
+        yaml_path = tmp_path / "config.yaml"
+        yaml_content = {"urls": ["https://example.com"]}
+        with open(yaml_path, "w") as f:
+            yaml.dump(yaml_content, f)
+
+        config = read_yaml_config(str(yaml_path))
+        assert len(config.urls) == 1
+
+
+class TestWriteJsonResults:
+    """Tests pour write_json_results()."""
+
+    @pytest.fixture
+    def sample_snapshot(self) -> ProductSnapshot:
+        """Fixture: ProductSnapshot exemple."""
+        return ProductSnapshot(
+            source="test",
+            url="https://example.com/product",
+            fetched_at=datetime(2024, 1, 1, 12, 0, 0),
+            title="Test Product",
+            price=99.99,
+            currency="EUR",
+            stock_status=StockStatus.IN_STOCK,
+            reference="TEST123",
+            images=["https://example.com/img1.jpg"],
+            category="Test Category",
+            specs={"Brand": "TestBrand"},
+            debug=DebugInfo(
+                method=FetchMethod.HTTP,
+                status=DebugStatus.SUCCESS,
+                errors=[],
+                notes=[],
+            ),
+        )
+
+    def test_write_single_snapshot(self, tmp_path, sample_snapshot):
+        """Écrit un seul snapshot."""
+        json_path = tmp_path / "results.json"
+        write_json_results([sample_snapshot], json_path)
+
+        assert json_path.exists()
+
+        # Vérifier le contenu
+        with open(json_path) as f:
+            data = json.load(f)
+
+        assert isinstance(data, list)
+        assert len(data) == 1
+        assert data[0]["source"] == "test"
+        assert data[0]["title"] == "Test Product"
+
+    def test_write_multiple_snapshots(self, tmp_path, sample_snapshot):
+        """Écrit plusieurs snapshots."""
+        snapshot2 = ProductSnapshot(
+            source="test2",
+            url="https://example.com/product2",
+            fetched_at=datetime(2024, 1, 2, 12, 0, 0),
+            title="Test Product 2",
+            price=49.99,
+            currency="EUR",
+            stock_status=StockStatus.OUT_OF_STOCK,
+            debug=DebugInfo(
+                method=FetchMethod.PLAYWRIGHT,
+                status=DebugStatus.PARTIAL,
+                errors=["Test error"],
+                notes=[],
+            ),
+        )
+
+        json_path = tmp_path / "results.json"
+        write_json_results([sample_snapshot, snapshot2], json_path)
+
+        with open(json_path) as f:
+            data = json.load(f)
+
+        assert len(data) == 2
+        assert data[0]["source"] == "test"
+        assert data[1]["source"] == "test2"
+
+    def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot):
+        """Crée les dossiers parents si nécessaire."""
+        json_path = tmp_path / "sub" / "dir" / "results.json"
+        write_json_results([sample_snapshot], json_path)
+
+        assert json_path.exists()
+        assert json_path.parent.exists()
+
+    def test_write_empty_list(self, tmp_path):
+        """Écrit une liste vide."""
+        json_path = tmp_path / "empty.json"
+        write_json_results([], json_path)
+
+        assert json_path.exists()
+
+        with open(json_path) as f:
+            data = json.load(f)
+
+        assert data == []
+
+    def test_write_indent_control(self, tmp_path, sample_snapshot):
+        """Contrôle l'indentation."""
+        # Avec indent
+        json_path1 = tmp_path / "pretty.json"
+        write_json_results([sample_snapshot], json_path1, indent=2)
+        content1 = json_path1.read_text()
+        assert "\n" in content1  # Pretty-printed
+
+        # Sans indent (compact)
+        json_path2 = tmp_path / "compact.json"
+        write_json_results([sample_snapshot], json_path2, indent=None)
+        content2 = json_path2.read_text()
+        assert len(content2) < len(content1)  # Plus compact
+
+    def test_write_accepts_path_string(self, tmp_path, sample_snapshot):
+        """Accepte un string comme chemin."""
+        json_path = tmp_path / "results.json"
+        write_json_results([sample_snapshot], str(json_path))
+        assert json_path.exists()
+
+
+class TestReadJsonResults:
+    """Tests pour read_json_results()."""
+
+    @pytest.fixture
+    def json_file_with_snapshot(self, tmp_path) -> Path:
+        """Fixture: Fichier JSON avec un snapshot."""
+        json_path = tmp_path / "results.json"
+        snapshot_data = {
+            "source": "test",
+            "url": "https://example.com/product",
+            "fetched_at": "2024-01-01T12:00:00",
+            "title": "Test Product",
+            "price": 99.99,
+            "currency": "EUR",
+            "shipping_cost": None,
+            "stock_status": "in_stock",
+            "reference": "TEST123",
+            "images": ["https://example.com/img.jpg"],
+            "category": "Test",
+            "specs": {"Brand": "Test"},
+            "debug": {
+                "method": "http",
+                "status": "success",
+                "errors": [],
+                "notes": [],
+                "duration_ms": None,
+                "html_size_bytes": None,
+            },
+        }
+
+        with open(json_path, "w") as f:
+            json.dump([snapshot_data], f)
+
+        return json_path
+
+    def test_read_single_snapshot(self, json_file_with_snapshot):
+        """Lit un fichier avec un snapshot."""
+        snapshots = read_json_results(json_file_with_snapshot)
+
+        assert len(snapshots) == 1
+        assert isinstance(snapshots[0], ProductSnapshot)
+        assert snapshots[0].source == "test"
+        assert snapshots[0].title == "Test Product"
+        assert snapshots[0].price == 99.99
+
+    def test_read_file_not_found(self, tmp_path):
+        """Fichier introuvable lève FileNotFoundError."""
+        json_path = tmp_path / "nonexistent.json"
+        with pytest.raises(FileNotFoundError):
+            read_json_results(json_path)
+
+    def test_read_invalid_json(self, tmp_path):
+        """JSON invalide lève ValueError."""
+        json_path = tmp_path / "invalid.json"
+        json_path.write_text("{invalid json")
+
+        with pytest.raises(ValueError, match="JSON invalide"):
+            read_json_results(json_path)
+
+    def test_read_not_a_list(self, tmp_path):
+        """JSON qui n'est pas une liste lève ValueError."""
+        json_path = tmp_path / "notlist.json"
+        with open(json_path, "w") as f:
+            json.dump({"key": "value"}, f)
+
+        with pytest.raises(ValueError, match="doit contenir une liste"):
+            read_json_results(json_path)
+
+    def test_read_empty_list(self, tmp_path):
+        """Liste vide est acceptée."""
+        json_path = tmp_path / "empty.json"
+        with open(json_path, "w") as f:
+            json.dump([], f)
+
+        snapshots = read_json_results(json_path)
+        assert snapshots == []
+
+    def test_read_accepts_path_string(self, json_file_with_snapshot):
+        """Accepte un string comme chemin."""
+        snapshots = read_json_results(str(json_file_with_snapshot))
+        assert len(snapshots) == 1
+
+
+class TestSaveDebugHtml:
+    """Tests pour save_debug_html()."""
+
+    def test_save_html_default_dir(self, tmp_path, monkeypatch):
+        """Sauvegarde HTML dans le dossier par défaut."""
+        # Changer le répertoire de travail pour le test
+        monkeypatch.chdir(tmp_path)
+
+        html = "<html><body>Test</body></html>"
+        result_path = save_debug_html(html, "test_page")
+
+        assert result_path.exists()
+        assert result_path.name == "test_page.html"
+        assert result_path.read_text(encoding="utf-8") == html
+
+    def test_save_html_custom_dir(self, tmp_path):
+        """Sauvegarde HTML dans un dossier personnalisé."""
+        output_dir = tmp_path / "debug_html"
+        html = "<html><body>Test</body></html>"
+
+        result_path = save_debug_html(html, "test_page", output_dir)
+
+        assert result_path.parent == output_dir
+        assert result_path.name == "test_page.html"
+        assert result_path.read_text(encoding="utf-8") == html
+
+    def test_save_html_creates_dir(self, tmp_path):
+        """Crée le dossier de sortie s'il n'existe pas."""
+        output_dir = tmp_path / "sub" / "dir" / "html"
+        html = "<html><body>Test</body></html>"
+
+        result_path = save_debug_html(html, "test_page", output_dir)
+
+        assert output_dir.exists()
+        assert result_path.exists()
+
+    def test_save_html_large_content(self, tmp_path):
+        """Sauvegarde du HTML volumineux."""
+        html = "<html><body>" + ("x" * 100000) + "</body></html>"
+        result_path = save_debug_html(html, "large_page", tmp_path)
+
+        assert result_path.exists()
+        assert len(result_path.read_text(encoding="utf-8")) == len(html)
+
+
+class TestSaveDebugScreenshot:
+    """Tests pour save_debug_screenshot()."""
+
+    def test_save_screenshot_default_dir(self, tmp_path, monkeypatch):
+        """Sauvegarde screenshot dans le dossier par défaut."""
+        monkeypatch.chdir(tmp_path)
+
+        screenshot_bytes = b"\x89PNG fake image data"
+        result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot")
+
+        assert result_path.exists()
+        assert result_path.name == "test_screenshot.png"
+        assert result_path.read_bytes() == screenshot_bytes
+
+    def test_save_screenshot_custom_dir(self, tmp_path):
+        """Sauvegarde screenshot dans un dossier personnalisé."""
+        output_dir = tmp_path / "screenshots"
+        screenshot_bytes = b"\x89PNG fake image data"
+
+        result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
+
+        assert result_path.parent == output_dir
+        assert result_path.name == "test_screenshot.png"
+        assert result_path.read_bytes() == screenshot_bytes
+
+    def test_save_screenshot_creates_dir(self, tmp_path):
+        """Crée le dossier de sortie s'il n'existe pas."""
+        output_dir = tmp_path / "sub" / "dir" / "screenshots"
+        screenshot_bytes = b"\x89PNG fake image data"
+
+        result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
+
+        assert output_dir.exists()
+        assert result_path.exists()