feat: improve SPA scraping and increase test coverage

- Add SPA support for Playwright with wait_for_network_idle and extra_wait_ms
- Add BaseStore.get_spa_config() and requires_playwright() methods
- Implement AliExpress SPA config with JSON price extraction patterns
- Fix Amazon price parsing to prioritize whole+fraction combination
- Fix AliExpress regex patterns (remove double backslashes)
- Add CLI tests: detect, doctor, fetch, parse, run commands
- Add API tests: auth, logs, products, scraping_logs, webhooks

Tests: 417 passed, 85% coverage

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Gilles Soulier
2026-01-17 14:46:55 +01:00
parent cf7c415e22
commit 152c2724fc
14 changed files with 1307 additions and 22 deletions

42
tests/cli/test_detect.py Normal file
View File

@@ -0,0 +1,42 @@
"""Tests pour la commande CLI detect."""
import pytest
from typer.testing import CliRunner
from pricewatch.app.cli.main import app
runner = CliRunner()
class TestDetectCommand:
"""Tests pour la commande detect."""
def test_detect_amazon_url(self):
"""Detect doit identifier une URL Amazon."""
result = runner.invoke(app, ["detect", "https://www.amazon.fr/dp/B08N5WRWNW"])
assert result.exit_code == 0
assert "amazon" in result.stdout.lower()
assert "B08N5WRWNW" in result.stdout
def test_detect_cdiscount_url(self):
"""Detect doit identifier une URL Cdiscount."""
result = runner.invoke(
app,
[
"detect",
"https://www.cdiscount.com/informatique/f-10709-tuf608umrv004.html",
],
)
assert result.exit_code == 0
assert "cdiscount" in result.stdout.lower()
def test_detect_unknown_url(self):
"""Detect doit echouer pour une URL inconnue."""
result = runner.invoke(app, ["detect", "https://www.unknown-store.com/product"])
assert result.exit_code == 1
assert "aucun store" in result.stdout.lower()
def test_detect_invalid_url(self):
"""Detect doit echouer pour une URL invalide."""
result = runner.invoke(app, ["detect", "not-a-valid-url"])
assert result.exit_code == 1

36
tests/cli/test_doctor.py Normal file
View File

@@ -0,0 +1,36 @@
"""Tests pour la commande CLI doctor."""
import pytest
from typer.testing import CliRunner
from pricewatch.app.cli.main import app
runner = CliRunner()
class TestDoctorCommand:
"""Tests pour la commande doctor."""
def test_doctor_success(self):
"""Doctor doit afficher le statut de l'installation."""
result = runner.invoke(app, ["doctor"])
assert result.exit_code == 0
assert "PriceWatch Doctor" in result.stdout
assert "Python" in result.stdout
# "prêt" avec accent
assert "prêt" in result.stdout.lower() or "ready" in result.stdout.lower()
def test_doctor_shows_dependencies(self):
"""Doctor doit lister les dependances."""
result = runner.invoke(app, ["doctor"])
assert result.exit_code == 0
assert "typer" in result.stdout.lower()
assert "pydantic" in result.stdout.lower()
assert "playwright" in result.stdout.lower()
def test_doctor_shows_stores(self):
"""Doctor doit lister les stores disponibles."""
result = runner.invoke(app, ["doctor"])
assert result.exit_code == 0
assert "amazon" in result.stdout.lower()
assert "cdiscount" in result.stdout.lower()

99
tests/cli/test_fetch.py Normal file
View File

@@ -0,0 +1,99 @@
"""Tests pour la commande CLI fetch."""
import pytest
from unittest.mock import patch, MagicMock
from typer.testing import CliRunner
from pricewatch.app.cli.main import app
runner = CliRunner()
class TestFetchCommand:
"""Tests pour la commande fetch."""
def test_fetch_conflicting_options(self):
"""Fetch doit echouer si --http et --playwright sont specifies."""
result = runner.invoke(
app, ["fetch", "https://example.com", "--http", "--playwright"]
)
assert result.exit_code == 1
assert "impossible" in result.stdout.lower()
@patch("pricewatch.app.cli.main.fetch_http")
def test_fetch_http_success(self, mock_fetch: MagicMock):
"""Fetch HTTP doit afficher le resultat."""
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html>test</html>"
mock_result.status_code = 200
mock_result.duration_ms = 150
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com", "--http"])
assert result.exit_code == 0
assert "Succes" in result.stdout or "" in result.stdout
assert "150" in result.stdout
@patch("pricewatch.app.cli.main.fetch_http")
def test_fetch_http_failure(self, mock_fetch: MagicMock):
"""Fetch HTTP doit signaler l'echec."""
mock_result = MagicMock()
mock_result.success = False
mock_result.error = "Connection refused"
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com", "--http"])
assert result.exit_code == 1
assert "Connection refused" in result.stdout
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_fetch_playwright_success(self, mock_fetch: MagicMock):
"""Fetch Playwright doit afficher le resultat."""
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html>test playwright</html>"
mock_result.duration_ms = 2500
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com", "--playwright"])
assert result.exit_code == 0
assert "Succes" in result.stdout or "" in result.stdout
assert "2500" in result.stdout
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_fetch_playwright_failure(self, mock_fetch: MagicMock):
"""Fetch Playwright doit signaler l'echec."""
mock_result = MagicMock()
mock_result.success = False
mock_result.error = "Timeout waiting for page"
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com", "--playwright"])
assert result.exit_code == 1
assert "Timeout" in result.stdout
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_fetch_default_is_playwright(self, mock_fetch: MagicMock):
"""Fetch sans option utilise Playwright par defaut."""
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html>test</html>"
mock_result.duration_ms = 1000
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com"])
assert result.exit_code == 0
mock_fetch.assert_called_once()
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_fetch_with_debug(self, mock_fetch: MagicMock):
"""Fetch doit fonctionner avec --debug."""
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html>test</html>"
mock_result.duration_ms = 1000
mock_fetch.return_value = mock_result
result = runner.invoke(app, ["fetch", "https://example.com", "--debug"])
assert result.exit_code == 0

99
tests/cli/test_parse.py Normal file
View File

@@ -0,0 +1,99 @@
"""Tests pour la commande CLI parse."""
import tempfile
from pathlib import Path
import pytest
from typer.testing import CliRunner
from pricewatch.app.cli.main import app
runner = CliRunner()
class TestParseCommand:
"""Tests pour la commande parse."""
@pytest.fixture
def amazon_html_file(self, tmp_path: Path) -> Path:
"""Cree un fichier HTML Amazon temporaire."""
html = """
<html>
<body>
<span id="productTitle">Test Product</span>
<span class="a-price-whole">299,99 €</span>
<div id="availability">
<span>En stock</span>
</div>
</body>
</html>
"""
file_path = tmp_path / "amazon_test.html"
file_path.write_text(html, encoding="utf-8")
return file_path
@pytest.fixture
def cdiscount_html_file(self, tmp_path: Path) -> Path:
"""Cree un fichier HTML Cdiscount temporaire."""
html = """
<html>
<head>
<script type="application/ld+json">
{
"@type": "Product",
"name": "Produit Cdiscount",
"offers": {"price": "199.99", "priceCurrency": "EUR"}
}
</script>
</head>
<body>
<h1 data-e2e="title">Produit Cdiscount</h1>
</body>
</html>
"""
file_path = tmp_path / "cdiscount_test.html"
file_path.write_text(html, encoding="utf-8")
return file_path
def test_parse_amazon_success(self, amazon_html_file: Path):
"""Parse doit extraire les donnees d'un HTML Amazon."""
result = runner.invoke(
app, ["parse", "amazon", "--in", str(amazon_html_file)]
)
assert result.exit_code == 0
assert "Test Product" in result.stdout
assert "299" in result.stdout
def test_parse_cdiscount_success(self, cdiscount_html_file: Path):
"""Parse doit extraire les donnees d'un HTML Cdiscount."""
result = runner.invoke(
app, ["parse", "cdiscount", "--in", str(cdiscount_html_file)]
)
assert result.exit_code == 0
assert "Produit Cdiscount" in result.stdout
assert "199" in result.stdout
def test_parse_unknown_store(self, amazon_html_file: Path):
"""Parse doit echouer pour un store inconnu."""
result = runner.invoke(
app, ["parse", "unknown_store", "--in", str(amazon_html_file)]
)
assert result.exit_code == 1
assert "inconnu" in result.stdout.lower()
def test_parse_with_debug(self, amazon_html_file: Path):
"""Parse doit fonctionner avec --debug."""
result = runner.invoke(
app, ["parse", "amazon", "--in", str(amazon_html_file), "--debug"]
)
assert result.exit_code == 0
def test_parse_shows_fields(self, amazon_html_file: Path):
"""Parse doit afficher les champs extraits."""
result = runner.invoke(
app, ["parse", "amazon", "--in", str(amazon_html_file)]
)
assert result.exit_code == 0
assert "Titre" in result.stdout
assert "Prix" in result.stdout
assert "Stock" in result.stdout

View File

@@ -0,0 +1,258 @@
"""Tests pour la commande CLI run."""
import tempfile
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
from typer.testing import CliRunner
from pricewatch.app.cli.main import app
from pricewatch.app.core.schema import ProductSnapshot, DebugInfo, DebugStatus, FetchMethod
runner = CliRunner()
@pytest.fixture
def yaml_config(tmp_path: Path) -> Path:
"""Cree un fichier YAML de config temporaire."""
yaml_content = """
urls:
- "https://www.amazon.fr/dp/B08N5WRWNW"
options:
use_playwright: false
force_playwright: false
headful: false
save_html: false
save_screenshot: false
timeout_ms: 30000
"""
file_path = tmp_path / "test_config.yaml"
file_path.write_text(yaml_content, encoding="utf-8")
return file_path
@pytest.fixture
def output_json(tmp_path: Path) -> Path:
"""Chemin pour le fichier JSON de sortie."""
return tmp_path / "output.json"
class TestRunCommand:
"""Tests pour la commande run."""
@patch("pricewatch.app.cli.main.fetch_http")
def test_run_http_success(self, mock_fetch, yaml_config, output_json):
"""Run avec HTTP reussi."""
# Mock HTTP fetch
mock_result = MagicMock()
mock_result.success = True
mock_result.html = """
<html><body>
<span id="productTitle">Test Product</span>
<span class="a-price-whole">299,99 €</span>
</body></html>
"""
mock_result.error = None
mock_fetch.return_value = mock_result
result = runner.invoke(
app,
["run", "--yaml", str(yaml_config), "--out", str(output_json), "--no-db"],
)
assert result.exit_code == 0
assert output_json.exists()
@patch("pricewatch.app.cli.main.fetch_http")
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_run_http_fail_playwright_fallback(
self, mock_pw, mock_http, yaml_config, output_json
):
"""Run avec fallback Playwright quand HTTP echoue."""
# Mock HTTP fail
mock_http_result = MagicMock()
mock_http_result.success = False
mock_http_result.error = "403 Forbidden"
mock_http.return_value = mock_http_result
# Mock Playwright success
mock_pw_result = MagicMock()
mock_pw_result.success = True
mock_pw_result.html = """
<html><body>
<span id="productTitle">Playwright Product</span>
<span class="a-price-whole">199,99 €</span>
</body></html>
"""
mock_pw_result.screenshot = None
mock_pw.return_value = mock_pw_result
# Modifier config pour activer playwright
yaml_content = """
urls:
- "https://www.amazon.fr/dp/B08N5WRWNW"
options:
use_playwright: true
force_playwright: false
headful: false
save_html: false
save_screenshot: false
timeout_ms: 30000
"""
yaml_config.write_text(yaml_content, encoding="utf-8")
result = runner.invoke(
app,
["run", "--yaml", str(yaml_config), "--out", str(output_json), "--no-db"],
)
assert result.exit_code == 0
mock_pw.assert_called()
@patch("pricewatch.app.cli.main.fetch_http")
def test_run_http_fail_no_playwright(self, mock_http, yaml_config, output_json):
"""Run avec HTTP echoue sans Playwright."""
mock_result = MagicMock()
mock_result.success = False
mock_result.error = "Connection refused"
mock_http.return_value = mock_result
result = runner.invoke(
app,
["run", "--yaml", str(yaml_config), "--out", str(output_json), "--no-db"],
)
# Doit quand meme creer le fichier JSON (avec snapshot failed)
assert result.exit_code == 0
assert output_json.exists()
def test_run_invalid_yaml(self, tmp_path, output_json):
"""Run avec YAML invalide echoue."""
yaml_file = tmp_path / "invalid.yaml"
yaml_file.write_text("invalid: [yaml: content", encoding="utf-8")
result = runner.invoke(
app,
["run", "--yaml", str(yaml_file), "--out", str(output_json)],
)
assert result.exit_code == 1
def test_run_with_debug(self, yaml_config, output_json):
"""Run avec --debug active les logs."""
with patch("pricewatch.app.cli.main.fetch_http") as mock_fetch:
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html><body>Test</body></html>"
mock_fetch.return_value = mock_result
result = runner.invoke(
app,
[
"run",
"--yaml",
str(yaml_config),
"--out",
str(output_json),
"--debug",
"--no-db",
],
)
assert result.exit_code == 0
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_run_force_playwright(self, mock_pw, tmp_path, output_json):
"""Run avec force_playwright skip HTTP."""
yaml_content = """
urls:
- "https://www.amazon.fr/dp/B08N5WRWNW"
options:
use_playwright: true
force_playwright: true
headful: false
save_html: false
save_screenshot: false
timeout_ms: 30000
"""
yaml_file = tmp_path / "force_pw.yaml"
yaml_file.write_text(yaml_content, encoding="utf-8")
mock_result = MagicMock()
mock_result.success = True
mock_result.html = "<html><body>PW content</body></html>"
mock_result.screenshot = None
mock_pw.return_value = mock_result
with patch("pricewatch.app.cli.main.fetch_http") as mock_http:
result = runner.invoke(
app,
["run", "--yaml", str(yaml_file), "--out", str(output_json), "--no-db"],
)
# HTTP ne doit pas etre appele
mock_http.assert_not_called()
mock_pw.assert_called()
assert result.exit_code == 0
@patch("pricewatch.app.cli.main.fetch_http")
def test_run_unknown_store(self, mock_fetch, tmp_path, output_json):
"""Run avec URL de store inconnu."""
yaml_content = """
urls:
- "https://www.unknown-store.com/product/123"
options:
use_playwright: false
"""
yaml_file = tmp_path / "unknown.yaml"
yaml_file.write_text(yaml_content, encoding="utf-8")
result = runner.invoke(
app,
["run", "--yaml", str(yaml_file), "--out", str(output_json), "--no-db"],
)
# Doit continuer sans crash
assert result.exit_code == 0
# HTTP ne doit pas etre appele (store non trouve)
mock_fetch.assert_not_called()
@patch("pricewatch.app.cli.main.fetch_http")
@patch("pricewatch.app.cli.main.fetch_playwright")
def test_run_with_save_screenshot(self, mock_pw, mock_http, tmp_path, output_json):
"""Run avec save_screenshot."""
yaml_content = """
urls:
- "https://www.amazon.fr/dp/B08N5WRWNW"
options:
use_playwright: true
force_playwright: false
save_screenshot: true
timeout_ms: 30000
"""
yaml_file = tmp_path / "screenshot.yaml"
yaml_file.write_text(yaml_content, encoding="utf-8")
# HTTP fail
mock_http_result = MagicMock()
mock_http_result.success = False
mock_http_result.error = "blocked"
mock_http.return_value = mock_http_result
# PW success avec screenshot
mock_pw_result = MagicMock()
mock_pw_result.success = True
mock_pw_result.html = "<html><body>content</body></html>"
mock_pw_result.screenshot = b"fake_png_data"
mock_pw.return_value = mock_pw_result
with patch("pricewatch.app.core.io.save_debug_screenshot") as mock_save:
result = runner.invoke(
app,
["run", "--yaml", str(yaml_file), "--out", str(output_json), "--no-db"],
)
assert result.exit_code == 0
# Le screenshot doit etre sauvegarde si present
mock_save.assert_called()