This commit is contained in:
Gilles Soulier
2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions

BIN
pricewatch/app/core/__pycache__/config.cpython-313.pyc Executable file → Normal file

Binary file not shown.

BIN
pricewatch/app/core/__pycache__/schema.cpython-313.pyc Executable file → Normal file

Binary file not shown.

6
pricewatch/app/core/config.py Executable file → Normal file
View File

@@ -108,6 +108,11 @@ class AppConfig(BaseSettings):
default=True, description="Enable background worker functionality"
)
# API auth
api_token: Optional[str] = Field(
default=None, description="API token simple (Bearer)"
)
# Scraping defaults
default_playwright_timeout: int = Field(
default=60000, description="Default Playwright timeout in milliseconds"
@@ -138,6 +143,7 @@ class AppConfig(BaseSettings):
logger.info(f"Worker enabled: {self.enable_worker}")
logger.info(f"Worker timeout: {self.worker_timeout}s")
logger.info(f"Worker concurrency: {self.worker_concurrency}")
logger.info(f"API token configured: {bool(self.api_token)}")
logger.info("================================")

View File

@@ -23,6 +23,9 @@ class ScrapingOptions(BaseModel):
use_playwright: bool = Field(
default=True, description="Utiliser Playwright en fallback"
)
force_playwright: bool = Field(
default=False, description="Forcer Playwright même si HTTP réussi"
)
headful: bool = Field(default=False, description="Mode headful (voir le navigateur)")
save_html: bool = Field(
default=True, description="Sauvegarder HTML pour debug"
@@ -94,7 +97,8 @@ def read_yaml_config(yaml_path: str | Path) -> ScrapingConfig:
config = ScrapingConfig.model_validate(data)
logger.info(
f"Configuration chargée: {len(config.urls)} URL(s), "
f"playwright={config.options.use_playwright}"
f"playwright={config.options.use_playwright}, "
f"force_playwright={config.options.force_playwright}"
)
return config

View File

@@ -9,7 +9,7 @@ from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field, HttpUrl, field_validator
from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
class StockStatus(str, Enum):
@@ -38,6 +38,8 @@ class DebugStatus(str, Enum):
class DebugInfo(BaseModel):
"""Informations de debug pour tracer les problèmes de scraping."""
model_config = ConfigDict(use_enum_values=True)
method: FetchMethod = Field(
description="Méthode utilisée pour la récupération (http ou playwright)"
)
@@ -55,9 +57,6 @@ class DebugInfo(BaseModel):
default=None, description="Taille du HTML récupéré en octets"
)
class Config:
use_enum_values = True
class ProductSnapshot(BaseModel):
"""
@@ -81,6 +80,7 @@ class ProductSnapshot(BaseModel):
# Données produit principales
title: Optional[str] = Field(default=None, description="Nom du produit")
price: Optional[float] = Field(default=None, description="Prix du produit", ge=0)
msrp: Optional[float] = Field(default=None, description="Prix conseille", ge=0)
currency: str = Field(default="EUR", description="Devise (EUR, USD, etc.)")
shipping_cost: Optional[float] = Field(
default=None, description="Frais de port", ge=0
@@ -94,6 +94,7 @@ class ProductSnapshot(BaseModel):
default=None, description="Référence produit (ASIN, SKU, etc.)"
)
category: Optional[str] = Field(default=None, description="Catégorie du produit")
description: Optional[str] = Field(default=None, description="Description produit")
# Médias
images: list[str] = Field(
@@ -133,20 +134,22 @@ class ProductSnapshot(BaseModel):
"""Filtre les URLs d'images vides."""
return [url.strip() for url in v if url and url.strip()]
class Config:
use_enum_values = True
json_schema_extra = {
model_config = ConfigDict(
use_enum_values=True,
json_schema_extra={
"example": {
"source": "amazon",
"url": "https://www.amazon.fr/dp/B08N5WRWNW",
"fetched_at": "2026-01-13T10:30:00Z",
"title": "Exemple de produit",
"price": 299.99,
"msrp": 349.99,
"currency": "EUR",
"shipping_cost": 0.0,
"stock_status": "in_stock",
"reference": "B08N5WRWNW",
"category": "Electronics",
"description": "Chargeur USB-C multi-ports.",
"images": [
"https://example.com/image1.jpg",
"https://example.com/image2.jpg",
@@ -165,7 +168,8 @@ class ProductSnapshot(BaseModel):
"html_size_bytes": 145000,
},
}
}
},
)
def to_dict(self) -> dict:
"""Serialize vers un dictionnaire Python natif."""