codex2
This commit is contained in:
BIN
pricewatch/app/core/__pycache__/config.cpython-313.pyc
Executable file → Normal file
BIN
pricewatch/app/core/__pycache__/config.cpython-313.pyc
Executable file → Normal file
Binary file not shown.
BIN
pricewatch/app/core/__pycache__/schema.cpython-313.pyc
Executable file → Normal file
BIN
pricewatch/app/core/__pycache__/schema.cpython-313.pyc
Executable file → Normal file
Binary file not shown.
6
pricewatch/app/core/config.py
Executable file → Normal file
6
pricewatch/app/core/config.py
Executable file → Normal file
@@ -108,6 +108,11 @@ class AppConfig(BaseSettings):
|
||||
default=True, description="Enable background worker functionality"
|
||||
)
|
||||
|
||||
# API auth
|
||||
api_token: Optional[str] = Field(
|
||||
default=None, description="API token simple (Bearer)"
|
||||
)
|
||||
|
||||
# Scraping defaults
|
||||
default_playwright_timeout: int = Field(
|
||||
default=60000, description="Default Playwright timeout in milliseconds"
|
||||
@@ -138,6 +143,7 @@ class AppConfig(BaseSettings):
|
||||
logger.info(f"Worker enabled: {self.enable_worker}")
|
||||
logger.info(f"Worker timeout: {self.worker_timeout}s")
|
||||
logger.info(f"Worker concurrency: {self.worker_concurrency}")
|
||||
logger.info(f"API token configured: {bool(self.api_token)}")
|
||||
logger.info("================================")
|
||||
|
||||
|
||||
|
||||
@@ -23,6 +23,9 @@ class ScrapingOptions(BaseModel):
|
||||
use_playwright: bool = Field(
|
||||
default=True, description="Utiliser Playwright en fallback"
|
||||
)
|
||||
force_playwright: bool = Field(
|
||||
default=False, description="Forcer Playwright même si HTTP réussi"
|
||||
)
|
||||
headful: bool = Field(default=False, description="Mode headful (voir le navigateur)")
|
||||
save_html: bool = Field(
|
||||
default=True, description="Sauvegarder HTML pour debug"
|
||||
@@ -94,7 +97,8 @@ def read_yaml_config(yaml_path: str | Path) -> ScrapingConfig:
|
||||
config = ScrapingConfig.model_validate(data)
|
||||
logger.info(
|
||||
f"Configuration chargée: {len(config.urls)} URL(s), "
|
||||
f"playwright={config.options.use_playwright}"
|
||||
f"playwright={config.options.use_playwright}, "
|
||||
f"force_playwright={config.options.force_playwright}"
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl, field_validator
|
||||
from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
|
||||
|
||||
|
||||
class StockStatus(str, Enum):
|
||||
@@ -38,6 +38,8 @@ class DebugStatus(str, Enum):
|
||||
class DebugInfo(BaseModel):
|
||||
"""Informations de debug pour tracer les problèmes de scraping."""
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True)
|
||||
|
||||
method: FetchMethod = Field(
|
||||
description="Méthode utilisée pour la récupération (http ou playwright)"
|
||||
)
|
||||
@@ -55,9 +57,6 @@ class DebugInfo(BaseModel):
|
||||
default=None, description="Taille du HTML récupéré en octets"
|
||||
)
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
class ProductSnapshot(BaseModel):
|
||||
"""
|
||||
@@ -81,6 +80,7 @@ class ProductSnapshot(BaseModel):
|
||||
# Données produit principales
|
||||
title: Optional[str] = Field(default=None, description="Nom du produit")
|
||||
price: Optional[float] = Field(default=None, description="Prix du produit", ge=0)
|
||||
msrp: Optional[float] = Field(default=None, description="Prix conseille", ge=0)
|
||||
currency: str = Field(default="EUR", description="Devise (EUR, USD, etc.)")
|
||||
shipping_cost: Optional[float] = Field(
|
||||
default=None, description="Frais de port", ge=0
|
||||
@@ -94,6 +94,7 @@ class ProductSnapshot(BaseModel):
|
||||
default=None, description="Référence produit (ASIN, SKU, etc.)"
|
||||
)
|
||||
category: Optional[str] = Field(default=None, description="Catégorie du produit")
|
||||
description: Optional[str] = Field(default=None, description="Description produit")
|
||||
|
||||
# Médias
|
||||
images: list[str] = Field(
|
||||
@@ -133,20 +134,22 @@ class ProductSnapshot(BaseModel):
|
||||
"""Filtre les URLs d'images vides."""
|
||||
return [url.strip() for url in v if url and url.strip()]
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
json_schema_extra = {
|
||||
model_config = ConfigDict(
|
||||
use_enum_values=True,
|
||||
json_schema_extra={
|
||||
"example": {
|
||||
"source": "amazon",
|
||||
"url": "https://www.amazon.fr/dp/B08N5WRWNW",
|
||||
"fetched_at": "2026-01-13T10:30:00Z",
|
||||
"title": "Exemple de produit",
|
||||
"price": 299.99,
|
||||
"msrp": 349.99,
|
||||
"currency": "EUR",
|
||||
"shipping_cost": 0.0,
|
||||
"stock_status": "in_stock",
|
||||
"reference": "B08N5WRWNW",
|
||||
"category": "Electronics",
|
||||
"description": "Chargeur USB-C multi-ports.",
|
||||
"images": [
|
||||
"https://example.com/image1.jpg",
|
||||
"https://example.com/image2.jpg",
|
||||
@@ -165,7 +168,8 @@ class ProductSnapshot(BaseModel):
|
||||
"html_size_bytes": 145000,
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Serialize vers un dictionnaire Python natif."""
|
||||
|
||||
Reference in New Issue
Block a user