chore: sync project files
This commit is contained in:
43
analyze_price_philips.py
Executable file
43
analyze_price_philips.py
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Analyse du prix sur la page Philips."""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
with open("scraped/cdiscount_phi1721524349346_pw.html", "r", encoding="utf-8") as f:
|
||||
html = f.read()
|
||||
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
print("=" * 80)
|
||||
print("RECHERCHE DU PRIX")
|
||||
print("=" * 80)
|
||||
|
||||
# 1. Chercher tous les divs avec "price" dans la classe
|
||||
price_divs = soup.find_all("div", class_=lambda x: x and "price" in x.lower())
|
||||
print(f"\n1. Divs avec 'price' dans la classe: {len(price_divs)}")
|
||||
for i, div in enumerate(price_divs[:10]):
|
||||
text = div.get_text().strip()[:100]
|
||||
print(f" [{i+1}] {div.get('class')} → {text}")
|
||||
|
||||
# 2. Chercher les spans avec "price"
|
||||
price_spans = soup.find_all("span", class_=lambda x: x and "price" in x.lower())
|
||||
print(f"\n2. Spans avec 'price' dans la classe: {len(price_spans)}")
|
||||
for i, span in enumerate(price_spans[:10]):
|
||||
text = span.get_text().strip()[:100]
|
||||
print(f" [{i+1}] {span.get('class')} → {text}")
|
||||
|
||||
# 3. Regex sur tout le texte
|
||||
print(f"\n3. Regex sur le texte complet:")
|
||||
matches = re.findall(r'(\d+[,\.]\d+)\s*€', html)
|
||||
print(f" Trouvé {len(matches)} matches avec pattern \\d+[,\\.]\\d+\\s*€")
|
||||
for i, match in enumerate(matches[:10]):
|
||||
print(f" [{i+1}] {match} €")
|
||||
|
||||
# 4. data-price attributes
|
||||
price_data = soup.find_all(attrs={"data-price": True})
|
||||
print(f"\n4. Éléments avec data-price: {len(price_data)}")
|
||||
for elem in price_data[:5]:
|
||||
print(f" - data-price={elem.get('data-price')} {elem.name} {elem.get('class')}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
Reference in New Issue
Block a user