addon

2026-01-05 16:08:01 +01:00
parent dcba044cd6
commit c67befc549
2215 changed files with 26743 additions and 329 deletions
--- a/backend/app/utils/md_parser.py
+++ b/backend/app/utils/md_parser.py
@@ -0,0 +1,322 @@
+"""
+Markdown specification file parser for peripherals.
+Parses .md files containing USB device specifications.
+"""
+import re
+from typing import Dict, Any, Optional
+
+
+def parse_md_specification(md_content: str) -> Dict[str, Any]:
+    """
+    Parse a markdown specification file and extract peripheral information.
+
+    Supports two formats:
+    1. Simple format: Title + Description
+    2. Detailed format: Full USB specification with vendor/product IDs, characteristics, etc.
+
+    Args:
+        md_content: Raw markdown content
+
+    Returns:
+        Dictionary with peripheral data ready for database insertion
+    """
+    result = {
+        "nom": None,
+        "type_principal": "USB",
+        "sous_type": None,
+        "marque": None,
+        "modele": None,
+        "numero_serie": None,
+        "description": None,
+        "synthese": md_content,  # Store complete markdown content
+        "caracteristiques_specifiques": {},
+        "notes": None
+    }
+
+    lines = md_content.strip().split('\n')
+
+    # Extract title (first H1)
+    title_match = re.search(r'^#\s+(.+?)$', md_content, re.MULTILINE)
+    if title_match:
+        title = title_match.group(1).strip()
+
+        # Extract USB IDs from title if present
+        id_match = re.search(r'(?:ID\s+)?([0-9a-fA-F]{4})[_:]([0-9a-fA-F]{4})', title)
+        if id_match:
+            vendor_id = id_match.group(1).lower()
+            product_id = id_match.group(2).lower()
+            result["caracteristiques_specifiques"]["vendor_id"] = f"0x{vendor_id}"
+            result["caracteristiques_specifiques"]["product_id"] = f"0x{product_id}"
+
+    # Parse content
+    current_section = None
+    description_lines = []
+    notes_lines = []
+
+    for line in lines:
+        line = line.strip()
+
+        # Section headers (H2)
+        if line.startswith('## '):
+            section_raw = line[3:].strip()
+            # Remove numbering (e.g., "1. ", "2. ", "10. ")
+            current_section = re.sub(r'^\d+\.\s*', '', section_raw)
+            continue
+
+        # Description section
+        if current_section == "Description":
+            if line and not line.startswith('#'):
+                description_lines.append(line)
+
+                # Try to extract device type from description
+                if not result["sous_type"]:
+                    # Common patterns
+                    if re.search(r'souris|mouse', line, re.IGNORECASE):
+                        result["sous_type"] = "Souris"
+                    elif re.search(r'clavier|keyboard', line, re.IGNORECASE):
+                        result["sous_type"] = "Clavier"
+                    elif re.search(r'wi-?fi|wireless', line, re.IGNORECASE):
+                        result["type_principal"] = "WiFi"
+                        result["sous_type"] = "Adaptateur WiFi"
+                    elif re.search(r'bluetooth', line, re.IGNORECASE):
+                        result["type_principal"] = "Bluetooth"
+                        result["sous_type"] = "Adaptateur Bluetooth"
+                    elif re.search(r'usb\s+flash|clé\s+usb|flash\s+drive', line, re.IGNORECASE):
+                        result["sous_type"] = "Clé USB"
+                    elif re.search(r'dongle', line, re.IGNORECASE):
+                        result["sous_type"] = "Dongle"
+
+        # Identification section (support both "Identification" and "Identification USB")
+        elif current_section in ["Identification", "Identification USB", "Identification générale"]:
+            # Vendor ID (support multiple formats)
+            vendor_match = re.search(r'\*\*Vendor\s+ID\*\*\s*:\s*0x([0-9a-fA-F]{4})\s*(?:\((.+?)\))?', line)
+            if vendor_match:
+                result["caracteristiques_specifiques"]["vendor_id"] = f"0x{vendor_match.group(1)}"
+                if vendor_match.group(2):
+                    result["marque"] = vendor_match.group(2).strip()
+
+            # Product ID (support multiple formats)
+            product_match = re.search(r'\*\*Product\s+ID\*\*\s*:\s*0x([0-9a-fA-F]{4})', line)
+            if product_match:
+                result["caracteristiques_specifiques"]["product_id"] = f"0x{product_match.group(1)}"
+
+            # Commercial name or Désignation USB
+            name_match = re.search(r'\*\*(?:Commercial\s+name|Désignation\s+USB)\*\*\s*:\s*(.+?)$', line, re.IGNORECASE)
+            if name_match:
+                result["nom"] = name_match.group(1).strip()
+
+            # Manufacturer
+            mfg_match = re.search(r'\*\*Manufacturer\s+string\*\*:\s*(.+?)$', line)
+            if mfg_match and not result["marque"]:
+                result["marque"] = mfg_match.group(1).strip()
+
+            # Product string
+            prod_match = re.search(r'\*\*Product\s+string\*\*:\s*(.+?)$', line)
+            if prod_match and not result["nom"]:
+                result["nom"] = prod_match.group(1).strip()
+
+            # Serial number
+            serial_match = re.search(r'\*\*Serial\s+number\*\*:\s*(.+?)$', line)
+            if serial_match:
+                result["numero_serie"] = serial_match.group(1).strip()
+
+            # Catégorie (format FR)
+            cat_match = re.search(r'\*\*Catégorie\*\*:\s*(.+?)$', line)
+            if cat_match:
+                cat_value = cat_match.group(1).strip()
+                if 'réseau' in cat_value.lower():
+                    result["type_principal"] = "Réseau"
+
+            # Sous-catégorie (format FR)
+            subcat_match = re.search(r'\*\*Sous-catégorie\*\*:\s*(.+?)$', line)
+            if subcat_match:
+                result["sous_type"] = subcat_match.group(1).strip()
+
+            # Nom courant (format FR)
+            common_match = re.search(r'\*\*Nom\s+courant\*\*\s*:\s*(.+?)$', line)
+            if common_match and not result.get("modele"):
+                result["modele"] = common_match.group(1).strip()
+
+            # Version USB (from Identification USB section)
+            version_match = re.search(r'\*\*Version\s+USB\*\*\s*:\s*(.+?)$', line)
+            if version_match:
+                result["caracteristiques_specifiques"]["usb_version"] = version_match.group(1).strip()
+
+            # Vitesse négociée (from Identification USB section)
+            speed_match2 = re.search(r'\*\*Vitesse\s+négociée\*\*\s*:\s*(.+?)$', line)
+            if speed_match2:
+                result["caracteristiques_specifiques"]["usb_speed"] = speed_match2.group(1).strip()
+
+            # Consommation maximale (from Identification USB section)
+            power_match2 = re.search(r'\*\*Consommation\s+maximale\*\*\s*:\s*(.+?)$', line)
+            if power_match2:
+                result["caracteristiques_specifiques"]["max_power"] = power_match2.group(1).strip()
+
+        # USB Characteristics
+        elif current_section == "USB Characteristics":
+            # USB version (support both formats)
+            usb_ver_match = re.search(r'\*\*(?:USB\s+version|Version\s+USB)\*\*:\s*(.+?)$', line, re.IGNORECASE)
+            if usb_ver_match:
+                result["caracteristiques_specifiques"]["usb_version"] = usb_ver_match.group(1).strip()
+
+            # Speed (support both formats)
+            speed_match = re.search(r'\*\*(?:Negotiated\s+speed|Vitesse\s+négociée)\*\*:\s*(.+?)$', line, re.IGNORECASE)
+            if speed_match:
+                result["caracteristiques_specifiques"]["usb_speed"] = speed_match.group(1).strip()
+
+            # bcdUSB
+            bcd_match = re.search(r'\*\*bcdUSB\*\*:\s*(.+?)$', line)
+            if bcd_match:
+                result["caracteristiques_specifiques"]["bcdUSB"] = bcd_match.group(1).strip()
+
+            # Power (support both formats)
+            power_match = re.search(r'\*\*(?:Max\s+power\s+draw|Consommation\s+maximale)\*\*:\s*(.+?)$', line, re.IGNORECASE)
+            if power_match:
+                result["caracteristiques_specifiques"]["max_power"] = power_match.group(1).strip()
+
+        # Device Class (support both formats)
+        elif current_section in ["Device Class", "Classe et interface USB"]:
+            # Interface class (EN format)
+            class_match = re.search(r'\*\*Interface\s+class\*\*:\s*(\d+)\s*—\s*(.+?)$', line)
+            if class_match:
+                result["caracteristiques_specifiques"]["interface_class"] = class_match.group(1)
+                result["caracteristiques_specifiques"]["interface_class_name"] = class_match.group(2).strip()
+
+            # Classe USB (FR format)
+            class_fr_match = re.search(r'\*\*Classe\s+USB\*\*\s*:\s*(.+?)\s*\((\d+)\)', line)
+            if class_fr_match:
+                result["caracteristiques_specifiques"]["interface_class"] = class_fr_match.group(2)
+                result["caracteristiques_specifiques"]["interface_class_name"] = class_fr_match.group(1).strip()
+
+            # Subclass (EN format)
+            subclass_match = re.search(r'\*\*Subclass\*\*\s*:\s*(\d+)\s*—\s*(.+?)$', line)
+            if subclass_match:
+                result["caracteristiques_specifiques"]["interface_subclass"] = subclass_match.group(1)
+                result["caracteristiques_specifiques"]["interface_subclass_name"] = subclass_match.group(2).strip()
+
+            # Sous-classe (FR format)
+            subclass_fr_match = re.search(r'\*\*Sous-classe\*\*\s*:\s*(.+?)\s*\((\d+)\)', line)
+            if subclass_fr_match:
+                result["caracteristiques_specifiques"]["interface_subclass"] = subclass_fr_match.group(2)
+                result["caracteristiques_specifiques"]["interface_subclass_name"] = subclass_fr_match.group(1).strip()
+
+            # Protocol (EN format)
+            protocol_match = re.search(r'\*\*Protocol\*\*\s*:\s*(\d+|[0-9a-fA-F]{2})\s*—\s*(.+?)$', line)
+            if protocol_match:
+                result["caracteristiques_specifiques"]["interface_protocol"] = protocol_match.group(1)
+                result["caracteristiques_specifiques"]["interface_protocol_name"] = protocol_match.group(2).strip()
+
+            # Protocole (FR format)
+            protocol_fr_match = re.search(r'\*\*Protocole\*\*\s*:\s*(.+?)\s*\((\d+)\)', line)
+            if protocol_fr_match:
+                result["caracteristiques_specifiques"]["interface_protocol"] = protocol_fr_match.group(2)
+                result["caracteristiques_specifiques"]["interface_protocol_name"] = protocol_fr_match.group(1).strip()
+
+        # Functional Role
+        elif current_section == "Functional Role":
+            if line.startswith('- '):
+                notes_lines.append(line[2:])
+
+        # Classification Summary
+        elif current_section == "Classification Summary":
+            # Category
+            category_match = re.search(r'\*\*Category\*\*:\s*(.+?)$', line)
+            if category_match:
+                result["caracteristiques_specifiques"]["category"] = category_match.group(1).strip()
+
+            # Subcategory
+            subcategory_match = re.search(r'\*\*Subcategory\*\*:\s*(.+?)$', line)
+            if subcategory_match:
+                result["caracteristiques_specifiques"]["subcategory"] = subcategory_match.group(1).strip()
+
+        # Wi-Fi characteristics (new section for wireless adapters)
+        elif current_section == "Caractéristiques Wi‑Fi":
+            # Norme Wi-Fi
+            wifi_std_match = re.search(r'\*\*Norme\s+Wi‑Fi\*\*:\s*(.+?)$', line)
+            if wifi_std_match:
+                result["caracteristiques_specifiques"]["wifi_standard"] = wifi_std_match.group(1).strip()
+
+            # Bande de fréquence
+            freq_match = re.search(r'\*\*Bande\s+de\s+fréquence\*\*:\s*(.+?)$', line)
+            if freq_match:
+                result["caracteristiques_specifiques"]["wifi_frequency"] = freq_match.group(1).strip()
+
+            # Débit théorique maximal
+            speed_match = re.search(r'\*\*Débit\s+théorique\s+maximal\*\*:\s*(.+?)$', line)
+            if speed_match:
+                result["caracteristiques_specifiques"]["wifi_max_speed"] = speed_match.group(1).strip()
+
+        # Collect other sections for notes
+        elif current_section in ["Performance Notes", "Power & Stability Considerations",
+                                  "Recommended USB Port Placement", "Typical Use Cases",
+                                  "Operating System Support", "Pilotes et compatibilité système",
+                                  "Contraintes et limitations", "Placement USB recommandé",
+                                  "Cas d'usage typiques", "Fonction réseau", "Résumé synthétique"]:
+            if line and not line.startswith('#'):
+                if line.startswith('- '):
+                    notes_lines.append(f"{current_section}: {line[2:]}")
+                elif line.startswith('**'):
+                    notes_lines.append(f"{current_section}: {line}")
+                elif line.startswith('>'):
+                    notes_lines.append(f"{current_section}: {line[1:].strip()}")
+                elif current_section == "Résumé synthétique":
+                    notes_lines.append(line)
+
+    # Build description
+    if description_lines:
+        result["description"] = " ".join(description_lines)
+
+    # Build notes
+    if notes_lines:
+        result["notes"] = "\n".join(notes_lines)
+
+    # Fallback for nom if not found
+    if not result["nom"]:
+        if result["description"]:
+            # Use first line/sentence of description as name
+            first_line = result["description"].split('\n')[0]
+            result["nom"] = first_line[:100] if len(first_line) > 100 else first_line
+        elif title_match:
+            result["nom"] = title
+        else:
+            result["nom"] = "Périphérique importé"
+
+    # Extract brand from description if not found
+    if not result["marque"] and result["description"]:
+        # Common brand patterns
+        brands = ["Logitech", "SanDisk", "Ralink", "Broadcom", "ASUS", "Realtek",
+                  "TP-Link", "Intel", "Samsung", "Kingston", "Corsair"]
+        for brand in brands:
+            if re.search(rf'\b{brand}\b', result["description"], re.IGNORECASE):
+                result["marque"] = brand
+                break
+
+    # Clean up None values and empty dicts
+    result = {k: v for k, v in result.items() if v is not None}
+    if not result.get("caracteristiques_specifiques"):
+        result.pop("caracteristiques_specifiques", None)
+
+    return result
+
+
+def extract_usb_ids_from_filename(filename: str) -> Optional[Dict[str, str]]:
+    """
+    Extract vendor_id and product_id from filename.
+
+    Examples:
+        ID_0781_55ab.md -> {"vendor_id": "0x0781", "product_id": "0x55ab"}
+        id_0b05_17cb.md -> {"vendor_id": "0x0b05", "product_id": "0x17cb"}
+
+    Args:
+        filename: Name of the file
+
+    Returns:
+        Dict with vendor_id and product_id, or None if not found
+    """
+    match = re.search(r'(?:ID|id)[_\s]+([0-9a-fA-F]{4})[_:]([0-9a-fA-F]{4})', filename)
+    if match:
+        return {
+            "vendor_id": f"0x{match.group(1).lower()}",
+            "product_id": f"0x{match.group(2).lower()}"
+        }
+    return None