""" Device classifier - Intelligent detection of peripheral type and subtype Analyzes CLI output and markdown content to automatically determine device category """ import re from typing import Dict, Optional, Tuple class DeviceClassifier: """ Intelligent classifier for USB/Bluetooth/Network devices Analyzes content to determine type_principal and sous_type """ # Keywords mapping for type detection TYPE_KEYWORDS = { # WiFi adapters ("USB", "Adaptateur WiFi"): [ r"wi[‑-]?fi", r"wireless", r"802\.11[a-z]", r"rtl81\d+", # Realtek WiFi chips r"mt76\d+", # MediaTek WiFi chips r"atheros", r"qualcomm.*wireless", r"broadcom.*wireless", r"wlan", r"wireless\s+adapter", ], # Bluetooth ("Bluetooth", "Autre"): [ r"bluetooth", r"bcm20702", # Broadcom BT chips r"bt\s+adapter", ], # USB Flash Drive / Clé USB ("Stockage", "Clé USB"): [ r"flash\s+drive", r"usb\s+stick", r"cruzer", # SanDisk Cruzer series r"datatraveler", # Kingston DataTraveler r"usb.*flash", r"clé\s+usb", r"pendrive", ], # External HDD/SSD ("Stockage", "Disque dur externe"): [ r"external\s+hdd", r"external\s+ssd", r"portable\s+ssd", r"portable\s+drive", r"disk\s+drive", r"disque\s+dur\s+externe", r"my\s+passport", # WD My Passport r"expansion", # Seagate Expansion r"backup\s+plus", # Seagate Backup Plus r"elements", # WD Elements r"touro", # Hitachi Touro r"adata.*hd\d+", # ADATA external drives ], # Card Reader ("Stockage", "Lecteur de carte"): [ r"card\s+reader", r"lecteur.*carte", r"sd.*reader", r"microsd.*reader", r"multi.*card", r"cf.*reader", ], # USB Hub ("USB", "Hub"): [ r"usb\s+hub", r"hub\s+controller", r"multi[‑-]?port", ], # USB Keyboard ("USB", "Clavier"): [ r"keyboard", r"clavier", r"hid.*keyboard", ], # USB Mouse ("USB", "Souris"): [ r"mouse", r"souris", r"hid.*mouse", r"optical\s+mouse", ], # Logitech Unifying (can be keyboard or mouse) ("USB", "Autre"): [ r"unifying\s+receiver", r"logitech.*receiver", ], # ZigBee dongle ("USB", "ZigBee"): [ r"zigbee", r"conbee", r"cc2531", # Texas Instruments ZigBee chip r"cc2652", # TI newer ZigBee chip r"dresden\s+elektronik", r"zigbee.*gateway", r"zigbee.*coordinator", r"thread.*border", ], # Fingerprint reader ("USB", "Lecteur biométrique"): [ r"fingerprint", r"fingprint", # Common typo (CS9711Fingprint) r"empreinte", r"biometric", r"biométrique", r"validity.*sensor", r"synaptics.*fingerprint", r"goodix.*fingerprint", r"elan.*fingerprint", ], # USB Webcam ("Video", "Webcam"): [ r"webcam", r"camera", r"video\s+capture", r"uvc", # USB Video Class ], # Ethernet ("Réseau", "Ethernet"): [ r"ethernet", r"gigabit", r"network\s+adapter", r"lan\s+adapter", r"rtl81\d+.*ethernet", ], # Network WiFi (non-USB) ("Réseau", "Wi-Fi"): [ r"wireless.*network", r"wi[‑-]?fi.*card", r"wlan.*card", ], } # INTERFACE class codes (from USB spec) # CRITICAL: Mass Storage is determined by bInterfaceClass, not bDeviceClass USB_INTERFACE_CLASS_MAPPING = { 8: ("Stockage", "Clé USB"), # Mass Storage (refined by keywords to distinguish flash/HDD/card reader) 3: ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords) 14: ("Video", "Webcam"), # Video (0x0e) 9: ("USB", "Hub"), # Hub 224: ("Bluetooth", "Autre"), # Wireless Controller (0xe0) 255: ("USB", "Autre"), # Vendor Specific - requires firmware } # Device class codes (less reliable than interface class for Mass Storage) USB_DEVICE_CLASS_MAPPING = { "08": ("Stockage", "Clé USB"), # Mass Storage (fallback only) "03": ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords) "0e": ("Video", "Webcam"), # Video "09": ("USB", "Hub"), # Hub "e0": ("Bluetooth", "Autre"), # Wireless Controller } @staticmethod def normalize_text(text: str) -> str: """Normalize text for matching (lowercase, remove accents)""" if not text: return "" return text.lower().strip() @staticmethod def detect_from_keywords(content: str) -> Optional[Tuple[str, str]]: """ Detect device type from keywords in content Args: content: Text content to analyze (CLI output or markdown) Returns: Tuple of (type_principal, sous_type) or None """ normalized = DeviceClassifier.normalize_text(content) # Score each type based on keyword matches scores = {} for (type_principal, sous_type), patterns in DeviceClassifier.TYPE_KEYWORDS.items(): score = 0 for pattern in patterns: matches = re.findall(pattern, normalized, re.IGNORECASE) score += len(matches) if score > 0: scores[(type_principal, sous_type)] = score if not scores: return None # Return the type with highest score best_match = max(scores.items(), key=lambda x: x[1]) return best_match[0] @staticmethod def detect_from_usb_interface_class(interface_classes: Optional[list]) -> Optional[Tuple[str, str]]: """ Detect device type from USB interface class codes CRITICAL: This is the normative way to detect Mass Storage (class 08) Args: interface_classes: List of interface class info dicts with 'code' and 'name' e.g., [{"code": 8, "name": "Mass Storage"}] Returns: Tuple of (type_principal, sous_type) or None """ if not interface_classes: return None # Check all interfaces for known types # Priority: Mass Storage (8) > others for interface in interface_classes: class_code = interface.get("code") if class_code in DeviceClassifier.USB_INTERFACE_CLASS_MAPPING: return DeviceClassifier.USB_INTERFACE_CLASS_MAPPING[class_code] return None @staticmethod def detect_from_usb_device_class(device_class: Optional[str]) -> Optional[Tuple[str, str]]: """ Detect device type from USB device class code (FALLBACK ONLY) NOTE: For Mass Storage, bInterfaceClass is normative, not bDeviceClass Args: device_class: USB bDeviceClass (e.g., "08", "03") Returns: Tuple of (type_principal, sous_type) or None """ if not device_class: return None # Normalize class code device_class = device_class.strip().lower().lstrip("0x") return DeviceClassifier.USB_DEVICE_CLASS_MAPPING.get(device_class) @staticmethod def detect_from_vendor_product(vendor_id: Optional[str], product_id: Optional[str], manufacturer: Optional[str], product: Optional[str]) -> Optional[Tuple[str, str]]: """ Detect device type from vendor/product IDs and strings Args: vendor_id: USB vendor ID (e.g., "0x0781") product_id: USB product ID manufacturer: Manufacturer string product: Product string Returns: Tuple of (type_principal, sous_type) or None """ # Build a searchable string from all identifiers search_text = " ".join(filter(None, [ manufacturer or "", product or "", vendor_id or "", product_id or "", ])) return DeviceClassifier.detect_from_keywords(search_text) @staticmethod def classify_device(cli_content: Optional[str] = None, synthese_content: Optional[str] = None, device_info: Optional[Dict] = None) -> Tuple[str, str]: """ Classify a device using all available information Args: cli_content: Raw CLI output (lsusb -v, lshw, etc.) synthese_content: Markdown synthesis content device_info: Parsed device info dict (vendor_id, product_id, interface_classes, etc.) Returns: Tuple of (type_principal, sous_type) - defaults to ("USB", "Autre") if unknown """ device_info = device_info or {} # Strategy 1: CRITICAL - Check USB INTERFACE class (normative for Mass Storage) if device_info.get("interface_classes"): result = DeviceClassifier.detect_from_usb_interface_class(device_info["interface_classes"]) if result: # Refine HID devices (class 03) using keywords if result == ("USB", "Clavier"): content = " ".join(filter(None, [cli_content, synthese_content])) if re.search(r"mouse|souris", content, re.IGNORECASE): return ("USB", "Souris") return result # Strategy 2: Fallback to device class (less reliable) if device_info.get("device_class"): result = DeviceClassifier.detect_from_usb_device_class(device_info["device_class"]) if result: # Refine HID devices (class 03) using keywords if result == ("USB", "Clavier"): content = " ".join(filter(None, [cli_content, synthese_content])) if re.search(r"mouse|souris", content, re.IGNORECASE): return ("USB", "Souris") return result # Strategy 3: Analyze vendor/product info result = DeviceClassifier.detect_from_vendor_product( device_info.get("vendor_id"), device_info.get("product_id"), device_info.get("manufacturer"), device_info.get("product"), ) if result: return result # Strategy 4: Analyze full CLI content if cli_content: result = DeviceClassifier.detect_from_keywords(cli_content) if result: return result # Strategy 5: Analyze markdown synthesis if synthese_content: result = DeviceClassifier.detect_from_keywords(synthese_content) if result: return result # Default fallback return ("USB", "Autre") @staticmethod def refine_bluetooth_subtype(content: str) -> str: """ Refine Bluetooth subtype based on content Args: content: Combined content to analyze Returns: Refined sous_type (Clavier, Souris, Audio, or Autre) """ normalized = DeviceClassifier.normalize_text(content) if re.search(r"keyboard|clavier", normalized): return "Clavier" if re.search(r"mouse|souris", normalized): return "Souris" if re.search(r"headset|audio|speaker|écouteur|casque", normalized): return "Audio" return "Autre" @staticmethod def refine_storage_subtype(content: str) -> str: """ Refine Storage subtype based on content Distinguishes between USB flash drives, external HDD/SSD, and card readers Args: content: Combined content to analyze Returns: Refined sous_type (Clé USB, Disque dur externe, Lecteur de carte) """ normalized = DeviceClassifier.normalize_text(content) # Check for card reader first (most specific) if re.search(r"card\s+reader|lecteur.*carte|sd.*reader|multi.*card", normalized): return "Lecteur de carte" # Check for external HDD/SSD if re.search(r"external\s+(hdd|ssd|disk)|portable\s+(ssd|drive)|disque\s+dur|" r"my\s+passport|expansion|backup\s+plus|elements|touro", normalized): return "Disque dur externe" # Check for USB flash drive indicators if re.search(r"flash\s+drive|usb\s+stick|cruzer|datatraveler|pendrive|clé\s+usb", normalized): return "Clé USB" # Default to USB flash drive for mass storage devices return "Clé USB"