This commit is contained in:
Gilles Soulier
2026-01-05 16:08:01 +01:00
parent dcba044cd6
commit c67befc549
2215 changed files with 26743 additions and 329 deletions

View File

@@ -0,0 +1,395 @@
"""
Device classifier - Intelligent detection of peripheral type and subtype
Analyzes CLI output and markdown content to automatically determine device category
"""
import re
from typing import Dict, Optional, Tuple
class DeviceClassifier:
"""
Intelligent classifier for USB/Bluetooth/Network devices
Analyzes content to determine type_principal and sous_type
"""
# Keywords mapping for type detection
TYPE_KEYWORDS = {
# WiFi adapters
("USB", "Adaptateur WiFi"): [
r"wi[-]?fi",
r"wireless",
r"802\.11[a-z]",
r"rtl81\d+", # Realtek WiFi chips
r"mt76\d+", # MediaTek WiFi chips
r"atheros",
r"qualcomm.*wireless",
r"broadcom.*wireless",
r"wlan",
r"wireless\s+adapter",
],
# Bluetooth
("Bluetooth", "Autre"): [
r"bluetooth",
r"bcm20702", # Broadcom BT chips
r"bt\s+adapter",
],
# USB Flash Drive / Clé USB
("Stockage", "Clé USB"): [
r"flash\s+drive",
r"usb\s+stick",
r"cruzer", # SanDisk Cruzer series
r"datatraveler", # Kingston DataTraveler
r"usb.*flash",
r"clé\s+usb",
r"pendrive",
],
# External HDD/SSD
("Stockage", "Disque dur externe"): [
r"external\s+hdd",
r"external\s+ssd",
r"portable\s+ssd",
r"portable\s+drive",
r"disk\s+drive",
r"disque\s+dur\s+externe",
r"my\s+passport", # WD My Passport
r"expansion", # Seagate Expansion
r"backup\s+plus", # Seagate Backup Plus
r"elements", # WD Elements
r"touro", # Hitachi Touro
r"adata.*hd\d+", # ADATA external drives
],
# Card Reader
("Stockage", "Lecteur de carte"): [
r"card\s+reader",
r"lecteur.*carte",
r"sd.*reader",
r"microsd.*reader",
r"multi.*card",
r"cf.*reader",
],
# USB Hub
("USB", "Hub"): [
r"usb\s+hub",
r"hub\s+controller",
r"multi[-]?port",
],
# USB Keyboard
("USB", "Clavier"): [
r"keyboard",
r"clavier",
r"hid.*keyboard",
],
# USB Mouse
("USB", "Souris"): [
r"mouse",
r"souris",
r"hid.*mouse",
r"optical\s+mouse",
],
# Logitech Unifying (can be keyboard or mouse)
("USB", "Autre"): [
r"unifying\s+receiver",
r"logitech.*receiver",
],
# ZigBee dongle
("USB", "ZigBee"): [
r"zigbee",
r"conbee",
r"cc2531", # Texas Instruments ZigBee chip
r"cc2652", # TI newer ZigBee chip
r"dresden\s+elektronik",
r"zigbee.*gateway",
r"zigbee.*coordinator",
r"thread.*border",
],
# Fingerprint reader
("USB", "Lecteur biométrique"): [
r"fingerprint",
r"fingprint", # Common typo (CS9711Fingprint)
r"empreinte",
r"biometric",
r"biométrique",
r"validity.*sensor",
r"synaptics.*fingerprint",
r"goodix.*fingerprint",
r"elan.*fingerprint",
],
# USB Webcam
("Video", "Webcam"): [
r"webcam",
r"camera",
r"video\s+capture",
r"uvc", # USB Video Class
],
# Ethernet
("Réseau", "Ethernet"): [
r"ethernet",
r"gigabit",
r"network\s+adapter",
r"lan\s+adapter",
r"rtl81\d+.*ethernet",
],
# Network WiFi (non-USB)
("Réseau", "Wi-Fi"): [
r"wireless.*network",
r"wi[-]?fi.*card",
r"wlan.*card",
],
}
# INTERFACE class codes (from USB spec)
# CRITICAL: Mass Storage is determined by bInterfaceClass, not bDeviceClass
USB_INTERFACE_CLASS_MAPPING = {
8: ("Stockage", "Clé USB"), # Mass Storage (refined by keywords to distinguish flash/HDD/card reader)
3: ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
14: ("Video", "Webcam"), # Video (0x0e)
9: ("USB", "Hub"), # Hub
224: ("Bluetooth", "Autre"), # Wireless Controller (0xe0)
255: ("USB", "Autre"), # Vendor Specific - requires firmware
}
# Device class codes (less reliable than interface class for Mass Storage)
USB_DEVICE_CLASS_MAPPING = {
"08": ("Stockage", "Clé USB"), # Mass Storage (fallback only)
"03": ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
"0e": ("Video", "Webcam"), # Video
"09": ("USB", "Hub"), # Hub
"e0": ("Bluetooth", "Autre"), # Wireless Controller
}
@staticmethod
def normalize_text(text: str) -> str:
"""Normalize text for matching (lowercase, remove accents)"""
if not text:
return ""
return text.lower().strip()
@staticmethod
def detect_from_keywords(content: str) -> Optional[Tuple[str, str]]:
"""
Detect device type from keywords in content
Args:
content: Text content to analyze (CLI output or markdown)
Returns:
Tuple of (type_principal, sous_type) or None
"""
normalized = DeviceClassifier.normalize_text(content)
# Score each type based on keyword matches
scores = {}
for (type_principal, sous_type), patterns in DeviceClassifier.TYPE_KEYWORDS.items():
score = 0
for pattern in patterns:
matches = re.findall(pattern, normalized, re.IGNORECASE)
score += len(matches)
if score > 0:
scores[(type_principal, sous_type)] = score
if not scores:
return None
# Return the type with highest score
best_match = max(scores.items(), key=lambda x: x[1])
return best_match[0]
@staticmethod
def detect_from_usb_interface_class(interface_classes: Optional[list]) -> Optional[Tuple[str, str]]:
"""
Detect device type from USB interface class codes
CRITICAL: This is the normative way to detect Mass Storage (class 08)
Args:
interface_classes: List of interface class info dicts with 'code' and 'name'
e.g., [{"code": 8, "name": "Mass Storage"}]
Returns:
Tuple of (type_principal, sous_type) or None
"""
if not interface_classes:
return None
# Check all interfaces for known types
# Priority: Mass Storage (8) > others
for interface in interface_classes:
class_code = interface.get("code")
if class_code in DeviceClassifier.USB_INTERFACE_CLASS_MAPPING:
return DeviceClassifier.USB_INTERFACE_CLASS_MAPPING[class_code]
return None
@staticmethod
def detect_from_usb_device_class(device_class: Optional[str]) -> Optional[Tuple[str, str]]:
"""
Detect device type from USB device class code (FALLBACK ONLY)
NOTE: For Mass Storage, bInterfaceClass is normative, not bDeviceClass
Args:
device_class: USB bDeviceClass (e.g., "08", "03")
Returns:
Tuple of (type_principal, sous_type) or None
"""
if not device_class:
return None
# Normalize class code
device_class = device_class.strip().lower().lstrip("0x")
return DeviceClassifier.USB_DEVICE_CLASS_MAPPING.get(device_class)
@staticmethod
def detect_from_vendor_product(vendor_id: Optional[str], product_id: Optional[str],
manufacturer: Optional[str], product: Optional[str]) -> Optional[Tuple[str, str]]:
"""
Detect device type from vendor/product IDs and strings
Args:
vendor_id: USB vendor ID (e.g., "0x0781")
product_id: USB product ID
manufacturer: Manufacturer string
product: Product string
Returns:
Tuple of (type_principal, sous_type) or None
"""
# Build a searchable string from all identifiers
search_text = " ".join(filter(None, [
manufacturer or "",
product or "",
vendor_id or "",
product_id or "",
]))
return DeviceClassifier.detect_from_keywords(search_text)
@staticmethod
def classify_device(cli_content: Optional[str] = None,
synthese_content: Optional[str] = None,
device_info: Optional[Dict] = None) -> Tuple[str, str]:
"""
Classify a device using all available information
Args:
cli_content: Raw CLI output (lsusb -v, lshw, etc.)
synthese_content: Markdown synthesis content
device_info: Parsed device info dict (vendor_id, product_id, interface_classes, etc.)
Returns:
Tuple of (type_principal, sous_type) - defaults to ("USB", "Autre") if unknown
"""
device_info = device_info or {}
# Strategy 1: CRITICAL - Check USB INTERFACE class (normative for Mass Storage)
if device_info.get("interface_classes"):
result = DeviceClassifier.detect_from_usb_interface_class(device_info["interface_classes"])
if result:
# Refine HID devices (class 03) using keywords
if result == ("USB", "Clavier"):
content = " ".join(filter(None, [cli_content, synthese_content]))
if re.search(r"mouse|souris", content, re.IGNORECASE):
return ("USB", "Souris")
return result
# Strategy 2: Fallback to device class (less reliable)
if device_info.get("device_class"):
result = DeviceClassifier.detect_from_usb_device_class(device_info["device_class"])
if result:
# Refine HID devices (class 03) using keywords
if result == ("USB", "Clavier"):
content = " ".join(filter(None, [cli_content, synthese_content]))
if re.search(r"mouse|souris", content, re.IGNORECASE):
return ("USB", "Souris")
return result
# Strategy 3: Analyze vendor/product info
result = DeviceClassifier.detect_from_vendor_product(
device_info.get("vendor_id"),
device_info.get("product_id"),
device_info.get("manufacturer"),
device_info.get("product"),
)
if result:
return result
# Strategy 4: Analyze full CLI content
if cli_content:
result = DeviceClassifier.detect_from_keywords(cli_content)
if result:
return result
# Strategy 5: Analyze markdown synthesis
if synthese_content:
result = DeviceClassifier.detect_from_keywords(synthese_content)
if result:
return result
# Default fallback
return ("USB", "Autre")
@staticmethod
def refine_bluetooth_subtype(content: str) -> str:
"""
Refine Bluetooth subtype based on content
Args:
content: Combined content to analyze
Returns:
Refined sous_type (Clavier, Souris, Audio, or Autre)
"""
normalized = DeviceClassifier.normalize_text(content)
if re.search(r"keyboard|clavier", normalized):
return "Clavier"
if re.search(r"mouse|souris", normalized):
return "Souris"
if re.search(r"headset|audio|speaker|écouteur|casque", normalized):
return "Audio"
return "Autre"
@staticmethod
def refine_storage_subtype(content: str) -> str:
"""
Refine Storage subtype based on content
Distinguishes between USB flash drives, external HDD/SSD, and card readers
Args:
content: Combined content to analyze
Returns:
Refined sous_type (Clé USB, Disque dur externe, Lecteur de carte)
"""
normalized = DeviceClassifier.normalize_text(content)
# Check for card reader first (most specific)
if re.search(r"card\s+reader|lecteur.*carte|sd.*reader|multi.*card", normalized):
return "Lecteur de carte"
# Check for external HDD/SSD
if re.search(r"external\s+(hdd|ssd|disk)|portable\s+(ssd|drive)|disque\s+dur|"
r"my\s+passport|expansion|backup\s+plus|elements|touro", normalized):
return "Disque dur externe"
# Check for USB flash drive indicators
if re.search(r"flash\s+drive|usb\s+stick|cruzer|datatraveler|pendrive|clé\s+usb", normalized):
return "Clé USB"
# Default to USB flash drive for mass storage devices
return "Clé USB"