Files
serv_benchmark/backend/app/utils/device_classifier.py
Gilles Soulier c67befc549 addon
2026-01-05 16:08:01 +01:00

396 lines
13 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Device classifier - Intelligent detection of peripheral type and subtype
Analyzes CLI output and markdown content to automatically determine device category
"""
import re
from typing import Dict, Optional, Tuple
class DeviceClassifier:
"""
Intelligent classifier for USB/Bluetooth/Network devices
Analyzes content to determine type_principal and sous_type
"""
# Keywords mapping for type detection
TYPE_KEYWORDS = {
# WiFi adapters
("USB", "Adaptateur WiFi"): [
r"wi[-]?fi",
r"wireless",
r"802\.11[a-z]",
r"rtl81\d+", # Realtek WiFi chips
r"mt76\d+", # MediaTek WiFi chips
r"atheros",
r"qualcomm.*wireless",
r"broadcom.*wireless",
r"wlan",
r"wireless\s+adapter",
],
# Bluetooth
("Bluetooth", "Autre"): [
r"bluetooth",
r"bcm20702", # Broadcom BT chips
r"bt\s+adapter",
],
# USB Flash Drive / Clé USB
("Stockage", "Clé USB"): [
r"flash\s+drive",
r"usb\s+stick",
r"cruzer", # SanDisk Cruzer series
r"datatraveler", # Kingston DataTraveler
r"usb.*flash",
r"clé\s+usb",
r"pendrive",
],
# External HDD/SSD
("Stockage", "Disque dur externe"): [
r"external\s+hdd",
r"external\s+ssd",
r"portable\s+ssd",
r"portable\s+drive",
r"disk\s+drive",
r"disque\s+dur\s+externe",
r"my\s+passport", # WD My Passport
r"expansion", # Seagate Expansion
r"backup\s+plus", # Seagate Backup Plus
r"elements", # WD Elements
r"touro", # Hitachi Touro
r"adata.*hd\d+", # ADATA external drives
],
# Card Reader
("Stockage", "Lecteur de carte"): [
r"card\s+reader",
r"lecteur.*carte",
r"sd.*reader",
r"microsd.*reader",
r"multi.*card",
r"cf.*reader",
],
# USB Hub
("USB", "Hub"): [
r"usb\s+hub",
r"hub\s+controller",
r"multi[-]?port",
],
# USB Keyboard
("USB", "Clavier"): [
r"keyboard",
r"clavier",
r"hid.*keyboard",
],
# USB Mouse
("USB", "Souris"): [
r"mouse",
r"souris",
r"hid.*mouse",
r"optical\s+mouse",
],
# Logitech Unifying (can be keyboard or mouse)
("USB", "Autre"): [
r"unifying\s+receiver",
r"logitech.*receiver",
],
# ZigBee dongle
("USB", "ZigBee"): [
r"zigbee",
r"conbee",
r"cc2531", # Texas Instruments ZigBee chip
r"cc2652", # TI newer ZigBee chip
r"dresden\s+elektronik",
r"zigbee.*gateway",
r"zigbee.*coordinator",
r"thread.*border",
],
# Fingerprint reader
("USB", "Lecteur biométrique"): [
r"fingerprint",
r"fingprint", # Common typo (CS9711Fingprint)
r"empreinte",
r"biometric",
r"biométrique",
r"validity.*sensor",
r"synaptics.*fingerprint",
r"goodix.*fingerprint",
r"elan.*fingerprint",
],
# USB Webcam
("Video", "Webcam"): [
r"webcam",
r"camera",
r"video\s+capture",
r"uvc", # USB Video Class
],
# Ethernet
("Réseau", "Ethernet"): [
r"ethernet",
r"gigabit",
r"network\s+adapter",
r"lan\s+adapter",
r"rtl81\d+.*ethernet",
],
# Network WiFi (non-USB)
("Réseau", "Wi-Fi"): [
r"wireless.*network",
r"wi[-]?fi.*card",
r"wlan.*card",
],
}
# INTERFACE class codes (from USB spec)
# CRITICAL: Mass Storage is determined by bInterfaceClass, not bDeviceClass
USB_INTERFACE_CLASS_MAPPING = {
8: ("Stockage", "Clé USB"), # Mass Storage (refined by keywords to distinguish flash/HDD/card reader)
3: ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
14: ("Video", "Webcam"), # Video (0x0e)
9: ("USB", "Hub"), # Hub
224: ("Bluetooth", "Autre"), # Wireless Controller (0xe0)
255: ("USB", "Autre"), # Vendor Specific - requires firmware
}
# Device class codes (less reliable than interface class for Mass Storage)
USB_DEVICE_CLASS_MAPPING = {
"08": ("Stockage", "Clé USB"), # Mass Storage (fallback only)
"03": ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
"0e": ("Video", "Webcam"), # Video
"09": ("USB", "Hub"), # Hub
"e0": ("Bluetooth", "Autre"), # Wireless Controller
}
@staticmethod
def normalize_text(text: str) -> str:
"""Normalize text for matching (lowercase, remove accents)"""
if not text:
return ""
return text.lower().strip()
@staticmethod
def detect_from_keywords(content: str) -> Optional[Tuple[str, str]]:
"""
Detect device type from keywords in content
Args:
content: Text content to analyze (CLI output or markdown)
Returns:
Tuple of (type_principal, sous_type) or None
"""
normalized = DeviceClassifier.normalize_text(content)
# Score each type based on keyword matches
scores = {}
for (type_principal, sous_type), patterns in DeviceClassifier.TYPE_KEYWORDS.items():
score = 0
for pattern in patterns:
matches = re.findall(pattern, normalized, re.IGNORECASE)
score += len(matches)
if score > 0:
scores[(type_principal, sous_type)] = score
if not scores:
return None
# Return the type with highest score
best_match = max(scores.items(), key=lambda x: x[1])
return best_match[0]
@staticmethod
def detect_from_usb_interface_class(interface_classes: Optional[list]) -> Optional[Tuple[str, str]]:
"""
Detect device type from USB interface class codes
CRITICAL: This is the normative way to detect Mass Storage (class 08)
Args:
interface_classes: List of interface class info dicts with 'code' and 'name'
e.g., [{"code": 8, "name": "Mass Storage"}]
Returns:
Tuple of (type_principal, sous_type) or None
"""
if not interface_classes:
return None
# Check all interfaces for known types
# Priority: Mass Storage (8) > others
for interface in interface_classes:
class_code = interface.get("code")
if class_code in DeviceClassifier.USB_INTERFACE_CLASS_MAPPING:
return DeviceClassifier.USB_INTERFACE_CLASS_MAPPING[class_code]
return None
@staticmethod
def detect_from_usb_device_class(device_class: Optional[str]) -> Optional[Tuple[str, str]]:
"""
Detect device type from USB device class code (FALLBACK ONLY)
NOTE: For Mass Storage, bInterfaceClass is normative, not bDeviceClass
Args:
device_class: USB bDeviceClass (e.g., "08", "03")
Returns:
Tuple of (type_principal, sous_type) or None
"""
if not device_class:
return None
# Normalize class code
device_class = device_class.strip().lower().lstrip("0x")
return DeviceClassifier.USB_DEVICE_CLASS_MAPPING.get(device_class)
@staticmethod
def detect_from_vendor_product(vendor_id: Optional[str], product_id: Optional[str],
manufacturer: Optional[str], product: Optional[str]) -> Optional[Tuple[str, str]]:
"""
Detect device type from vendor/product IDs and strings
Args:
vendor_id: USB vendor ID (e.g., "0x0781")
product_id: USB product ID
manufacturer: Manufacturer string
product: Product string
Returns:
Tuple of (type_principal, sous_type) or None
"""
# Build a searchable string from all identifiers
search_text = " ".join(filter(None, [
manufacturer or "",
product or "",
vendor_id or "",
product_id or "",
]))
return DeviceClassifier.detect_from_keywords(search_text)
@staticmethod
def classify_device(cli_content: Optional[str] = None,
synthese_content: Optional[str] = None,
device_info: Optional[Dict] = None) -> Tuple[str, str]:
"""
Classify a device using all available information
Args:
cli_content: Raw CLI output (lsusb -v, lshw, etc.)
synthese_content: Markdown synthesis content
device_info: Parsed device info dict (vendor_id, product_id, interface_classes, etc.)
Returns:
Tuple of (type_principal, sous_type) - defaults to ("USB", "Autre") if unknown
"""
device_info = device_info or {}
# Strategy 1: CRITICAL - Check USB INTERFACE class (normative for Mass Storage)
if device_info.get("interface_classes"):
result = DeviceClassifier.detect_from_usb_interface_class(device_info["interface_classes"])
if result:
# Refine HID devices (class 03) using keywords
if result == ("USB", "Clavier"):
content = " ".join(filter(None, [cli_content, synthese_content]))
if re.search(r"mouse|souris", content, re.IGNORECASE):
return ("USB", "Souris")
return result
# Strategy 2: Fallback to device class (less reliable)
if device_info.get("device_class"):
result = DeviceClassifier.detect_from_usb_device_class(device_info["device_class"])
if result:
# Refine HID devices (class 03) using keywords
if result == ("USB", "Clavier"):
content = " ".join(filter(None, [cli_content, synthese_content]))
if re.search(r"mouse|souris", content, re.IGNORECASE):
return ("USB", "Souris")
return result
# Strategy 3: Analyze vendor/product info
result = DeviceClassifier.detect_from_vendor_product(
device_info.get("vendor_id"),
device_info.get("product_id"),
device_info.get("manufacturer"),
device_info.get("product"),
)
if result:
return result
# Strategy 4: Analyze full CLI content
if cli_content:
result = DeviceClassifier.detect_from_keywords(cli_content)
if result:
return result
# Strategy 5: Analyze markdown synthesis
if synthese_content:
result = DeviceClassifier.detect_from_keywords(synthese_content)
if result:
return result
# Default fallback
return ("USB", "Autre")
@staticmethod
def refine_bluetooth_subtype(content: str) -> str:
"""
Refine Bluetooth subtype based on content
Args:
content: Combined content to analyze
Returns:
Refined sous_type (Clavier, Souris, Audio, or Autre)
"""
normalized = DeviceClassifier.normalize_text(content)
if re.search(r"keyboard|clavier", normalized):
return "Clavier"
if re.search(r"mouse|souris", normalized):
return "Souris"
if re.search(r"headset|audio|speaker|écouteur|casque", normalized):
return "Audio"
return "Autre"
@staticmethod
def refine_storage_subtype(content: str) -> str:
"""
Refine Storage subtype based on content
Distinguishes between USB flash drives, external HDD/SSD, and card readers
Args:
content: Combined content to analyze
Returns:
Refined sous_type (Clé USB, Disque dur externe, Lecteur de carte)
"""
normalized = DeviceClassifier.normalize_text(content)
# Check for card reader first (most specific)
if re.search(r"card\s+reader|lecteur.*carte|sd.*reader|multi.*card", normalized):
return "Lecteur de carte"
# Check for external HDD/SSD
if re.search(r"external\s+(hdd|ssd|disk)|portable\s+(ssd|drive)|disque\s+dur|"
r"my\s+passport|expansion|backup\s+plus|elements|touro", normalized):
return "Disque dur externe"
# Check for USB flash drive indicators
if re.search(r"flash\s+drive|usb\s+stick|cruzer|datatraveler|pendrive|clé\s+usb", normalized):
return "Clé USB"
# Default to USB flash drive for mass storage devices
return "Clé USB"