396 lines
13 KiB
Python
Executable File
396 lines
13 KiB
Python
Executable File
"""
|
||
Device classifier - Intelligent detection of peripheral type and subtype
|
||
Analyzes CLI output and markdown content to automatically determine device category
|
||
"""
|
||
import re
|
||
from typing import Dict, Optional, Tuple
|
||
|
||
|
||
class DeviceClassifier:
|
||
"""
|
||
Intelligent classifier for USB/Bluetooth/Network devices
|
||
Analyzes content to determine type_principal and sous_type
|
||
"""
|
||
|
||
# Keywords mapping for type detection
|
||
TYPE_KEYWORDS = {
|
||
# WiFi adapters
|
||
("USB", "Adaptateur WiFi"): [
|
||
r"wi[‑-]?fi",
|
||
r"wireless",
|
||
r"802\.11[a-z]",
|
||
r"rtl81\d+", # Realtek WiFi chips
|
||
r"mt76\d+", # MediaTek WiFi chips
|
||
r"atheros",
|
||
r"qualcomm.*wireless",
|
||
r"broadcom.*wireless",
|
||
r"wlan",
|
||
r"wireless\s+adapter",
|
||
],
|
||
|
||
# Bluetooth
|
||
("Bluetooth", "Autre"): [
|
||
r"bluetooth",
|
||
r"bcm20702", # Broadcom BT chips
|
||
r"bt\s+adapter",
|
||
],
|
||
|
||
# USB Flash Drive / Clé USB
|
||
("Stockage", "Clé USB"): [
|
||
r"flash\s+drive",
|
||
r"usb\s+stick",
|
||
r"cruzer", # SanDisk Cruzer series
|
||
r"datatraveler", # Kingston DataTraveler
|
||
r"usb.*flash",
|
||
r"clé\s+usb",
|
||
r"pendrive",
|
||
],
|
||
|
||
# External HDD/SSD
|
||
("Stockage", "Disque dur externe"): [
|
||
r"external\s+hdd",
|
||
r"external\s+ssd",
|
||
r"portable\s+ssd",
|
||
r"portable\s+drive",
|
||
r"disk\s+drive",
|
||
r"disque\s+dur\s+externe",
|
||
r"my\s+passport", # WD My Passport
|
||
r"expansion", # Seagate Expansion
|
||
r"backup\s+plus", # Seagate Backup Plus
|
||
r"elements", # WD Elements
|
||
r"touro", # Hitachi Touro
|
||
r"adata.*hd\d+", # ADATA external drives
|
||
],
|
||
|
||
# Card Reader
|
||
("Stockage", "Lecteur de carte"): [
|
||
r"card\s+reader",
|
||
r"lecteur.*carte",
|
||
r"sd.*reader",
|
||
r"microsd.*reader",
|
||
r"multi.*card",
|
||
r"cf.*reader",
|
||
],
|
||
|
||
# USB Hub
|
||
("USB", "Hub"): [
|
||
r"usb\s+hub",
|
||
r"hub\s+controller",
|
||
r"multi[‑-]?port",
|
||
],
|
||
|
||
# USB Keyboard
|
||
("USB", "Clavier"): [
|
||
r"keyboard",
|
||
r"clavier",
|
||
r"hid.*keyboard",
|
||
],
|
||
|
||
# USB Mouse
|
||
("USB", "Souris"): [
|
||
r"mouse",
|
||
r"souris",
|
||
r"hid.*mouse",
|
||
r"optical\s+mouse",
|
||
],
|
||
|
||
# Logitech Unifying (can be keyboard or mouse)
|
||
("USB", "Autre"): [
|
||
r"unifying\s+receiver",
|
||
r"logitech.*receiver",
|
||
],
|
||
|
||
# ZigBee dongle
|
||
("USB", "ZigBee"): [
|
||
r"zigbee",
|
||
r"conbee",
|
||
r"cc2531", # Texas Instruments ZigBee chip
|
||
r"cc2652", # TI newer ZigBee chip
|
||
r"dresden\s+elektronik",
|
||
r"zigbee.*gateway",
|
||
r"zigbee.*coordinator",
|
||
r"thread.*border",
|
||
],
|
||
|
||
# Fingerprint reader
|
||
("USB", "Lecteur biométrique"): [
|
||
r"fingerprint",
|
||
r"fingprint", # Common typo (CS9711Fingprint)
|
||
r"empreinte",
|
||
r"biometric",
|
||
r"biométrique",
|
||
r"validity.*sensor",
|
||
r"synaptics.*fingerprint",
|
||
r"goodix.*fingerprint",
|
||
r"elan.*fingerprint",
|
||
],
|
||
|
||
# USB Webcam
|
||
("Video", "Webcam"): [
|
||
r"webcam",
|
||
r"camera",
|
||
r"video\s+capture",
|
||
r"uvc", # USB Video Class
|
||
],
|
||
|
||
# Ethernet
|
||
("Réseau", "Ethernet"): [
|
||
r"ethernet",
|
||
r"gigabit",
|
||
r"network\s+adapter",
|
||
r"lan\s+adapter",
|
||
r"rtl81\d+.*ethernet",
|
||
],
|
||
|
||
# Network WiFi (non-USB)
|
||
("Réseau", "Wi-Fi"): [
|
||
r"wireless.*network",
|
||
r"wi[‑-]?fi.*card",
|
||
r"wlan.*card",
|
||
],
|
||
}
|
||
|
||
# INTERFACE class codes (from USB spec)
|
||
# CRITICAL: Mass Storage is determined by bInterfaceClass, not bDeviceClass
|
||
USB_INTERFACE_CLASS_MAPPING = {
|
||
8: ("Stockage", "Clé USB"), # Mass Storage (refined by keywords to distinguish flash/HDD/card reader)
|
||
3: ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
|
||
14: ("Video", "Webcam"), # Video (0x0e)
|
||
9: ("USB", "Hub"), # Hub
|
||
224: ("Bluetooth", "Autre"), # Wireless Controller (0xe0)
|
||
255: ("USB", "Autre"), # Vendor Specific - requires firmware
|
||
}
|
||
|
||
# Device class codes (less reliable than interface class for Mass Storage)
|
||
USB_DEVICE_CLASS_MAPPING = {
|
||
"08": ("Stockage", "Clé USB"), # Mass Storage (fallback only)
|
||
"03": ("USB", "Clavier"), # HID (could be keyboard/mouse, refined by keywords)
|
||
"0e": ("Video", "Webcam"), # Video
|
||
"09": ("USB", "Hub"), # Hub
|
||
"e0": ("Bluetooth", "Autre"), # Wireless Controller
|
||
}
|
||
|
||
@staticmethod
|
||
def normalize_text(text: str) -> str:
|
||
"""Normalize text for matching (lowercase, remove accents)"""
|
||
if not text:
|
||
return ""
|
||
return text.lower().strip()
|
||
|
||
@staticmethod
|
||
def detect_from_keywords(content: str) -> Optional[Tuple[str, str]]:
|
||
"""
|
||
Detect device type from keywords in content
|
||
|
||
Args:
|
||
content: Text content to analyze (CLI output or markdown)
|
||
|
||
Returns:
|
||
Tuple of (type_principal, sous_type) or None
|
||
"""
|
||
normalized = DeviceClassifier.normalize_text(content)
|
||
|
||
# Score each type based on keyword matches
|
||
scores = {}
|
||
for (type_principal, sous_type), patterns in DeviceClassifier.TYPE_KEYWORDS.items():
|
||
score = 0
|
||
for pattern in patterns:
|
||
matches = re.findall(pattern, normalized, re.IGNORECASE)
|
||
score += len(matches)
|
||
|
||
if score > 0:
|
||
scores[(type_principal, sous_type)] = score
|
||
|
||
if not scores:
|
||
return None
|
||
|
||
# Return the type with highest score
|
||
best_match = max(scores.items(), key=lambda x: x[1])
|
||
return best_match[0]
|
||
|
||
@staticmethod
|
||
def detect_from_usb_interface_class(interface_classes: Optional[list]) -> Optional[Tuple[str, str]]:
|
||
"""
|
||
Detect device type from USB interface class codes
|
||
CRITICAL: This is the normative way to detect Mass Storage (class 08)
|
||
|
||
Args:
|
||
interface_classes: List of interface class info dicts with 'code' and 'name'
|
||
e.g., [{"code": 8, "name": "Mass Storage"}]
|
||
|
||
Returns:
|
||
Tuple of (type_principal, sous_type) or None
|
||
"""
|
||
if not interface_classes:
|
||
return None
|
||
|
||
# Check all interfaces for known types
|
||
# Priority: Mass Storage (8) > others
|
||
for interface in interface_classes:
|
||
class_code = interface.get("code")
|
||
if class_code in DeviceClassifier.USB_INTERFACE_CLASS_MAPPING:
|
||
return DeviceClassifier.USB_INTERFACE_CLASS_MAPPING[class_code]
|
||
|
||
return None
|
||
|
||
@staticmethod
|
||
def detect_from_usb_device_class(device_class: Optional[str]) -> Optional[Tuple[str, str]]:
|
||
"""
|
||
Detect device type from USB device class code (FALLBACK ONLY)
|
||
NOTE: For Mass Storage, bInterfaceClass is normative, not bDeviceClass
|
||
|
||
Args:
|
||
device_class: USB bDeviceClass (e.g., "08", "03")
|
||
|
||
Returns:
|
||
Tuple of (type_principal, sous_type) or None
|
||
"""
|
||
if not device_class:
|
||
return None
|
||
|
||
# Normalize class code
|
||
device_class = device_class.strip().lower().lstrip("0x")
|
||
|
||
return DeviceClassifier.USB_DEVICE_CLASS_MAPPING.get(device_class)
|
||
|
||
@staticmethod
|
||
def detect_from_vendor_product(vendor_id: Optional[str], product_id: Optional[str],
|
||
manufacturer: Optional[str], product: Optional[str]) -> Optional[Tuple[str, str]]:
|
||
"""
|
||
Detect device type from vendor/product IDs and strings
|
||
|
||
Args:
|
||
vendor_id: USB vendor ID (e.g., "0x0781")
|
||
product_id: USB product ID
|
||
manufacturer: Manufacturer string
|
||
product: Product string
|
||
|
||
Returns:
|
||
Tuple of (type_principal, sous_type) or None
|
||
"""
|
||
# Build a searchable string from all identifiers
|
||
search_text = " ".join(filter(None, [
|
||
manufacturer or "",
|
||
product or "",
|
||
vendor_id or "",
|
||
product_id or "",
|
||
]))
|
||
|
||
return DeviceClassifier.detect_from_keywords(search_text)
|
||
|
||
@staticmethod
|
||
def classify_device(cli_content: Optional[str] = None,
|
||
synthese_content: Optional[str] = None,
|
||
device_info: Optional[Dict] = None) -> Tuple[str, str]:
|
||
"""
|
||
Classify a device using all available information
|
||
|
||
Args:
|
||
cli_content: Raw CLI output (lsusb -v, lshw, etc.)
|
||
synthese_content: Markdown synthesis content
|
||
device_info: Parsed device info dict (vendor_id, product_id, interface_classes, etc.)
|
||
|
||
Returns:
|
||
Tuple of (type_principal, sous_type) - defaults to ("USB", "Autre") if unknown
|
||
"""
|
||
device_info = device_info or {}
|
||
|
||
# Strategy 1: CRITICAL - Check USB INTERFACE class (normative for Mass Storage)
|
||
if device_info.get("interface_classes"):
|
||
result = DeviceClassifier.detect_from_usb_interface_class(device_info["interface_classes"])
|
||
if result:
|
||
# Refine HID devices (class 03) using keywords
|
||
if result == ("USB", "Clavier"):
|
||
content = " ".join(filter(None, [cli_content, synthese_content]))
|
||
if re.search(r"mouse|souris", content, re.IGNORECASE):
|
||
return ("USB", "Souris")
|
||
return result
|
||
|
||
# Strategy 2: Fallback to device class (less reliable)
|
||
if device_info.get("device_class"):
|
||
result = DeviceClassifier.detect_from_usb_device_class(device_info["device_class"])
|
||
if result:
|
||
# Refine HID devices (class 03) using keywords
|
||
if result == ("USB", "Clavier"):
|
||
content = " ".join(filter(None, [cli_content, synthese_content]))
|
||
if re.search(r"mouse|souris", content, re.IGNORECASE):
|
||
return ("USB", "Souris")
|
||
return result
|
||
|
||
# Strategy 3: Analyze vendor/product info
|
||
result = DeviceClassifier.detect_from_vendor_product(
|
||
device_info.get("vendor_id"),
|
||
device_info.get("product_id"),
|
||
device_info.get("manufacturer"),
|
||
device_info.get("product"),
|
||
)
|
||
if result:
|
||
return result
|
||
|
||
# Strategy 4: Analyze full CLI content
|
||
if cli_content:
|
||
result = DeviceClassifier.detect_from_keywords(cli_content)
|
||
if result:
|
||
return result
|
||
|
||
# Strategy 5: Analyze markdown synthesis
|
||
if synthese_content:
|
||
result = DeviceClassifier.detect_from_keywords(synthese_content)
|
||
if result:
|
||
return result
|
||
|
||
# Default fallback
|
||
return ("USB", "Autre")
|
||
|
||
@staticmethod
|
||
def refine_bluetooth_subtype(content: str) -> str:
|
||
"""
|
||
Refine Bluetooth subtype based on content
|
||
|
||
Args:
|
||
content: Combined content to analyze
|
||
|
||
Returns:
|
||
Refined sous_type (Clavier, Souris, Audio, or Autre)
|
||
"""
|
||
normalized = DeviceClassifier.normalize_text(content)
|
||
|
||
if re.search(r"keyboard|clavier", normalized):
|
||
return "Clavier"
|
||
if re.search(r"mouse|souris", normalized):
|
||
return "Souris"
|
||
if re.search(r"headset|audio|speaker|écouteur|casque", normalized):
|
||
return "Audio"
|
||
|
||
return "Autre"
|
||
|
||
@staticmethod
|
||
def refine_storage_subtype(content: str) -> str:
|
||
"""
|
||
Refine Storage subtype based on content
|
||
Distinguishes between USB flash drives, external HDD/SSD, and card readers
|
||
|
||
Args:
|
||
content: Combined content to analyze
|
||
|
||
Returns:
|
||
Refined sous_type (Clé USB, Disque dur externe, Lecteur de carte)
|
||
"""
|
||
normalized = DeviceClassifier.normalize_text(content)
|
||
|
||
# Check for card reader first (most specific)
|
||
if re.search(r"card\s+reader|lecteur.*carte|sd.*reader|multi.*card", normalized):
|
||
return "Lecteur de carte"
|
||
|
||
# Check for external HDD/SSD
|
||
if re.search(r"external\s+(hdd|ssd|disk)|portable\s+(ssd|drive)|disque\s+dur|"
|
||
r"my\s+passport|expansion|backup\s+plus|elements|touro", normalized):
|
||
return "Disque dur externe"
|
||
|
||
# Check for USB flash drive indicators
|
||
if re.search(r"flash\s+drive|usb\s+stick|cruzer|datatraveler|pendrive|clé\s+usb", normalized):
|
||
return "Clé USB"
|
||
|
||
# Default to USB flash drive for mass storage devices
|
||
return "Clé USB"
|