""" lspci output parser for PCI device detection and extraction. Parses output from 'lspci -v' and extracts individual device information. """ import re from typing import List, Dict, Any, Optional, Tuple def extract_brand_model(vendor_name: str, device_name: str, device_class: str) -> Tuple[str, str]: """ Extract brand (marque) and model (modele) from vendor and device names. Args: vendor_name: Vendor name (e.g., "NVIDIA Corporation", "Micron/Crucial Technology") device_name: Device name (e.g., "GA106 [GeForce RTX 3060]") device_class: Device class for context (e.g., "VGA compatible controller") Returns: Tuple of (brand, model) Examples: ("NVIDIA Corporation", "GA106 [GeForce RTX 3060 Lite Hash Rate]", "VGA") -> ("NVIDIA", "GeForce RTX 3060 Lite Hash Rate") ("Micron/Crucial Technology", "P2 [Nick P2] / P3 Plus NVMe", "Non-Volatile") -> ("Micron", "P2/P3 Plus NVMe PCIe SSD") """ # Extract brand from vendor name brand = vendor_name.split()[0] if vendor_name else "" # Handle cases like "Micron/Crucial" - take the first one if '/' in brand: brand = brand.split('/')[0] # Extract model from device name model = device_name # Extract content from brackets [...] as it often contains the commercial name bracket_match = re.search(r'\[([^\]]+)\]', device_name) if bracket_match: bracket_content = bracket_match.group(1) # For GPUs, prefer the bracket content (e.g., "GeForce RTX 3060") if any(kw in device_class.lower() for kw in ['vga', 'graphics', '3d', 'display']): model = bracket_content # For storage, extract the commercial model name elif any(kw in device_class.lower() for kw in ['nvme', 'non-volatile', 'sata', 'storage']): # Pattern: "P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD (DRAM-less)" # We want: "P2/P3/P3 Plus NVMe PCIe SSD" # Remove content in brackets like [Nick P2] cleaned = re.sub(r'\[[^\]]*\]', '', device_name) # Clean up extra slashes and spaces cleaned = re.sub(r'\s*/\s*', '/', cleaned) cleaned = re.sub(r'\s+', ' ', cleaned) cleaned = re.sub(r'/+', '/', cleaned) # Remove leading/trailing slashes cleaned = cleaned.strip('/ ') model = cleaned return brand, model.strip() def _split_vendor_device(description: str) -> Tuple[str, str]: """ Split description into vendor name and device name. Args: description: Full device description from lspci Returns: Tuple of (vendor_name, device_name) Examples: "NVIDIA Corporation GA106 [GeForce RTX 3060]" -> ("NVIDIA Corporation", "GA106 [GeForce RTX 3060]") "Micron/Crucial Technology P2 NVMe PCIe SSD" -> ("Micron/Crucial Technology", "P2 NVMe PCIe SSD") "Realtek Semiconductor Co., Ltd. RTL8111/8168" -> ("Realtek Semiconductor Co., Ltd.", "RTL8111/8168") """ # Vendor suffix patterns (ordered by priority) vendor_suffixes = [ # Multi-word patterns (must come first) r'\bCo\.,?\s*Ltd\.?', r'\bCo\.,?\s*Inc\.?', r'\bInc\.,?\s*Ltd\.?', r'\bTechnology\s+Co\.,?\s*Ltd\.?', r'\bSemiconductor\s+Co\.,?\s*Ltd\.?', # Single word patterns r'\bCorporation\b', r'\bTechnology\b', r'\bSemiconductor\b', r'\bInc\.?\b', r'\bLtd\.?\b', r'\bGmbH\b', r'\bAG\b', ] # Try each pattern for pattern in vendor_suffixes: match = re.search(pattern, description, re.IGNORECASE) if match: # Split at the end of the vendor suffix split_pos = match.end() vendor_name = description[:split_pos].strip() device_name = description[split_pos:].strip() return vendor_name, device_name # No suffix found - fallback to first word parts = description.split(' ', 1) if len(parts) >= 2: return parts[0], parts[1] return description, "" def detect_pci_devices(lspci_output: str, exclude_system_devices: bool = True) -> List[Dict[str, str]]: """ Detect all PCI devices from lspci -v output. Returns a list of devices with their slot and basic info. Args: lspci_output: Raw output from 'lspci -v' command exclude_system_devices: If True (default), exclude system infrastructure devices like PCI bridges, Host bridges, ISA bridges, SMBus, etc. Returns: List of dicts with keys: slot, device_class, vendor_device_id, description Example: [ { "slot": "04:00.0", "device_class": "Ethernet controller", "vendor_device_id": "10ec:8168", "description": "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..." }, ... ] """ # System infrastructure device classes to exclude by default SYSTEM_DEVICE_CLASSES = [ "Host bridge", "PCI bridge", "ISA bridge", "SMBus", "IOMMU", "Signal processing controller", "System peripheral", "RAM memory", "Non-Essential Instrumentation", ] devices = [] lines = lspci_output.strip().split('\n') for line in lines: line_stripped = line.strip() # Match lines starting with slot format "XX:XX.X" # Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. ..." match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', line_stripped) if match: slot = match.group(1) device_class = match.group(2).strip() description = match.group(3).strip() # Filter out system devices if requested if exclude_system_devices: # Check if device class matches any system device pattern is_system_device = any( sys_class.lower() in device_class.lower() for sys_class in SYSTEM_DEVICE_CLASSES ) if is_system_device: continue # Skip this device devices.append({ "slot": slot, "device_class": device_class, "description": description }) return devices def extract_device_section(lspci_output: str, slot: str) -> Optional[str]: """ Extract the complete section for a specific device from lspci -v output. Args: lspci_output: Raw output from 'lspci -v' command slot: PCI slot (e.g., "04:00.0") Returns: Complete section for the device, from its slot line to the next slot line (or end) """ lines = lspci_output.strip().split('\n') # Build the pattern to match the target device's slot line target_pattern = re.compile(rf'^{re.escape(slot)}\s+') section_lines = [] in_section = False for line in lines: # Check if this is the start of our target device if target_pattern.match(line): in_section = True section_lines.append(line) continue # If we're in the section if in_section: # Check if we've hit the next device (new slot line - starts with hex:hex.hex) if re.match(r'^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]\s+', line): # End of our section break # Add the line to our section section_lines.append(line) if section_lines: return '\n'.join(section_lines) return None def parse_device_info(device_section: str) -> Dict[str, Any]: """ Parse detailed information from a PCI device section. Args: device_section: The complete lspci output for a single device Returns: Dictionary with parsed device information """ result = { "slot": None, "device_class": None, "vendor_name": None, "device_name": None, "subsystem": None, "subsystem_vendor": None, "subsystem_device": None, "driver": None, "modules": [], "vendor_device_id": None, # Will be extracted from other sources or databases "revision": None, "prog_if": None, "flags": [], "irq": None, "iommu_group": None, "memory_addresses": [], "io_ports": [], "capabilities": [] } lines = device_section.split('\n') # Parse the first line (slot line) # Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..." first_line = lines[0] if lines else "" slot_match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', first_line) if slot_match: result["slot"] = slot_match.group(1) result["device_class"] = slot_match.group(2).strip() description = slot_match.group(3).strip() # Try to extract vendor and device name from description # Common formats: # "NVIDIA Corporation GA106 [GeForce RTX 3060 Lite Hash Rate]" # "Micron/Crucial Technology P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD" # "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411" # "Intel Corporation Device 1234" # Strategy: Find vendor suffix markers (Corporation, Technology, Co., Ltd., etc.) # Then everything after is the device name vendor_name, device_name = _split_vendor_device(description) result["vendor_name"] = vendor_name result["device_name"] = device_name # Extract revision if present rev_match = re.search(r'\(rev\s+([0-9a-fA-F]+)\)', description) if rev_match: result["revision"] = rev_match.group(1) # Clean revision from device_name result["device_name"] = re.sub(r'\s*\(rev\s+[0-9a-fA-F]+\)', '', result["device_name"]) # Extract prog-if if present progif_match = re.search(r'\(prog-if\s+([0-9a-fA-F]+)\s*\[([^\]]+)\]\)', description) if progif_match: result["prog_if"] = progif_match.group(1) # Clean prog-if from device_name result["device_name"] = re.sub(r'\s*\(prog-if\s+[0-9a-fA-F]+\s*\[[^\]]+\]\)', '', result["device_name"]) # Parse detailed fields for line in lines[1:]: line_stripped = line.strip() # Subsystem subsystem_match = re.match(r'^Subsystem:\s+(.+)$', line_stripped) if subsystem_match: result["subsystem"] = subsystem_match.group(1).strip() # DeviceName (sometimes present) devicename_match = re.match(r'^DeviceName:\s+(.+)$', line_stripped) if devicename_match: if not result["device_name"]: result["device_name"] = devicename_match.group(1).strip() # Flags flags_match = re.match(r'^Flags:\s+(.+)$', line_stripped) if flags_match: flags_str = flags_match.group(1).strip() # Extract IOMMU group iommu_match = re.search(r'IOMMU group\s+(\d+)', flags_str) if iommu_match: result["iommu_group"] = iommu_match.group(1) # Extract IRQ irq_match = re.search(r'IRQ\s+(\d+)', flags_str) if irq_match: result["irq"] = irq_match.group(1) # Parse flags result["flags"] = [f.strip() for f in flags_str.split(',')] # Memory addresses memory_match = re.match(r'^Memory at\s+([0-9a-fA-F]+)\s+\((.+?)\)\s+\[(.+?)\]', line_stripped) if memory_match: result["memory_addresses"].append({ "address": memory_match.group(1), "type": memory_match.group(2), "info": memory_match.group(3) }) # I/O ports io_match = re.match(r'^I/O ports at\s+([0-9a-fA-F]+)\s+\[size=(\d+)\]', line_stripped) if io_match: result["io_ports"].append({ "address": io_match.group(1), "size": io_match.group(2) }) # Kernel driver in use driver_match = re.match(r'^Kernel driver in use:\s+(.+)$', line_stripped) if driver_match: result["driver"] = driver_match.group(1).strip() # Kernel modules modules_match = re.match(r'^Kernel modules:\s+(.+)$', line_stripped) if modules_match: modules_str = modules_match.group(1).strip() result["modules"] = [m.strip() for m in modules_str.split(',')] # Capabilities (just capture the type for classification) cap_match = re.match(r'^Capabilities:\s+\[([0-9a-fA-F]+)\]\s+(.+)$', line_stripped) if cap_match: result["capabilities"].append({ "offset": cap_match.group(1), "type": cap_match.group(2).strip() }) return result def get_pci_vendor_device_id(slot: str) -> Optional[str]: """ Get vendor:device ID for a PCI slot using lspci -n. This is a helper that would need to be called with subprocess. Args: slot: PCI slot (e.g., "04:00.0") Returns: Vendor:Device ID string (e.g., "10ec:8168") or None """ # This function would call: lspci -n -s {slot} # Output format: "04:00.0 0200: 10ec:8168 (rev 16)" # For now, this is a placeholder - implementation would use subprocess pass