Files
serv_benchmark/backend/app/utils/lspci_parser.py
2026-01-11 23:41:30 +01:00

382 lines
13 KiB
Python

"""
lspci output parser for PCI device detection and extraction.
Parses output from 'lspci -v' and extracts individual device information.
"""
import re
from typing import List, Dict, Any, Optional, Tuple
def extract_brand_model(vendor_name: str, device_name: str, device_class: str) -> Tuple[str, str]:
"""
Extract brand (marque) and model (modele) from vendor and device names.
Args:
vendor_name: Vendor name (e.g., "NVIDIA Corporation", "Micron/Crucial Technology")
device_name: Device name (e.g., "GA106 [GeForce RTX 3060]")
device_class: Device class for context (e.g., "VGA compatible controller")
Returns:
Tuple of (brand, model)
Examples:
("NVIDIA Corporation", "GA106 [GeForce RTX 3060 Lite Hash Rate]", "VGA")
-> ("NVIDIA", "GeForce RTX 3060 Lite Hash Rate")
("Micron/Crucial Technology", "P2 [Nick P2] / P3 Plus NVMe", "Non-Volatile")
-> ("Micron", "P2/P3 Plus NVMe PCIe SSD")
"""
# Extract brand from vendor name
brand = vendor_name.split()[0] if vendor_name else ""
# Handle cases like "Micron/Crucial" - take the first one
if '/' in brand:
brand = brand.split('/')[0]
# Extract model from device name
model = device_name
# Extract content from brackets [...] as it often contains the commercial name
bracket_match = re.search(r'\[([^\]]+)\]', device_name)
if bracket_match:
bracket_content = bracket_match.group(1)
# For GPUs, prefer the bracket content (e.g., "GeForce RTX 3060")
if any(kw in device_class.lower() for kw in ['vga', 'graphics', '3d', 'display']):
model = bracket_content
# For storage, extract the commercial model name
elif any(kw in device_class.lower() for kw in ['nvme', 'non-volatile', 'sata', 'storage']):
# Pattern: "P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD (DRAM-less)"
# We want: "P2/P3/P3 Plus NVMe PCIe SSD"
# Remove content in brackets like [Nick P2]
cleaned = re.sub(r'\[[^\]]*\]', '', device_name)
# Clean up extra slashes and spaces
cleaned = re.sub(r'\s*/\s*', '/', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned)
cleaned = re.sub(r'/+', '/', cleaned)
# Remove leading/trailing slashes
cleaned = cleaned.strip('/ ')
model = cleaned
return brand, model.strip()
def _split_vendor_device(description: str) -> Tuple[str, str]:
"""
Split description into vendor name and device name.
Args:
description: Full device description from lspci
Returns:
Tuple of (vendor_name, device_name)
Examples:
"NVIDIA Corporation GA106 [GeForce RTX 3060]"
-> ("NVIDIA Corporation", "GA106 [GeForce RTX 3060]")
"Micron/Crucial Technology P2 NVMe PCIe SSD"
-> ("Micron/Crucial Technology", "P2 NVMe PCIe SSD")
"Realtek Semiconductor Co., Ltd. RTL8111/8168"
-> ("Realtek Semiconductor Co., Ltd.", "RTL8111/8168")
"""
# Vendor suffix patterns (ordered by priority)
vendor_suffixes = [
# Multi-word patterns (must come first)
r'\bCo\.,?\s*Ltd\.?',
r'\bCo\.,?\s*Inc\.?',
r'\bInc\.,?\s*Ltd\.?',
r'\bTechnology\s+Co\.,?\s*Ltd\.?',
r'\bSemiconductor\s+Co\.,?\s*Ltd\.?',
# Single word patterns
r'\bCorporation\b',
r'\bTechnology\b',
r'\bSemiconductor\b',
r'\bInc\.?\b',
r'\bLtd\.?\b',
r'\bGmbH\b',
r'\bAG\b',
]
# Try each pattern
for pattern in vendor_suffixes:
match = re.search(pattern, description, re.IGNORECASE)
if match:
# Split at the end of the vendor suffix
split_pos = match.end()
vendor_name = description[:split_pos].strip()
device_name = description[split_pos:].strip()
return vendor_name, device_name
# No suffix found - fallback to first word
parts = description.split(' ', 1)
if len(parts) >= 2:
return parts[0], parts[1]
return description, ""
def detect_pci_devices(lspci_output: str, exclude_system_devices: bool = True) -> List[Dict[str, str]]:
"""
Detect all PCI devices from lspci -v output.
Returns a list of devices with their slot and basic info.
Args:
lspci_output: Raw output from 'lspci -v' command
exclude_system_devices: If True (default), exclude system infrastructure devices
like PCI bridges, Host bridges, ISA bridges, SMBus, etc.
Returns:
List of dicts with keys: slot, device_class, vendor_device_id, description
Example:
[
{
"slot": "04:00.0",
"device_class": "Ethernet controller",
"vendor_device_id": "10ec:8168",
"description": "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..."
},
...
]
"""
# System infrastructure device classes to exclude by default
SYSTEM_DEVICE_CLASSES = [
"Host bridge",
"PCI bridge",
"ISA bridge",
"SMBus",
"IOMMU",
"Signal processing controller",
"System peripheral",
"RAM memory",
"Non-Essential Instrumentation",
]
devices = []
lines = lspci_output.strip().split('\n')
for line in lines:
line_stripped = line.strip()
# Match lines starting with slot format "XX:XX.X"
# Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. ..."
match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', line_stripped)
if match:
slot = match.group(1)
device_class = match.group(2).strip()
description = match.group(3).strip()
# Filter out system devices if requested
if exclude_system_devices:
# Check if device class matches any system device pattern
is_system_device = any(
sys_class.lower() in device_class.lower()
for sys_class in SYSTEM_DEVICE_CLASSES
)
if is_system_device:
continue # Skip this device
devices.append({
"slot": slot,
"device_class": device_class,
"description": description
})
return devices
def extract_device_section(lspci_output: str, slot: str) -> Optional[str]:
"""
Extract the complete section for a specific device from lspci -v output.
Args:
lspci_output: Raw output from 'lspci -v' command
slot: PCI slot (e.g., "04:00.0")
Returns:
Complete section for the device, from its slot line to the next slot line (or end)
"""
lines = lspci_output.strip().split('\n')
# Build the pattern to match the target device's slot line
target_pattern = re.compile(rf'^{re.escape(slot)}\s+')
section_lines = []
in_section = False
for line in lines:
# Check if this is the start of our target device
if target_pattern.match(line):
in_section = True
section_lines.append(line)
continue
# If we're in the section
if in_section:
# Check if we've hit the next device (new slot line - starts with hex:hex.hex)
if re.match(r'^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]\s+', line):
# End of our section
break
# Add the line to our section
section_lines.append(line)
if section_lines:
return '\n'.join(section_lines)
return None
def parse_device_info(device_section: str) -> Dict[str, Any]:
"""
Parse detailed information from a PCI device section.
Args:
device_section: The complete lspci output for a single device
Returns:
Dictionary with parsed device information
"""
result = {
"slot": None,
"device_class": None,
"vendor_name": None,
"device_name": None,
"subsystem": None,
"subsystem_vendor": None,
"subsystem_device": None,
"driver": None,
"modules": [],
"vendor_device_id": None, # Will be extracted from other sources or databases
"revision": None,
"prog_if": None,
"flags": [],
"irq": None,
"iommu_group": None,
"memory_addresses": [],
"io_ports": [],
"capabilities": []
}
lines = device_section.split('\n')
# Parse the first line (slot line)
# Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..."
first_line = lines[0] if lines else ""
slot_match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', first_line)
if slot_match:
result["slot"] = slot_match.group(1)
result["device_class"] = slot_match.group(2).strip()
description = slot_match.group(3).strip()
# Try to extract vendor and device name from description
# Common formats:
# "NVIDIA Corporation GA106 [GeForce RTX 3060 Lite Hash Rate]"
# "Micron/Crucial Technology P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD"
# "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411"
# "Intel Corporation Device 1234"
# Strategy: Find vendor suffix markers (Corporation, Technology, Co., Ltd., etc.)
# Then everything after is the device name
vendor_name, device_name = _split_vendor_device(description)
result["vendor_name"] = vendor_name
result["device_name"] = device_name
# Extract revision if present
rev_match = re.search(r'\(rev\s+([0-9a-fA-F]+)\)', description)
if rev_match:
result["revision"] = rev_match.group(1)
# Clean revision from device_name
result["device_name"] = re.sub(r'\s*\(rev\s+[0-9a-fA-F]+\)', '', result["device_name"])
# Extract prog-if if present
progif_match = re.search(r'\(prog-if\s+([0-9a-fA-F]+)\s*\[([^\]]+)\]\)', description)
if progif_match:
result["prog_if"] = progif_match.group(1)
# Clean prog-if from device_name
result["device_name"] = re.sub(r'\s*\(prog-if\s+[0-9a-fA-F]+\s*\[[^\]]+\]\)', '', result["device_name"])
# Parse detailed fields
for line in lines[1:]:
line_stripped = line.strip()
# Subsystem
subsystem_match = re.match(r'^Subsystem:\s+(.+)$', line_stripped)
if subsystem_match:
result["subsystem"] = subsystem_match.group(1).strip()
# DeviceName (sometimes present)
devicename_match = re.match(r'^DeviceName:\s+(.+)$', line_stripped)
if devicename_match:
if not result["device_name"]:
result["device_name"] = devicename_match.group(1).strip()
# Flags
flags_match = re.match(r'^Flags:\s+(.+)$', line_stripped)
if flags_match:
flags_str = flags_match.group(1).strip()
# Extract IOMMU group
iommu_match = re.search(r'IOMMU group\s+(\d+)', flags_str)
if iommu_match:
result["iommu_group"] = iommu_match.group(1)
# Extract IRQ
irq_match = re.search(r'IRQ\s+(\d+)', flags_str)
if irq_match:
result["irq"] = irq_match.group(1)
# Parse flags
result["flags"] = [f.strip() for f in flags_str.split(',')]
# Memory addresses
memory_match = re.match(r'^Memory at\s+([0-9a-fA-F]+)\s+\((.+?)\)\s+\[(.+?)\]', line_stripped)
if memory_match:
result["memory_addresses"].append({
"address": memory_match.group(1),
"type": memory_match.group(2),
"info": memory_match.group(3)
})
# I/O ports
io_match = re.match(r'^I/O ports at\s+([0-9a-fA-F]+)\s+\[size=(\d+)\]', line_stripped)
if io_match:
result["io_ports"].append({
"address": io_match.group(1),
"size": io_match.group(2)
})
# Kernel driver in use
driver_match = re.match(r'^Kernel driver in use:\s+(.+)$', line_stripped)
if driver_match:
result["driver"] = driver_match.group(1).strip()
# Kernel modules
modules_match = re.match(r'^Kernel modules:\s+(.+)$', line_stripped)
if modules_match:
modules_str = modules_match.group(1).strip()
result["modules"] = [m.strip() for m in modules_str.split(',')]
# Capabilities (just capture the type for classification)
cap_match = re.match(r'^Capabilities:\s+\[([0-9a-fA-F]+)\]\s+(.+)$', line_stripped)
if cap_match:
result["capabilities"].append({
"offset": cap_match.group(1),
"type": cap_match.group(2).strip()
})
return result
def get_pci_vendor_device_id(slot: str) -> Optional[str]:
"""
Get vendor:device ID for a PCI slot using lspci -n.
This is a helper that would need to be called with subprocess.
Args:
slot: PCI slot (e.g., "04:00.0")
Returns:
Vendor:Device ID string (e.g., "10ec:8168") or None
"""
# This function would call: lspci -n -s {slot}
# Output format: "04:00.0 0200: 10ec:8168 (rev 16)"
# For now, this is a placeholder - implementation would use subprocess
pass