382 lines
13 KiB
Python
382 lines
13 KiB
Python
"""
|
|
lspci output parser for PCI device detection and extraction.
|
|
Parses output from 'lspci -v' and extracts individual device information.
|
|
"""
|
|
import re
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
|
|
|
|
def extract_brand_model(vendor_name: str, device_name: str, device_class: str) -> Tuple[str, str]:
|
|
"""
|
|
Extract brand (marque) and model (modele) from vendor and device names.
|
|
|
|
Args:
|
|
vendor_name: Vendor name (e.g., "NVIDIA Corporation", "Micron/Crucial Technology")
|
|
device_name: Device name (e.g., "GA106 [GeForce RTX 3060]")
|
|
device_class: Device class for context (e.g., "VGA compatible controller")
|
|
|
|
Returns:
|
|
Tuple of (brand, model)
|
|
|
|
Examples:
|
|
("NVIDIA Corporation", "GA106 [GeForce RTX 3060 Lite Hash Rate]", "VGA")
|
|
-> ("NVIDIA", "GeForce RTX 3060 Lite Hash Rate")
|
|
|
|
("Micron/Crucial Technology", "P2 [Nick P2] / P3 Plus NVMe", "Non-Volatile")
|
|
-> ("Micron", "P2/P3 Plus NVMe PCIe SSD")
|
|
"""
|
|
# Extract brand from vendor name
|
|
brand = vendor_name.split()[0] if vendor_name else ""
|
|
# Handle cases like "Micron/Crucial" - take the first one
|
|
if '/' in brand:
|
|
brand = brand.split('/')[0]
|
|
|
|
# Extract model from device name
|
|
model = device_name
|
|
|
|
# Extract content from brackets [...] as it often contains the commercial name
|
|
bracket_match = re.search(r'\[([^\]]+)\]', device_name)
|
|
if bracket_match:
|
|
bracket_content = bracket_match.group(1)
|
|
|
|
# For GPUs, prefer the bracket content (e.g., "GeForce RTX 3060")
|
|
if any(kw in device_class.lower() for kw in ['vga', 'graphics', '3d', 'display']):
|
|
model = bracket_content
|
|
# For storage, extract the commercial model name
|
|
elif any(kw in device_class.lower() for kw in ['nvme', 'non-volatile', 'sata', 'storage']):
|
|
# Pattern: "P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD (DRAM-less)"
|
|
# We want: "P2/P3/P3 Plus NVMe PCIe SSD"
|
|
|
|
# Remove content in brackets like [Nick P2]
|
|
cleaned = re.sub(r'\[[^\]]*\]', '', device_name)
|
|
# Clean up extra slashes and spaces
|
|
cleaned = re.sub(r'\s*/\s*', '/', cleaned)
|
|
cleaned = re.sub(r'\s+', ' ', cleaned)
|
|
cleaned = re.sub(r'/+', '/', cleaned)
|
|
# Remove leading/trailing slashes
|
|
cleaned = cleaned.strip('/ ')
|
|
model = cleaned
|
|
|
|
return brand, model.strip()
|
|
|
|
|
|
def _split_vendor_device(description: str) -> Tuple[str, str]:
|
|
"""
|
|
Split description into vendor name and device name.
|
|
|
|
Args:
|
|
description: Full device description from lspci
|
|
|
|
Returns:
|
|
Tuple of (vendor_name, device_name)
|
|
|
|
Examples:
|
|
"NVIDIA Corporation GA106 [GeForce RTX 3060]"
|
|
-> ("NVIDIA Corporation", "GA106 [GeForce RTX 3060]")
|
|
|
|
"Micron/Crucial Technology P2 NVMe PCIe SSD"
|
|
-> ("Micron/Crucial Technology", "P2 NVMe PCIe SSD")
|
|
|
|
"Realtek Semiconductor Co., Ltd. RTL8111/8168"
|
|
-> ("Realtek Semiconductor Co., Ltd.", "RTL8111/8168")
|
|
"""
|
|
# Vendor suffix patterns (ordered by priority)
|
|
vendor_suffixes = [
|
|
# Multi-word patterns (must come first)
|
|
r'\bCo\.,?\s*Ltd\.?',
|
|
r'\bCo\.,?\s*Inc\.?',
|
|
r'\bInc\.,?\s*Ltd\.?',
|
|
r'\bTechnology\s+Co\.,?\s*Ltd\.?',
|
|
r'\bSemiconductor\s+Co\.,?\s*Ltd\.?',
|
|
# Single word patterns
|
|
r'\bCorporation\b',
|
|
r'\bTechnology\b',
|
|
r'\bSemiconductor\b',
|
|
r'\bInc\.?\b',
|
|
r'\bLtd\.?\b',
|
|
r'\bGmbH\b',
|
|
r'\bAG\b',
|
|
]
|
|
|
|
# Try each pattern
|
|
for pattern in vendor_suffixes:
|
|
match = re.search(pattern, description, re.IGNORECASE)
|
|
if match:
|
|
# Split at the end of the vendor suffix
|
|
split_pos = match.end()
|
|
vendor_name = description[:split_pos].strip()
|
|
device_name = description[split_pos:].strip()
|
|
return vendor_name, device_name
|
|
|
|
# No suffix found - fallback to first word
|
|
parts = description.split(' ', 1)
|
|
if len(parts) >= 2:
|
|
return parts[0], parts[1]
|
|
return description, ""
|
|
|
|
|
|
def detect_pci_devices(lspci_output: str, exclude_system_devices: bool = True) -> List[Dict[str, str]]:
|
|
"""
|
|
Detect all PCI devices from lspci -v output.
|
|
Returns a list of devices with their slot and basic info.
|
|
|
|
Args:
|
|
lspci_output: Raw output from 'lspci -v' command
|
|
exclude_system_devices: If True (default), exclude system infrastructure devices
|
|
like PCI bridges, Host bridges, ISA bridges, SMBus, etc.
|
|
|
|
Returns:
|
|
List of dicts with keys: slot, device_class, vendor_device_id, description
|
|
|
|
Example:
|
|
[
|
|
{
|
|
"slot": "04:00.0",
|
|
"device_class": "Ethernet controller",
|
|
"vendor_device_id": "10ec:8168",
|
|
"description": "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..."
|
|
},
|
|
...
|
|
]
|
|
"""
|
|
# System infrastructure device classes to exclude by default
|
|
SYSTEM_DEVICE_CLASSES = [
|
|
"Host bridge",
|
|
"PCI bridge",
|
|
"ISA bridge",
|
|
"SMBus",
|
|
"IOMMU",
|
|
"Signal processing controller",
|
|
"System peripheral",
|
|
"RAM memory",
|
|
"Non-Essential Instrumentation",
|
|
]
|
|
|
|
devices = []
|
|
lines = lspci_output.strip().split('\n')
|
|
|
|
for line in lines:
|
|
line_stripped = line.strip()
|
|
# Match lines starting with slot format "XX:XX.X"
|
|
# Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. ..."
|
|
match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', line_stripped)
|
|
if match:
|
|
slot = match.group(1)
|
|
device_class = match.group(2).strip()
|
|
description = match.group(3).strip()
|
|
|
|
# Filter out system devices if requested
|
|
if exclude_system_devices:
|
|
# Check if device class matches any system device pattern
|
|
is_system_device = any(
|
|
sys_class.lower() in device_class.lower()
|
|
for sys_class in SYSTEM_DEVICE_CLASSES
|
|
)
|
|
if is_system_device:
|
|
continue # Skip this device
|
|
|
|
devices.append({
|
|
"slot": slot,
|
|
"device_class": device_class,
|
|
"description": description
|
|
})
|
|
|
|
return devices
|
|
|
|
|
|
def extract_device_section(lspci_output: str, slot: str) -> Optional[str]:
|
|
"""
|
|
Extract the complete section for a specific device from lspci -v output.
|
|
|
|
Args:
|
|
lspci_output: Raw output from 'lspci -v' command
|
|
slot: PCI slot (e.g., "04:00.0")
|
|
|
|
Returns:
|
|
Complete section for the device, from its slot line to the next slot line (or end)
|
|
"""
|
|
lines = lspci_output.strip().split('\n')
|
|
|
|
# Build the pattern to match the target device's slot line
|
|
target_pattern = re.compile(rf'^{re.escape(slot)}\s+')
|
|
|
|
section_lines = []
|
|
in_section = False
|
|
|
|
for line in lines:
|
|
# Check if this is the start of our target device
|
|
if target_pattern.match(line):
|
|
in_section = True
|
|
section_lines.append(line)
|
|
continue
|
|
|
|
# If we're in the section
|
|
if in_section:
|
|
# Check if we've hit the next device (new slot line - starts with hex:hex.hex)
|
|
if re.match(r'^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]\s+', line):
|
|
# End of our section
|
|
break
|
|
|
|
# Add the line to our section
|
|
section_lines.append(line)
|
|
|
|
if section_lines:
|
|
return '\n'.join(section_lines)
|
|
|
|
return None
|
|
|
|
|
|
def parse_device_info(device_section: str) -> Dict[str, Any]:
|
|
"""
|
|
Parse detailed information from a PCI device section.
|
|
|
|
Args:
|
|
device_section: The complete lspci output for a single device
|
|
|
|
Returns:
|
|
Dictionary with parsed device information
|
|
"""
|
|
result = {
|
|
"slot": None,
|
|
"device_class": None,
|
|
"vendor_name": None,
|
|
"device_name": None,
|
|
"subsystem": None,
|
|
"subsystem_vendor": None,
|
|
"subsystem_device": None,
|
|
"driver": None,
|
|
"modules": [],
|
|
"vendor_device_id": None, # Will be extracted from other sources or databases
|
|
"revision": None,
|
|
"prog_if": None,
|
|
"flags": [],
|
|
"irq": None,
|
|
"iommu_group": None,
|
|
"memory_addresses": [],
|
|
"io_ports": [],
|
|
"capabilities": []
|
|
}
|
|
|
|
lines = device_section.split('\n')
|
|
|
|
# Parse the first line (slot line)
|
|
# Format: "04:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411..."
|
|
first_line = lines[0] if lines else ""
|
|
slot_match = re.match(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+([^:]+):\s+(.+)$', first_line)
|
|
if slot_match:
|
|
result["slot"] = slot_match.group(1)
|
|
result["device_class"] = slot_match.group(2).strip()
|
|
description = slot_match.group(3).strip()
|
|
|
|
# Try to extract vendor and device name from description
|
|
# Common formats:
|
|
# "NVIDIA Corporation GA106 [GeForce RTX 3060 Lite Hash Rate]"
|
|
# "Micron/Crucial Technology P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD"
|
|
# "Realtek Semiconductor Co., Ltd. RTL8111/8168/8211/8411"
|
|
# "Intel Corporation Device 1234"
|
|
|
|
# Strategy: Find vendor suffix markers (Corporation, Technology, Co., Ltd., etc.)
|
|
# Then everything after is the device name
|
|
vendor_name, device_name = _split_vendor_device(description)
|
|
result["vendor_name"] = vendor_name
|
|
result["device_name"] = device_name
|
|
|
|
# Extract revision if present
|
|
rev_match = re.search(r'\(rev\s+([0-9a-fA-F]+)\)', description)
|
|
if rev_match:
|
|
result["revision"] = rev_match.group(1)
|
|
# Clean revision from device_name
|
|
result["device_name"] = re.sub(r'\s*\(rev\s+[0-9a-fA-F]+\)', '', result["device_name"])
|
|
|
|
# Extract prog-if if present
|
|
progif_match = re.search(r'\(prog-if\s+([0-9a-fA-F]+)\s*\[([^\]]+)\]\)', description)
|
|
if progif_match:
|
|
result["prog_if"] = progif_match.group(1)
|
|
# Clean prog-if from device_name
|
|
result["device_name"] = re.sub(r'\s*\(prog-if\s+[0-9a-fA-F]+\s*\[[^\]]+\]\)', '', result["device_name"])
|
|
|
|
# Parse detailed fields
|
|
for line in lines[1:]:
|
|
line_stripped = line.strip()
|
|
|
|
# Subsystem
|
|
subsystem_match = re.match(r'^Subsystem:\s+(.+)$', line_stripped)
|
|
if subsystem_match:
|
|
result["subsystem"] = subsystem_match.group(1).strip()
|
|
|
|
# DeviceName (sometimes present)
|
|
devicename_match = re.match(r'^DeviceName:\s+(.+)$', line_stripped)
|
|
if devicename_match:
|
|
if not result["device_name"]:
|
|
result["device_name"] = devicename_match.group(1).strip()
|
|
|
|
# Flags
|
|
flags_match = re.match(r'^Flags:\s+(.+)$', line_stripped)
|
|
if flags_match:
|
|
flags_str = flags_match.group(1).strip()
|
|
# Extract IOMMU group
|
|
iommu_match = re.search(r'IOMMU group\s+(\d+)', flags_str)
|
|
if iommu_match:
|
|
result["iommu_group"] = iommu_match.group(1)
|
|
# Extract IRQ
|
|
irq_match = re.search(r'IRQ\s+(\d+)', flags_str)
|
|
if irq_match:
|
|
result["irq"] = irq_match.group(1)
|
|
# Parse flags
|
|
result["flags"] = [f.strip() for f in flags_str.split(',')]
|
|
|
|
# Memory addresses
|
|
memory_match = re.match(r'^Memory at\s+([0-9a-fA-F]+)\s+\((.+?)\)\s+\[(.+?)\]', line_stripped)
|
|
if memory_match:
|
|
result["memory_addresses"].append({
|
|
"address": memory_match.group(1),
|
|
"type": memory_match.group(2),
|
|
"info": memory_match.group(3)
|
|
})
|
|
|
|
# I/O ports
|
|
io_match = re.match(r'^I/O ports at\s+([0-9a-fA-F]+)\s+\[size=(\d+)\]', line_stripped)
|
|
if io_match:
|
|
result["io_ports"].append({
|
|
"address": io_match.group(1),
|
|
"size": io_match.group(2)
|
|
})
|
|
|
|
# Kernel driver in use
|
|
driver_match = re.match(r'^Kernel driver in use:\s+(.+)$', line_stripped)
|
|
if driver_match:
|
|
result["driver"] = driver_match.group(1).strip()
|
|
|
|
# Kernel modules
|
|
modules_match = re.match(r'^Kernel modules:\s+(.+)$', line_stripped)
|
|
if modules_match:
|
|
modules_str = modules_match.group(1).strip()
|
|
result["modules"] = [m.strip() for m in modules_str.split(',')]
|
|
|
|
# Capabilities (just capture the type for classification)
|
|
cap_match = re.match(r'^Capabilities:\s+\[([0-9a-fA-F]+)\]\s+(.+)$', line_stripped)
|
|
if cap_match:
|
|
result["capabilities"].append({
|
|
"offset": cap_match.group(1),
|
|
"type": cap_match.group(2).strip()
|
|
})
|
|
|
|
return result
|
|
|
|
|
|
def get_pci_vendor_device_id(slot: str) -> Optional[str]:
|
|
"""
|
|
Get vendor:device ID for a PCI slot using lspci -n.
|
|
This is a helper that would need to be called with subprocess.
|
|
|
|
Args:
|
|
slot: PCI slot (e.g., "04:00.0")
|
|
|
|
Returns:
|
|
Vendor:Device ID string (e.g., "10ec:8168") or None
|
|
"""
|
|
# This function would call: lspci -n -s {slot}
|
|
# Output format: "04:00.0 0200: 10ec:8168 (rev 16)"
|
|
# For now, this is a placeholder - implementation would use subprocess
|
|
pass
|