StrixCamDB/scripts/generate_presets.py

#!/usr/bin/env python3
"""Generate preset files from converted brand data.

Reads all brands/*.json, counts URL pattern popularity (by number of brands
that use each pattern), and outputs top-N preset files.
"""

import json
import os
import sys
from collections import defaultdict

BRANDS_DIR = os.path.join(os.path.dirname(__file__), "..", "brands")
PRESETS_DIR = os.path.join(os.path.dirname(__file__), "..", "presets")

# Preset configurations: (preset_id, name, description, limit)
PRESETS = [
    (
        "top-150",
        "Top 150 Stream Patterns",
        "150 most common stream URL patterns across all brands. Good for quick scanning.",
        150,
    ),
    (
        "top-1000",
        "Top 1000 Stream Patterns",
        "1000 most common stream URL patterns. Covers most IP cameras.",
        1000,
    ),
    (
        "top-5000",
        "Top 5000 Stream Patterns",
        "5000 most common stream URL patterns. Comprehensive coverage.",
        5000,
    ),
]


def main():
    brands_dir = os.path.abspath(BRANDS_DIR)
    presets_dir = os.path.abspath(PRESETS_DIR)

    if not os.path.isdir(brands_dir):
        print(f"Error: brands directory not found: {brands_dir}", file=sys.stderr)
        sys.exit(1)

    os.makedirs(presets_dir, exist_ok=True)

    # Collect URL patterns and count brands per pattern.
    # Key: (url, type, protocol, port)
    # Value: set of brand_ids
    pattern_brands = defaultdict(set)

    files = sorted(f for f in os.listdir(brands_dir) if f.endswith(".json"))
    for filename in files:
        filepath = os.path.join(brands_dir, filename)
        try:
            with open(filepath) as f:
                data = json.load(f)
        except (json.JSONDecodeError, IOError):
            continue

        brand_id = data.get("brand_id", "")
        for stream in data.get("streams", []):
            key = (
                stream.get("url", ""),
                stream.get("type", ""),
                stream.get("protocol", ""),
                stream.get("port", 0),
            )
            pattern_brands[key].add(brand_id)

    # Sort by brand count descending, then by URL alphabetically
    sorted_patterns = sorted(
        pattern_brands.items(),
        key=lambda x: (-len(x[1]), x[0][0]),
    )

    print(f"Total unique patterns: {len(sorted_patterns)}")

    # Generate each preset
    for preset_id, name, description, limit in PRESETS:
        streams = []
        for (url, stype, protocol, port), brands in sorted_patterns[:limit]:
            entry = {
                "url": url,
                "type": stype,
                "protocol": protocol,
                "port": port,
                "brand_count": len(brands),
            }
            streams.append(entry)

        preset = {
            "version": 1,
            "name": name,
            "preset_id": preset_id,
            "description": description,
            "streams": streams,
        }

        output_path = os.path.join(presets_dir, f"{preset_id}.json")
        with open(output_path, "w") as f:
            json.dump(preset, f, indent=2, ensure_ascii=False)
            f.write("\n")

        actual = len(streams)
        top_count = streams[0]["brand_count"] if streams else 0
        bottom_count = streams[-1]["brand_count"] if streams else 0
        print(
            f"  {preset_id}.json: {actual} patterns "
            f"(brand_count {top_count} -> {bottom_count})"
        )


if __name__ == "__main__":
    main()