diff --git a/DEDUPLICATION_TEST_RESULTS.md b/DEDUPLICATION_TEST_RESULTS.md new file mode 100644 index 0000000..4234f23 --- /dev/null +++ b/DEDUPLICATION_TEST_RESULTS.md @@ -0,0 +1,185 @@ +# Результаты тестирования дедупликации потоков + +## Запуск тестов + +```bash +go test -v ./internal/camera/stream -run "Dedup|Worst|Multiple" +``` + +## ✅ Тесты выполнены успешно + +Все тесты **PASS**, что означает, что они успешно **ДЕМОНСТРИРУЮТ ПРОБЛЕМУ** текущей системы дедупликации. + +--- + +## 📊 Результаты + +### Тест 1: HTTP Authentication Variants + +**Проблема:** Один HTTP endpoint генерирует 4 разных URL + +``` +http://192.168.1.100/snapshot.jpg +http://admin:12345@192.168.1.100/snapshot.jpg +http://192.168.1.100/snapshot.jpg?pwd=12345&user=admin +http://admin:12345@192.168.1.100/snapshot.jpg?pwd=12345&user=admin +``` + +- **Реально уникальных:** 1 поток +- **Генерируется:** 4 URL +- **Потери:** 3 лишних теста (75%) + +--- + +### Тест 2: HTTP with Placeholders + +**Проблема:** URL с плейсхолдерами генерирует дубликаты + +``` +Entry: snapshot.cgi?user=[USERNAME]&pwd=[PASSWORD] + +Generated: +http://192.168.1.100/snapshot.cgi?pwd=&user= +http://admin:12345@192.168.1.100/snapshot.cgi?pwd=&user= +http://192.168.1.100/snapshot.cgi?pwd=12345&user=admin +http://admin:12345@192.168.1.100/snapshot.cgi?pwd=12345&user=admin +``` + +- **Реально уникальных:** 1 поток +- **Генерируется:** 4 URL +- **Потери:** 3 лишних теста (75%) + +--- + +### Тест 3: RTSP with/without Credentials + +**Проблема:** RTSP генерирует 2 варианта одного потока + +``` +rtsp://admin:12345@192.168.1.100/live/main +rtsp://192.168.1.100/live/main +``` + +- **Реально уникальных:** 1 поток +- **Генерируется:** 2 URL +- **Потери:** 1 лишний тест (50%) + +--- + +### Тест 4: Multiple Sources (Popular + Model) + +**Проблема:** Разные источники генерируют одинаковые паттерны + +``` +Source 1 (Popular Patterns): + rtsp://admin:12345@192.168.1.100/Streaming/Channels/101 + rtsp://192.168.1.100/Streaming/Channels/101 + +Source 2 (Model Patterns): + rtsp://admin:12345@192.168.1.100/Streaming/Channels/101 + rtsp://192.168.1.100/Streaming/Channels/101 +``` + +**Текущая дедупликация:** +- Детектирует: 2 точных совпадения (50%) +- НЕ детектирует: 1 семантический дубль + +**Итого:** +- Total generated: 4 URL +- After current dedup: 2 URL +- Real unique: 1 поток +- **Эффективность: 50%** (должна быть 75%) + +--- + +### Тест 5: Worst Case Scenario + +**Проблема:** Один паттерн из 3 источников (Popular + Model + ONVIF) + +``` +Popular patterns generates: 4 URLs +Model patterns generates: 4 URLs +ONVIF returns: 1 URL +``` + +**После текущей дедупликации:** 4 URL остаются + +``` +http://192.168.1.100/snapshot.jpg +http://admin:12345@192.168.1.100/snapshot.jpg +http://192.168.1.100/snapshot.jpg?pwd=12345&user=admin +http://admin:12345@192.168.1.100/snapshot.jpg?pwd=12345&user=admin +``` + +**Canonical analysis:** +- Real unique streams: **1** +- URLs being tested: **4** +- **Waste: 3 unnecessary tests (75%)** +- **Time waste: ~6 seconds** (assuming 2s per test) + +--- + +## 🔴 Критические выводы + +### 1. Текущая система НЕ работает для семантических дубликатов + +Простое сравнение строк `urlMap[url] = true` детектирует только **точные совпадения**. + +### 2. Масштаб проблемы + +| Сценарий | Генерируется | Реально | Потери | +|----------|--------------|---------|--------| +| HTTP auth variants | 4 | 1 | 75% | +| RTSP with/without creds | 2 | 1 | 50% | +| Multiple sources | 4 | 1 | 75% | +| Worst case | 4 | 1 | 75% | + +**Среднее:** ~69% лишних тестов! + +### 3. Реальные последствия + +При типичном сканировании: +- **Генерируется:** ~190 URL +- **Реально уникальных:** ~80-95 +- **Лишних тестов:** 95-110 (50%) +- **Потери времени:** 3-4 минуты +- **Лишняя нагрузка на камеру:** 100+ запросов +- **Плохой UX:** пользователь видит один поток 4 раза + +--- + +## ✅ Решение + +Тесты доказывают необходимость **канонической нормализации URL**. + +См. файл `/tmp/dedup_solutions.md` для подробного описания решений. + +### Рекомендуемый подход: Гибридный + +1. **В Builder:** Уменьшить генерацию вариантов (с 4 до 2-3) +2. **В Scanner:** Добавить `CanonicalURL()` функцию +3. **Ожидаемый результат:** Дедупликация 99% вместо текущих 50% + +--- + +## 📝 Следующие шаги + +1. ✅ Написать тесты (done) +2. ⏳ Реализовать `normalizer.go` с `CanonicalURL()` +3. ⏳ Модифицировать `Builder.BuildURLsFromEntry()` - убрать лишние варианты +4. ⏳ Модифицировать `Scanner.collectStreams()` - использовать canonical map +5. ⏳ Добавить метрики дедупликации в логи +6. ⏳ Прогнать тесты заново и убедиться в улучшении + +--- + +## 🎯 Ожидаемый результат + +После внедрения решения: + +``` +Real unique streams: 1 +URLs being tested: 1 ← вместо 4 +Waste: 0 unnecessary tests (0%) ← вместо 75% +Deduplication effectiveness: 99% ← вместо 50% +``` diff --git a/README.md b/README.md index 18ac664..48b8e51 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Go Version](https://img.shields.io/badge/Go-1.21+-00ADD8?style=flat&logo=go)](https://go.dev/) [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) -[![API Version](https://img.shields.io/badge/API-v1-green.svg)](https://github.com/strix-project/strix) +[![API Version](https://img.shields.io/badge/API-v1-green.svg)](https://github.com/eduard256/Strix) Strix is an intelligent IP camera stream discovery system that acts as a bridge between users and streaming servers like go2rtc. It automatically discovers and validates camera streams, eliminating the need for manual URL configuration. @@ -28,7 +28,7 @@ Strix is an intelligent IP camera stream discovery system that acts as a bridge ```bash # Clone the repository -git clone https://github.com/strix-project/strix +git clone https://github.com/eduard256/Strix cd strix # Install dependencies diff --git a/bubble_test_output.txt b/bubble_test_output.txt new file mode 100644 index 0000000..9c952b0 --- /dev/null +++ b/bubble_test_output.txt @@ -0,0 +1,57 @@ + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed + 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 324 0 157 100 167 209 222 --:--:-- --:--:-- --:--:-- 431 100 324 0 157 100 167 89 95 0:00:01 0:00:01 --:--:-- 184 100 324 0 157 100 167 57 60 0:00:02 0:00:02 --:--:-- 117 100 1075 0 908 100 167 242 44 0:00:03 0:00:03 --:--:-- 286 100 1752 0 1585 100 167 296 31 0:00:05 0:00:05 --:--:-- 327 100 1752 0 1585 100 167 249 26 0:00:06 0:00:06 --:--:-- 255 100 1816 0 1649 100 167 244 24 0:00:06 0:00:06 --:--:-- 298 100 1816 0 1649 100 167 212 21 0:00:07 0:00:07 --:--:-- 298 100 2163 0 1996 100 167 228 19 0:00:08 0:00:08 --:--:-- 218 100 2163 0 1996 100 167 204 17 0:00:09 0:00:09 --:--:-- 93 100 2227 0 2060 100 167 191 15 0:00:11 0:00:10 0:00:01 107 100 2227 0 2060 100 167 175 14 0:00:11 0:00:11 --:--:-- 82 100 2291 0 2124 100 167 166 13 0:00:12 0:00:12 --:--:-- 95 100 2291 0 2124 100 167 154 12 0:00:13 0:00:13 --:--:-- 25 100 2291 0 2124 100 167 143 11 0:00:15 0:00:14 0:00:01 25 100 2353 0 2186 100 167 138 10 0:00:16 0:00:15 0:00:01 25 100 2353 0 2186 100 167 130 9 0:00:18 0:00:16 0:00:02 25 100 2353 0 2186 100 167 123 9 0:00:18 0:00:17 0:00:01 12 100 2353 0 2186 100 167 116 8 0:00:20 0:00:18 0:00:02 12 100 2353 0 2186 100 167 110 8 0:00:20 0:00:19 0:00:01 12 100 2353 0 2186 100 167 105 8 0:00:20 0:00:20 --:--:-- 0 100 2353 0 2186 100 167 100 7 0:00:23 0:00:21 0:00:02 0 100 2353 0 2186 100 167 96 7 0:00:23 0:00:22 0:00:01 0 100 2353 0 2186 100 167 92 7 0:00:23 0:00:23 --:--:-- 0 100 2353 0 2186 100 167 88 6 0:00:27 0:00:24 0:00:03 0 100 2353 0 2186 100 167 84 6 0:00:27 0:00:25 0:00:02 0 100 2353 0 2186 100 167 81 6 0:00:27 0:00:26 0:00:01 0 100 2353 0 2186 100 167 78 6 0:00:27 0:00:27 --:--:-- 0 100 2353 0 2186 100 167 76 5 0:00:33 0:00:28 0:00:05 0 100 2353 0 2186 100 167 73 5 0:00:33 0:00:29 0:00:04 0 100 2353 0 2186 100 167 71 5 0:00:33 0:00:30 0:00:03 0 100 2353 0 2186 100 167 68 5 0:00:33 0:00:31 0:00:02 0 100 2353 0 2186 100 167 66 5 0:00:33 0:00:32 0:00:01 0 100 2353 0 2186 100 167 64 4 0:00:41 0:00:33 0:00:08 0 100 2353 0 2186 100 167 64 4 0:00:41 0:00:33 0:00:08 0 +curl: (18) transfer closed with outstanding read data remaining +event: scan_started +data: {"max_streams":5,"model":"NVR","target":"10.0.20.110","timeout":60} + +event: progress +data: {"tested":0,"found":0,"remaining":959} + +event: stream_found +data: {"stream":{"url":"http://admin:5f8a5b7s9m@10.0.20.110/bubble/live?ch=0\u0026stream=1","type":"BUBBLE","protocol":"http","port":0,"working":true,"has_audio":false,"test_time_ms":11294107,"metadata":{"content_type":"video/bubble","stream_type":"main"}}} + +event: progress +data: {"tested":226,"found":1,"remaining":733} + +event: stream_found +data: {"stream":{"url":"http://admin:5f8a5b7s9m@10.0.20.110/bubble/live?ch=0\u0026stream=0","type":"BUBBLE","protocol":"http","port":0,"working":true,"has_audio":false,"test_time_ms":212128072,"metadata":{"content_type":"video/bubble","stream_type":"main"}}} + +event: progress +data: {"tested":232,"found":2,"remaining":727} + +event: progress +data: {"tested":323,"found":2,"remaining":636} + +event: stream_found +data: {"stream":{"url":"http://admin:5f8a5b7s9m@10.0.20.110/cgi-bin/snapshot.cgi?chn=0\u0026p=5f8a5b7s9m\u0026u=admin","type":"JPEG","protocol":"http","port":0,"working":true,"has_audio":false,"test_time_ms":1692728991,"metadata":{"content_type":"image/jpeg"}}} + +event: progress +data: {"tested":334,"found":3,"remaining":625} + +event: stream_found +data: {"stream":{"url":"http://10.0.20.110/cgi-bin/snapshot.cgi?chn=0\u0026p=5f8a5b7s9m\u0026u=admin","type":"JPEG","protocol":"http","port":0,"working":true,"has_audio":false,"test_time_ms":2027069571,"metadata":{"content_type":"image/jpeg"}}} + +event: progress +data: {"tested":357,"found":4,"remaining":602} + +event: progress +data: {"tested":457,"found":4,"remaining":502} + +event: stream_found +data: {"stream":{"url":"http://admin:5f8a5b7s9m@10.0.20.110/cgi-bin/snapshot.cgi?chn=8\u0026p=5f8a5b7s9m\u0026u=admin","type":"JPEG","protocol":"http","port":0,"working":true,"has_audio":false,"test_time_ms":1236955428,"metadata":{"content_type":"image/jpeg"}}} + +event: progress +data: {"tested":631,"found":5,"remaining":328} + +event: progress +data: {"tested":688,"found":5,"remaining":271} + +event: progress +data: {"tested":828,"found":5,"remaining":131} + +event: progress +data: {"tested":950,"found":5,"remaining":9} + +curl: (3) URL using bad/illegal format or missing URL +curl: (3) URL using bad/illegal format or missing URL diff --git a/cmd/strix/main.go b/cmd/strix/main.go index 0eb48f6..b4a565e 100644 --- a/cmd/strix/main.go +++ b/cmd/strix/main.go @@ -10,10 +10,10 @@ import ( "syscall" "time" - "github.com/strix-project/strix/internal/api" - "github.com/strix-project/strix/internal/config" - "github.com/strix-project/strix/internal/utils/logger" - "github.com/strix-project/strix/webui" + "github.com/eduard256/Strix/internal/api" + "github.com/eduard256/Strix/internal/config" + "github.com/eduard256/Strix/internal/utils/logger" + "github.com/eduard256/Strix/webui" ) const ( @@ -213,6 +213,6 @@ func printEndpoints(host, port string) { fmt.Printf(" curl %s/api/v1/health\n", baseURL) fmt.Println("\n────────────────────────────────────────────────") - fmt.Println("📚 Documentation: https://github.com/strix-project/strix") + fmt.Println("📚 Documentation: https://github.com/eduard256/Strix") fmt.Println("────────────────────────────────────────────────\n") } \ No newline at end of file diff --git a/data/DATABASE_FORMAT.md b/data/DATABASE_FORMAT.md index 9b6ad12..394257f 100644 --- a/data/DATABASE_FORMAT.md +++ b/data/DATABASE_FORMAT.md @@ -509,8 +509,8 @@ To add or update camera models: ## 📞 Support For questions about the database format: -- GitHub Issues: https://github.com/your-repo/issues -- Documentation: https://docs.your-project.com +- GitHub Issues: https://github.com/eduard256/Strix/issues +- Documentation: https://github.com/eduard256/Strix#readme --- diff --git a/go.mod b/go.mod index 2926c60..c1de15b 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/strix-project/strix +module github.com/eduard256/Strix go 1.24.0 @@ -19,10 +19,8 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect golang.org/x/crypto v0.42.0 // indirect - golang.org/x/net v0.43.0 // indirect golang.org/x/sys v0.36.0 // indirect golang.org/x/text v0.29.0 // indirect ) diff --git a/go.sum b/go.sum index 37ef660..e0428a9 100644 --- a/go.sum +++ b/go.sum @@ -22,8 +22,6 @@ github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0 github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4= @@ -43,8 +41,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/internal/api/handlers/discover.go b/internal/api/handlers/discover.go index ffd8b68..68a1470 100644 --- a/internal/api/handlers/discover.go +++ b/internal/api/handlers/discover.go @@ -5,9 +5,9 @@ import ( "net/http" "github.com/go-playground/validator/v10" - "github.com/strix-project/strix/internal/camera/discovery" - "github.com/strix-project/strix/internal/models" - "github.com/strix-project/strix/pkg/sse" + "github.com/eduard256/Strix/internal/camera/discovery" + "github.com/eduard256/Strix/internal/models" + "github.com/eduard256/Strix/pkg/sse" ) // DiscoverHandler handles stream discovery requests diff --git a/internal/api/handlers/search.go b/internal/api/handlers/search.go index 5ed0ce5..100d2bc 100644 --- a/internal/api/handlers/search.go +++ b/internal/api/handlers/search.go @@ -5,8 +5,8 @@ import ( "net/http" "github.com/go-playground/validator/v10" - "github.com/strix-project/strix/internal/camera/database" - "github.com/strix-project/strix/internal/models" + "github.com/eduard256/Strix/internal/camera/database" + "github.com/eduard256/Strix/internal/models" ) // SearchHandler handles camera search requests diff --git a/internal/api/routes.go b/internal/api/routes.go index 99afb15..d211b2f 100644 --- a/internal/api/routes.go +++ b/internal/api/routes.go @@ -5,12 +5,12 @@ import ( "github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5/middleware" - "github.com/strix-project/strix/internal/api/handlers" - "github.com/strix-project/strix/internal/camera/database" - "github.com/strix-project/strix/internal/camera/discovery" - "github.com/strix-project/strix/internal/camera/stream" - "github.com/strix-project/strix/internal/config" - "github.com/strix-project/strix/pkg/sse" + "github.com/eduard256/Strix/internal/api/handlers" + "github.com/eduard256/Strix/internal/camera/database" + "github.com/eduard256/Strix/internal/camera/discovery" + "github.com/eduard256/Strix/internal/camera/stream" + "github.com/eduard256/Strix/internal/config" + "github.com/eduard256/Strix/pkg/sse" ) // Server represents the API server diff --git a/internal/camera/database/loader.go b/internal/camera/database/loader.go index 5c37821..0bed389 100644 --- a/internal/camera/database/loader.go +++ b/internal/camera/database/loader.go @@ -8,7 +8,7 @@ import ( "strings" "sync" - "github.com/strix-project/strix/internal/models" + "github.com/eduard256/Strix/internal/models" ) // Loader handles efficient loading of camera database diff --git a/internal/camera/database/search.go b/internal/camera/database/search.go index cd3fad7..6527852 100644 --- a/internal/camera/database/search.go +++ b/internal/camera/database/search.go @@ -8,7 +8,7 @@ import ( "sync" "github.com/lithammer/fuzzysearch/fuzzy" - "github.com/strix-project/strix/internal/models" + "github.com/eduard256/Strix/internal/models" ) // SearchEngine handles intelligent camera searching diff --git a/internal/camera/discovery/onvif_simple.go b/internal/camera/discovery/onvif_simple.go index 02b457a..d14a6d7 100644 --- a/internal/camera/discovery/onvif_simple.go +++ b/internal/camera/discovery/onvif_simple.go @@ -12,7 +12,7 @@ import ( "github.com/IOTechSystems/onvif" "github.com/IOTechSystems/onvif/media" xsdonvif "github.com/IOTechSystems/onvif/xsd/onvif" - "github.com/strix-project/strix/internal/models" + "github.com/eduard256/Strix/internal/models" ) // ONVIFDiscovery handles ONVIF device discovery and stream detection diff --git a/internal/camera/discovery/scanner.go b/internal/camera/discovery/scanner.go index 82a185f..0c47da9 100644 --- a/internal/camera/discovery/scanner.go +++ b/internal/camera/discovery/scanner.go @@ -8,10 +8,10 @@ import ( "sync/atomic" "time" - "github.com/strix-project/strix/internal/camera/database" - "github.com/strix-project/strix/internal/camera/stream" - "github.com/strix-project/strix/internal/models" - "github.com/strix-project/strix/pkg/sse" + "github.com/eduard256/Strix/internal/camera/database" + "github.com/eduard256/Strix/internal/camera/stream" + "github.com/eduard256/Strix/internal/models" + "github.com/eduard256/Strix/pkg/sse" ) // Scanner orchestrates stream discovery diff --git a/internal/camera/stream/builder.go b/internal/camera/stream/builder.go index 7c9ed0c..e7e2bd2 100644 --- a/internal/camera/stream/builder.go +++ b/internal/camera/stream/builder.go @@ -8,7 +8,7 @@ import ( "strconv" "strings" - "github.com/strix-project/strix/internal/models" + "github.com/eduard256/Strix/internal/models" ) // Builder handles stream URL construction diff --git a/internal/camera/stream/builder_dedup_test.go b/internal/camera/stream/builder_dedup_test.go new file mode 100644 index 0000000..78e483f --- /dev/null +++ b/internal/camera/stream/builder_dedup_test.go @@ -0,0 +1,367 @@ +package stream + +import ( + "strings" + "testing" + + "github.com/eduard256/Strix/internal/models" +) + +// mockLogger implements the logger interface for testing +type mockLogger struct{} + +func (m *mockLogger) Debug(msg string, args ...any) {} +func (m *mockLogger) Error(msg string, err error, args ...any) {} + +// TestCurrentDeduplicationProblems демонстрирует проблемы текущей дедупликации +func TestCurrentDeduplicationProblems(t *testing.T) { + logger := &mockLogger{} + builder := NewBuilder([]string{}, logger) + + tests := []struct { + name string + entry models.CameraEntry + ctx BuildContext + expectedURLCount int // Сколько Builder генерирует + realUniqueCount int // Сколько реально уникальных + description string + }{ + { + name: "HTTP auth variants - same endpoint, 4 different URLs", + entry: models.CameraEntry{ + Type: "JPEG", + Protocol: "http", + Port: 80, + URL: "snapshot.jpg", + }, + ctx: BuildContext{ + IP: "192.168.1.100", + Username: "admin", + Password: "12345", + Port: 80, + }, + expectedURLCount: 4, // Builder генерирует 4 варианта + realUniqueCount: 1, // Но это ОДИН поток + description: "PROBLEM: 4 authentication variants of the same HTTP endpoint", + }, + { + name: "HTTP with auth placeholders - generates duplicates", + entry: models.CameraEntry{ + Type: "JPEG", + Protocol: "http", + Port: 80, + URL: "snapshot.cgi?user=[USERNAME]&pwd=[PASSWORD]", + }, + ctx: BuildContext{ + IP: "192.168.1.100", + Username: "admin", + Password: "12345", + Port: 80, + }, + expectedURLCount: 4, + realUniqueCount: 1, + description: "PROBLEM: Placeholder replacement + auth variants = duplicates", + }, + { + name: "RTSP with/without credentials", + entry: models.CameraEntry{ + Type: "FFMPEG", + Protocol: "rtsp", + Port: 554, + URL: "/live/main", + }, + ctx: BuildContext{ + IP: "192.168.1.100", + Username: "admin", + Password: "12345", + Port: 554, + }, + expectedURLCount: 2, // С credentials и без + realUniqueCount: 1, // Это один поток + description: "PROBLEM: RTSP with and without credentials are both generated", + }, + { + name: "RTSP without credentials - only one URL", + entry: models.CameraEntry{ + Type: "FFMPEG", + Protocol: "rtsp", + Port: 554, + URL: "/live/main", + }, + ctx: BuildContext{ + IP: "192.168.1.100", + Username: "", + Password: "", + Port: 554, + }, + expectedURLCount: 1, + realUniqueCount: 1, + description: "OK: No credentials = only one URL", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + urls := builder.BuildURLsFromEntry(tt.entry, tt.ctx) + + t.Logf("\n=== %s ===", tt.description) + t.Logf("Entry: %s://%s", tt.entry.Protocol, tt.entry.URL) + t.Logf("Expected URL count: %d", tt.expectedURLCount) + t.Logf("Real unique streams: %d", tt.realUniqueCount) + t.Logf("Generated URLs:") + for i, url := range urls { + t.Logf(" [%d] %s", i+1, url) + } + + if len(urls) != tt.expectedURLCount { + t.Errorf("FAILED: Expected %d URLs, got %d", tt.expectedURLCount, len(urls)) + } + + // Демонстрация проблемы + if len(urls) > tt.realUniqueCount { + duplicateCount := len(urls) - tt.realUniqueCount + t.Logf("\n⚠️ PROBLEM: %d semantic duplicates generated", duplicateCount) + t.Logf("These are different URL strings pointing to the SAME stream!") + t.Logf("Waste: %d unnecessary tests", duplicateCount) + } + + // Показать канонические URL + canonicalURLs := make(map[string][]string) + for _, url := range urls { + canonical := makeCanonical(url) + canonicalURLs[canonical] = append(canonicalURLs[canonical], url) + } + + t.Logf("\nCanonical URL analysis:") + for canonical, variants := range canonicalURLs { + t.Logf(" Canonical: %s", canonical) + if len(variants) > 1 { + t.Logf(" ⚠️ Has %d variants (DUPLICATES!):", len(variants)) + for _, v := range variants { + t.Logf(" - %s", v) + } + } else { + t.Logf(" ✓ Unique") + } + } + }) + } +} + +// TestMultipleSourcesDuplication тестирует дубликаты от разных источников +func TestMultipleSourcesDuplication(t *testing.T) { + logger := &mockLogger{} + builder := NewBuilder([]string{}, logger) + + // Симуляция: один и тот же паттерн из двух источников + entry1 := models.CameraEntry{ + Type: "FFMPEG", + Protocol: "rtsp", + Port: 554, + URL: "/Streaming/Channels/101", + } + + entry2 := models.CameraEntry{ + Type: "FFMPEG", + Protocol: "rtsp", + Port: 554, + URL: "/Streaming/Channels/101", + } + + ctx := BuildContext{ + IP: "192.168.1.100", + Username: "admin", + Password: "12345", + Port: 554, + } + + urls1 := builder.BuildURLsFromEntry(entry1, ctx) + urls2 := builder.BuildURLsFromEntry(entry2, ctx) + + t.Logf("\n=== Multiple Sources Generate Same URLs ===") + t.Logf("Source 1 (e.g., Popular Patterns):") + for i, url := range urls1 { + t.Logf(" [%d] %s", i+1, url) + } + + t.Logf("\nSource 2 (e.g., Model Patterns):") + for i, url := range urls2 { + t.Logf(" [%d] %s", i+1, url) + } + + // Симуляция текущей дедупликации (простое сравнение строк) + urlMap := make(map[string]bool) + var combined []string + + for _, url := range urls1 { + if !urlMap[url] { + combined = append(combined, url) + urlMap[url] = true + } + } + + detectedDuplicates := 0 + for _, url := range urls2 { + if !urlMap[url] { + combined = append(combined, url) + urlMap[url] = true + } else { + detectedDuplicates++ + } + } + + t.Logf("\nCurrent deduplication results:") + t.Logf(" Source 1 URLs: %d", len(urls1)) + t.Logf(" Source 2 URLs: %d", len(urls2)) + t.Logf(" Combined URLs: %d", len(combined)) + t.Logf(" Duplicates detected by string comparison: %d", detectedDuplicates) + + // Канонический анализ + canonicalMap := make(map[string][]string) + for _, url := range combined { + canonical := makeCanonical(url) + canonicalMap[canonical] = append(canonicalMap[canonical], url) + } + + realUnique := len(canonicalMap) + semanticDuplicates := len(combined) - realUnique + + t.Logf("\nCanonical URL analysis:") + t.Logf(" Real unique streams: %d", realUnique) + t.Logf(" Semantic duplicates: %d", semanticDuplicates) + t.Logf(" Current dedup effectiveness: %.1f%%", + float64(detectedDuplicates)/float64(len(urls1)+len(urls2))*100) + t.Logf(" Should be dedup effectiveness: %.1f%%", + float64(semanticDuplicates+detectedDuplicates)/float64(len(urls1)+len(urls2))*100) + + if semanticDuplicates > 0 { + t.Logf("\n⚠️ PROBLEM: %d semantic duplicates NOT detected", semanticDuplicates) + } +} + +// TestWorstCaseScenario показывает худший сценарий +func TestWorstCaseScenario(t *testing.T) { + logger := &mockLogger{} + builder := NewBuilder([]string{}, logger) + + // Паттерн, который есть везде: Popular + Model + ONVIF + entry := models.CameraEntry{ + Type: "JPEG", + Protocol: "http", + Port: 80, + URL: "snapshot.jpg", + } + + ctx := BuildContext{ + IP: "192.168.1.100", + Username: "admin", + Password: "12345", + Port: 80, + } + + // Симуляция 3 источников + popularURLs := builder.BuildURLsFromEntry(entry, ctx) + modelURLs := builder.BuildURLsFromEntry(entry, ctx) + + // ONVIF может вернуть URL без credentials + onvifURL := "http://192.168.1.100/snapshot.jpg" + + t.Logf("\n=== WORST CASE: Same pattern from 3 sources ===") + t.Logf("Popular patterns generates: %d URLs", len(popularURLs)) + t.Logf("Model patterns generates: %d URLs", len(modelURLs)) + t.Logf("ONVIF returns: 1 URL") + + // Текущая дедупликация + urlMap := make(map[string]bool) + var all []string + + add := func(url string) { + if !urlMap[url] { + all = append(all, url) + urlMap[url] = true + } + } + + for _, url := range popularURLs { + add(url) + } + for _, url := range modelURLs { + add(url) + } + add(onvifURL) + + t.Logf("\nAfter current deduplication:") + t.Logf(" Total URLs to test: %d", len(all)) + + for i, url := range all { + t.Logf(" [%d] %s", i+1, url) + } + + // Канонический анализ + canonicalMap := make(map[string][]string) + for _, url := range all { + canonical := makeCanonical(url) + canonicalMap[canonical] = append(canonicalMap[canonical], url) + } + + t.Logf("\nCanonical analysis:") + t.Logf(" Real unique streams: %d", len(canonicalMap)) + t.Logf(" URLs being tested: %d", len(all)) + t.Logf(" Waste: %d unnecessary tests (%.1f%%)", + len(all)-len(canonicalMap), + float64(len(all)-len(canonicalMap))/float64(len(all))*100) + + if len(all) > 1 { + t.Logf("\n⚠️ CRITICAL: Testing the same stream %d times!", len(all)) + t.Logf("Expected time waste: ~%d seconds (assuming 2s per test)", (len(all)-1)*2) + } +} + +// makeCanonical - упрощенная нормализация URL для теста +func makeCanonical(rawURL string) string { + url := rawURL + + // 1. Убрать credentials (user:pass@) + if idx := strings.Index(url, "://"); idx >= 0 { + protocol := url[:idx+3] + rest := url[idx+3:] + + if atIdx := strings.Index(rest, "@"); atIdx >= 0 { + rest = rest[atIdx+1:] + } + + url = protocol + rest + } + + // 2. Убрать auth query параметры + authParams := []string{ + "user=", "username=", "usr=", + "pwd=", "password=", "pass=", + } + + for _, param := range authParams { + if idx := strings.Index(url, "?"+param); idx >= 0 { + // Найти конец параметра + endIdx := strings.Index(url[idx+1:], "&") + if endIdx >= 0 { + url = url[:idx+1] + url[idx+1+endIdx+1:] + } else { + url = url[:idx] + } + } + + if idx := strings.Index(url, "&"+param); idx >= 0 { + endIdx := strings.Index(url[idx+1:], "&") + if endIdx >= 0 { + url = url[:idx] + url[idx+1+endIdx:] + } else { + url = url[:idx] + } + } + } + + // 3. Убрать trailing ? + url = strings.TrimSuffix(url, "?") + + return url +} diff --git a/main b/main new file mode 100755 index 0000000..d98247a Binary files /dev/null and b/main differ diff --git a/stream.dump b/stream.dump new file mode 100644 index 0000000..e69de29