diff --git a/internal/ffmpeg/ffmpeg.go b/internal/ffmpeg/ffmpeg.go index 5c5bfcb5..c6607e85 100644 --- a/internal/ffmpeg/ffmpeg.go +++ b/internal/ffmpeg/ffmpeg.go @@ -79,6 +79,8 @@ var defaults = map[string]string{ "pcm": "-c:a pcm_s16be -ar:a 8000 -ac:a 1", "pcm/16000": "-c:a pcm_s16be -ar:a 16000 -ac:a 1", "pcm/48000": "-c:a pcm_s16be -ar:a 48000 -ac:a 1", + "pcml": "-c:a pcm_s16le -ar:a 8000 -ac:a 1", + "pcml/44100": "-c:a pcm_s16le -ar:a 44100 -ac:a 1", // hardware Intel and AMD on Linux // better not to set `-async_depth:v 1` like for QSV, because framedrops diff --git a/pkg/core/README.md b/pkg/core/README.md new file mode 100644 index 00000000..7f6faca1 --- /dev/null +++ b/pkg/core/README.md @@ -0,0 +1,40 @@ +## PCM + +**RTSP** + +- PayloadType=10 - L16/44100/2 - Linear PCM 16-bit big endian +- PayloadType=11 - L16/44100/1 - Linear PCM 16-bit big endian + +https://en.wikipedia.org/wiki/RTP_payload_formats + +**Apple QuickTime** + +- `raw` - 16-bit data is stored in little endian format +- `twos` - 16-bit data is stored in big endian format +- `sowt` - 16-bit data is stored in little endian format +- `in24` - denotes 24-bit, big endian +- `in32` - denotes 32-bit, big endian +- `fl32` - denotes 32-bit floating point PCM +- `fl64` - denotes 64-bit floating point PCM +- `alaw` - denotes A-law logarithmic PCM +- `ulaw` - denotes mu-law logarithmic PCM + +https://wiki.multimedia.cx/index.php/PCM + +**FFmpeg RTSP** + +``` +pcm_s16be, 44100 Hz, stereo => 10 +pcm_s16be, 48000 Hz, stereo => 96 L16/48000/2 +pcm_s16be, 44100 Hz, mono => 11 + +pcm_s16le, 48000 Hz, stereo => 96 (b=AS:1536) +pcm_s16le, 44100 Hz, stereo => 96 (b=AS:1411) +pcm_s16le, 16000 Hz, stereo => 96 (b=AS:512) +pcm_s16le, 8000 Hz, stereo => 96 (b=AS:256) + +pcm_s16le, 48000 Hz, mono => 96 (b=AS:768) +pcm_s16le, 44100 Hz, mono => 96 (b=AS:705) +pcm_s16le, 16000 Hz, mono => 96 (b=AS:256) +pcm_s16le, 8000 Hz, mono => 96 (b=AS:128) +``` \ No newline at end of file diff --git a/pkg/core/codec.go b/pkg/core/codec.go index 5f346739..4709abc1 100644 --- a/pkg/core/codec.go +++ b/pkg/core/codec.go @@ -3,10 +3,11 @@ package core import ( "encoding/base64" "fmt" - "github.com/pion/sdp/v3" "strconv" "strings" "unicode" + + "github.com/pion/sdp/v3" ) type Codec struct { @@ -112,6 +113,42 @@ func UnmarshalCodec(md *sdp.MediaDescription, payloadType string) *Codec { case "26": c.Name = CodecJPEG c.ClockRate = 90000 + case "96", "97", "98": + if len(md.Bandwidth) == 0 { + c.Name = payloadType + break + } + + // FFmpeg + RTSP + pcm_s16le = doesn't pass info about codec name and params + // so try to guess the codec based on bitrate + // https://github.com/AlexxIT/go2rtc/issues/523 + switch md.Bandwidth[0].Bandwidth { + case 128: + c.ClockRate = 8000 + case 256: + c.ClockRate = 16000 + case 384: + c.ClockRate = 24000 + case 512: + c.ClockRate = 32000 + case 705: + c.ClockRate = 44100 + case 768: + c.ClockRate = 48000 + case 1411: + // default Windows DShow + c.ClockRate = 44100 + c.Channels = 2 + case 1536: + // default Linux ALSA + c.ClockRate = 48000 + c.Channels = 2 + default: + c.Name = payloadType + break + } + + c.Name = CodecPCML default: c.Name = payloadType } diff --git a/pkg/core/core.go b/pkg/core/core.go index 72d32b78..f123588f 100644 --- a/pkg/core/core.go +++ b/pkg/core/core.go @@ -25,7 +25,9 @@ const ( CodecOpus = "OPUS" // payloadType: 111 CodecG722 = "G722" CodecMP3 = "MPA" // payload: 14, aka MPEG-1 Layer III - CodecPCM = "L16" // Linear PCM + CodecPCM = "L16" // Linear PCM (big endian) + + CodecPCML = "PCML" // Linear PCM (little endian) CodecELD = "ELD" // AAC-ELD CodecFLAC = "FLAC" diff --git a/pkg/core/media.go b/pkg/core/media.go index 5d73dc6b..fe58cfd6 100644 --- a/pkg/core/media.go +++ b/pkg/core/media.go @@ -3,8 +3,9 @@ package core import ( "encoding/json" "fmt" - "github.com/pion/sdp/v3" "strings" + + "github.com/pion/sdp/v3" ) // Media take best from: @@ -93,7 +94,7 @@ func GetKind(name string) string { switch name { case CodecH264, CodecH265, CodecVP8, CodecVP9, CodecAV1, CodecJPEG: return KindVideo - case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecELD, CodecFLAC: + case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecPCML, CodecELD, CodecFLAC: return KindAudio } return "" diff --git a/pkg/mp4/consumer.go b/pkg/mp4/consumer.go index 4a754500..3060a532 100644 --- a/pkg/mp4/consumer.go +++ b/pkg/mp4/consumer.go @@ -2,13 +2,14 @@ package mp4 import ( "encoding/json" + "sync" + "github.com/AlexxIT/go2rtc/pkg/aac" "github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/h264" "github.com/AlexxIT/go2rtc/pkg/h265" "github.com/AlexxIT/go2rtc/pkg/pcm" "github.com/pion/rtp" - "sync" ) type Consumer struct { @@ -131,7 +132,7 @@ func (c *Consumer) AddTrack(media *core.Media, _ *core.Codec, track *core.Receiv handler.Handler = aac.RTPDepay(handler.Handler) } case core.CodecOpus, core.CodecMP3: // no changes - case core.CodecPCMA, core.CodecPCMU, core.CodecPCM: + case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML: handler.Handler = pcm.FLACEncoder(track.Codec, handler.Handler) codec.Name = core.CodecFLAC diff --git a/pkg/mp4/helpers.go b/pkg/mp4/helpers.go index ff737d29..05d97c8a 100644 --- a/pkg/mp4/helpers.go +++ b/pkg/mp4/helpers.go @@ -37,6 +37,7 @@ func ParseQuery(query map[string][]string) []*core.Media { &core.Codec{Name: core.CodecPCMA}, &core.Codec{Name: core.CodecPCMU}, &core.Codec{Name: core.CodecPCM}, + &core.Codec{Name: core.CodecPCML}, ) if v[0] == "flac" { @@ -74,6 +75,7 @@ func ParseCodecs(codecs string, parseAudio bool) (medias []*core.Media) { &core.Codec{Name: core.CodecPCMA}, &core.Codec{Name: core.CodecPCMU}, &core.Codec{Name: core.CodecPCM}, + &core.Codec{Name: core.CodecPCML}, ) case MimeOpus: codec := &core.Codec{Name: core.CodecOpus} diff --git a/pkg/mp4/muxer.go b/pkg/mp4/muxer.go index f01758ec..f6a2fb79 100644 --- a/pkg/mp4/muxer.go +++ b/pkg/mp4/muxer.go @@ -119,7 +119,7 @@ func (m *Muxer) GetInit(codecs []*core.Codec) ([]byte, error) { uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, b, ) - case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecFLAC: + case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecFLAC: mv.WriteAudioTrack( uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, nil, ) diff --git a/pkg/pcm/flac.go b/pkg/pcm/flac.go index 054746d1..fdbe5190 100644 --- a/pkg/pcm/flac.go +++ b/pkg/pcm/flac.go @@ -6,11 +6,12 @@ package pcm import ( "encoding/binary" + "unicode/utf8" + "github.com/AlexxIT/go2rtc/pkg/core" "github.com/pion/rtp" "github.com/sigurn/crc16" "github.com/sigurn/crc8" - "unicode/utf8" ) func FLACHeader(magic bool, sampleRate uint32) []byte { @@ -86,7 +87,7 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc { return func(packet *rtp.Packet) { samples := uint16(len(packet.Payload)) - if codec.Name == core.CodecPCM { + if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML { samples /= 2 } @@ -131,6 +132,14 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc { } case core.CodecPCM: n += copy(buf[n:], packet.Payload) + case core.CodecPCML: + // reverse endian from little to big + size := len(packet.Payload) + for i := 0; i < size; i += 2 { + buf[n] = packet.Payload[i+1] + buf[n+1] = packet.Payload[i] + n += 2 + } } // 4. Frame footer diff --git a/pkg/pcm/pcm.go b/pkg/pcm/pcm.go index d2c08717..2a9d26e1 100644 --- a/pkg/pcm/pcm.go +++ b/pkg/pcm/pcm.go @@ -1,29 +1,39 @@ package pcm import ( + "sync" + "github.com/AlexxIT/go2rtc/pkg/core" "github.com/pion/rtp" - "sync" ) -func Resample(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc { +// ResampleToPCMA - convert PCMA/PCMU/PCM/PCML to PCMA with decreasing sample rate +func ResampleToPCMA(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc { n := float32(codec.ClockRate) / float32(sampleRate) switch codec.Name { case core.CodecPCMA: return DownsampleByte(PCMAtoPCM, PCMtoPCMA, n, handler) case core.CodecPCMU: - return DownsampleByte(PCMUtoPCM, PCMtoPCMU, n, handler) - case core.CodecPCM: + return DownsampleByte(PCMUtoPCM, PCMtoPCMA, n, handler) + case core.CodecPCM, core.CodecPCML: if n == 1 { - return ResamplePCM(PCMtoPCMA, handler) + handler = ResamplePCM(PCMtoPCMA, handler) + } else { + handler = DownsamplePCM(PCMtoPCMA, n, handler) } - return DownsamplePCM(PCMtoPCMA, n, handler) + + if codec.Name == core.CodecPCML { + return LittleToBig(handler) + } + + return handler } panic(core.Caller()) } +// DownsampleByte - convert PCMA/PCMU to PCMA/PCMU with decreasing sample rate (N times) func DownsampleByte( toPCM func(byte) int16, fromPCM func(int16) byte, n float32, handler core.HandlerFunc, ) core.HandlerFunc { @@ -58,6 +68,23 @@ func DownsampleByte( } } +// LittleToBig - conver PCM little endian to PCM big endian +func LittleToBig(handler core.HandlerFunc) core.HandlerFunc { + return func(packet *rtp.Packet) { + size := len(packet.Payload) + b := make([]byte, size) + for i := 0; i < size; i += 2 { + b[i] = packet.Payload[i+1] + b[i+1] = packet.Payload[i] + } + + clone := *packet + clone.Payload = b + handler(&clone) + } +} + +// ResamplePCM - convert PCM to PCMA/PCMU with same sample rate func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.HandlerFunc { var ts uint32 @@ -84,6 +111,7 @@ func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.Handle } } +// DownsamplePCM - convert PCM to PCMA/PCMU with decreasing sample rate (N times) func DownsamplePCM(fromPCM func(int16) byte, n float32, handler core.HandlerFunc) core.HandlerFunc { var sampleN, sampleSum float32 var ts uint32 diff --git a/pkg/webrtc/consumer.go b/pkg/webrtc/consumer.go index dc2e29c4..405468fe 100644 --- a/pkg/webrtc/consumer.go +++ b/pkg/webrtc/consumer.go @@ -3,6 +3,7 @@ package webrtc import ( "encoding/json" "errors" + "github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/h264" "github.com/AlexxIT/go2rtc/pkg/h265" @@ -63,13 +64,13 @@ func (c *Conn) AddTrack(media *core.Media, codec *core.Codec, track *core.Receiv sender.Handler = h265.RTPDepay(track.Codec, sender.Handler) } - case core.CodecPCMA, core.CodecPCMU, core.CodecPCM: + case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML: if codec.ClockRate == 0 { - if codec.Name == core.CodecPCM { + if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML { codec.Name = core.CodecPCMA } codec.ClockRate = 8000 - sender.Handler = pcm.Resample(track.Codec, 8000, sender.Handler) + sender.Handler = pcm.ResampleToPCMA(track.Codec, 8000, sender.Handler) } // Fix audio quality https://github.com/AlexxIT/WebRTC/issues/500 diff --git a/pkg/webrtc/helpers.go b/pkg/webrtc/helpers.go index b92e72ee..ab3f83b3 100644 --- a/pkg/webrtc/helpers.go +++ b/pkg/webrtc/helpers.go @@ -3,16 +3,17 @@ package webrtc import ( "errors" "fmt" - "github.com/AlexxIT/go2rtc/pkg/core" - "github.com/pion/ice/v2" - "github.com/pion/sdp/v3" - "github.com/pion/stun" - "github.com/pion/webrtc/v3" "hash/crc32" "net" "strconv" "strings" "time" + + "github.com/AlexxIT/go2rtc/pkg/core" + "github.com/pion/ice/v2" + "github.com/pion/sdp/v3" + "github.com/pion/stun" + "github.com/pion/webrtc/v3" ) func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media) { @@ -52,13 +53,15 @@ func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media return } +// WithResampling - will add for consumer: PCMA/0, PCMU/0, PCM/0, PCML/0 +// so it can add resampling for PCMA/PCMU and repack for PCM/PCML func WithResampling(medias []*core.Media) []*core.Media { for _, media := range medias { if media.Kind != core.KindAudio || media.Direction != core.DirectionSendonly { continue } - var pcma, pcmu, pcm *core.Codec + var pcma, pcmu, pcm, pcml *core.Codec for _, codec := range media.Codecs { switch codec.Name { @@ -76,6 +79,8 @@ func WithResampling(medias []*core.Media) []*core.Media { } case core.CodecPCM: pcm = codec + case core.CodecPCML: + pcml = codec } } @@ -94,6 +99,11 @@ func WithResampling(medias []*core.Media) []*core.Media { pcm.Name = core.CodecPCM media.Codecs = append(media.Codecs, pcm) } + if pcma != nil && pcml == nil { + pcml = pcma.Clone() + pcml.Name = core.CodecPCML + media.Codecs = append(media.Codecs, pcml) + } } return medias