Add support pcm_s16le audio

This commit is contained in:
Alexey Khit
2023-07-15 15:05:26 +03:00
parent e48459f49d
commit 13ca991c37
12 changed files with 157 additions and 24 deletions
+2
View File
@@ -79,6 +79,8 @@ var defaults = map[string]string{
"pcm": "-c:a pcm_s16be -ar:a 8000 -ac:a 1", "pcm": "-c:a pcm_s16be -ar:a 8000 -ac:a 1",
"pcm/16000": "-c:a pcm_s16be -ar:a 16000 -ac:a 1", "pcm/16000": "-c:a pcm_s16be -ar:a 16000 -ac:a 1",
"pcm/48000": "-c:a pcm_s16be -ar:a 48000 -ac:a 1", "pcm/48000": "-c:a pcm_s16be -ar:a 48000 -ac:a 1",
"pcml": "-c:a pcm_s16le -ar:a 8000 -ac:a 1",
"pcml/44100": "-c:a pcm_s16le -ar:a 44100 -ac:a 1",
// hardware Intel and AMD on Linux // hardware Intel and AMD on Linux
// better not to set `-async_depth:v 1` like for QSV, because framedrops // better not to set `-async_depth:v 1` like for QSV, because framedrops
+40
View File
@@ -0,0 +1,40 @@
## PCM
**RTSP**
- PayloadType=10 - L16/44100/2 - Linear PCM 16-bit big endian
- PayloadType=11 - L16/44100/1 - Linear PCM 16-bit big endian
https://en.wikipedia.org/wiki/RTP_payload_formats
**Apple QuickTime**
- `raw` - 16-bit data is stored in little endian format
- `twos` - 16-bit data is stored in big endian format
- `sowt` - 16-bit data is stored in little endian format
- `in24` - denotes 24-bit, big endian
- `in32` - denotes 32-bit, big endian
- `fl32` - denotes 32-bit floating point PCM
- `fl64` - denotes 64-bit floating point PCM
- `alaw` - denotes A-law logarithmic PCM
- `ulaw` - denotes mu-law logarithmic PCM
https://wiki.multimedia.cx/index.php/PCM
**FFmpeg RTSP**
```
pcm_s16be, 44100 Hz, stereo => 10
pcm_s16be, 48000 Hz, stereo => 96 L16/48000/2
pcm_s16be, 44100 Hz, mono => 11
pcm_s16le, 48000 Hz, stereo => 96 (b=AS:1536)
pcm_s16le, 44100 Hz, stereo => 96 (b=AS:1411)
pcm_s16le, 16000 Hz, stereo => 96 (b=AS:512)
pcm_s16le, 8000 Hz, stereo => 96 (b=AS:256)
pcm_s16le, 48000 Hz, mono => 96 (b=AS:768)
pcm_s16le, 44100 Hz, mono => 96 (b=AS:705)
pcm_s16le, 16000 Hz, mono => 96 (b=AS:256)
pcm_s16le, 8000 Hz, mono => 96 (b=AS:128)
```
+38 -1
View File
@@ -3,10 +3,11 @@ package core
import ( import (
"encoding/base64" "encoding/base64"
"fmt" "fmt"
"github.com/pion/sdp/v3"
"strconv" "strconv"
"strings" "strings"
"unicode" "unicode"
"github.com/pion/sdp/v3"
) )
type Codec struct { type Codec struct {
@@ -112,6 +113,42 @@ func UnmarshalCodec(md *sdp.MediaDescription, payloadType string) *Codec {
case "26": case "26":
c.Name = CodecJPEG c.Name = CodecJPEG
c.ClockRate = 90000 c.ClockRate = 90000
case "96", "97", "98":
if len(md.Bandwidth) == 0 {
c.Name = payloadType
break
}
// FFmpeg + RTSP + pcm_s16le = doesn't pass info about codec name and params
// so try to guess the codec based on bitrate
// https://github.com/AlexxIT/go2rtc/issues/523
switch md.Bandwidth[0].Bandwidth {
case 128:
c.ClockRate = 8000
case 256:
c.ClockRate = 16000
case 384:
c.ClockRate = 24000
case 512:
c.ClockRate = 32000
case 705:
c.ClockRate = 44100
case 768:
c.ClockRate = 48000
case 1411:
// default Windows DShow
c.ClockRate = 44100
c.Channels = 2
case 1536:
// default Linux ALSA
c.ClockRate = 48000
c.Channels = 2
default:
c.Name = payloadType
break
}
c.Name = CodecPCML
default: default:
c.Name = payloadType c.Name = payloadType
} }
+3 -1
View File
@@ -25,7 +25,9 @@ const (
CodecOpus = "OPUS" // payloadType: 111 CodecOpus = "OPUS" // payloadType: 111
CodecG722 = "G722" CodecG722 = "G722"
CodecMP3 = "MPA" // payload: 14, aka MPEG-1 Layer III CodecMP3 = "MPA" // payload: 14, aka MPEG-1 Layer III
CodecPCM = "L16" // Linear PCM CodecPCM = "L16" // Linear PCM (big endian)
CodecPCML = "PCML" // Linear PCM (little endian)
CodecELD = "ELD" // AAC-ELD CodecELD = "ELD" // AAC-ELD
CodecFLAC = "FLAC" CodecFLAC = "FLAC"
+3 -2
View File
@@ -3,8 +3,9 @@ package core
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/pion/sdp/v3"
"strings" "strings"
"github.com/pion/sdp/v3"
) )
// Media take best from: // Media take best from:
@@ -93,7 +94,7 @@ func GetKind(name string) string {
switch name { switch name {
case CodecH264, CodecH265, CodecVP8, CodecVP9, CodecAV1, CodecJPEG: case CodecH264, CodecH265, CodecVP8, CodecVP9, CodecAV1, CodecJPEG:
return KindVideo return KindVideo
case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecELD, CodecFLAC: case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecPCML, CodecELD, CodecFLAC:
return KindAudio return KindAudio
} }
return "" return ""
+3 -2
View File
@@ -2,13 +2,14 @@ package mp4
import ( import (
"encoding/json" "encoding/json"
"sync"
"github.com/AlexxIT/go2rtc/pkg/aac" "github.com/AlexxIT/go2rtc/pkg/aac"
"github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/core"
"github.com/AlexxIT/go2rtc/pkg/h264" "github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265" "github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/pcm" "github.com/AlexxIT/go2rtc/pkg/pcm"
"github.com/pion/rtp" "github.com/pion/rtp"
"sync"
) )
type Consumer struct { type Consumer struct {
@@ -131,7 +132,7 @@ func (c *Consumer) AddTrack(media *core.Media, _ *core.Codec, track *core.Receiv
handler.Handler = aac.RTPDepay(handler.Handler) handler.Handler = aac.RTPDepay(handler.Handler)
} }
case core.CodecOpus, core.CodecMP3: // no changes case core.CodecOpus, core.CodecMP3: // no changes
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM: case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML:
handler.Handler = pcm.FLACEncoder(track.Codec, handler.Handler) handler.Handler = pcm.FLACEncoder(track.Codec, handler.Handler)
codec.Name = core.CodecFLAC codec.Name = core.CodecFLAC
+2
View File
@@ -37,6 +37,7 @@ func ParseQuery(query map[string][]string) []*core.Media {
&core.Codec{Name: core.CodecPCMA}, &core.Codec{Name: core.CodecPCMA},
&core.Codec{Name: core.CodecPCMU}, &core.Codec{Name: core.CodecPCMU},
&core.Codec{Name: core.CodecPCM}, &core.Codec{Name: core.CodecPCM},
&core.Codec{Name: core.CodecPCML},
) )
if v[0] == "flac" { if v[0] == "flac" {
@@ -74,6 +75,7 @@ func ParseCodecs(codecs string, parseAudio bool) (medias []*core.Media) {
&core.Codec{Name: core.CodecPCMA}, &core.Codec{Name: core.CodecPCMA},
&core.Codec{Name: core.CodecPCMU}, &core.Codec{Name: core.CodecPCMU},
&core.Codec{Name: core.CodecPCM}, &core.Codec{Name: core.CodecPCM},
&core.Codec{Name: core.CodecPCML},
) )
case MimeOpus: case MimeOpus:
codec := &core.Codec{Name: core.CodecOpus} codec := &core.Codec{Name: core.CodecOpus}
+1 -1
View File
@@ -119,7 +119,7 @@ func (m *Muxer) GetInit(codecs []*core.Codec) ([]byte, error) {
uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, b, uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, b,
) )
case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecFLAC: case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecFLAC:
mv.WriteAudioTrack( mv.WriteAudioTrack(
uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, nil, uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, nil,
) )
+11 -2
View File
@@ -6,11 +6,12 @@ package pcm
import ( import (
"encoding/binary" "encoding/binary"
"unicode/utf8"
"github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/rtp" "github.com/pion/rtp"
"github.com/sigurn/crc16" "github.com/sigurn/crc16"
"github.com/sigurn/crc8" "github.com/sigurn/crc8"
"unicode/utf8"
) )
func FLACHeader(magic bool, sampleRate uint32) []byte { func FLACHeader(magic bool, sampleRate uint32) []byte {
@@ -86,7 +87,7 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
return func(packet *rtp.Packet) { return func(packet *rtp.Packet) {
samples := uint16(len(packet.Payload)) samples := uint16(len(packet.Payload))
if codec.Name == core.CodecPCM { if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML {
samples /= 2 samples /= 2
} }
@@ -131,6 +132,14 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
} }
case core.CodecPCM: case core.CodecPCM:
n += copy(buf[n:], packet.Payload) n += copy(buf[n:], packet.Payload)
case core.CodecPCML:
// reverse endian from little to big
size := len(packet.Payload)
for i := 0; i < size; i += 2 {
buf[n] = packet.Payload[i+1]
buf[n+1] = packet.Payload[i]
n += 2
}
} }
// 4. Frame footer // 4. Frame footer
+34 -6
View File
@@ -1,29 +1,39 @@
package pcm package pcm
import ( import (
"sync"
"github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/rtp" "github.com/pion/rtp"
"sync"
) )
func Resample(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc { // ResampleToPCMA - convert PCMA/PCMU/PCM/PCML to PCMA with decreasing sample rate
func ResampleToPCMA(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc {
n := float32(codec.ClockRate) / float32(sampleRate) n := float32(codec.ClockRate) / float32(sampleRate)
switch codec.Name { switch codec.Name {
case core.CodecPCMA: case core.CodecPCMA:
return DownsampleByte(PCMAtoPCM, PCMtoPCMA, n, handler) return DownsampleByte(PCMAtoPCM, PCMtoPCMA, n, handler)
case core.CodecPCMU: case core.CodecPCMU:
return DownsampleByte(PCMUtoPCM, PCMtoPCMU, n, handler) return DownsampleByte(PCMUtoPCM, PCMtoPCMA, n, handler)
case core.CodecPCM: case core.CodecPCM, core.CodecPCML:
if n == 1 { if n == 1 {
return ResamplePCM(PCMtoPCMA, handler) handler = ResamplePCM(PCMtoPCMA, handler)
} else {
handler = DownsamplePCM(PCMtoPCMA, n, handler)
} }
return DownsamplePCM(PCMtoPCMA, n, handler)
if codec.Name == core.CodecPCML {
return LittleToBig(handler)
}
return handler
} }
panic(core.Caller()) panic(core.Caller())
} }
// DownsampleByte - convert PCMA/PCMU to PCMA/PCMU with decreasing sample rate (N times)
func DownsampleByte( func DownsampleByte(
toPCM func(byte) int16, fromPCM func(int16) byte, n float32, handler core.HandlerFunc, toPCM func(byte) int16, fromPCM func(int16) byte, n float32, handler core.HandlerFunc,
) core.HandlerFunc { ) core.HandlerFunc {
@@ -58,6 +68,23 @@ func DownsampleByte(
} }
} }
// LittleToBig - conver PCM little endian to PCM big endian
func LittleToBig(handler core.HandlerFunc) core.HandlerFunc {
return func(packet *rtp.Packet) {
size := len(packet.Payload)
b := make([]byte, size)
for i := 0; i < size; i += 2 {
b[i] = packet.Payload[i+1]
b[i+1] = packet.Payload[i]
}
clone := *packet
clone.Payload = b
handler(&clone)
}
}
// ResamplePCM - convert PCM to PCMA/PCMU with same sample rate
func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.HandlerFunc { func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.HandlerFunc {
var ts uint32 var ts uint32
@@ -84,6 +111,7 @@ func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.Handle
} }
} }
// DownsamplePCM - convert PCM to PCMA/PCMU with decreasing sample rate (N times)
func DownsamplePCM(fromPCM func(int16) byte, n float32, handler core.HandlerFunc) core.HandlerFunc { func DownsamplePCM(fromPCM func(int16) byte, n float32, handler core.HandlerFunc) core.HandlerFunc {
var sampleN, sampleSum float32 var sampleN, sampleSum float32
var ts uint32 var ts uint32
+4 -3
View File
@@ -3,6 +3,7 @@ package webrtc
import ( import (
"encoding/json" "encoding/json"
"errors" "errors"
"github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/core"
"github.com/AlexxIT/go2rtc/pkg/h264" "github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265" "github.com/AlexxIT/go2rtc/pkg/h265"
@@ -63,13 +64,13 @@ func (c *Conn) AddTrack(media *core.Media, codec *core.Codec, track *core.Receiv
sender.Handler = h265.RTPDepay(track.Codec, sender.Handler) sender.Handler = h265.RTPDepay(track.Codec, sender.Handler)
} }
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM: case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML:
if codec.ClockRate == 0 { if codec.ClockRate == 0 {
if codec.Name == core.CodecPCM { if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML {
codec.Name = core.CodecPCMA codec.Name = core.CodecPCMA
} }
codec.ClockRate = 8000 codec.ClockRate = 8000
sender.Handler = pcm.Resample(track.Codec, 8000, sender.Handler) sender.Handler = pcm.ResampleToPCMA(track.Codec, 8000, sender.Handler)
} }
// Fix audio quality https://github.com/AlexxIT/WebRTC/issues/500 // Fix audio quality https://github.com/AlexxIT/WebRTC/issues/500
+16 -6
View File
@@ -3,16 +3,17 @@ package webrtc
import ( import (
"errors" "errors"
"fmt" "fmt"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/ice/v2"
"github.com/pion/sdp/v3"
"github.com/pion/stun"
"github.com/pion/webrtc/v3"
"hash/crc32" "hash/crc32"
"net" "net"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/ice/v2"
"github.com/pion/sdp/v3"
"github.com/pion/stun"
"github.com/pion/webrtc/v3"
) )
func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media) { func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media) {
@@ -52,13 +53,15 @@ func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media
return return
} }
// WithResampling - will add for consumer: PCMA/0, PCMU/0, PCM/0, PCML/0
// so it can add resampling for PCMA/PCMU and repack for PCM/PCML
func WithResampling(medias []*core.Media) []*core.Media { func WithResampling(medias []*core.Media) []*core.Media {
for _, media := range medias { for _, media := range medias {
if media.Kind != core.KindAudio || media.Direction != core.DirectionSendonly { if media.Kind != core.KindAudio || media.Direction != core.DirectionSendonly {
continue continue
} }
var pcma, pcmu, pcm *core.Codec var pcma, pcmu, pcm, pcml *core.Codec
for _, codec := range media.Codecs { for _, codec := range media.Codecs {
switch codec.Name { switch codec.Name {
@@ -76,6 +79,8 @@ func WithResampling(medias []*core.Media) []*core.Media {
} }
case core.CodecPCM: case core.CodecPCM:
pcm = codec pcm = codec
case core.CodecPCML:
pcml = codec
} }
} }
@@ -94,6 +99,11 @@ func WithResampling(medias []*core.Media) []*core.Media {
pcm.Name = core.CodecPCM pcm.Name = core.CodecPCM
media.Codecs = append(media.Codecs, pcm) media.Codecs = append(media.Codecs, pcm)
} }
if pcma != nil && pcml == nil {
pcml = pcma.Clone()
pcml.Name = core.CodecPCML
media.Codecs = append(media.Codecs, pcml)
}
} }
return medias return medias