Add support pcm_s16le audio

This commit is contained in:
Alexey Khit
2023-07-15 15:05:26 +03:00
parent e48459f49d
commit 13ca991c37
12 changed files with 157 additions and 24 deletions
+2
View File
@@ -79,6 +79,8 @@ var defaults = map[string]string{
"pcm": "-c:a pcm_s16be -ar:a 8000 -ac:a 1",
"pcm/16000": "-c:a pcm_s16be -ar:a 16000 -ac:a 1",
"pcm/48000": "-c:a pcm_s16be -ar:a 48000 -ac:a 1",
"pcml": "-c:a pcm_s16le -ar:a 8000 -ac:a 1",
"pcml/44100": "-c:a pcm_s16le -ar:a 44100 -ac:a 1",
// hardware Intel and AMD on Linux
// better not to set `-async_depth:v 1` like for QSV, because framedrops
+40
View File
@@ -0,0 +1,40 @@
## PCM
**RTSP**
- PayloadType=10 - L16/44100/2 - Linear PCM 16-bit big endian
- PayloadType=11 - L16/44100/1 - Linear PCM 16-bit big endian
https://en.wikipedia.org/wiki/RTP_payload_formats
**Apple QuickTime**
- `raw` - 16-bit data is stored in little endian format
- `twos` - 16-bit data is stored in big endian format
- `sowt` - 16-bit data is stored in little endian format
- `in24` - denotes 24-bit, big endian
- `in32` - denotes 32-bit, big endian
- `fl32` - denotes 32-bit floating point PCM
- `fl64` - denotes 64-bit floating point PCM
- `alaw` - denotes A-law logarithmic PCM
- `ulaw` - denotes mu-law logarithmic PCM
https://wiki.multimedia.cx/index.php/PCM
**FFmpeg RTSP**
```
pcm_s16be, 44100 Hz, stereo => 10
pcm_s16be, 48000 Hz, stereo => 96 L16/48000/2
pcm_s16be, 44100 Hz, mono => 11
pcm_s16le, 48000 Hz, stereo => 96 (b=AS:1536)
pcm_s16le, 44100 Hz, stereo => 96 (b=AS:1411)
pcm_s16le, 16000 Hz, stereo => 96 (b=AS:512)
pcm_s16le, 8000 Hz, stereo => 96 (b=AS:256)
pcm_s16le, 48000 Hz, mono => 96 (b=AS:768)
pcm_s16le, 44100 Hz, mono => 96 (b=AS:705)
pcm_s16le, 16000 Hz, mono => 96 (b=AS:256)
pcm_s16le, 8000 Hz, mono => 96 (b=AS:128)
```
+38 -1
View File
@@ -3,10 +3,11 @@ package core
import (
"encoding/base64"
"fmt"
"github.com/pion/sdp/v3"
"strconv"
"strings"
"unicode"
"github.com/pion/sdp/v3"
)
type Codec struct {
@@ -112,6 +113,42 @@ func UnmarshalCodec(md *sdp.MediaDescription, payloadType string) *Codec {
case "26":
c.Name = CodecJPEG
c.ClockRate = 90000
case "96", "97", "98":
if len(md.Bandwidth) == 0 {
c.Name = payloadType
break
}
// FFmpeg + RTSP + pcm_s16le = doesn't pass info about codec name and params
// so try to guess the codec based on bitrate
// https://github.com/AlexxIT/go2rtc/issues/523
switch md.Bandwidth[0].Bandwidth {
case 128:
c.ClockRate = 8000
case 256:
c.ClockRate = 16000
case 384:
c.ClockRate = 24000
case 512:
c.ClockRate = 32000
case 705:
c.ClockRate = 44100
case 768:
c.ClockRate = 48000
case 1411:
// default Windows DShow
c.ClockRate = 44100
c.Channels = 2
case 1536:
// default Linux ALSA
c.ClockRate = 48000
c.Channels = 2
default:
c.Name = payloadType
break
}
c.Name = CodecPCML
default:
c.Name = payloadType
}
+3 -1
View File
@@ -25,7 +25,9 @@ const (
CodecOpus = "OPUS" // payloadType: 111
CodecG722 = "G722"
CodecMP3 = "MPA" // payload: 14, aka MPEG-1 Layer III
CodecPCM = "L16" // Linear PCM
CodecPCM = "L16" // Linear PCM (big endian)
CodecPCML = "PCML" // Linear PCM (little endian)
CodecELD = "ELD" // AAC-ELD
CodecFLAC = "FLAC"
+3 -2
View File
@@ -3,8 +3,9 @@ package core
import (
"encoding/json"
"fmt"
"github.com/pion/sdp/v3"
"strings"
"github.com/pion/sdp/v3"
)
// Media take best from:
@@ -93,7 +94,7 @@ func GetKind(name string) string {
switch name {
case CodecH264, CodecH265, CodecVP8, CodecVP9, CodecAV1, CodecJPEG:
return KindVideo
case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecELD, CodecFLAC:
case CodecPCMU, CodecPCMA, CodecAAC, CodecOpus, CodecG722, CodecMP3, CodecPCM, CodecPCML, CodecELD, CodecFLAC:
return KindAudio
}
return ""
+3 -2
View File
@@ -2,13 +2,14 @@ package mp4
import (
"encoding/json"
"sync"
"github.com/AlexxIT/go2rtc/pkg/aac"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/pcm"
"github.com/pion/rtp"
"sync"
)
type Consumer struct {
@@ -131,7 +132,7 @@ func (c *Consumer) AddTrack(media *core.Media, _ *core.Codec, track *core.Receiv
handler.Handler = aac.RTPDepay(handler.Handler)
}
case core.CodecOpus, core.CodecMP3: // no changes
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM:
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML:
handler.Handler = pcm.FLACEncoder(track.Codec, handler.Handler)
codec.Name = core.CodecFLAC
+2
View File
@@ -37,6 +37,7 @@ func ParseQuery(query map[string][]string) []*core.Media {
&core.Codec{Name: core.CodecPCMA},
&core.Codec{Name: core.CodecPCMU},
&core.Codec{Name: core.CodecPCM},
&core.Codec{Name: core.CodecPCML},
)
if v[0] == "flac" {
@@ -74,6 +75,7 @@ func ParseCodecs(codecs string, parseAudio bool) (medias []*core.Media) {
&core.Codec{Name: core.CodecPCMA},
&core.Codec{Name: core.CodecPCMU},
&core.Codec{Name: core.CodecPCM},
&core.Codec{Name: core.CodecPCML},
)
case MimeOpus:
codec := &core.Codec{Name: core.CodecOpus}
+1 -1
View File
@@ -119,7 +119,7 @@ func (m *Muxer) GetInit(codecs []*core.Codec) ([]byte, error) {
uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, b,
)
case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecFLAC:
case core.CodecOpus, core.CodecMP3, core.CodecPCMA, core.CodecPCMU, core.CodecFLAC:
mv.WriteAudioTrack(
uint32(i+1), codec.Name, codec.ClockRate, codec.Channels, nil,
)
+11 -2
View File
@@ -6,11 +6,12 @@ package pcm
import (
"encoding/binary"
"unicode/utf8"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/rtp"
"github.com/sigurn/crc16"
"github.com/sigurn/crc8"
"unicode/utf8"
)
func FLACHeader(magic bool, sampleRate uint32) []byte {
@@ -86,7 +87,7 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
return func(packet *rtp.Packet) {
samples := uint16(len(packet.Payload))
if codec.Name == core.CodecPCM {
if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML {
samples /= 2
}
@@ -131,6 +132,14 @@ func FLACEncoder(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
}
case core.CodecPCM:
n += copy(buf[n:], packet.Payload)
case core.CodecPCML:
// reverse endian from little to big
size := len(packet.Payload)
for i := 0; i < size; i += 2 {
buf[n] = packet.Payload[i+1]
buf[n+1] = packet.Payload[i]
n += 2
}
}
// 4. Frame footer
+34 -6
View File
@@ -1,29 +1,39 @@
package pcm
import (
"sync"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/rtp"
"sync"
)
func Resample(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc {
// ResampleToPCMA - convert PCMA/PCMU/PCM/PCML to PCMA with decreasing sample rate
func ResampleToPCMA(codec *core.Codec, sampleRate uint32, handler core.HandlerFunc) core.HandlerFunc {
n := float32(codec.ClockRate) / float32(sampleRate)
switch codec.Name {
case core.CodecPCMA:
return DownsampleByte(PCMAtoPCM, PCMtoPCMA, n, handler)
case core.CodecPCMU:
return DownsampleByte(PCMUtoPCM, PCMtoPCMU, n, handler)
case core.CodecPCM:
return DownsampleByte(PCMUtoPCM, PCMtoPCMA, n, handler)
case core.CodecPCM, core.CodecPCML:
if n == 1 {
return ResamplePCM(PCMtoPCMA, handler)
handler = ResamplePCM(PCMtoPCMA, handler)
} else {
handler = DownsamplePCM(PCMtoPCMA, n, handler)
}
return DownsamplePCM(PCMtoPCMA, n, handler)
if codec.Name == core.CodecPCML {
return LittleToBig(handler)
}
return handler
}
panic(core.Caller())
}
// DownsampleByte - convert PCMA/PCMU to PCMA/PCMU with decreasing sample rate (N times)
func DownsampleByte(
toPCM func(byte) int16, fromPCM func(int16) byte, n float32, handler core.HandlerFunc,
) core.HandlerFunc {
@@ -58,6 +68,23 @@ func DownsampleByte(
}
}
// LittleToBig - conver PCM little endian to PCM big endian
func LittleToBig(handler core.HandlerFunc) core.HandlerFunc {
return func(packet *rtp.Packet) {
size := len(packet.Payload)
b := make([]byte, size)
for i := 0; i < size; i += 2 {
b[i] = packet.Payload[i+1]
b[i+1] = packet.Payload[i]
}
clone := *packet
clone.Payload = b
handler(&clone)
}
}
// ResamplePCM - convert PCM to PCMA/PCMU with same sample rate
func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.HandlerFunc {
var ts uint32
@@ -84,6 +111,7 @@ func ResamplePCM(fromPCM func(int16) byte, handler core.HandlerFunc) core.Handle
}
}
// DownsamplePCM - convert PCM to PCMA/PCMU with decreasing sample rate (N times)
func DownsamplePCM(fromPCM func(int16) byte, n float32, handler core.HandlerFunc) core.HandlerFunc {
var sampleN, sampleSum float32
var ts uint32
+4 -3
View File
@@ -3,6 +3,7 @@ package webrtc
import (
"encoding/json"
"errors"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
@@ -63,13 +64,13 @@ func (c *Conn) AddTrack(media *core.Media, codec *core.Codec, track *core.Receiv
sender.Handler = h265.RTPDepay(track.Codec, sender.Handler)
}
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM:
case core.CodecPCMA, core.CodecPCMU, core.CodecPCM, core.CodecPCML:
if codec.ClockRate == 0 {
if codec.Name == core.CodecPCM {
if codec.Name == core.CodecPCM || codec.Name == core.CodecPCML {
codec.Name = core.CodecPCMA
}
codec.ClockRate = 8000
sender.Handler = pcm.Resample(track.Codec, 8000, sender.Handler)
sender.Handler = pcm.ResampleToPCMA(track.Codec, 8000, sender.Handler)
}
// Fix audio quality https://github.com/AlexxIT/WebRTC/issues/500
+16 -6
View File
@@ -3,16 +3,17 @@ package webrtc
import (
"errors"
"fmt"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/ice/v2"
"github.com/pion/sdp/v3"
"github.com/pion/stun"
"github.com/pion/webrtc/v3"
"hash/crc32"
"net"
"strconv"
"strings"
"time"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/ice/v2"
"github.com/pion/sdp/v3"
"github.com/pion/stun"
"github.com/pion/webrtc/v3"
)
func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media) {
@@ -52,13 +53,15 @@ func UnmarshalMedias(descriptions []*sdp.MediaDescription) (medias []*core.Media
return
}
// WithResampling - will add for consumer: PCMA/0, PCMU/0, PCM/0, PCML/0
// so it can add resampling for PCMA/PCMU and repack for PCM/PCML
func WithResampling(medias []*core.Media) []*core.Media {
for _, media := range medias {
if media.Kind != core.KindAudio || media.Direction != core.DirectionSendonly {
continue
}
var pcma, pcmu, pcm *core.Codec
var pcma, pcmu, pcm, pcml *core.Codec
for _, codec := range media.Codecs {
switch codec.Name {
@@ -76,6 +79,8 @@ func WithResampling(medias []*core.Media) []*core.Media {
}
case core.CodecPCM:
pcm = codec
case core.CodecPCML:
pcml = codec
}
}
@@ -94,6 +99,11 @@ func WithResampling(medias []*core.Media) []*core.Media {
pcm.Name = core.CodecPCM
media.Codecs = append(media.Codecs, pcm)
}
if pcma != nil && pcml == nil {
pcml = pcma.Clone()
pcml.Name = core.CodecPCML
media.Codecs = append(media.Codecs, pcml)
}
}
return medias