Total rewrite mov/mp4 encoder

This commit is contained in:
Alexey Khit
2023-01-26 22:29:12 +03:00
parent e295bc4eaf
commit d05ab79f88
10 changed files with 1157 additions and 164 deletions
+318
View File
@@ -0,0 +1,318 @@
package mov
const (
Ftyp = "ftyp"
Moov = "moov"
MoovMvhd = "mvhd"
MoovTrak = "trak"
MoovTrakTkhd = "tkhd"
MoovTrakMdia = "mdia"
MoovTrakMdiaMdhd = "mdhd"
MoovTrakMdiaHdlr = "hdlr"
MoovTrakMdiaMinf = "minf"
MoovTrakMdiaMinfVmhd = "vmhd"
MoovTrakMdiaMinfSmhd = "smhd"
MoovTrakMdiaMinfDinf = "dinf"
MoovTrakMdiaMinfDinfDref = "dref"
MoovTrakMdiaMinfDinfDrefUrl = "url "
MoovTrakMdiaMinfStbl = "stbl"
MoovTrakMdiaMinfStblStsd = "stsd"
MoovTrakMdiaMinfStblStts = "stts"
MoovTrakMdiaMinfStblStsc = "stsc"
MoovTrakMdiaMinfStblStsz = "stsz"
MoovTrakMdiaMinfStblStco = "stco"
MoovMvex = "mvex"
MoovMvexTrex = "trex"
Moof = "moof"
MoofMfhd = "mfhd"
MoofTraf = "traf"
MoofTrafTfhd = "tfhd"
MoofTrafTfdt = "tfdt"
MoofTrafTrun = "trun"
Mdat = "mdat"
)
func (m *Movie) WriteFileType() {
m.StartAtom(Ftyp)
m.WriteString("iso5")
m.WriteUint32(512)
m.WriteString("iso5")
m.WriteString("iso6")
m.WriteString("mp41")
m.EndAtom()
}
func (m *Movie) WriteMovieHeader() {
m.StartAtom(MoovMvhd)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // create time
m.Skip(4) // modify time
m.WriteUint32(1000) // time scale
m.Skip(4) // duration
m.WriteFloat32(1) // preferred rate
m.WriteFloat16(1) // preferred volume
m.Skip(10) // reserved
m.WriteMatrix()
m.Skip(6 * 4) // predefined?
m.WriteUint32(0xFFFFFFFF) // next track ID
m.EndAtom()
}
func (m *Movie) WriteTrackHeader(id uint32, width, height uint16) {
const (
TkhdTrackEnabled = 0x0001
TkhdTrackInMovie = 0x0002
TkhdTrackInPreview = 0x0004
TkhdTrackInPoster = 0x0008
)
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-32963
m.StartAtom(MoovTrakTkhd)
m.Skip(1) // version
m.WriteUint24(TkhdTrackEnabled | TkhdTrackInMovie)
m.Skip(4) // create time
m.Skip(4) // modify time
m.WriteUint32(id) // trackID
m.Skip(4) // reserved
m.Skip(4) // duration
m.Skip(8) // reserved
m.Skip(2) // layer
if width > 0 {
m.Skip(2)
m.Skip(2)
} else {
m.WriteUint16(1) // alternate group
m.WriteFloat16(1) // volume
}
m.Skip(2) // reserved
m.WriteMatrix()
if width > 0 {
m.WriteFloat32(float64(width))
m.WriteFloat32(float64(height))
} else {
m.Skip(4)
m.Skip(4)
}
m.EndAtom()
}
func (m *Movie) WriteMediaHeader(timescale uint32) {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-32999
m.StartAtom(MoovTrakMdiaMdhd)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // creation time
m.Skip(4) // modification time
m.WriteUint32(timescale) // timescale
m.Skip(4) // duration
m.WriteUint16(0x55C4) // language (Unspecified)
m.Skip(2) // quality
m.EndAtom()
}
func (m *Movie) WriteMediaHandler(s, name string) {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33004
m.StartAtom(MoovTrakMdiaHdlr)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4)
m.WriteString(s) // handler type (4 byte!)
m.Skip(3 * 4) // reserved
m.WriteString(name) // handler name (any len)
m.Skip(1) // end string
m.EndAtom()
}
func (m *Movie) WriteVideoMediaInfo() {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33012
m.StartAtom(MoovTrakMdiaMinfVmhd)
m.Skip(1) // version
m.WriteUint24(1) // flags (You should always set this flag to 1)
m.Skip(2) // graphics mode
m.Skip(3 * 2) // op color
m.EndAtom()
}
func (m *Movie) WriteAudioMediaInfo() {
m.StartAtom(MoovTrakMdiaMinfSmhd)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // balance
m.EndAtom()
}
func (m *Movie) WriteDataInfo() {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-25680
m.StartAtom(MoovTrakMdiaMinfDinf)
m.StartAtom(MoovTrakMdiaMinfDinfDref)
m.Skip(1) // version
m.Skip(3) // flags
m.WriteUint32(1) // childrens
m.StartAtom(MoovTrakMdiaMinfDinfDrefUrl)
m.Skip(1) // version
m.WriteUint24(1) // flags (self reference)
m.EndAtom()
m.EndAtom() // DREF
m.EndAtom() // DINF
}
func (m *Movie) WriteSampleTable(writeSampleDesc func()) {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33040
m.StartAtom(MoovTrakMdiaMinfStbl)
m.StartAtom(MoovTrakMdiaMinfStblStsd)
m.Skip(1) // version
m.Skip(3) // flags
m.WriteUint32(1) // entry count
writeSampleDesc()
m.EndAtom()
m.StartAtom(MoovTrakMdiaMinfStblStts)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // entry count
m.EndAtom()
m.StartAtom(MoovTrakMdiaMinfStblStsc)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // entry count
m.EndAtom()
m.StartAtom(MoovTrakMdiaMinfStblStsz)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // sample size
m.Skip(4) // entry count
m.EndAtom()
m.StartAtom(MoovTrakMdiaMinfStblStco)
m.Skip(1) // version
m.Skip(3) // flags
m.Skip(4) // entry count
m.EndAtom()
m.EndAtom()
}
func (m *Movie) WriteTrackExtend(id uint32) {
m.StartAtom(MoovMvexTrex)
m.Skip(1) // version
m.Skip(3) // flags
m.WriteUint32(id) // trackID
m.WriteUint32(1) // default sample description index
m.Skip(4) // default sample duration
m.Skip(4) // default sample size
m.Skip(4) // default sample flags
m.EndAtom()
}
func (m *Movie) WriteVideoTrack(id, timescale uint32, width, height uint16, conf []byte, h264 bool) {
m.StartAtom(MoovTrak)
m.WriteTrackHeader(id, width, height)
m.StartAtom(MoovTrakMdia)
m.WriteMediaHeader(timescale)
m.WriteMediaHandler("vide", "VideoHandler")
m.StartAtom(MoovTrakMdiaMinf)
m.WriteVideoMediaInfo()
m.WriteDataInfo()
m.WriteSampleTable(func() {
m.WriteH26X(width, height, conf, h264)
})
m.EndAtom() // MINF
m.EndAtom() // MDIA
m.EndAtom() // TRAK
}
func (m *Movie) WriteAudioTrack(id uint32, timescale uint32, channels, sampleSize uint16, conf []byte) {
m.StartAtom(MoovTrak)
m.WriteTrackHeader(id, 0, 0)
m.StartAtom(MoovTrakMdia)
m.WriteMediaHeader(timescale)
m.WriteMediaHandler("soun", "SoundHandler")
m.StartAtom(MoovTrakMdiaMinf)
m.WriteAudioMediaInfo()
m.WriteDataInfo()
m.WriteSampleTable(func() {
m.WriteMP4A(channels, sampleSize, timescale, conf)
})
m.EndAtom() // MINF
m.EndAtom() // MDIA
m.EndAtom() // TRAK
}
func (m *Movie) WriteMovieFragment(seq, tid, duration, size, flags uint32, time uint64) {
m.StartAtom(Moof)
m.StartAtom(MoofMfhd)
m.Skip(1) // version
m.Skip(3) // flags
m.WriteUint32(seq) // sequence number
m.EndAtom()
m.StartAtom(MoofTraf)
const (
TfhdDefaultSampleDuration = 0x000008
TfhdDefaultSampleSize = 0x000010
TfhdDefaultSampleFlags = 0x000020
TfhdDefaultBaseIsMoof = 0x020000
)
m.StartAtom(MoofTrafTfhd)
m.Skip(1) // version
m.WriteUint24(
TfhdDefaultSampleDuration |
TfhdDefaultSampleSize |
TfhdDefaultSampleFlags |
TfhdDefaultBaseIsMoof,
)
m.WriteUint32(tid) // track id
m.WriteUint32(duration) // default sample duration
m.WriteUint32(size) // default sample size
m.WriteUint32(flags) // default sample flags
m.EndAtom()
m.StartAtom(MoofTrafTfdt)
m.WriteBytes(1) // version
m.Skip(3) // flags
m.WriteUint64(time) // base media decode time
m.EndAtom()
const (
TrunDataOffset = 0x000001
TrunFirstSampleFlags = 0x000004
TrunSampleDuration = 0x0000100
TrunSampleSize = 0x0000200
TrunSampleFlags = 0x0000400
TrunSampleCTS = 0x0000800
)
m.StartAtom(MoofTrafTrun)
m.Skip(1) // version
m.WriteUint24(TrunDataOffset) // flags
m.WriteUint32(1) // sample count
// data offset: current pos + uint32 len + MDAT header len
m.WriteUint32(uint32(len(m.b)) + 4 + 8)
m.EndAtom() // TRUN
m.EndAtom() // TRAF
m.EndAtom() // MOOF
}
func (m *Movie) WriteData(b []byte) {
m.StartAtom(Mdat)
m.Write(b)
m.EndAtom()
}
+97
View File
@@ -0,0 +1,97 @@
package mov
const (
MoovTrakMdiaMinfStblStsdAvc1 = "avc1"
MoovTrakMdiaMinfStblStsdAvc1AvcC = "avcC"
MoovTrakMdiaMinfStblStsdHev1 = "hev1"
MoovTrakMdiaMinfStblStsdHev1HvcC = "hvcC"
MoovTrakMdiaMinfStblStsdMp4a = "mp4a"
)
func (m *Movie) WriteH26X(width, height uint16, conf []byte, h264 bool) {
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html
if h264 {
m.StartAtom(MoovTrakMdiaMinfStblStsdAvc1)
} else {
m.StartAtom(MoovTrakMdiaMinfStblStsdHev1)
}
m.Skip(6)
m.WriteUint16(1) // data_reference_index
m.Skip(2) // version
m.Skip(2) // revision
m.Skip(4) // vendor
m.Skip(4) // temporal quality
m.Skip(4) // spatial quality
m.WriteUint16(width) // width
m.WriteUint16(height) // height
m.WriteFloat32(72) // horizontal resolution
m.WriteFloat32(72) // vertical resolution
m.Skip(4) // reserved
m.WriteUint16(1) // frame count
m.Skip(32) // compressor name
m.WriteUint16(24) // depth
m.WriteUint16(0xFFFF) // color table id (-1)
if h264 {
m.StartAtom(MoovTrakMdiaMinfStblStsdAvc1AvcC)
} else {
m.StartAtom(MoovTrakMdiaMinfStblStsdHev1HvcC)
}
m.Write(conf)
m.EndAtom() // AVCC
m.EndAtom() // AVC1
}
func (m *Movie) WriteMP4A(channels, sampleSize uint16, sampleRate uint32, conf []byte) {
m.StartAtom(MoovTrakMdiaMinfStblStsdMp4a)
m.Skip(6)
m.WriteUint16(1) // data_reference_index
m.Skip(2) // version
m.Skip(2) // revision
m.Skip(4) // vendor
m.WriteUint16(channels) // channel_count
m.WriteUint16(sampleSize) // sample_size
m.Skip(2) // compression id
m.Skip(2) // reserved
m.WriteFloat32(float64(sampleRate)) // sample_rate
m.WriteESDS(conf)
m.EndAtom() // MP4A
}
func (m *Movie) WriteESDS(conf []byte) {
m.StartAtom("esds")
m.Skip(1) // version
m.Skip(3) // flags
// MP4ESDescrTag[3]:
// - MP4DecConfigDescrTag[4]:
// - MP4DecSpecificDescrTag[5]: conf
// - Other[6]
const header = 5
const size3 = 3
const size4 = 13
size5 := byte(len(conf))
const size6 = 1
m.WriteBytes(3, 0x80, 0x80, 0x80, size3+header+size4+header+size5+header+size6)
m.Skip(2) // es id
m.Skip(1) // es flags
m.WriteBytes(4, 0x80, 0x80, 0x80, size4+header+size5)
m.WriteBytes(0x40) // object id
m.WriteBytes(0x15) // stream type
m.Skip(3) // buffer size db
m.Skip(4) // max bitraga
m.Skip(4) // avg bitraga
m.WriteBytes(5, 0x80, 0x80, 0x80, size5)
m.Write(conf)
m.WriteBytes(6, 0x80, 0x80, 0x80, 1)
m.WriteBytes(2) // ?
m.EndAtom() // ESDS
}
+91
View File
@@ -0,0 +1,91 @@
package mov
import (
"encoding/binary"
"math"
)
type Movie struct {
b []byte
start []int
}
func NewMovie(size int) *Movie {
return &Movie{b: make([]byte, 0, size)}
}
func (m *Movie) Bytes() []byte {
return m.b
}
func (m *Movie) StartAtom(name string) {
m.start = append(m.start, len(m.b))
m.b = append(m.b, 0, 0, 0, 0)
m.b = append(m.b, name...)
}
func (m *Movie) EndAtom() {
n := len(m.start) - 1
i := m.start[n]
size := uint32(len(m.b) - i)
binary.BigEndian.PutUint32(m.b[i:], size)
m.start = m.start[:n]
}
func (m *Movie) Write(b []byte) {
m.b = append(m.b, b...)
}
func (m *Movie) WriteBytes(b ...byte) {
m.b = append(m.b, b...)
}
func (m *Movie) WriteString(s string) {
m.b = append(m.b, s...)
}
func (m *Movie) Skip(n int) {
m.b = append(m.b, make([]byte, n)...)
}
func (m *Movie) WriteUint16(v uint16) {
m.b = append(m.b, byte(v>>8), byte(v))
}
func (m *Movie) WriteUint24(v uint32) {
m.b = append(m.b, byte(v>>16), byte(v>>8), byte(v))
}
func (m *Movie) WriteUint32(v uint32) {
m.b = append(m.b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
func (m *Movie) WriteUint64(v uint64) {
m.b = append(m.b, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
func (m *Movie) WriteFloat16(f float64) {
i, f := math.Modf(f)
f *= 256
m.b = append(m.b, byte(i), byte(f))
}
func (m *Movie) WriteFloat32(f float64) {
i, f := math.Modf(f)
f *= 65536
m.b = append(m.b, byte(uint16(i)>>8), byte(i), byte(uint16(f)>>8), byte(f))
}
func (m *Movie) WriteMatrix() {
m.WriteUint32(0x00010000)
m.Skip(4)
m.Skip(4)
m.Skip(4)
m.WriteUint32(0x00010000)
m.Skip(4)
m.Skip(4)
m.Skip(4)
m.WriteUint32(0x40000000)
}
+25 -14
View File
@@ -1,19 +1,30 @@
## Fragmented MP4
```
ffmpeg -i "rtsp://..." -movflags +frag_keyframe+separate_moof+default_base_moof+empty_moov -frag_duration 1 -c copy -t 5 sample.mp4
```
- movflags frag_keyframe
Start a new fragment at each video keyframe.
- frag_duration duration
Create fragments that are duration microseconds long.
- movflags separate_moof
Write a separate moof (movie fragment) atom for each track.
- movflags default_base_moof
Similarly to the omit_tfhd_offset, this flag avoids writing the absolute base_data_offset field in tfhd atoms, but does so by using the new default-base-is-moof flag instead.
https://ffmpeg.org/ffmpeg-formats.html#Options-13
## HEVC
Browser | avc1 | hvc1 | hev1
------------|------|------|---
Mac Chrome | + | - | +
Mac Safari | + | + | -
iOS 15? | + | + | -
Mac Firefox | + | - | -
iOS 12 | + | - | -
Android 13 | + | - | -
```
ffmpeg -i input-hev1.mp4 -c:v copy -tag:v hvc1 -c:a copy output-hvc1.mp4
Stream #0:0(eng): Video: hevc (Main) (hev1 / 0x31766568), yuv420p(tv, progressive), 720x404, 164 kb/s, 29.97 fps,
Stream #0:0(eng): Video: hevc (Main) (hvc1 / 0x31637668), yuv420p(tv, progressive), 720x404, 164 kb/s, 29.97 fps,
```
| Browser | avc1 | hvc1 | hev1 |
|-------------|------|------|------|
| Mac Chrome | + | - | + |
| Mac Safari | + | + | - |
| iOS 15? | + | + | - |
| Mac Firefox | + | - | - |
| iOS 12 | + | - | - |
| Android 13 | + | - | - |
## Useful links
+52 -149
View File
@@ -1,22 +1,19 @@
package mp4
import (
"encoding/binary"
"encoding/hex"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/mov"
"github.com/AlexxIT/go2rtc/pkg/streamer"
"github.com/deepch/vdk/av"
"github.com/deepch/vdk/codec/h264parser"
"github.com/deepch/vdk/codec/h265parser"
"github.com/deepch/vdk/format/fmp4/fmp4io"
"github.com/deepch/vdk/format/mp4/mp4io"
"github.com/deepch/vdk/format/mp4f/mp4fio"
"github.com/pion/rtp"
)
type Muxer struct {
fragIndex uint32
flags []uint32
dts []uint64
pts []uint32
}
@@ -45,7 +42,11 @@ func (m *Muxer) MimeType(codecs []*streamer.Codec) string {
}
func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) {
moov := MOOV()
mv := mov.NewMovie(1024)
mv.WriteFileType()
mv.StartAtom(mov.Moov)
mv.WriteMovieHeader()
for i, codec := range codecs {
switch codec.Name {
@@ -62,35 +63,13 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) {
return nil, err
}
width := codecData.Width()
height := codecData.Height()
mv.WriteVideoTrack(
uint32(i+1), codec.ClockRate,
uint16(codecData.Width()), uint16(codecData.Height()),
codecData.AVCDecoderConfRecordBytes(), true,
)
trak := TRAK(i + 1)
trak.Header.TrackWidth = float64(width)
trak.Header.TrackHeight = float64(height)
trak.Media.Header.TimeScale = int32(codec.ClockRate)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'v', 'i', 'd', 'e'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Video = &mp4io.VideoMediaInfo{
Flags: 0x000001,
}
trak.Media.Info.Sample.SampleDesc.AVC1Desc = &mp4io.AVC1Desc{
DataRefIdx: 1,
HorizontalResolution: 72,
VorizontalResolution: 72,
Width: int16(width),
Height: int16(height),
FrameCount: 1,
Depth: 24,
ColorTableId: -1,
Conf: &mp4io.AVC1Conf{
Data: codecData.AVCDecoderConfRecordBytes(),
},
}
moov.Tracks = append(moov.Tracks, trak)
m.flags = append(m.flags, 0x1010000)
case streamer.CodecH265:
vps, sps, pps := h265.GetParameterSet(codec.FmtpLine)
@@ -106,35 +85,13 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) {
return nil, err
}
width := codecData.Width()
height := codecData.Height()
mv.WriteVideoTrack(
uint32(i+1), codec.ClockRate,
uint16(codecData.Width()), uint16(codecData.Height()),
codecData.AVCDecoderConfRecordBytes(), false,
)
trak := TRAK(i + 1)
trak.Header.TrackWidth = float64(width)
trak.Header.TrackHeight = float64(height)
trak.Media.Header.TimeScale = int32(codec.ClockRate)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'v', 'i', 'd', 'e'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Video = &mp4io.VideoMediaInfo{
Flags: 0x000001,
}
trak.Media.Info.Sample.SampleDesc.HV1Desc = &mp4io.HV1Desc{
DataRefIdx: 1,
HorizontalResolution: 72,
VorizontalResolution: 72,
Width: int16(width),
Height: int16(height),
FrameCount: 1,
Depth: 24,
ColorTableId: -1,
Conf: &mp4io.HV1Conf{
Data: codecData.AVCDecoderConfRecordBytes(),
},
}
moov.Tracks = append(moov.Tracks, trak)
m.flags = append(m.flags, 0x1010000)
case streamer.CodecAAC:
s := streamer.Between(codec.FmtpLine, "config=", ";")
@@ -143,44 +100,26 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) {
return nil, err
}
trak := TRAK(i + 1)
trak.Header.AlternateGroup = 1
trak.Header.Duration = 0
trak.Header.Volume = 1
trak.Media.Header.TimeScale = int32(codec.ClockRate)
mv.WriteAudioTrack(
uint32(i+1), codec.ClockRate, codec.Channels, 16, b,
)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'s', 'o', 'u', 'n'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Sound = &mp4io.SoundMediaInfo{}
trak.Media.Info.Sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{
DataRefIdx: 1,
NumberOfChannels: int16(codec.Channels),
SampleSize: int16(av.FLTP.BytesPerSample() * 4),
SampleRate: float64(codec.ClockRate),
Unknowns: []mp4io.Atom{ESDS(b)},
}
moov.Tracks = append(moov.Tracks, trak)
m.flags = append(m.flags, 0x2000000)
}
trex := &mp4io.TrackExtend{
TrackId: uint32(i + 1),
DefaultSampleDescIdx: 1,
DefaultSampleDuration: 0,
}
moov.MovieExtend.Tracks = append(moov.MovieExtend.Tracks, trex)
m.pts = append(m.pts, 0)
m.dts = append(m.dts, 0)
}
data := make([]byte, moov.Len())
moov.Marshal(data)
mv.StartAtom(mov.MoovMvex)
for i := range codecs {
mv.WriteTrackExtend(uint32(i + 1))
}
mv.EndAtom() // MVEX
return append(FTYP(), data...), nil
mv.EndAtom() // MOOV
return mv.Bytes(), nil
}
func (m *Muxer) Reset() {
@@ -192,65 +131,29 @@ func (m *Muxer) Reset() {
}
func (m *Muxer) Marshal(trackID byte, packet *rtp.Packet) []byte {
run := &mp4fio.TrackFragRun{
Flags: 0x000b05,
FirstSampleFlags: uint32(fmp4io.SampleNoDependencies),
DataOffset: 0,
Entries: []mp4io.TrackFragRunEntry{},
}
moof := &mp4fio.MovieFrag{
Header: &mp4fio.MovieFragHeader{
Seqnum: m.fragIndex + 1,
},
Tracks: []*mp4fio.TrackFrag{
{
Header: &mp4fio.TrackFragHeader{
Data: []byte{0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, trackID + 1, 0x01, 0x01, 0x00, 0x00},
},
DecodeTime: &mp4fio.TrackFragDecodeTime{
Version: 1,
Flags: 0,
Time: m.dts[trackID],
},
Run: run,
},
},
}
entry := mp4io.TrackFragRunEntry{
Size: uint32(len(packet.Payload)),
}
newTime := packet.Timestamp
if m.pts[trackID] > 0 {
entry.Duration = newTime - m.pts[trackID]
m.dts[trackID] += uint64(entry.Duration)
} else {
// important, or Safari will fail with first frame
entry.Duration = 1
}
m.pts[trackID] = newTime
// important before moof.Len()
run.Entries = append(run.Entries, entry)
moofLen := moof.Len()
mdatLen := 8 + len(packet.Payload)
// important after moof.Len()
run.DataOffset = uint32(moofLen + 8)
buf := make([]byte, moofLen+mdatLen)
moof.Marshal(buf)
binary.BigEndian.PutUint32(buf[moofLen:], uint32(mdatLen))
copy(buf[moofLen+4:], "mdat")
copy(buf[moofLen+8:], packet.Payload)
// important before increment
time := m.dts[trackID]
m.fragIndex++
//m.total += moofLen + mdatLen
var duration uint32
newTime := packet.Timestamp
if m.pts[trackID] > 0 {
duration = newTime - m.pts[trackID]
m.dts[trackID] += uint64(duration)
} else {
// important, or Safari will fail with first frame
duration = 1
}
m.pts[trackID] = newTime
return buf
mv := mov.NewMovie(1024 + len(packet.Payload))
mv.WriteMovieFragment(
m.fragIndex, uint32(trackID+1), duration,
uint32(len(packet.Payload)),
m.flags[trackID], time,
)
mv.WriteData(packet.Payload)
return mv.Bytes()
}
@@ -1,4 +1,4 @@
package mp4f
package mp4
import (
"github.com/AlexxIT/go2rtc/pkg/h264"
+174
View File
@@ -0,0 +1,174 @@
package mp4
import (
"encoding/json"
"github.com/AlexxIT/go2rtc/pkg/aac"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/streamer"
"github.com/pion/rtp"
"sync/atomic"
)
type Consumer struct {
streamer.Element
Medias []*streamer.Media
UserAgent string
RemoteAddr string
muxer *Muxer
codecs []*streamer.Codec
wait byte
send uint32
}
const (
waitNone byte = iota
waitKeyframe
waitInit
)
func (c *Consumer) GetMedias() []*streamer.Media {
if c.Medias != nil {
return c.Medias
}
// default medias
return []*streamer.Media{
{
Kind: streamer.KindVideo,
Direction: streamer.DirectionRecvonly,
Codecs: []*streamer.Codec{
{Name: streamer.CodecH264},
{Name: streamer.CodecH265},
},
},
{
Kind: streamer.KindAudio,
Direction: streamer.DirectionRecvonly,
Codecs: []*streamer.Codec{
{Name: streamer.CodecAAC},
},
},
}
}
func (c *Consumer) AddTrack(media *streamer.Media, track *streamer.Track) *streamer.Track {
trackID := byte(len(c.codecs))
c.codecs = append(c.codecs, track.Codec)
codec := track.Codec
switch codec.Name {
case streamer.CodecH264:
c.wait = waitInit
push := func(packet *rtp.Packet) error {
if packet.Version != h264.RTPPacketVersionAVC {
return nil
}
if c.wait != waitNone {
if c.wait == waitInit || !h264.IsKeyframe(packet.Payload) {
return nil
}
c.wait = waitNone
}
buf := c.muxer.Marshal(trackID, packet)
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(buf)
return nil
}
var wrapper streamer.WrapperFunc
if codec.IsRTP() {
wrapper = h264.RTPDepay(track)
} else {
wrapper = h264.RepairAVC(track)
}
push = wrapper(push)
return track.Bind(push)
case streamer.CodecH265:
c.wait = waitInit
push := func(packet *rtp.Packet) error {
if packet.Version != h264.RTPPacketVersionAVC {
return nil
}
if c.wait != waitNone {
if c.wait == waitInit || !h265.IsKeyframe(packet.Payload) {
return nil
}
c.wait = waitNone
}
buf := c.muxer.Marshal(trackID, packet)
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(buf)
return nil
}
if codec.IsRTP() {
wrapper := h265.RTPDepay(track)
push = wrapper(push)
}
return track.Bind(push)
case streamer.CodecAAC:
push := func(packet *rtp.Packet) error {
if c.wait != waitNone {
return nil
}
buf := c.muxer.Marshal(trackID, packet)
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(buf)
return nil
}
if codec.IsRTP() {
wrapper := aac.RTPDepay(track)
push = wrapper(push)
}
return track.Bind(push)
}
panic("unsupported codec")
}
func (c *Consumer) MimeType() string {
return c.muxer.MimeType(c.codecs)
}
func (c *Consumer) Init() ([]byte, error) {
c.muxer = &Muxer{}
return c.muxer.GetInit(c.codecs)
}
func (c *Consumer) Start() {
if c.wait == waitInit {
c.wait = waitKeyframe
}
}
//
func (c *Consumer) MarshalJSON() ([]byte, error) {
info := &streamer.Info{
Type: "MP4 client",
RemoteAddr: c.RemoteAddr,
UserAgent: c.UserAgent,
Send: atomic.LoadUint32(&c.send),
}
return json.Marshal(info)
}
+256
View File
@@ -0,0 +1,256 @@
package mp4
import (
"encoding/binary"
"encoding/hex"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/streamer"
"github.com/deepch/vdk/av"
"github.com/deepch/vdk/codec/h264parser"
"github.com/deepch/vdk/codec/h265parser"
"github.com/deepch/vdk/format/fmp4/fmp4io"
"github.com/deepch/vdk/format/mp4/mp4io"
"github.com/deepch/vdk/format/mp4f/mp4fio"
"github.com/pion/rtp"
)
type Muxer struct {
fragIndex uint32
dts []uint64
pts []uint32
}
func (m *Muxer) MimeType(codecs []*streamer.Codec) string {
s := `video/mp4; codecs="`
for i, codec := range codecs {
if i > 0 {
s += ","
}
switch codec.Name {
case streamer.CodecH264:
s += "avc1." + h264.GetProfileLevelID(codec.FmtpLine)
case streamer.CodecH265:
// H.265 profile=main level=5.1
// hvc1 - supported in Safari, hev1 - doesn't, both supported in Chrome
s += "hvc1.1.6.L153.B0"
case streamer.CodecAAC:
s += "mp4a.40.2"
}
}
return s + `"`
}
func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) {
moov := MOOV()
for i, codec := range codecs {
switch codec.Name {
case streamer.CodecH264:
sps, pps := h264.GetParameterSet(codec.FmtpLine)
if sps == nil {
// some dummy SPS and PPS not a problem
sps = []byte{0x67, 0x42, 0x00, 0x0a, 0xf8, 0x41, 0xa2}
pps = []byte{0x68, 0xce, 0x38, 0x80}
}
codecData, err := h264parser.NewCodecDataFromSPSAndPPS(sps, pps)
if err != nil {
return nil, err
}
width := codecData.Width()
height := codecData.Height()
trak := TRAK(i + 1)
trak.Header.TrackWidth = float64(width)
trak.Header.TrackHeight = float64(height)
trak.Media.Header.TimeScale = int32(codec.ClockRate)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'v', 'i', 'd', 'e'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Video = &mp4io.VideoMediaInfo{
Flags: 0x000001,
}
trak.Media.Info.Sample.SampleDesc.AVC1Desc = &mp4io.AVC1Desc{
DataRefIdx: 1,
HorizontalResolution: 72,
VorizontalResolution: 72,
Width: int16(width),
Height: int16(height),
FrameCount: 1,
Depth: 24,
ColorTableId: -1,
Conf: &mp4io.AVC1Conf{
Data: codecData.AVCDecoderConfRecordBytes(),
},
}
moov.Tracks = append(moov.Tracks, trak)
case streamer.CodecH265:
vps, sps, pps := h265.GetParameterSet(codec.FmtpLine)
if sps == nil {
// some dummy SPS and PPS not a problem
vps = []byte{0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0xac, 0x09}
sps = []byte{0x42, 0x01, 0x01, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0xa0, 0x01, 0x40, 0x20, 0x05, 0xa1, 0xfe, 0x5a, 0xee, 0x46, 0xc1, 0xae, 0x55, 0x04}
pps = []byte{0x44, 0x01, 0xc0, 0x73, 0xc0, 0x4c, 0x90}
}
codecData, err := h265parser.NewCodecDataFromVPSAndSPSAndPPS(vps, sps, pps)
if err != nil {
return nil, err
}
width := codecData.Width()
height := codecData.Height()
trak := TRAK(i + 1)
trak.Header.TrackWidth = float64(width)
trak.Header.TrackHeight = float64(height)
trak.Media.Header.TimeScale = int32(codec.ClockRate)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'v', 'i', 'd', 'e'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Video = &mp4io.VideoMediaInfo{
Flags: 0x000001,
}
trak.Media.Info.Sample.SampleDesc.HV1Desc = &mp4io.HV1Desc{
DataRefIdx: 1,
HorizontalResolution: 72,
VorizontalResolution: 72,
Width: int16(width),
Height: int16(height),
FrameCount: 1,
Depth: 24,
ColorTableId: -1,
Conf: &mp4io.HV1Conf{
Data: codecData.AVCDecoderConfRecordBytes(),
},
}
moov.Tracks = append(moov.Tracks, trak)
case streamer.CodecAAC:
s := streamer.Between(codec.FmtpLine, "config=", ";")
b, err := hex.DecodeString(s)
if err != nil {
return nil, err
}
trak := TRAK(i + 1)
trak.Header.AlternateGroup = 1
trak.Header.Duration = 0
trak.Header.Volume = 1
trak.Media.Header.TimeScale = int32(codec.ClockRate)
trak.Media.Handler = &mp4io.HandlerRefer{
SubType: [4]byte{'s', 'o', 'u', 'n'},
Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0},
}
trak.Media.Info.Sound = &mp4io.SoundMediaInfo{}
trak.Media.Info.Sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{
DataRefIdx: 1,
NumberOfChannels: int16(codec.Channels),
SampleSize: int16(av.FLTP.BytesPerSample() * 4),
SampleRate: float64(codec.ClockRate),
Unknowns: []mp4io.Atom{ESDS(b)},
}
moov.Tracks = append(moov.Tracks, trak)
}
trex := &mp4io.TrackExtend{
TrackId: uint32(i + 1),
DefaultSampleDescIdx: 1,
DefaultSampleDuration: 0,
}
moov.MovieExtend.Tracks = append(moov.MovieExtend.Tracks, trex)
m.pts = append(m.pts, 0)
m.dts = append(m.dts, 0)
}
data := make([]byte, moov.Len())
moov.Marshal(data)
return append(FTYP(), data...), nil
}
func (m *Muxer) Reset() {
m.fragIndex = 0
for i := range m.dts {
m.dts[i] = 0
m.pts[i] = 0
}
}
func (m *Muxer) Marshal(trackID byte, packet *rtp.Packet) []byte {
run := &mp4fio.TrackFragRun{
Flags: 0x000b05,
FirstSampleFlags: uint32(fmp4io.SampleNoDependencies),
DataOffset: 0,
Entries: []mp4io.TrackFragRunEntry{},
}
moof := &mp4fio.MovieFrag{
Header: &mp4fio.MovieFragHeader{
Seqnum: m.fragIndex + 1,
},
Tracks: []*mp4fio.TrackFrag{
{
Header: &mp4fio.TrackFragHeader{
Data: []byte{0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, trackID + 1, 0x01, 0x01, 0x00, 0x00},
},
DecodeTime: &mp4fio.TrackFragDecodeTime{
Version: 1,
Flags: 0,
Time: m.dts[trackID],
},
Run: run,
},
},
}
entry := mp4io.TrackFragRunEntry{
Size: uint32(len(packet.Payload)),
}
newTime := packet.Timestamp
if m.pts[trackID] > 0 {
entry.Duration = newTime - m.pts[trackID]
m.dts[trackID] += uint64(entry.Duration)
} else {
// important, or Safari will fail with first frame
entry.Duration = 1
}
m.pts[trackID] = newTime
// important before moof.Len()
run.Entries = append(run.Entries, entry)
moofLen := moof.Len()
mdatLen := 8 + len(packet.Payload)
// important after moof.Len()
run.DataOffset = uint32(moofLen + 8)
buf := make([]byte, moofLen+mdatLen)
moof.Marshal(buf)
binary.BigEndian.PutUint32(buf[moofLen:], uint32(mdatLen))
copy(buf[moofLen+4:], "mdat")
copy(buf[moofLen+8:], packet.Payload)
m.fragIndex++
//m.total += moofLen + mdatLen
return buf
}
+143
View File
@@ -0,0 +1,143 @@
package mp4
import (
"encoding/json"
"github.com/AlexxIT/go2rtc/pkg/h264"
"github.com/AlexxIT/go2rtc/pkg/h265"
"github.com/AlexxIT/go2rtc/pkg/streamer"
"github.com/pion/rtp"
"sync/atomic"
)
type Segment struct {
streamer.Element
Medias []*streamer.Media
UserAgent string
RemoteAddr string
MimeType string
OnlyKeyframe bool
send uint32
}
func (c *Segment) GetMedias() []*streamer.Media {
if c.Medias != nil {
return c.Medias
}
// default medias
return []*streamer.Media{
{
Kind: streamer.KindVideo,
Direction: streamer.DirectionRecvonly,
Codecs: []*streamer.Codec{
{Name: streamer.CodecH264},
{Name: streamer.CodecH265},
},
},
}
}
func (c *Segment) AddTrack(media *streamer.Media, track *streamer.Track) *streamer.Track {
muxer := &Muxer{}
codecs := []*streamer.Codec{track.Codec}
init, err := muxer.GetInit(codecs)
if err != nil {
return nil
}
c.MimeType = muxer.MimeType(codecs)
switch track.Codec.Name {
case streamer.CodecH264:
var push streamer.WriterFunc
if c.OnlyKeyframe {
push = func(packet *rtp.Packet) error {
if !h264.IsKeyframe(packet.Payload) {
return nil
}
buf := muxer.Marshal(0, packet)
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(append(init, buf...))
return nil
}
} else {
var buf []byte
push = func(packet *rtp.Packet) error {
if h264.IsKeyframe(packet.Payload) {
// fist frame - send only IFrame
// other frames - send IFrame and all PFrames
if buf == nil {
buf = append(buf, init...)
b := muxer.Marshal(0, packet)
buf = append(buf, b...)
}
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(buf)
buf = buf[:0]
buf = append(buf, init...)
muxer.Reset()
}
if buf != nil {
b := muxer.Marshal(0, packet)
buf = append(buf, b...)
}
return nil
}
}
var wrapper streamer.WrapperFunc
if track.Codec.IsRTP() {
wrapper = h264.RTPDepay(track)
} else {
wrapper = h264.RepairAVC(track)
}
push = wrapper(push)
return track.Bind(push)
case streamer.CodecH265:
push := func(packet *rtp.Packet) error {
if !h265.IsKeyframe(packet.Payload) {
return nil
}
buf := muxer.Marshal(0, packet)
atomic.AddUint32(&c.send, uint32(len(buf)))
c.Fire(append(init, buf...))
return nil
}
if track.Codec.IsRTP() {
wrapper := h265.RTPDepay(track)
push = wrapper(push)
}
return track.Bind(push)
}
panic("unsupported codec")
}
func (c *Segment) MarshalJSON() ([]byte, error) {
info := &streamer.Info{
Type: "WS/MP4 client",
RemoteAddr: c.RemoteAddr,
UserAgent: c.UserAgent,
Send: atomic.LoadUint32(&c.send),
}
return json.Marshal(info)
}