From d05ab79f8844697ee44b9ae2881f301efa3fe477 Mon Sep 17 00:00:00 2001 From: Alexey Khit Date: Thu, 26 Jan 2023 22:29:12 +0300 Subject: [PATCH] Total rewrite mov/mp4 encoder --- pkg/mov/atoms.go | 318 +++++++++++++++++++++++++++++++ pkg/mov/codecs.go | 97 ++++++++++ pkg/mov/mov.go | 91 +++++++++ pkg/mp4/README.md | 39 ++-- pkg/mp4/muxer.go | 201 +++++-------------- pkg/{mp4f => mp4/v1}/consumer.go | 2 +- pkg/mp4/{ => v2}/const.go | 0 pkg/mp4/v2/consumer.go | 174 +++++++++++++++++ pkg/mp4/v2/muxer.go | 256 +++++++++++++++++++++++++ pkg/mp4/v2/segment.go | 143 ++++++++++++++ 10 files changed, 1157 insertions(+), 164 deletions(-) create mode 100644 pkg/mov/atoms.go create mode 100644 pkg/mov/codecs.go create mode 100644 pkg/mov/mov.go rename pkg/{mp4f => mp4/v1}/consumer.go (99%) rename pkg/mp4/{ => v2}/const.go (100%) create mode 100644 pkg/mp4/v2/consumer.go create mode 100644 pkg/mp4/v2/muxer.go create mode 100644 pkg/mp4/v2/segment.go diff --git a/pkg/mov/atoms.go b/pkg/mov/atoms.go new file mode 100644 index 00000000..d476ebec --- /dev/null +++ b/pkg/mov/atoms.go @@ -0,0 +1,318 @@ +package mov + +const ( + Ftyp = "ftyp" + Moov = "moov" + MoovMvhd = "mvhd" + MoovTrak = "trak" + MoovTrakTkhd = "tkhd" + MoovTrakMdia = "mdia" + MoovTrakMdiaMdhd = "mdhd" + MoovTrakMdiaHdlr = "hdlr" + MoovTrakMdiaMinf = "minf" + MoovTrakMdiaMinfVmhd = "vmhd" + MoovTrakMdiaMinfSmhd = "smhd" + MoovTrakMdiaMinfDinf = "dinf" + MoovTrakMdiaMinfDinfDref = "dref" + MoovTrakMdiaMinfDinfDrefUrl = "url " + MoovTrakMdiaMinfStbl = "stbl" + MoovTrakMdiaMinfStblStsd = "stsd" + MoovTrakMdiaMinfStblStts = "stts" + MoovTrakMdiaMinfStblStsc = "stsc" + MoovTrakMdiaMinfStblStsz = "stsz" + MoovTrakMdiaMinfStblStco = "stco" + MoovMvex = "mvex" + MoovMvexTrex = "trex" + Moof = "moof" + MoofMfhd = "mfhd" + MoofTraf = "traf" + MoofTrafTfhd = "tfhd" + MoofTrafTfdt = "tfdt" + MoofTrafTrun = "trun" + Mdat = "mdat" +) + +func (m *Movie) WriteFileType() { + m.StartAtom(Ftyp) + m.WriteString("iso5") + m.WriteUint32(512) + m.WriteString("iso5") + m.WriteString("iso6") + m.WriteString("mp41") + m.EndAtom() +} + +func (m *Movie) WriteMovieHeader() { + m.StartAtom(MoovMvhd) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // create time + m.Skip(4) // modify time + m.WriteUint32(1000) // time scale + m.Skip(4) // duration + m.WriteFloat32(1) // preferred rate + m.WriteFloat16(1) // preferred volume + m.Skip(10) // reserved + m.WriteMatrix() + m.Skip(6 * 4) // predefined? + m.WriteUint32(0xFFFFFFFF) // next track ID + m.EndAtom() +} + +func (m *Movie) WriteTrackHeader(id uint32, width, height uint16) { + const ( + TkhdTrackEnabled = 0x0001 + TkhdTrackInMovie = 0x0002 + TkhdTrackInPreview = 0x0004 + TkhdTrackInPoster = 0x0008 + ) + + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-32963 + m.StartAtom(MoovTrakTkhd) + m.Skip(1) // version + m.WriteUint24(TkhdTrackEnabled | TkhdTrackInMovie) + m.Skip(4) // create time + m.Skip(4) // modify time + m.WriteUint32(id) // trackID + m.Skip(4) // reserved + m.Skip(4) // duration + m.Skip(8) // reserved + m.Skip(2) // layer + if width > 0 { + m.Skip(2) + m.Skip(2) + } else { + m.WriteUint16(1) // alternate group + m.WriteFloat16(1) // volume + } + m.Skip(2) // reserved + m.WriteMatrix() + if width > 0 { + m.WriteFloat32(float64(width)) + m.WriteFloat32(float64(height)) + } else { + m.Skip(4) + m.Skip(4) + } + m.EndAtom() +} + +func (m *Movie) WriteMediaHeader(timescale uint32) { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-32999 + m.StartAtom(MoovTrakMdiaMdhd) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // creation time + m.Skip(4) // modification time + m.WriteUint32(timescale) // timescale + m.Skip(4) // duration + m.WriteUint16(0x55C4) // language (Unspecified) + m.Skip(2) // quality + m.EndAtom() +} + +func (m *Movie) WriteMediaHandler(s, name string) { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33004 + m.StartAtom(MoovTrakMdiaHdlr) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) + m.WriteString(s) // handler type (4 byte!) + m.Skip(3 * 4) // reserved + m.WriteString(name) // handler name (any len) + m.Skip(1) // end string + m.EndAtom() +} + +func (m *Movie) WriteVideoMediaInfo() { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33012 + m.StartAtom(MoovTrakMdiaMinfVmhd) + m.Skip(1) // version + m.WriteUint24(1) // flags (You should always set this flag to 1) + m.Skip(2) // graphics mode + m.Skip(3 * 2) // op color + m.EndAtom() +} + +func (m *Movie) WriteAudioMediaInfo() { + m.StartAtom(MoovTrakMdiaMinfSmhd) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // balance + m.EndAtom() +} + +func (m *Movie) WriteDataInfo() { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-25680 + m.StartAtom(MoovTrakMdiaMinfDinf) + m.StartAtom(MoovTrakMdiaMinfDinfDref) + m.Skip(1) // version + m.Skip(3) // flags + m.WriteUint32(1) // childrens + + m.StartAtom(MoovTrakMdiaMinfDinfDrefUrl) + m.Skip(1) // version + m.WriteUint24(1) // flags (self reference) + m.EndAtom() + + m.EndAtom() // DREF + m.EndAtom() // DINF +} + +func (m *Movie) WriteSampleTable(writeSampleDesc func()) { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-33040 + m.StartAtom(MoovTrakMdiaMinfStbl) + + m.StartAtom(MoovTrakMdiaMinfStblStsd) + m.Skip(1) // version + m.Skip(3) // flags + m.WriteUint32(1) // entry count + writeSampleDesc() + m.EndAtom() + + m.StartAtom(MoovTrakMdiaMinfStblStts) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // entry count + m.EndAtom() + + m.StartAtom(MoovTrakMdiaMinfStblStsc) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // entry count + m.EndAtom() + + m.StartAtom(MoovTrakMdiaMinfStblStsz) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // sample size + m.Skip(4) // entry count + m.EndAtom() + + m.StartAtom(MoovTrakMdiaMinfStblStco) + m.Skip(1) // version + m.Skip(3) // flags + m.Skip(4) // entry count + m.EndAtom() + + m.EndAtom() +} + +func (m *Movie) WriteTrackExtend(id uint32) { + m.StartAtom(MoovMvexTrex) + m.Skip(1) // version + m.Skip(3) // flags + m.WriteUint32(id) // trackID + m.WriteUint32(1) // default sample description index + m.Skip(4) // default sample duration + m.Skip(4) // default sample size + m.Skip(4) // default sample flags + m.EndAtom() +} + +func (m *Movie) WriteVideoTrack(id, timescale uint32, width, height uint16, conf []byte, h264 bool) { + m.StartAtom(MoovTrak) + m.WriteTrackHeader(id, width, height) + + m.StartAtom(MoovTrakMdia) + m.WriteMediaHeader(timescale) + m.WriteMediaHandler("vide", "VideoHandler") + + m.StartAtom(MoovTrakMdiaMinf) + m.WriteVideoMediaInfo() + m.WriteDataInfo() + m.WriteSampleTable(func() { + m.WriteH26X(width, height, conf, h264) + }) + m.EndAtom() // MINF + + m.EndAtom() // MDIA + m.EndAtom() // TRAK +} + +func (m *Movie) WriteAudioTrack(id uint32, timescale uint32, channels, sampleSize uint16, conf []byte) { + m.StartAtom(MoovTrak) + m.WriteTrackHeader(id, 0, 0) + + m.StartAtom(MoovTrakMdia) + m.WriteMediaHeader(timescale) + m.WriteMediaHandler("soun", "SoundHandler") + + m.StartAtom(MoovTrakMdiaMinf) + m.WriteAudioMediaInfo() + m.WriteDataInfo() + m.WriteSampleTable(func() { + m.WriteMP4A(channels, sampleSize, timescale, conf) + }) + m.EndAtom() // MINF + + m.EndAtom() // MDIA + m.EndAtom() // TRAK +} + +func (m *Movie) WriteMovieFragment(seq, tid, duration, size, flags uint32, time uint64) { + m.StartAtom(Moof) + + m.StartAtom(MoofMfhd) + m.Skip(1) // version + m.Skip(3) // flags + m.WriteUint32(seq) // sequence number + m.EndAtom() + + m.StartAtom(MoofTraf) + + const ( + TfhdDefaultSampleDuration = 0x000008 + TfhdDefaultSampleSize = 0x000010 + TfhdDefaultSampleFlags = 0x000020 + TfhdDefaultBaseIsMoof = 0x020000 + ) + + m.StartAtom(MoofTrafTfhd) + m.Skip(1) // version + m.WriteUint24( + TfhdDefaultSampleDuration | + TfhdDefaultSampleSize | + TfhdDefaultSampleFlags | + TfhdDefaultBaseIsMoof, + ) + m.WriteUint32(tid) // track id + m.WriteUint32(duration) // default sample duration + m.WriteUint32(size) // default sample size + m.WriteUint32(flags) // default sample flags + m.EndAtom() + + m.StartAtom(MoofTrafTfdt) + m.WriteBytes(1) // version + m.Skip(3) // flags + m.WriteUint64(time) // base media decode time + m.EndAtom() + + const ( + TrunDataOffset = 0x000001 + TrunFirstSampleFlags = 0x000004 + TrunSampleDuration = 0x0000100 + TrunSampleSize = 0x0000200 + TrunSampleFlags = 0x0000400 + TrunSampleCTS = 0x0000800 + ) + + m.StartAtom(MoofTrafTrun) + m.Skip(1) // version + m.WriteUint24(TrunDataOffset) // flags + m.WriteUint32(1) // sample count + // data offset: current pos + uint32 len + MDAT header len + m.WriteUint32(uint32(len(m.b)) + 4 + 8) + m.EndAtom() // TRUN + + m.EndAtom() // TRAF + + m.EndAtom() // MOOF +} + +func (m *Movie) WriteData(b []byte) { + m.StartAtom(Mdat) + m.Write(b) + m.EndAtom() + +} diff --git a/pkg/mov/codecs.go b/pkg/mov/codecs.go new file mode 100644 index 00000000..1ccec54f --- /dev/null +++ b/pkg/mov/codecs.go @@ -0,0 +1,97 @@ +package mov + +const ( + MoovTrakMdiaMinfStblStsdAvc1 = "avc1" + MoovTrakMdiaMinfStblStsdAvc1AvcC = "avcC" + MoovTrakMdiaMinfStblStsdHev1 = "hev1" + MoovTrakMdiaMinfStblStsdHev1HvcC = "hvcC" + MoovTrakMdiaMinfStblStsdMp4a = "mp4a" +) + +func (m *Movie) WriteH26X(width, height uint16, conf []byte, h264 bool) { + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html + if h264 { + m.StartAtom(MoovTrakMdiaMinfStblStsdAvc1) + } else { + m.StartAtom(MoovTrakMdiaMinfStblStsdHev1) + } + m.Skip(6) + m.WriteUint16(1) // data_reference_index + m.Skip(2) // version + m.Skip(2) // revision + m.Skip(4) // vendor + m.Skip(4) // temporal quality + m.Skip(4) // spatial quality + m.WriteUint16(width) // width + m.WriteUint16(height) // height + m.WriteFloat32(72) // horizontal resolution + m.WriteFloat32(72) // vertical resolution + m.Skip(4) // reserved + m.WriteUint16(1) // frame count + m.Skip(32) // compressor name + m.WriteUint16(24) // depth + m.WriteUint16(0xFFFF) // color table id (-1) + + if h264 { + m.StartAtom(MoovTrakMdiaMinfStblStsdAvc1AvcC) + } else { + m.StartAtom(MoovTrakMdiaMinfStblStsdHev1HvcC) + } + m.Write(conf) + m.EndAtom() // AVCC + + m.EndAtom() // AVC1 +} + +func (m *Movie) WriteMP4A(channels, sampleSize uint16, sampleRate uint32, conf []byte) { + m.StartAtom(MoovTrakMdiaMinfStblStsdMp4a) + m.Skip(6) + m.WriteUint16(1) // data_reference_index + m.Skip(2) // version + m.Skip(2) // revision + m.Skip(4) // vendor + m.WriteUint16(channels) // channel_count + m.WriteUint16(sampleSize) // sample_size + m.Skip(2) // compression id + m.Skip(2) // reserved + m.WriteFloat32(float64(sampleRate)) // sample_rate + + m.WriteESDS(conf) + + m.EndAtom() // MP4A +} + +func (m *Movie) WriteESDS(conf []byte) { + m.StartAtom("esds") + m.Skip(1) // version + m.Skip(3) // flags + + // MP4ESDescrTag[3]: + // - MP4DecConfigDescrTag[4]: + // - MP4DecSpecificDescrTag[5]: conf + // - Other[6] + const header = 5 + const size3 = 3 + const size4 = 13 + size5 := byte(len(conf)) + const size6 = 1 + + m.WriteBytes(3, 0x80, 0x80, 0x80, size3+header+size4+header+size5+header+size6) + m.Skip(2) // es id + m.Skip(1) // es flags + + m.WriteBytes(4, 0x80, 0x80, 0x80, size4+header+size5) + m.WriteBytes(0x40) // object id + m.WriteBytes(0x15) // stream type + m.Skip(3) // buffer size db + m.Skip(4) // max bitraga + m.Skip(4) // avg bitraga + + m.WriteBytes(5, 0x80, 0x80, 0x80, size5) + m.Write(conf) + + m.WriteBytes(6, 0x80, 0x80, 0x80, 1) + m.WriteBytes(2) // ? + + m.EndAtom() // ESDS +} diff --git a/pkg/mov/mov.go b/pkg/mov/mov.go new file mode 100644 index 00000000..a8390628 --- /dev/null +++ b/pkg/mov/mov.go @@ -0,0 +1,91 @@ +package mov + +import ( + "encoding/binary" + "math" +) + +type Movie struct { + b []byte + start []int +} + +func NewMovie(size int) *Movie { + return &Movie{b: make([]byte, 0, size)} +} + +func (m *Movie) Bytes() []byte { + return m.b +} + +func (m *Movie) StartAtom(name string) { + m.start = append(m.start, len(m.b)) + m.b = append(m.b, 0, 0, 0, 0) + m.b = append(m.b, name...) +} + +func (m *Movie) EndAtom() { + n := len(m.start) - 1 + + i := m.start[n] + size := uint32(len(m.b) - i) + binary.BigEndian.PutUint32(m.b[i:], size) + + m.start = m.start[:n] +} + +func (m *Movie) Write(b []byte) { + m.b = append(m.b, b...) +} + +func (m *Movie) WriteBytes(b ...byte) { + m.b = append(m.b, b...) +} + +func (m *Movie) WriteString(s string) { + m.b = append(m.b, s...) +} + +func (m *Movie) Skip(n int) { + m.b = append(m.b, make([]byte, n)...) +} + +func (m *Movie) WriteUint16(v uint16) { + m.b = append(m.b, byte(v>>8), byte(v)) +} + +func (m *Movie) WriteUint24(v uint32) { + m.b = append(m.b, byte(v>>16), byte(v>>8), byte(v)) +} + +func (m *Movie) WriteUint32(v uint32) { + m.b = append(m.b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) +} + +func (m *Movie) WriteUint64(v uint64) { + m.b = append(m.b, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) +} + +func (m *Movie) WriteFloat16(f float64) { + i, f := math.Modf(f) + f *= 256 + m.b = append(m.b, byte(i), byte(f)) +} + +func (m *Movie) WriteFloat32(f float64) { + i, f := math.Modf(f) + f *= 65536 + m.b = append(m.b, byte(uint16(i)>>8), byte(i), byte(uint16(f)>>8), byte(f)) +} + +func (m *Movie) WriteMatrix() { + m.WriteUint32(0x00010000) + m.Skip(4) + m.Skip(4) + m.Skip(4) + m.WriteUint32(0x00010000) + m.Skip(4) + m.Skip(4) + m.Skip(4) + m.WriteUint32(0x40000000) +} diff --git a/pkg/mp4/README.md b/pkg/mp4/README.md index 2fcb64f5..b89cda1b 100644 --- a/pkg/mp4/README.md +++ b/pkg/mp4/README.md @@ -1,19 +1,30 @@ +## Fragmented MP4 + +``` +ffmpeg -i "rtsp://..." -movflags +frag_keyframe+separate_moof+default_base_moof+empty_moov -frag_duration 1 -c copy -t 5 sample.mp4 +``` + +- movflags frag_keyframe + Start a new fragment at each video keyframe. +- frag_duration duration + Create fragments that are duration microseconds long. +- movflags separate_moof + Write a separate moof (movie fragment) atom for each track. +- movflags default_base_moof + Similarly to the omit_tfhd_offset, this flag avoids writing the absolute base_data_offset field in tfhd atoms, but does so by using the new default-base-is-moof flag instead. + +https://ffmpeg.org/ffmpeg-formats.html#Options-13 + ## HEVC -Browser | avc1 | hvc1 | hev1 -------------|------|------|--- -Mac Chrome | + | - | + -Mac Safari | + | + | - -iOS 15? | + | + | - -Mac Firefox | + | - | - -iOS 12 | + | - | - -Android 13 | + | - | - - -``` -ffmpeg -i input-hev1.mp4 -c:v copy -tag:v hvc1 -c:a copy output-hvc1.mp4 -Stream #0:0(eng): Video: hevc (Main) (hev1 / 0x31766568), yuv420p(tv, progressive), 720x404, 164 kb/s, 29.97 fps, -Stream #0:0(eng): Video: hevc (Main) (hvc1 / 0x31637668), yuv420p(tv, progressive), 720x404, 164 kb/s, 29.97 fps, -``` +| Browser | avc1 | hvc1 | hev1 | +|-------------|------|------|------| + | Mac Chrome | + | - | + | + | Mac Safari | + | + | - | + | iOS 15? | + | + | - | + | Mac Firefox | + | - | - | + | iOS 12 | + | - | - | + | Android 13 | + | - | - | ## Useful links diff --git a/pkg/mp4/muxer.go b/pkg/mp4/muxer.go index efd4b912..249723f5 100644 --- a/pkg/mp4/muxer.go +++ b/pkg/mp4/muxer.go @@ -1,22 +1,19 @@ package mp4 import ( - "encoding/binary" "encoding/hex" "github.com/AlexxIT/go2rtc/pkg/h264" "github.com/AlexxIT/go2rtc/pkg/h265" + "github.com/AlexxIT/go2rtc/pkg/mov" "github.com/AlexxIT/go2rtc/pkg/streamer" - "github.com/deepch/vdk/av" "github.com/deepch/vdk/codec/h264parser" "github.com/deepch/vdk/codec/h265parser" - "github.com/deepch/vdk/format/fmp4/fmp4io" - "github.com/deepch/vdk/format/mp4/mp4io" - "github.com/deepch/vdk/format/mp4f/mp4fio" "github.com/pion/rtp" ) type Muxer struct { fragIndex uint32 + flags []uint32 dts []uint64 pts []uint32 } @@ -45,7 +42,11 @@ func (m *Muxer) MimeType(codecs []*streamer.Codec) string { } func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) { - moov := MOOV() + mv := mov.NewMovie(1024) + mv.WriteFileType() + + mv.StartAtom(mov.Moov) + mv.WriteMovieHeader() for i, codec := range codecs { switch codec.Name { @@ -62,35 +63,13 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) { return nil, err } - width := codecData.Width() - height := codecData.Height() + mv.WriteVideoTrack( + uint32(i+1), codec.ClockRate, + uint16(codecData.Width()), uint16(codecData.Height()), + codecData.AVCDecoderConfRecordBytes(), true, + ) - trak := TRAK(i + 1) - trak.Header.TrackWidth = float64(width) - trak.Header.TrackHeight = float64(height) - trak.Media.Header.TimeScale = int32(codec.ClockRate) - trak.Media.Handler = &mp4io.HandlerRefer{ - SubType: [4]byte{'v', 'i', 'd', 'e'}, - Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, - } - trak.Media.Info.Video = &mp4io.VideoMediaInfo{ - Flags: 0x000001, - } - trak.Media.Info.Sample.SampleDesc.AVC1Desc = &mp4io.AVC1Desc{ - DataRefIdx: 1, - HorizontalResolution: 72, - VorizontalResolution: 72, - Width: int16(width), - Height: int16(height), - FrameCount: 1, - Depth: 24, - ColorTableId: -1, - Conf: &mp4io.AVC1Conf{ - Data: codecData.AVCDecoderConfRecordBytes(), - }, - } - - moov.Tracks = append(moov.Tracks, trak) + m.flags = append(m.flags, 0x1010000) case streamer.CodecH265: vps, sps, pps := h265.GetParameterSet(codec.FmtpLine) @@ -106,35 +85,13 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) { return nil, err } - width := codecData.Width() - height := codecData.Height() + mv.WriteVideoTrack( + uint32(i+1), codec.ClockRate, + uint16(codecData.Width()), uint16(codecData.Height()), + codecData.AVCDecoderConfRecordBytes(), false, + ) - trak := TRAK(i + 1) - trak.Header.TrackWidth = float64(width) - trak.Header.TrackHeight = float64(height) - trak.Media.Header.TimeScale = int32(codec.ClockRate) - trak.Media.Handler = &mp4io.HandlerRefer{ - SubType: [4]byte{'v', 'i', 'd', 'e'}, - Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, - } - trak.Media.Info.Video = &mp4io.VideoMediaInfo{ - Flags: 0x000001, - } - trak.Media.Info.Sample.SampleDesc.HV1Desc = &mp4io.HV1Desc{ - DataRefIdx: 1, - HorizontalResolution: 72, - VorizontalResolution: 72, - Width: int16(width), - Height: int16(height), - FrameCount: 1, - Depth: 24, - ColorTableId: -1, - Conf: &mp4io.HV1Conf{ - Data: codecData.AVCDecoderConfRecordBytes(), - }, - } - - moov.Tracks = append(moov.Tracks, trak) + m.flags = append(m.flags, 0x1010000) case streamer.CodecAAC: s := streamer.Between(codec.FmtpLine, "config=", ";") @@ -143,44 +100,26 @@ func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) { return nil, err } - trak := TRAK(i + 1) - trak.Header.AlternateGroup = 1 - trak.Header.Duration = 0 - trak.Header.Volume = 1 - trak.Media.Header.TimeScale = int32(codec.ClockRate) + mv.WriteAudioTrack( + uint32(i+1), codec.ClockRate, codec.Channels, 16, b, + ) - trak.Media.Handler = &mp4io.HandlerRefer{ - SubType: [4]byte{'s', 'o', 'u', 'n'}, - Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, - } - trak.Media.Info.Sound = &mp4io.SoundMediaInfo{} - - trak.Media.Info.Sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{ - DataRefIdx: 1, - NumberOfChannels: int16(codec.Channels), - SampleSize: int16(av.FLTP.BytesPerSample() * 4), - SampleRate: float64(codec.ClockRate), - Unknowns: []mp4io.Atom{ESDS(b)}, - } - - moov.Tracks = append(moov.Tracks, trak) + m.flags = append(m.flags, 0x2000000) } - trex := &mp4io.TrackExtend{ - TrackId: uint32(i + 1), - DefaultSampleDescIdx: 1, - DefaultSampleDuration: 0, - } - moov.MovieExtend.Tracks = append(moov.MovieExtend.Tracks, trex) - m.pts = append(m.pts, 0) m.dts = append(m.dts, 0) } - data := make([]byte, moov.Len()) - moov.Marshal(data) + mv.StartAtom(mov.MoovMvex) + for i := range codecs { + mv.WriteTrackExtend(uint32(i + 1)) + } + mv.EndAtom() // MVEX - return append(FTYP(), data...), nil + mv.EndAtom() // MOOV + + return mv.Bytes(), nil } func (m *Muxer) Reset() { @@ -192,65 +131,29 @@ func (m *Muxer) Reset() { } func (m *Muxer) Marshal(trackID byte, packet *rtp.Packet) []byte { - run := &mp4fio.TrackFragRun{ - Flags: 0x000b05, - FirstSampleFlags: uint32(fmp4io.SampleNoDependencies), - DataOffset: 0, - Entries: []mp4io.TrackFragRunEntry{}, - } - - moof := &mp4fio.MovieFrag{ - Header: &mp4fio.MovieFragHeader{ - Seqnum: m.fragIndex + 1, - }, - Tracks: []*mp4fio.TrackFrag{ - { - Header: &mp4fio.TrackFragHeader{ - Data: []byte{0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, trackID + 1, 0x01, 0x01, 0x00, 0x00}, - }, - DecodeTime: &mp4fio.TrackFragDecodeTime{ - Version: 1, - Flags: 0, - Time: m.dts[trackID], - }, - Run: run, - }, - }, - } - - entry := mp4io.TrackFragRunEntry{ - Size: uint32(len(packet.Payload)), - } - - newTime := packet.Timestamp - if m.pts[trackID] > 0 { - entry.Duration = newTime - m.pts[trackID] - m.dts[trackID] += uint64(entry.Duration) - } else { - // important, or Safari will fail with first frame - entry.Duration = 1 - } - m.pts[trackID] = newTime - - // important before moof.Len() - run.Entries = append(run.Entries, entry) - - moofLen := moof.Len() - mdatLen := 8 + len(packet.Payload) - - // important after moof.Len() - run.DataOffset = uint32(moofLen + 8) - - buf := make([]byte, moofLen+mdatLen) - moof.Marshal(buf) - - binary.BigEndian.PutUint32(buf[moofLen:], uint32(mdatLen)) - copy(buf[moofLen+4:], "mdat") - copy(buf[moofLen+8:], packet.Payload) + // important before increment + time := m.dts[trackID] m.fragIndex++ - //m.total += moofLen + mdatLen + var duration uint32 + newTime := packet.Timestamp + if m.pts[trackID] > 0 { + duration = newTime - m.pts[trackID] + m.dts[trackID] += uint64(duration) + } else { + // important, or Safari will fail with first frame + duration = 1 + } + m.pts[trackID] = newTime - return buf + mv := mov.NewMovie(1024 + len(packet.Payload)) + mv.WriteMovieFragment( + m.fragIndex, uint32(trackID+1), duration, + uint32(len(packet.Payload)), + m.flags[trackID], time, + ) + mv.WriteData(packet.Payload) + + return mv.Bytes() } diff --git a/pkg/mp4f/consumer.go b/pkg/mp4/v1/consumer.go similarity index 99% rename from pkg/mp4f/consumer.go rename to pkg/mp4/v1/consumer.go index 56e8a523..75dbd59e 100644 --- a/pkg/mp4f/consumer.go +++ b/pkg/mp4/v1/consumer.go @@ -1,4 +1,4 @@ -package mp4f +package mp4 import ( "github.com/AlexxIT/go2rtc/pkg/h264" diff --git a/pkg/mp4/const.go b/pkg/mp4/v2/const.go similarity index 100% rename from pkg/mp4/const.go rename to pkg/mp4/v2/const.go diff --git a/pkg/mp4/v2/consumer.go b/pkg/mp4/v2/consumer.go new file mode 100644 index 00000000..3df6e355 --- /dev/null +++ b/pkg/mp4/v2/consumer.go @@ -0,0 +1,174 @@ +package mp4 + +import ( + "encoding/json" + "github.com/AlexxIT/go2rtc/pkg/aac" + "github.com/AlexxIT/go2rtc/pkg/h264" + "github.com/AlexxIT/go2rtc/pkg/h265" + "github.com/AlexxIT/go2rtc/pkg/streamer" + "github.com/pion/rtp" + "sync/atomic" +) + +type Consumer struct { + streamer.Element + + Medias []*streamer.Media + UserAgent string + RemoteAddr string + + muxer *Muxer + codecs []*streamer.Codec + wait byte + + send uint32 +} + +const ( + waitNone byte = iota + waitKeyframe + waitInit +) + +func (c *Consumer) GetMedias() []*streamer.Media { + if c.Medias != nil { + return c.Medias + } + + // default medias + return []*streamer.Media{ + { + Kind: streamer.KindVideo, + Direction: streamer.DirectionRecvonly, + Codecs: []*streamer.Codec{ + {Name: streamer.CodecH264}, + {Name: streamer.CodecH265}, + }, + }, + { + Kind: streamer.KindAudio, + Direction: streamer.DirectionRecvonly, + Codecs: []*streamer.Codec{ + {Name: streamer.CodecAAC}, + }, + }, + } +} + +func (c *Consumer) AddTrack(media *streamer.Media, track *streamer.Track) *streamer.Track { + trackID := byte(len(c.codecs)) + c.codecs = append(c.codecs, track.Codec) + + codec := track.Codec + switch codec.Name { + case streamer.CodecH264: + c.wait = waitInit + + push := func(packet *rtp.Packet) error { + if packet.Version != h264.RTPPacketVersionAVC { + return nil + } + + if c.wait != waitNone { + if c.wait == waitInit || !h264.IsKeyframe(packet.Payload) { + return nil + } + c.wait = waitNone + } + + buf := c.muxer.Marshal(trackID, packet) + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(buf) + + return nil + } + + var wrapper streamer.WrapperFunc + if codec.IsRTP() { + wrapper = h264.RTPDepay(track) + } else { + wrapper = h264.RepairAVC(track) + } + push = wrapper(push) + + return track.Bind(push) + + case streamer.CodecH265: + c.wait = waitInit + + push := func(packet *rtp.Packet) error { + if packet.Version != h264.RTPPacketVersionAVC { + return nil + } + + if c.wait != waitNone { + if c.wait == waitInit || !h265.IsKeyframe(packet.Payload) { + return nil + } + c.wait = waitNone + } + + buf := c.muxer.Marshal(trackID, packet) + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(buf) + + return nil + } + + if codec.IsRTP() { + wrapper := h265.RTPDepay(track) + push = wrapper(push) + } + + return track.Bind(push) + + case streamer.CodecAAC: + push := func(packet *rtp.Packet) error { + if c.wait != waitNone { + return nil + } + + buf := c.muxer.Marshal(trackID, packet) + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(buf) + + return nil + } + + if codec.IsRTP() { + wrapper := aac.RTPDepay(track) + push = wrapper(push) + } + + return track.Bind(push) + } + + panic("unsupported codec") +} + +func (c *Consumer) MimeType() string { + return c.muxer.MimeType(c.codecs) +} + +func (c *Consumer) Init() ([]byte, error) { + c.muxer = &Muxer{} + return c.muxer.GetInit(c.codecs) +} + +func (c *Consumer) Start() { + if c.wait == waitInit { + c.wait = waitKeyframe + } +} + +// + +func (c *Consumer) MarshalJSON() ([]byte, error) { + info := &streamer.Info{ + Type: "MP4 client", + RemoteAddr: c.RemoteAddr, + UserAgent: c.UserAgent, + Send: atomic.LoadUint32(&c.send), + } + return json.Marshal(info) +} diff --git a/pkg/mp4/v2/muxer.go b/pkg/mp4/v2/muxer.go new file mode 100644 index 00000000..efd4b912 --- /dev/null +++ b/pkg/mp4/v2/muxer.go @@ -0,0 +1,256 @@ +package mp4 + +import ( + "encoding/binary" + "encoding/hex" + "github.com/AlexxIT/go2rtc/pkg/h264" + "github.com/AlexxIT/go2rtc/pkg/h265" + "github.com/AlexxIT/go2rtc/pkg/streamer" + "github.com/deepch/vdk/av" + "github.com/deepch/vdk/codec/h264parser" + "github.com/deepch/vdk/codec/h265parser" + "github.com/deepch/vdk/format/fmp4/fmp4io" + "github.com/deepch/vdk/format/mp4/mp4io" + "github.com/deepch/vdk/format/mp4f/mp4fio" + "github.com/pion/rtp" +) + +type Muxer struct { + fragIndex uint32 + dts []uint64 + pts []uint32 +} + +func (m *Muxer) MimeType(codecs []*streamer.Codec) string { + s := `video/mp4; codecs="` + + for i, codec := range codecs { + if i > 0 { + s += "," + } + + switch codec.Name { + case streamer.CodecH264: + s += "avc1." + h264.GetProfileLevelID(codec.FmtpLine) + case streamer.CodecH265: + // H.265 profile=main level=5.1 + // hvc1 - supported in Safari, hev1 - doesn't, both supported in Chrome + s += "hvc1.1.6.L153.B0" + case streamer.CodecAAC: + s += "mp4a.40.2" + } + } + + return s + `"` +} + +func (m *Muxer) GetInit(codecs []*streamer.Codec) ([]byte, error) { + moov := MOOV() + + for i, codec := range codecs { + switch codec.Name { + case streamer.CodecH264: + sps, pps := h264.GetParameterSet(codec.FmtpLine) + if sps == nil { + // some dummy SPS and PPS not a problem + sps = []byte{0x67, 0x42, 0x00, 0x0a, 0xf8, 0x41, 0xa2} + pps = []byte{0x68, 0xce, 0x38, 0x80} + } + + codecData, err := h264parser.NewCodecDataFromSPSAndPPS(sps, pps) + if err != nil { + return nil, err + } + + width := codecData.Width() + height := codecData.Height() + + trak := TRAK(i + 1) + trak.Header.TrackWidth = float64(width) + trak.Header.TrackHeight = float64(height) + trak.Media.Header.TimeScale = int32(codec.ClockRate) + trak.Media.Handler = &mp4io.HandlerRefer{ + SubType: [4]byte{'v', 'i', 'd', 'e'}, + Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, + } + trak.Media.Info.Video = &mp4io.VideoMediaInfo{ + Flags: 0x000001, + } + trak.Media.Info.Sample.SampleDesc.AVC1Desc = &mp4io.AVC1Desc{ + DataRefIdx: 1, + HorizontalResolution: 72, + VorizontalResolution: 72, + Width: int16(width), + Height: int16(height), + FrameCount: 1, + Depth: 24, + ColorTableId: -1, + Conf: &mp4io.AVC1Conf{ + Data: codecData.AVCDecoderConfRecordBytes(), + }, + } + + moov.Tracks = append(moov.Tracks, trak) + + case streamer.CodecH265: + vps, sps, pps := h265.GetParameterSet(codec.FmtpLine) + if sps == nil { + // some dummy SPS and PPS not a problem + vps = []byte{0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0xac, 0x09} + sps = []byte{0x42, 0x01, 0x01, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0xa0, 0x01, 0x40, 0x20, 0x05, 0xa1, 0xfe, 0x5a, 0xee, 0x46, 0xc1, 0xae, 0x55, 0x04} + pps = []byte{0x44, 0x01, 0xc0, 0x73, 0xc0, 0x4c, 0x90} + } + + codecData, err := h265parser.NewCodecDataFromVPSAndSPSAndPPS(vps, sps, pps) + if err != nil { + return nil, err + } + + width := codecData.Width() + height := codecData.Height() + + trak := TRAK(i + 1) + trak.Header.TrackWidth = float64(width) + trak.Header.TrackHeight = float64(height) + trak.Media.Header.TimeScale = int32(codec.ClockRate) + trak.Media.Handler = &mp4io.HandlerRefer{ + SubType: [4]byte{'v', 'i', 'd', 'e'}, + Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, + } + trak.Media.Info.Video = &mp4io.VideoMediaInfo{ + Flags: 0x000001, + } + trak.Media.Info.Sample.SampleDesc.HV1Desc = &mp4io.HV1Desc{ + DataRefIdx: 1, + HorizontalResolution: 72, + VorizontalResolution: 72, + Width: int16(width), + Height: int16(height), + FrameCount: 1, + Depth: 24, + ColorTableId: -1, + Conf: &mp4io.HV1Conf{ + Data: codecData.AVCDecoderConfRecordBytes(), + }, + } + + moov.Tracks = append(moov.Tracks, trak) + + case streamer.CodecAAC: + s := streamer.Between(codec.FmtpLine, "config=", ";") + b, err := hex.DecodeString(s) + if err != nil { + return nil, err + } + + trak := TRAK(i + 1) + trak.Header.AlternateGroup = 1 + trak.Header.Duration = 0 + trak.Header.Volume = 1 + trak.Media.Header.TimeScale = int32(codec.ClockRate) + + trak.Media.Handler = &mp4io.HandlerRefer{ + SubType: [4]byte{'s', 'o', 'u', 'n'}, + Name: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'm', 'a', 'i', 'n', 0}, + } + trak.Media.Info.Sound = &mp4io.SoundMediaInfo{} + + trak.Media.Info.Sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{ + DataRefIdx: 1, + NumberOfChannels: int16(codec.Channels), + SampleSize: int16(av.FLTP.BytesPerSample() * 4), + SampleRate: float64(codec.ClockRate), + Unknowns: []mp4io.Atom{ESDS(b)}, + } + + moov.Tracks = append(moov.Tracks, trak) + } + + trex := &mp4io.TrackExtend{ + TrackId: uint32(i + 1), + DefaultSampleDescIdx: 1, + DefaultSampleDuration: 0, + } + moov.MovieExtend.Tracks = append(moov.MovieExtend.Tracks, trex) + + m.pts = append(m.pts, 0) + m.dts = append(m.dts, 0) + } + + data := make([]byte, moov.Len()) + moov.Marshal(data) + + return append(FTYP(), data...), nil +} + +func (m *Muxer) Reset() { + m.fragIndex = 0 + for i := range m.dts { + m.dts[i] = 0 + m.pts[i] = 0 + } +} + +func (m *Muxer) Marshal(trackID byte, packet *rtp.Packet) []byte { + run := &mp4fio.TrackFragRun{ + Flags: 0x000b05, + FirstSampleFlags: uint32(fmp4io.SampleNoDependencies), + DataOffset: 0, + Entries: []mp4io.TrackFragRunEntry{}, + } + + moof := &mp4fio.MovieFrag{ + Header: &mp4fio.MovieFragHeader{ + Seqnum: m.fragIndex + 1, + }, + Tracks: []*mp4fio.TrackFrag{ + { + Header: &mp4fio.TrackFragHeader{ + Data: []byte{0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, trackID + 1, 0x01, 0x01, 0x00, 0x00}, + }, + DecodeTime: &mp4fio.TrackFragDecodeTime{ + Version: 1, + Flags: 0, + Time: m.dts[trackID], + }, + Run: run, + }, + }, + } + + entry := mp4io.TrackFragRunEntry{ + Size: uint32(len(packet.Payload)), + } + + newTime := packet.Timestamp + if m.pts[trackID] > 0 { + entry.Duration = newTime - m.pts[trackID] + m.dts[trackID] += uint64(entry.Duration) + } else { + // important, or Safari will fail with first frame + entry.Duration = 1 + } + m.pts[trackID] = newTime + + // important before moof.Len() + run.Entries = append(run.Entries, entry) + + moofLen := moof.Len() + mdatLen := 8 + len(packet.Payload) + + // important after moof.Len() + run.DataOffset = uint32(moofLen + 8) + + buf := make([]byte, moofLen+mdatLen) + moof.Marshal(buf) + + binary.BigEndian.PutUint32(buf[moofLen:], uint32(mdatLen)) + copy(buf[moofLen+4:], "mdat") + copy(buf[moofLen+8:], packet.Payload) + + m.fragIndex++ + + //m.total += moofLen + mdatLen + + return buf +} diff --git a/pkg/mp4/v2/segment.go b/pkg/mp4/v2/segment.go new file mode 100644 index 00000000..9cc3a88a --- /dev/null +++ b/pkg/mp4/v2/segment.go @@ -0,0 +1,143 @@ +package mp4 + +import ( + "encoding/json" + "github.com/AlexxIT/go2rtc/pkg/h264" + "github.com/AlexxIT/go2rtc/pkg/h265" + "github.com/AlexxIT/go2rtc/pkg/streamer" + "github.com/pion/rtp" + "sync/atomic" +) + +type Segment struct { + streamer.Element + + Medias []*streamer.Media + UserAgent string + RemoteAddr string + + MimeType string + OnlyKeyframe bool + + send uint32 +} + +func (c *Segment) GetMedias() []*streamer.Media { + if c.Medias != nil { + return c.Medias + } + + // default medias + return []*streamer.Media{ + { + Kind: streamer.KindVideo, + Direction: streamer.DirectionRecvonly, + Codecs: []*streamer.Codec{ + {Name: streamer.CodecH264}, + {Name: streamer.CodecH265}, + }, + }, + } +} + +func (c *Segment) AddTrack(media *streamer.Media, track *streamer.Track) *streamer.Track { + muxer := &Muxer{} + + codecs := []*streamer.Codec{track.Codec} + + init, err := muxer.GetInit(codecs) + if err != nil { + return nil + } + + c.MimeType = muxer.MimeType(codecs) + + switch track.Codec.Name { + case streamer.CodecH264: + var push streamer.WriterFunc + + if c.OnlyKeyframe { + push = func(packet *rtp.Packet) error { + if !h264.IsKeyframe(packet.Payload) { + return nil + } + + buf := muxer.Marshal(0, packet) + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(append(init, buf...)) + + return nil + } + } else { + var buf []byte + + push = func(packet *rtp.Packet) error { + if h264.IsKeyframe(packet.Payload) { + // fist frame - send only IFrame + // other frames - send IFrame and all PFrames + if buf == nil { + buf = append(buf, init...) + b := muxer.Marshal(0, packet) + buf = append(buf, b...) + } + + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(buf) + + buf = buf[:0] + buf = append(buf, init...) + muxer.Reset() + } + + if buf != nil { + b := muxer.Marshal(0, packet) + buf = append(buf, b...) + } + + return nil + } + } + + var wrapper streamer.WrapperFunc + if track.Codec.IsRTP() { + wrapper = h264.RTPDepay(track) + } else { + wrapper = h264.RepairAVC(track) + } + push = wrapper(push) + + return track.Bind(push) + + case streamer.CodecH265: + push := func(packet *rtp.Packet) error { + if !h265.IsKeyframe(packet.Payload) { + return nil + } + + buf := muxer.Marshal(0, packet) + atomic.AddUint32(&c.send, uint32(len(buf))) + c.Fire(append(init, buf...)) + + return nil + } + + if track.Codec.IsRTP() { + wrapper := h265.RTPDepay(track) + push = wrapper(push) + } + + return track.Bind(push) + } + + panic("unsupported codec") +} + +func (c *Segment) MarshalJSON() ([]byte, error) { + info := &streamer.Info{ + Type: "WS/MP4 client", + RemoteAddr: c.RemoteAddr, + UserAgent: c.UserAgent, + Send: atomic.LoadUint32(&c.send), + } + return json.Marshal(info) +}