diff --git a/pkg/iso/reader.go b/pkg/iso/reader.go index 175e2563..501a4eac 100644 --- a/pkg/iso/reader.go +++ b/pkg/iso/reader.go @@ -86,7 +86,7 @@ func DecodeAtom(b []byte) (any, error) { return DecodeAtom(data[1+3+4:]) } - case "avc1", "hev1": + case "avc1", "hev1", "hvc1": b = data[6+2+2+2+4+4+4+2+2+4+4+4+2+32+2+2:] atom, err := DecodeAtom(b) if err != nil { @@ -141,7 +141,17 @@ func DecodeAtom(b []byte) (any, error) { return atom, nil case MoofTrafTfdt: - return &AtomTfdt{DecodeTime: binary.BigEndian.Uint64(data[4:])}, nil + // Check version to determine field size + version := data[0] // First byte is version + if version == 0 { + // Version 0 uses 32-bit time + decodeTime := uint64(binary.BigEndian.Uint32(data[4:])) + return &AtomTfdt{DecodeTime: decodeTime}, nil + } else { + // Version 1 uses 64-bit time + decodeTime := binary.BigEndian.Uint64(data[4:]) + return &AtomTfdt{DecodeTime: decodeTime}, nil + } case MoofTrafTrun: rd := bits.NewReader(data) diff --git a/pkg/mp4/demuxer.go b/pkg/mp4/demuxer.go index 25c8c70e..67da93de 100644 --- a/pkg/mp4/demuxer.go +++ b/pkg/mp4/demuxer.go @@ -1,9 +1,12 @@ package mp4 import ( + "fmt" + "github.com/AlexxIT/go2rtc/pkg/aac" "github.com/AlexxIT/go2rtc/pkg/core" "github.com/AlexxIT/go2rtc/pkg/h264" + "github.com/AlexxIT/go2rtc/pkg/h265" "github.com/AlexxIT/go2rtc/pkg/iso" "github.com/pion/rtp" ) @@ -13,6 +16,16 @@ type Demuxer struct { timeScales map[uint32]float32 } +type TrackPackets struct { + TrackID uint32 + Packets []*core.Packet +} + +type TrackData struct { + DecodeTime uint32 + Trun *iso.AtomTrun +} + func (d *Demuxer) Probe(init []byte) (medias []*core.Media) { var trackID, timeScale uint32 @@ -34,11 +47,23 @@ func (d *Demuxer) Probe(init []byte) (medias []*core.Media) { switch atom.Name { case "avc1": codec = h264.ConfigToCodec(atom.Config) + case "hvc1", "hev1": + codec = h265.ConfigToCodec(atom.Config) } case *iso.AtomAudio: switch atom.Name { case "mp4a": - codec = aac.ConfigToCodec(atom.Config) + // G.711 PCMU audio detection for 8kHz mono (Tuya...) + if atom.SampleRate == 8000 && atom.Channels == 1 { + codec = &core.Codec{ + Name: core.CodecPCMU, + ClockRate: 8000, + Channels: 1, + PayloadType: 0, + } + } else { + codec = aac.ConfigToCodec(atom.Config) + } } } @@ -47,6 +72,7 @@ func (d *Demuxer) Probe(init []byte) (medias []*core.Media) { d.timeScales[trackID] = float32(codec.ClockRate) / float32(timeScale) medias = append(medias, &core.Media{ + ID: fmt.Sprintf("trackID=%d", trackID), Kind: codec.Kind(), Direction: core.DirectionRecvonly, Codecs: []*core.Codec{codec}, @@ -114,3 +140,170 @@ func (d *Demuxer) Demux(data2 []byte) (trackID uint32, packets []*core.Packet) { return } + +// DemuxAll returns packets from all tracks found in the fragment +func (d *Demuxer) DemuxAll(data []byte) []TrackPackets { + atoms, err := iso.DecodeAtoms(data) + if err != nil { + return nil + } + + // Map to store track-specific data + trackData := make(map[uint32]TrackData) + var mdat []byte + + // First pass: collect all track data + for _, atom := range atoms { + switch atom := atom.(type) { + case *iso.AtomMdat: + mdat = atom.Data + } + } + + // Temporary variables to track current track ID while parsing + var currentTrackID uint32 + + // Second pass: process traf boxes + for _, atom := range atoms { + switch atom := atom.(type) { + case *iso.AtomTfhd: + currentTrackID = atom.TrackID + + // Initialize track data if not exists + if _, ok := trackData[currentTrackID]; !ok { + trackData[currentTrackID] = TrackData{} + } + + case *iso.AtomTfdt: + if currentTrackID != 0 { + td := trackData[currentTrackID] + td.DecodeTime = uint32(atom.DecodeTime) + trackData[currentTrackID] = td + } + + case *iso.AtomTrun: + if currentTrackID != 0 { + td := trackData[currentTrackID] + td.Trun = atom + trackData[currentTrackID] = td + } + } + } + + // Process all tracks and collect results + var results []TrackPackets + + for tid, td := range trackData { + if td.Trun == nil || mdat == nil || len(td.Trun.SamplesSize) == 0 { + continue + } + + codec := d.codecs[tid] + if codec == nil { + continue + } + + timeScale := d.timeScales[tid] + + var packets []*core.Packet + switch codec.Kind() { + case "video": + packets = createVideoPackets(td.Trun, mdat, td.DecodeTime, timeScale) + case "audio": + packets = createAudioPackets(td.Trun, mdat, td.DecodeTime, timeScale, codec) + } + + if len(packets) > 0 { + results = append(results, TrackPackets{ + TrackID: tid, + Packets: packets, + }) + } + } + + return results +} + +// Creates video packets (H.264/H.265) +func createVideoPackets(trun *iso.AtomTrun, mdat []byte, decodeTime uint32, timeScale float32) []*core.Packet { + n := len(trun.SamplesSize) + hasDurations := len(trun.SamplesDuration) > 0 + + packets := make([]*core.Packet, n) + offset := uint32(0) + ts := decodeTime + + for i := 0; i < n; i++ { + // Get duration from array or use default + var duration uint32 + if hasDurations && i < len(trun.SamplesDuration) { + duration = trun.SamplesDuration[i] + } else { + duration = 1000 // Default for video + } + + size := trun.SamplesSize[i] + + if offset+size > uint32(len(mdat)) { + return packets[:i] + } + + timestamp := uint32(float32(ts) * timeScale) + packets[i] = &rtp.Packet{ + Header: rtp.Header{Timestamp: timestamp}, + Payload: mdat[offset : offset+size], + } + + offset += size + ts += duration + } + + return packets +} + +// Creates audio packets (G.711, AAC, etc.) +func createAudioPackets(trun *iso.AtomTrun, mdat []byte, decodeTime uint32, timeScale float32, codec *core.Codec) []*core.Packet { + n := len(trun.SamplesSize) + hasDurations := len(trun.SamplesDuration) > 0 + + packets := make([]*core.Packet, n) + offset := uint32(0) + ts := decodeTime + isPCM := codec.Name == core.CodecPCMU || codec.Name == core.CodecPCMA || codec.Name == core.CodecPCM || codec.Name == core.CodecPCML + + for i := 0; i < n; i++ { + size := trun.SamplesSize[i] + + // Calculate duration based on codec + var duration uint32 + if hasDurations && i < len(trun.SamplesDuration) { + duration = trun.SamplesDuration[i] + } else if isPCM { + duration = size + } else { + duration = 1024 + } + + if offset+size > uint32(len(mdat)) { + return packets[:i] + } + + // Calculate timestamp based on codec + var timestamp uint32 + if isPCM { + timestamp = ts + } else { + timestamp = uint32(float32(ts) * timeScale) + } + + packets[i] = &rtp.Packet{ + Header: rtp.Header{Timestamp: timestamp}, + Payload: mdat[offset : offset+size], + } + + offset += size + ts += duration + } + + return packets +}