From 7befcd2a9bffbefb8dfef1478d227d701320f29b Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Wed, 5 Apr 2023 16:21:31 +0100 Subject: [PATCH 01/17] Update construct to 2.10.68 --- poetry.lock | 13 ++++++++----- pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 110903e..6eaa5a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -33,15 +33,18 @@ files = [ [[package]] name = "construct" -version = "2.8.8" -description = "A powerful declarative parser/builder for binary data" +version = "2.10.68" +description = "A powerful declarative symmetric parser/builder for binary data" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.6" files = [ - {file = "construct-2.8.8.tar.gz", hash = "sha256:1b84b8147f6fd15bcf64b737c3e8ac5100811ad80c830cb4b2545140511c4157"}, + {file = "construct-2.10.68.tar.gz", hash = "sha256:7b2a3fd8e5f597a5aa1d614c3bd516fa065db01704c72a1efaaeec6ef23d8b45"}, ] +[package.extras] +extras = ["arrow", "cloudpickle", "enum34", "lz4", "numpy", "ruamel.yaml"] + [[package]] name = "coverage" version = "7.2.3" @@ -275,4 +278,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">=3.7,<4.0" -content-hash = "9a2baac6978e2ce64197980dda6b562347469f581c3881b1fe9d59432d47bc51" +content-hash = "fc4ee9dd83c675556d17d4c93f297cc1c9dc13f756645b08b15155585b2ee984" diff --git a/pyproject.toml b/pyproject.toml index b0b6ef0..d88bc73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = ">=3.7,<4.0" -construct = "2.8.8" +construct = "2.10.68" [tool.poetry.group.dev.dependencies] coverage = { version="^7.2.3", extras=["toml"] } From b712d1ede57f456da1cbd0b3b99e259d75a02c74 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Wed, 5 Apr 2023 16:21:45 +0100 Subject: [PATCH 02/17] Replace String with PaddedString When construct replaced String with PaddedString, the return type changed from bytes to unicode strings (u"..." on Python 2, "..." on Python 3). This affects a lot of conditional checks, tests, and user code. For example box type definitions. I've updated the code to reflect this change. --- src/pymp4/parser.py | 148 ++++++++++++++++++++-------------------- tests/test_box.py | 36 +++++----- tests/test_dashboxes.py | 4 +- tests/test_util.py | 46 ++++++------- 4 files changed, 117 insertions(+), 117 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 1c2311c..33f3d9d 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -84,22 +84,22 @@ def _sizeof(self, context, path): FileTypeBox = Struct( "type" / Const(b"ftyp"), - "major_brand" / String(4), + "major_brand" / PaddedString(4, "ascii"), "minor_version" / Int32ub, - "compatible_brands" / GreedyRange(String(4)), + "compatible_brands" / GreedyRange(PaddedString(4, "ascii")), ) SegmentTypeBox = Struct( "type" / Const(b"styp"), - "major_brand" / String(4), + "major_brand" / PaddedString(4, "ascii"), "minor_version" / Int32ub, - "compatible_brands" / GreedyRange(String(4)), + "compatible_brands" / GreedyRange(PaddedString(4, "ascii")), ) # Catch find boxes RawBox = Struct( - "type" / String(4, padchar=b" ", paddir="right"), + "type" / PaddedString(4, "ascii"), "data" / Default(GreedyBytes, b"") ) @@ -265,7 +265,7 @@ def _encode(self, obj, context): "version" / Const(Int8ub, 0), "flags" / Const(Int24ub, 0), Padding(4, pattern=b"\x00"), - "handler_type" / String(4), + "handler_type" / PaddedString(4, "ascii"), Padding(12, pattern=b"\x00"), # Int32ub[3] "name" / CString(encoding="utf8") ) @@ -378,7 +378,7 @@ def _encode(self, obj, context): AVC1SampleEntryBox = Struct( "version" / Default(Int16ub, 0), "revision" / Const(Int16ub, 0), - "vendor" / Default(String(4, padchar=b" "), b"brdy"), + "vendor" / Default(PaddedString(4, "ascii"), "brdy"), "temporal_quality" / Default(Int32ub, 0), "spatial_quality" / Default(Int32ub, 0), "width" / Int16ub, @@ -389,29 +389,29 @@ def _encode(self, obj, context): Padding(2), "data_size" / Const(Int32ub, 0), "frame_count" / Default(Int16ub, 1), - "compressor_name" / Default(String(32, padchar=b" "), ""), + "compressor_name" / Default(PaddedString(32, "ascii"), None), "depth" / Default(Int16ub, 24), "color_table_id" / Default(Int16sb, -1), "avc_data" / PrefixedIncludingSize(Int32ub, Struct( - "type" / String(4, padchar=b" ", paddir="right"), + "type" / PaddedString(4, "ascii"), Embedded(Switch(this.type, { - b"avcC": AAVC, - b"hvcC": HVCC, + "avcC": AAVC, + "hvcC": HVCC, }, Struct("data" / GreedyBytes))) )), "sample_info" / LazyBound(lambda _: GreedyRange(Box)) ) SampleEntryBox = PrefixedIncludingSize(Int32ub, Struct( - "format" / String(4, padchar=b" ", paddir="right"), + "format" / PaddedString(4, "ascii"), Padding(6, pattern=b"\x00"), "data_reference_index" / Default(Int16ub, 1), Embedded(Switch(this.format, { - b"ec-3": MP4ASampleEntryBox, - b"mp4a": MP4ASampleEntryBox, - b"enca": MP4ASampleEntryBox, - b"avc1": AVC1SampleEntryBox, - b"encv": AVC1SampleEntryBox + "ec-3": MP4ASampleEntryBox, + "mp4a": MP4ASampleEntryBox, + "enca": MP4ASampleEntryBox, + "avc1": AVC1SampleEntryBox, + "encv": AVC1SampleEntryBox }, Struct("data" / GreedyBytes))) )) @@ -682,7 +682,7 @@ def _encode(self, obj, context): ProtectionSystemHeaderBox = Struct( - "type" / If(this._.type != b"uuid", Const(b"pssh")), + "type" / If(this._.type != "uuid", Const(b"pssh")), "version" / Rebuild(Int8ub, lambda ctx: 1 if (hasattr(ctx, "key_IDs") and ctx.key_IDs) else 0), "flags" / Const(Int24ub, 0), "system_ID" / UUIDBytes(Bytes(16)), @@ -693,7 +693,7 @@ def _encode(self, obj, context): ) TrackEncryptionBox = Struct( - "type" / If(this._.type != b"uuid", Const(b"tenc")), + "type" / If(this._.type != "uuid", Const(b"tenc")), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "_reserved0" / Const(Int8ub, 0), @@ -709,7 +709,7 @@ def _encode(self, obj, context): ) SampleEncryptionBox = Struct( - "type" / If(this._.type != b"uuid", Const(b"senc")), + "type" / If(this._.type != "uuid", Const(b"senc")), "version" / Const(Int8ub, 0), "flags" / BitStruct( Padding(22), @@ -728,14 +728,14 @@ def _encode(self, obj, context): OriginalFormatBox = Struct( "type" / Const(b"frma"), - "original_format" / Default(String(4), b"avc1") + "original_format" / Default(PaddedString(4, "ascii"), "avc1") ) SchemeTypeBox = Struct( "type" / Const(b"schm"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), - "scheme_type" / Default(String(4), b"cenc"), + "scheme_type" / Default(PaddedString(4, "ascii"), "cenc"), "scheme_version" / Default(Int32ub, 0x00010000), "schema_uri" / Default(If(this.flags & 1 == 1, CString()), None) ) @@ -778,67 +778,67 @@ def sizeof(self, context=None, **kw): Box = PrefixedIncludingSize(Int32ub, Struct( "offset" / TellMinusSizeOf(Int32ub), - "type" / Peek(String(4, padchar=b" ", paddir="right")), + "type" / Peek(PaddedString(4, "ascii")), Embedded(Switch(this.type, { - b"ftyp": FileTypeBox, - b"styp": SegmentTypeBox, - b"mvhd": MovieHeaderBox, - b"moov": ContainerBoxLazy, - b"moof": ContainerBoxLazy, - b"mfhd": MovieFragmentHeaderBox, - b"tfdt": TrackFragmentBaseMediaDecodeTimeBox, - b"trun": TrackRunBox, - b"tfhd": TrackFragmentHeaderBox, - b"traf": ContainerBoxLazy, - b"mvex": ContainerBoxLazy, - b"mehd": MovieExtendsHeaderBox, - b"trex": TrackExtendsBox, - b"trak": ContainerBoxLazy, - b"mdia": ContainerBoxLazy, - b"tkhd": TrackHeaderBox, - b"mdat": MovieDataBox, - b"free": FreeBox, - b"skip": SkipBox, - b"mdhd": MediaHeaderBox, - b"hdlr": HandlerReferenceBox, - b"minf": ContainerBoxLazy, - b"vmhd": VideoMediaHeaderBox, - b"dinf": ContainerBoxLazy, - b"dref": DataReferenceBox, - b"stbl": ContainerBoxLazy, - b"stsd": SampleDescriptionBox, - b"stsz": SampleSizeBox, - b"stz2": SampleSizeBox2, - b"stts": TimeToSampleBox, - b"stss": SyncSampleBox, - b"stsc": SampleToChunkBox, - b"stco": ChunkOffsetBox, - b"co64": ChunkLargeOffsetBox, - b"smhd": SoundMediaHeaderBox, - b"sidx": SegmentIndexBox, - b"saiz": SampleAuxiliaryInformationSizesBox, - b"saio": SampleAuxiliaryInformationOffsetsBox, - b"btrt": BitRateBox, + "ftyp": FileTypeBox, + "styp": SegmentTypeBox, + "mvhd": MovieHeaderBox, + "moov": ContainerBoxLazy, + "moof": ContainerBoxLazy, + "mfhd": MovieFragmentHeaderBox, + "tfdt": TrackFragmentBaseMediaDecodeTimeBox, + "trun": TrackRunBox, + "tfhd": TrackFragmentHeaderBox, + "traf": ContainerBoxLazy, + "mvex": ContainerBoxLazy, + "mehd": MovieExtendsHeaderBox, + "trex": TrackExtendsBox, + "trak": ContainerBoxLazy, + "mdia": ContainerBoxLazy, + "tkhd": TrackHeaderBox, + "mdat": MovieDataBox, + "free": FreeBox, + "skip": SkipBox, + "mdhd": MediaHeaderBox, + "hdlr": HandlerReferenceBox, + "minf": ContainerBoxLazy, + "vmhd": VideoMediaHeaderBox, + "dinf": ContainerBoxLazy, + "dref": DataReferenceBox, + "stbl": ContainerBoxLazy, + "stsd": SampleDescriptionBox, + "stsz": SampleSizeBox, + "stz2": SampleSizeBox2, + "stts": TimeToSampleBox, + "stss": SyncSampleBox, + "stsc": SampleToChunkBox, + "stco": ChunkOffsetBox, + "co64": ChunkLargeOffsetBox, + "smhd": SoundMediaHeaderBox, + "sidx": SegmentIndexBox, + "saiz": SampleAuxiliaryInformationSizesBox, + "saio": SampleAuxiliaryInformationOffsetsBox, + "btrt": BitRateBox, # dash - b"tenc": TrackEncryptionBox, - b"pssh": ProtectionSystemHeaderBox, - b"senc": SampleEncryptionBox, - b"sinf": ProtectionSchemeInformationBox, - b"frma": OriginalFormatBox, - b"schm": SchemeTypeBox, - b"schi": ContainerBoxLazy, + "tenc": TrackEncryptionBox, + "pssh": ProtectionSystemHeaderBox, + "senc": SampleEncryptionBox, + "sinf": ProtectionSchemeInformationBox, + "frma": OriginalFormatBox, + "schm": SchemeTypeBox, + "schi": ContainerBoxLazy, # piff - b"uuid": UUIDBox, + "uuid": UUIDBox, # HDS boxes - b'abst': HDSSegmentBox, - b'asrt': HDSSegmentRunBox, - b'afrt': HDSFragmentRunBox + "abst": HDSSegmentBox, + "asrt": HDSSegmentRunBox, + "afrt": HDSFragmentRunBox }, default=RawBox)), "end" / Tell )) ContainerBox = Struct( - "type" / String(4, padchar=b" ", paddir="right"), + "type" / PaddedString(4, "ascii"), "children" / GreedyRange(Box) ) diff --git a/tests/test_box.py b/tests/test_box.py index 44937ed..7346cde 100644 --- a/tests/test_box.py +++ b/tests/test_box.py @@ -28,20 +28,20 @@ def test_ftyp_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x18ftypiso5\x00\x00\x00\x01iso5avc1'), Container(offset=0) - (type=b"ftyp") - (major_brand=b"iso5") + (type="ftyp") + (major_brand="iso5") (minor_version=1) - (compatible_brands=[b"iso5", b"avc1"]) + (compatible_brands=["iso5", "avc1"]) (end=24) ) def test_ftyp_build(self): self.assertEqual( Box.build(dict( - type=b"ftyp", - major_brand=b"iso5", + type="ftyp", + major_brand="iso5", minor_version=1, - compatible_brands=[b"iso5", b"avc1"])), + compatible_brands=["iso5", "avc1"])), b'\x00\x00\x00\x18ftypiso5\x00\x00\x00\x01iso5avc1') def test_mdhd_parse(self): @@ -49,7 +49,7 @@ def test_mdhd_parse(self): Box.parse( b'\x00\x00\x00\x20mdhd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00U\xc4\x00\x00'), Container(offset=0) - (type=b"mdhd")(version=0)(flags=0) + (type="mdhd")(version=0)(flags=0) (creation_time=0) (modification_time=0) (timescale=1000000) @@ -60,7 +60,7 @@ def test_mdhd_parse(self): def test_mdhd_build(self): mdhd_data = Box.build(dict( - type=b"mdhd", + type="mdhd", creation_time=0, modification_time=0, timescale=1000000, @@ -71,7 +71,7 @@ def test_mdhd_build(self): b'\x00\x00\x00\x20mdhd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00U\xc4\x00\x00') mdhd_data64 = Box.build(dict( - type=b"mdhd", + type="mdhd", version=1, creation_time=0, modification_time=0, @@ -84,11 +84,11 @@ def test_mdhd_build(self): def test_moov_build(self): moov = \ - Container(type=b"moov")(children=[ # 96 bytes - Container(type=b"mvex")(children=[ # 88 bytes - Container(type=b"mehd")(version=0)(flags=0)(fragment_duration=0), # 16 bytes - Container(type=b"trex")(track_ID=1), # 32 bytes - Container(type=b"trex")(track_ID=2), # 32 bytes + Container(type="moov")(children=[ # 96 bytes + Container(type="mvex")(children=[ # 88 bytes + Container(type="mehd")(version=0)(flags=0)(fragment_duration=0), # 16 bytes + Container(type="trex")(track_ID=1), # 32 bytes + Container(type="trex")(track_ID=2), # 32 bytes ]) ]) @@ -109,13 +109,13 @@ def test_smhd_parse(self): self.assertEqual( Box.parse(in_bytes + b'padding'), Container(offset=0) - (type=b"smhd")(version=0)(flags=0) + (type="smhd")(version=0)(flags=0) (balance=0)(reserved=0)(end=len(in_bytes)) ) def test_smhd_build(self): smhd_data = Box.build(dict( - type=b"smhd", + type="smhd", balance=0)) self.assertEqual(len(smhd_data), 16), self.assertEqual(smhd_data, b'\x00\x00\x00\x10smhd\x00\x00\x00\x00\x00\x00\x00\x00') @@ -126,7 +126,7 @@ def test_stsd_parse(self): self.assertEqual( Box.parse(in_bytes + b'padding'), Container(offset=0) - (type=b"stsd")(version=0)(flags=0) - (entries=[Container(format=b'tx3g')(data_reference_index=1)(data=tx3g_data)]) + (type="stsd")(version=0)(flags=0) + (entries=[Container(format="tx3g")(data_reference_index=1)(data=tx3g_data)]) (end=len(in_bytes)) ) diff --git a/tests/test_dashboxes.py b/tests/test_dashboxes.py index e1b014b..f039b12 100644 --- a/tests/test_dashboxes.py +++ b/tests/test_dashboxes.py @@ -29,7 +29,7 @@ def test_tenc_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00 tenc\x00\x00\x00\x00\x00\x00\x01\x083{\x96C!\xb6CU\x9eY>\xcc\xb4l~\xf7'), Container(offset=0) - (type=b"tenc") + (type="tenc") (version=0) (flags=0) (is_encrypted=1) @@ -42,7 +42,7 @@ def test_tenc_parse(self): def test_tenc_build(self): self.assertEqual( Box.build(dict( - type=b"tenc", + type="tenc", key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), iv_size=8, is_encrypted=1)), diff --git a/tests/test_util.py b/tests/test_util.py index e0f7fbb..5e064d7 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -26,62 +26,62 @@ class BoxTests(unittest.TestCase): - box_data = Container(type=b"demo")(children=[ - Container(type=b"a ")(id=1), - Container(type=b"b ")(id=2), - Container(type=b"c ")(children=[ - Container(type=b"a ")(id=3), - Container(type=b"b ")(id=4), + box_data = Container(type="demo")(children=[ + Container(type="a ")(id=1), + Container(type="b ")(id=2), + Container(type="c ")(children=[ + Container(type="a ")(id=3), + Container(type="b ")(id=4), ]), - Container(type=b"d ")(id=5), + Container(type="d ")(id=5), ]) - box_extended_data = Container(type=b"test")(children=[ - Container(type=b"a ")(id=1, extended_type=b"e--a"), - Container(type=b"b ")(id=2, extended_type=b"e--b"), + box_extended_data = Container(type="test")(children=[ + Container(type="a ")(id=1, extended_type=b"e--a"), + Container(type="b ")(id=2, extended_type=b"e--b"), ]) def test_find(self): self.assertListEqual( - list(BoxUtil.find(self.box_data, b"b ")), - [Container(type=b"b ")(id=2), Container(type=b"b ")(id=4)] + list(BoxUtil.find(self.box_data, "b ")), + [Container(type="b ")(id=2), Container(type="b ")(id=4)] ) def test_find_after_nest(self): self.assertListEqual( - list(BoxUtil.find(self.box_data, b"d ")), - [Container(type=b"d ")(id=5)] + list(BoxUtil.find(self.box_data, "d ")), + [Container(type="d ")(id=5)] ) def test_find_nested_type(self): self.assertListEqual( - list(BoxUtil.find(self.box_data, b"c ")), - [Container(type=b"c ")(children=[ - Container(type=b"a ")(id=3), - Container(type=b"b ")(id=4), + list(BoxUtil.find(self.box_data, "c ")), + [Container(type="c ")(children=[ + Container(type="a ")(id=3), + Container(type="b ")(id=4), ])] ) def test_find_empty(self): self.assertListEqual( - list(BoxUtil.find(self.box_data, b"f ")), + list(BoxUtil.find(self.box_data, "f ")), [] ) def test_first(self): self.assertEqual( - BoxUtil.first(self.box_data, b"b "), - Container(type=b"b ")(id=2) + BoxUtil.first(self.box_data, "b "), + Container(type="b ")(id=2) ) def test_first_missing(self): self.assertRaises( BoxNotFound, - BoxUtil.first, self.box_data, b"f ", + BoxUtil.first, self.box_data, "f ", ) def test_find_extended(self): self.assertListEqual( list(BoxUtil.find_extended(self.box_extended_data, b"e--a")), - [Container(type=b"a ")(id=1, extended_type=b"e--a")] + [Container(type="a ")(id=1, extended_type=b"e--a")] ) From 4e4a481449e4d5723e0a5f8f5929192541acfa52 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Wed, 5 Apr 2023 14:09:40 +0100 Subject: [PATCH 03/17] Flip argument order on all uses of Const For some reason sometime between 2.8.8 and 2.8.22 the arguments for Const were flipped. --- src/pymp4/parser.py | 94 ++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 33f3d9d..1fb4b5a 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -136,9 +136,9 @@ def _sizeof(self, context, path): "rate" / Default(Int32sb, 65536), "volume" / Default(Int16sb, 256), # below could be just Padding(10) but why not - Const(Int16ub, 0), - Const(Int32ub, 0), - Const(Int32ub, 0), + Const(0, Int16ub), + Const(0, Int32ub), + Const(0, Int32ub), "matrix" / Default(Int32sb[9], UNITY_MATRIX), "pre_defined" / Default(Int32ub[6], [0] * 6), "next_track_ID" / Default(Int32ub, 0xffffffff) @@ -248,7 +248,7 @@ def _encode(self, obj, context): MediaHeaderBox = Struct( "type" / Const(b"mdhd"), "version" / Default(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "creation_time" / IfThenElse(this.version == 1, Int64ub, Int32ub), "modification_time" / IfThenElse(this.version == 1, Int64ub, Int32ub), "timescale" / Int32ub, @@ -262,8 +262,8 @@ def _encode(self, obj, context): HandlerReferenceBox = Struct( "type" / Const(b"hdlr"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), Padding(4, pattern=b"\x00"), "handler_type" / PaddedString(4, "ascii"), Padding(12, pattern=b"\x00"), # Int32ub[3] @@ -275,7 +275,7 @@ def _encode(self, obj, context): VideoMediaHeaderBox = Struct( "type" / Const(b"vmhd"), "version" / Default(Int8ub, 0), - "flags" / Const(Int24ub, 1), + "flags" / Const(1, Int24ub), "graphics_mode" / Default(Int16ub, 0), "opcolor" / Struct( "red" / Default(Int16ub, 0), @@ -286,7 +286,7 @@ def _encode(self, obj, context): DataEntryUrlBox = PrefixedIncludingSize(Int32ub, Struct( "type" / Const(b"url "), - "version" / Const(Int8ub, 0), + "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~this._.location) ), @@ -295,7 +295,7 @@ def _encode(self, obj, context): DataEntryUrnBox = PrefixedIncludingSize(Int32ub, Struct( "type" / Const(b"urn "), - "version" / Const(Int8ub, 0), + "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~(this._.name & this._.location)) ), @@ -305,7 +305,7 @@ def _encode(self, obj, context): DataReferenceBox = Struct( "type" / Const(b"dref"), - "version" / Const(Int8ub, 0), + "version" / Const(0, Int8ub), "flags" / Default(Int24ub, 0), "data_entries" / PrefixedArray(Int32ub, Select(DataEntryUrnBox, DataEntryUrlBox)), ) @@ -314,12 +314,12 @@ def _encode(self, obj, context): MP4ASampleEntryBox = Struct( "version" / Default(Int16ub, 0), - "revision" / Const(Int16ub, 0), - "vendor" / Const(Int32ub, 0), + "revision" / Const(0, Int16ub), + "vendor" / Const(0, Int32ub), "channels" / Default(Int16ub, 2), "bits_per_sample" / Default(Int16ub, 16), "compression_id" / Default(Int16sb, 0), - "packet_size" / Const(Int16ub, 0), + "packet_size" / Const(0, Int16ub), "sampling_rate" / Int16ub, Padding(2) ) @@ -334,7 +334,7 @@ def _encode(self, obj, context): AAVC = Struct( - "version" / Const(Int8ub, 1), + "version" / Const(1, Int8ub), "profile" / Int8ub, "compatibility" / Int8ub, "level" / Int8ub, @@ -348,7 +348,7 @@ def _encode(self, obj, context): HVCC = Struct( EmbeddedBitStruct( - "version" / Const(BitsInteger(8), 1), + "version" / Const(1, BitsInteger(8)), "profile_space" / BitsInteger(2), "general_tier_flag" / BitsInteger(1), "general_profile" / BitsInteger(5), @@ -377,7 +377,7 @@ def _encode(self, obj, context): AVC1SampleEntryBox = Struct( "version" / Default(Int16ub, 0), - "revision" / Const(Int16ub, 0), + "revision" / Const(0, Int16ub), "vendor" / Default(PaddedString(4, "ascii"), "brdy"), "temporal_quality" / Default(Int32ub, 0), "spatial_quality" / Default(Int32ub, 0), @@ -387,7 +387,7 @@ def _encode(self, obj, context): Padding(2), "vertical_resolution" / Default(Int16ub, 72), # TODO: actually a fixed point decimal Padding(2), - "data_size" / Const(Int32ub, 0), + "data_size" / Const(0, Int32ub), "frame_count" / Default(Int16ub, 1), "compressor_name" / Default(PaddedString(32, "ascii"), None), "depth" / Default(Int16ub, 24), @@ -425,14 +425,14 @@ def _encode(self, obj, context): SampleDescriptionBox = Struct( "type" / Const(b"stsd"), "version" / Default(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "entries" / PrefixedArray(Int32ub, SampleEntryBox) ) SampleSizeBox = Struct( "type" / Const(b"stsz"), "version" / Int8ub, - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "sample_size" / Int32ub, "sample_count" / Int32ub, "entry_sizes" / If(this.sample_size == 0, Array(this.sample_count, Int32ub)) @@ -441,7 +441,7 @@ def _encode(self, obj, context): SampleSizeBox2 = Struct( "type" / Const(b"stz2"), "version" / Int8ub, - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), Padding(3, pattern=b"\x00"), "field_size" / Int8ub, "sample_count" / Int24ub, @@ -452,14 +452,14 @@ def _encode(self, obj, context): SampleDegradationPriorityBox = Struct( "type" / Const(b"stdp"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), ) TimeToSampleBox = Struct( "type" / Const(b"stts"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( "sample_count" / Int32ub, "sample_delta" / Int32ub, @@ -468,8 +468,8 @@ def _encode(self, obj, context): SyncSampleBox = Struct( "type" / Const(b"stss"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( "sample_number" / Int32ub, )), []) @@ -477,8 +477,8 @@ def _encode(self, obj, context): SampleToChunkBox = Struct( "type" / Const(b"stsc"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( "first_chunk" / Int32ub, "samples_per_chunk" / Int32ub, @@ -488,8 +488,8 @@ def _encode(self, obj, context): ChunkOffsetBox = Struct( "type" / Const(b"stco"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( "chunk_offset" / Int32ub, )), []) @@ -497,8 +497,8 @@ def _encode(self, obj, context): ChunkLargeOffsetBox = Struct( "type" / Const(b"co64"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "entries" / PrefixedArray(Int32ub, Struct( "chunk_offset" / Int64ub, )) @@ -508,15 +508,15 @@ def _encode(self, obj, context): MovieFragmentHeaderBox = Struct( "type" / Const(b"mfhd"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "sequence_number" / Int32ub ) TrackFragmentBaseMediaDecodeTimeBox = Struct( "type" / Const(b"tfdt"), "version" / Int8ub, - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "baseMediaDecodeTime" / Switch(this.version, {1: Int64ub, 0: Int32ub}) ) @@ -585,7 +585,7 @@ def _encode(self, obj, context): MovieExtendsHeaderBox = Struct( "type" / Const(b"mehd"), "version" / Default(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "fragment_duration" / IfThenElse(this.version == 1, Default(Int64ub, 0), Default(Int32ub, 0)) @@ -593,8 +593,8 @@ def _encode(self, obj, context): TrackExtendsBox = Struct( "type" / Const(b"trex"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "track_ID" / Int32ub, "default_sample_description_index" / Default(Int32ub, 1), "default_sample_duration" / Default(Int32ub, 0), @@ -605,7 +605,7 @@ def _encode(self, obj, context): SegmentIndexBox = Struct( "type" / Const(b"sidx"), "version" / Int8ub, - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "reference_ID" / Int32ub, "timescale" / Int32ub, "earliest_presentation_time" / IfThenElse(this.version == 0, Int32ub, Int64ub), @@ -624,7 +624,7 @@ def _encode(self, obj, context): SampleAuxiliaryInformationSizesBox = Struct( "type" / Const(b"saiz"), - "version" / Const(Int8ub, 0), + "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "has_aux_info_type" / Flag, @@ -664,10 +664,10 @@ def _encode(self, obj, context): SoundMediaHeaderBox = Struct( "type" / Const(b"smhd"), - "version" / Const(Int8ub, 0), - "flags" / Const(Int24ub, 0), + "version" / Const(0, Int8ub), + "flags" / Const(0, Int24ub), "balance" / Default(Int16sb, 0), - "reserved" / Const(Int16ub, 0) + "reserved" / Const(0, Int16ub) ) @@ -684,7 +684,7 @@ def _encode(self, obj, context): ProtectionSystemHeaderBox = Struct( "type" / If(this._.type != "uuid", Const(b"pssh")), "version" / Rebuild(Int8ub, lambda ctx: 1 if (hasattr(ctx, "key_IDs") and ctx.key_IDs) else 0), - "flags" / Const(Int24ub, 0), + "flags" / Const(0, Int24ub), "system_ID" / UUIDBytes(Bytes(16)), "key_IDs" / Default(If(this.version == 1, PrefixedArray(Int32ub, UUIDBytes(Bytes(16)))), @@ -696,8 +696,8 @@ def _encode(self, obj, context): "type" / If(this._.type != "uuid", Const(b"tenc")), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), - "_reserved0" / Const(Int8ub, 0), - "_reserved1" / Const(Int8ub, 0), + "_reserved0" / Const(0, Int8ub), + "_reserved1" / Const(0, Int8ub), "is_encrypted" / Int8ub, "iv_size" / Int8ub, "key_ID" / UUIDBytes(Bytes(16)), @@ -710,7 +710,7 @@ def _encode(self, obj, context): SampleEncryptionBox = Struct( "type" / If(this._.type != "uuid", Const(b"senc")), - "version" / Const(Int8ub, 0), + "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(22), "has_subsample_encryption_info" / Flag, From 56bf14d4e89187db121a7c56e16169551cddc3f8 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Wed, 5 Apr 2023 16:26:44 +0100 Subject: [PATCH 04/17] Replace PrefixedIncludingSize with Prefixed with includelength=True The includelength parameter was added in `2.8.11`. Also removes TellMinusSizeOf as it's now unused. --- src/pymp4/parser.py | 92 ++++++--------------------------------------- 1 file changed, 11 insertions(+), 81 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 1fb4b5a..e79a8ef 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -18,7 +18,6 @@ from uuid import UUID from construct import * -import construct.core from construct.lib import * log = logging.getLogger(__name__) @@ -26,60 +25,6 @@ UNITY_MATRIX = [0x10000, 0, 0, 0, 0x10000, 0, 0, 0, 0x40000000] -class PrefixedIncludingSize(Subconstruct): - __slots__ = ["name", "lengthfield", "subcon"] - - def __init__(self, lengthfield, subcon): - super(PrefixedIncludingSize, self).__init__(subcon) - self.lengthfield = lengthfield - - def _parse(self, stream, context, path): - try: - lengthfield_size = self.lengthfield.sizeof() - length = self.lengthfield._parse(stream, context, path) - except SizeofError: - offset_start = stream.tell() - length = self.lengthfield._parse(stream, context, path) - lengthfield_size = stream.tell() - offset_start - - stream2 = BoundBytesIO(stream, length - lengthfield_size) - obj = self.subcon._parse(stream2, context, path) - return obj - - def _build(self, obj, stream, context, path): - try: - # needs to be both fixed size, seekable and tellable (third not checked) - self.lengthfield.sizeof() - if not stream.seekable: - raise SizeofError - offset_start = stream.tell() - self.lengthfield._build(0, stream, context, path) - self.subcon._build(obj, stream, context, path) - offset_end = stream.tell() - stream.seek(offset_start) - self.lengthfield._build(offset_end - offset_start, stream, context, path) - stream.seek(offset_end) - except SizeofError: - data = self.subcon.build(obj, context) - sl, p_sl = 0, 0 - dlen = len(data) - # do..while - i = 0 - while True: - i += 1 - p_sl = sl - sl = len(self.lengthfield.build(dlen + sl)) - if p_sl == sl: break - - self.lengthfield._build(dlen + sl, stream, context, path) - else: - self.lengthfield._build(len(data), stream, context, path) - construct.core._write_stream(stream, len(data), data) - - def _sizeof(self, context, path): - return self.lengthfield._sizeof(context, path) + self.subcon._sizeof(context, path) - - # Header box FileTypeBox = Struct( @@ -284,16 +229,16 @@ def _encode(self, obj, context): ), ) -DataEntryUrlBox = PrefixedIncludingSize(Int32ub, Struct( +DataEntryUrlBox = Prefixed(Int32ub, Struct( "type" / Const(b"url "), "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~this._.location) ), "location" / If(~this.flags.self_contained, CString(encoding="utf8")), -)) +), includelength=True) -DataEntryUrnBox = PrefixedIncludingSize(Int32ub, Struct( +DataEntryUrnBox = Prefixed(Int32ub, Struct( "type" / Const(b"urn "), "version" / Const(0, Int8ub), "flags" / BitStruct( @@ -301,7 +246,7 @@ def _encode(self, obj, context): ), "name" / If(this.flags == 0, CString(encoding="utf8")), "location" / If(this.flags == 0, CString(encoding="utf8")), -)) +), includelength=True) DataReferenceBox = Struct( "type" / Const(b"dref"), @@ -392,17 +337,17 @@ def _encode(self, obj, context): "compressor_name" / Default(PaddedString(32, "ascii"), None), "depth" / Default(Int16ub, 24), "color_table_id" / Default(Int16sb, -1), - "avc_data" / PrefixedIncludingSize(Int32ub, Struct( + "avc_data" / Prefixed(Int32ub, Struct( "type" / PaddedString(4, "ascii"), Embedded(Switch(this.type, { "avcC": AAVC, "hvcC": HVCC, }, Struct("data" / GreedyBytes))) - )), + ), includelength=True), "sample_info" / LazyBound(lambda _: GreedyRange(Box)) ) -SampleEntryBox = PrefixedIncludingSize(Int32ub, Struct( +SampleEntryBox = Prefixed(Int32ub, Struct( "format" / PaddedString(4, "ascii"), Padding(6, pattern=b"\x00"), "data_reference_index" / Default(Int16ub, 1), @@ -413,7 +358,7 @@ def _encode(self, obj, context): "avc1": AVC1SampleEntryBox, "encv": AVC1SampleEntryBox }, Struct("data" / GreedyBytes))) -)) +), includelength=True) BitRateBox = Struct( "type" / Const(b"btrt"), @@ -761,23 +706,8 @@ def _encode(self, obj, context): ContainerBoxLazy = LazyBound(lambda ctx: ContainerBox) -class TellMinusSizeOf(Subconstruct): - def __init__(self, subcon): - super(TellMinusSizeOf, self).__init__(subcon) - self.flagbuildnone = True - - def _parse(self, stream, context, path): - return stream.tell() - self.subcon.sizeof(context) - - def _build(self, obj, stream, context, path): - return b"" - - def sizeof(self, context=None, **kw): - return 0 - - -Box = PrefixedIncludingSize(Int32ub, Struct( - "offset" / TellMinusSizeOf(Int32ub), +Box = Prefixed(Int32ub, Struct( + "offset" / Tell, "type" / Peek(PaddedString(4, "ascii")), Embedded(Switch(this.type, { "ftyp": FileTypeBox, @@ -835,7 +765,7 @@ def sizeof(self, context=None, **kw): "afrt": HDSFragmentRunBox }, default=RawBox)), "end" / Tell -)) +), includelength=True) ContainerBox = Struct( "type" / PaddedString(4, "ascii"), From 04e725a7174c8574af1bc8b7bf34e146c03b73fd Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Wed, 5 Apr 2023 23:11:15 +0100 Subject: [PATCH 05/17] Strings now require explicit specification of encoding The default for CString and PascalString was previously "ascii" (from what I can tell) so I specified "ascii" for everything that did not explicitly specify an encoding. --- src/pymp4/parser.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index e79a8ef..873b412 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -136,11 +136,11 @@ "time_scale" / Int32ub, "current_media_time" / Int64ub, "smpte_time_code_offset" / Int64ub, - "movie_identifier" / CString(), - "server_entry_table" / PrefixedArray(Int8ub, CString()), - "quality_entry_table" / PrefixedArray(Int8ub, CString()), - "drm_data" / CString(), - "metadata" / CString(), + "movie_identifier" / CString("ascii"), + "server_entry_table" / PrefixedArray(Int8ub, CString("ascii")), + "quality_entry_table" / PrefixedArray(Int8ub, CString("ascii")), + "drm_data" / CString("ascii"), + "metadata" / CString("ascii"), "segment_run_table" / PrefixedArray(Int8ub, LazyBound(lambda x: Box)), "fragment_run_table" / PrefixedArray(Int8ub, LazyBound(lambda x: Box)) ) @@ -149,7 +149,7 @@ "type" / Const(b"asrt"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), - "quality_entry_table" / PrefixedArray(Int8ub, CString()), + "quality_entry_table" / PrefixedArray(Int8ub, CString("ascii")), "segment_run_enteries" / PrefixedArray(Int32ub, Struct( "first_segment" / Int32ub, "fragments_per_segment" / Int32ub @@ -164,7 +164,7 @@ "update" / Flag ), "time_scale" / Int32ub, - "quality_entry_table" / PrefixedArray(Int8ub, CString()), + "quality_entry_table" / PrefixedArray(Int8ub, CString("ascii")), "fragment_run_enteries" / PrefixedArray(Int32ub, Struct( "first_fragment" / Int32ub, "first_fragment_timestamp" / Int64ub, @@ -212,7 +212,7 @@ def _encode(self, obj, context): Padding(4, pattern=b"\x00"), "handler_type" / PaddedString(4, "ascii"), Padding(12, pattern=b"\x00"), # Int32ub[3] - "name" / CString(encoding="utf8") + "name" / CString("utf8") ) # Boxes contained by Media Info Box @@ -235,7 +235,7 @@ def _encode(self, obj, context): "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~this._.location) ), - "location" / If(~this.flags.self_contained, CString(encoding="utf8")), + "location" / If(~this.flags.self_contained, CString("utf8")), ), includelength=True) DataEntryUrnBox = Prefixed(Int32ub, Struct( @@ -244,8 +244,8 @@ def _encode(self, obj, context): "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~(this._.name & this._.location)) ), - "name" / If(this.flags == 0, CString(encoding="utf8")), - "location" / If(this.flags == 0, CString(encoding="utf8")), + "name" / If(this.flags == 0, CString("utf8")), + "location" / If(this.flags == 0, CString("utf8")), ), includelength=True) DataReferenceBox = Struct( @@ -287,8 +287,8 @@ def _encode(self, obj, context): Padding(6, pattern=b'\x01'), "nal_unit_length_field" / Default(BitsInteger(2), 3), ), - "sps" / Default(PrefixedArray(MaskedInteger(Int8ub), PascalString(Int16ub)), []), - "pps" / Default(PrefixedArray(Int8ub, PascalString(Int16ub)), []) + "sps" / Default(PrefixedArray(MaskedInteger(Int8ub), PascalString(Int16ub, "ascii")), []), + "pps" / Default(PrefixedArray(Int8ub, PascalString(Int16ub, "ascii")), []) ) HVCC = Struct( @@ -682,7 +682,7 @@ def _encode(self, obj, context): "flags" / Default(Int24ub, 0), "scheme_type" / Default(PaddedString(4, "ascii"), "cenc"), "scheme_version" / Default(Int32ub, 0x00010000), - "schema_uri" / Default(If(this.flags & 1 == 1, CString()), None) + "schema_uri" / Default(If(this.flags & 1 == 1, CString("ascii")), None) ) ProtectionSchemeInformationBox = Struct( From f24c82649266a45a871ddec0c76a934cc01b6435 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 00:19:11 +0100 Subject: [PATCH 06/17] Simplify Container builds in tests Construct 2.10 (maybe even 2.9) no longer allows you to do e.g., `Container(foo=1)(bar=2)` as an alternative to `Container(foo=1, bar=2)` and I honestly can't blame them, it makes the code extremely annoying to read and undoubtably to parse as well. This commit simplifies them to singular construct call under a single Container object. --- tests/test_box.py | 73 +++++++++++++++++++++++++---------------- tests/test_dashboxes.py | 20 ++++++----- tests/test_util.py | 59 +++++++++++++++++++++------------ 3 files changed, 94 insertions(+), 58 deletions(-) diff --git a/tests/test_box.py b/tests/test_box.py index 7346cde..bba6704 100644 --- a/tests/test_box.py +++ b/tests/test_box.py @@ -27,12 +27,14 @@ class BoxTests(unittest.TestCase): def test_ftyp_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x18ftypiso5\x00\x00\x00\x01iso5avc1'), - Container(offset=0) - (type="ftyp") - (major_brand="iso5") - (minor_version=1) - (compatible_brands=["iso5", "avc1"]) - (end=24) + Container( + offset=0, + type="ftyp", + major_brand="iso5", + minor_version=1, + compatible_brands=["iso5", "avc1"], + end=24 + ) ) def test_ftyp_build(self): @@ -46,16 +48,19 @@ def test_ftyp_build(self): def test_mdhd_parse(self): self.assertEqual( - Box.parse( - b'\x00\x00\x00\x20mdhd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00U\xc4\x00\x00'), - Container(offset=0) - (type="mdhd")(version=0)(flags=0) - (creation_time=0) - (modification_time=0) - (timescale=1000000) - (duration=0) - (language="und") - (end=32) + Box.parse(b'\x00\x00\x00\x20mdhd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00U\xc4\x00\x00'), + Container( + offset=0, + type="mdhd", + version=0, + flags=0, + creation_time=0, + modification_time=0, + timescale=1000000, + duration=0, + language="und", + end=32 + ) ) def test_mdhd_build(self): @@ -84,11 +89,11 @@ def test_mdhd_build(self): def test_moov_build(self): moov = \ - Container(type="moov")(children=[ # 96 bytes - Container(type="mvex")(children=[ # 88 bytes - Container(type="mehd")(version=0)(flags=0)(fragment_duration=0), # 16 bytes - Container(type="trex")(track_ID=1), # 32 bytes - Container(type="trex")(track_ID=2), # 32 bytes + Container(type="moov", children=[ # 96 bytes + Container(type="mvex", children=[ # 88 bytes + Container(type="mehd", version=0, flags=0, fragment_duration=0), # 16 bytes + Container(type="trex", track_ID=1), # 32 bytes + Container(type="trex", track_ID=2), # 32 bytes ]) ]) @@ -108,9 +113,15 @@ def test_smhd_parse(self): in_bytes = b'\x00\x00\x00\x10smhd\x00\x00\x00\x00\x00\x00\x00\x00' self.assertEqual( Box.parse(in_bytes + b'padding'), - Container(offset=0) - (type="smhd")(version=0)(flags=0) - (balance=0)(reserved=0)(end=len(in_bytes)) + Container( + offset=0, + type="smhd", + version=0, + flags=0, + balance=0, + reserved=0, + end=len(in_bytes) + ) ) def test_smhd_build(self): @@ -125,8 +136,14 @@ def test_stsd_parse(self): in_bytes = b'\x00\x00\x00\x50stsd\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x40tx3g\x00\x00\x00\x00\x00\x00\x00\x01' + tx3g_data self.assertEqual( Box.parse(in_bytes + b'padding'), - Container(offset=0) - (type="stsd")(version=0)(flags=0) - (entries=[Container(format="tx3g")(data_reference_index=1)(data=tx3g_data)]) - (end=len(in_bytes)) + Container( + offset=0, + type="stsd", + version=0, + flags=0, + entries=[ + Container(format='tx3g', data_reference_index=1, data=tx3g_data) + ], + end=len(in_bytes) + ) ) diff --git a/tests/test_dashboxes.py b/tests/test_dashboxes.py index f039b12..1c735d6 100644 --- a/tests/test_dashboxes.py +++ b/tests/test_dashboxes.py @@ -28,15 +28,17 @@ class BoxTests(unittest.TestCase): def test_tenc_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00 tenc\x00\x00\x00\x00\x00\x00\x01\x083{\x96C!\xb6CU\x9eY>\xcc\xb4l~\xf7'), - Container(offset=0) - (type="tenc") - (version=0) - (flags=0) - (is_encrypted=1) - (iv_size=8) - (key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7')) - (constant_iv=None) - (end=32) + Container( + offset=0, + type="tenc", + version=0, + flags=0, + is_encrypted=1, + iv_size=8, + key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), + constant_iv=None, + end=32 + ) ) def test_tenc_build(self): diff --git a/tests/test_util.py b/tests/test_util.py index 5e064d7..d30ac3d 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -26,39 +26,56 @@ class BoxTests(unittest.TestCase): - box_data = Container(type="demo")(children=[ - Container(type="a ")(id=1), - Container(type="b ")(id=2), - Container(type="c ")(children=[ - Container(type="a ")(id=3), - Container(type="b ")(id=4), - ]), - Container(type="d ")(id=5), - ]) - - box_extended_data = Container(type="test")(children=[ - Container(type="a ")(id=1, extended_type=b"e--a"), - Container(type="b ")(id=2, extended_type=b"e--b"), - ]) + box_data = Container( + type="demo", + children=[ + Container(type="a ", id=1), + Container(type="b ", id=2), + Container( + type="c ", + children=[ + Container(type="a ", id=3), + Container(type="b ", id=4) + ] + ), + Container(type="d ", id=5) + ] + ) + + box_extended_data = Container( + type="test", + children=[ + Container( + type="a ", + id=1, + extended_type=b"e--a" + ), + Container( + type="b ", + id=2, + extended_type=b"e--b" + ) + ] + ) def test_find(self): self.assertListEqual( list(BoxUtil.find(self.box_data, "b ")), - [Container(type="b ")(id=2), Container(type="b ")(id=4)] + [Container(type="b ", id=2), Container(type="b ", id=4)] ) def test_find_after_nest(self): self.assertListEqual( list(BoxUtil.find(self.box_data, "d ")), - [Container(type="d ")(id=5)] + [Container(type="d ", id=5)] ) def test_find_nested_type(self): self.assertListEqual( list(BoxUtil.find(self.box_data, "c ")), - [Container(type="c ")(children=[ - Container(type="a ")(id=3), - Container(type="b ")(id=4), + [Container(type="c ", children=[ + Container(type="a ", id=3), + Container(type="b ", id=4), ])] ) @@ -71,7 +88,7 @@ def test_find_empty(self): def test_first(self): self.assertEqual( BoxUtil.first(self.box_data, "b "), - Container(type="b ")(id=2) + Container(type="b ", id=2) ) def test_first_missing(self): @@ -83,5 +100,5 @@ def test_first_missing(self): def test_find_extended(self): self.assertListEqual( list(BoxUtil.find_extended(self.box_extended_data, b"e--a")), - [Container(type="a ")(id=1, extended_type=b"e--a")] + [Container(type="a ", id=1, extended_type=b"e--a")] ) From 2cf4166d55e921bdbfd9f37b36d5c7bc47ecb376 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 00:35:48 +0100 Subject: [PATCH 07/17] Update _decode and _encode parameters on Adapter classes --- src/pymp4/parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 873b412..bbb8867 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -177,13 +177,13 @@ # Boxes contained by Media Box class ISO6392TLanguageCode(Adapter): - def _decode(self, obj, context): + def _decode(self, obj, context, path): """ Get the python representation of the obj """ return b''.join(map(int2byte, [c + 0x60 for c in bytearray(obj)])).decode("utf8") - def _encode(self, obj, context): + def _encode(self, obj, context, path): """ Get the bytes representation of the obj """ @@ -271,10 +271,10 @@ def _encode(self, obj, context): class MaskedInteger(Adapter): - def _decode(self, obj, context): + def _decode(self, obj, context, path): return obj & 0x1F - def _encode(self, obj, context): + def _encode(self, obj, context, path): return obj & 0x1F @@ -619,10 +619,10 @@ def _encode(self, obj, context): # DASH Boxes class UUIDBytes(Adapter): - def _decode(self, obj, context): + def _decode(self, obj, context, path): return UUID(bytes=obj) - def _encode(self, obj, context): + def _encode(self, obj, context, path): return obj.bytes From c67c0ba6220c95c59bb4a037a7adf7cec39d0a5a Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 01:28:51 +0100 Subject: [PATCH 08/17] Move custom Adapters to adapters.py --- src/pymp4/adapters.py | 30 ++++++++++++++++++++++++++++++ src/pymp4/parser.py | 33 ++------------------------------- 2 files changed, 32 insertions(+), 31 deletions(-) create mode 100644 src/pymp4/adapters.py diff --git a/src/pymp4/adapters.py b/src/pymp4/adapters.py new file mode 100644 index 0000000..3e9313c --- /dev/null +++ b/src/pymp4/adapters.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from abc import ABC +from uuid import UUID + +from construct import Adapter, int2byte + + +class ISO6392TLanguageCode(Adapter, ABC): + def _decode(self, obj, context, path): + return b"".join(map(int2byte, [c + 0x60 for c in bytearray(obj)])).decode("utf8") + + def _encode(self, obj, context, path): + return [c - 0x60 for c in bytearray(obj.encode("utf8"))] + + +class MaskedInteger(Adapter, ABC): + def _decode(self, obj, context, path): + return obj & 0x1F + + def _encode(self, obj, context, path): + return obj & 0x1F + + +class UUIDBytes(Adapter, ABC): + def _decode(self, obj, context, path): + return UUID(bytes=obj) + + def _encode(self, obj, context, path): + return obj.bytes diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index bbb8867..e75b20a 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -20,6 +20,8 @@ from construct import * from construct.lib import * +from pymp4.adapters import ISO6392TLanguageCode, MaskedInteger, UUIDBytes + log = logging.getLogger(__name__) UNITY_MATRIX = [0x10000, 0, 0, 0, 0x10000, 0, 0, 0, 0x40000000] @@ -176,20 +178,6 @@ # Boxes contained by Media Box -class ISO6392TLanguageCode(Adapter): - def _decode(self, obj, context, path): - """ - Get the python representation of the obj - """ - return b''.join(map(int2byte, [c + 0x60 for c in bytearray(obj)])).decode("utf8") - - def _encode(self, obj, context, path): - """ - Get the bytes representation of the obj - """ - return [c - 0x60 for c in bytearray(obj.encode("utf8"))] - - MediaHeaderBox = Struct( "type" / Const(b"mdhd"), "version" / Default(Int8ub, 0), @@ -269,15 +257,6 @@ def _encode(self, obj, context, path): Padding(2) ) - -class MaskedInteger(Adapter): - def _decode(self, obj, context, path): - return obj & 0x1F - - def _encode(self, obj, context, path): - return obj & 0x1F - - AAVC = Struct( "version" / Const(1, Int8ub), "profile" / Int8ub, @@ -618,14 +597,6 @@ def _encode(self, obj, context, path): # DASH Boxes -class UUIDBytes(Adapter): - def _decode(self, obj, context, path): - return UUID(bytes=obj) - - def _encode(self, obj, context, path): - return obj.bytes - - ProtectionSystemHeaderBox = Struct( "type" / If(this._.type != "uuid", Const(b"pssh")), "version" / Rebuild(Int8ub, lambda ctx: 1 if (hasattr(ctx, "key_IDs") and ctx.key_IDs) else 0), From 70f805a3948f14cb78017142086c870f0d30e6b1 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 01:32:00 +0100 Subject: [PATCH 09/17] Simplify use of Embed Switch on mvhd and tkhd boxes --- src/pymp4/parser.py | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index e75b20a..79bb71c 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -66,20 +66,10 @@ "type" / Const(b"mvhd"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), - Embedded(Switch(this.version, { - 1: Struct( - "creation_time" / Default(Int64ub, 0), - "modification_time" / Default(Int64ub, 0), - "timescale" / Default(Int32ub, 10000000), - "duration" / Int64ub - ), - 0: Struct( - "creation_time" / Default(Int32ub, 0), - "modification_time" / Default(Int32ub, 0), - "timescale" / Default(Int32ub, 10000000), - "duration" / Int32ub, - ), - })), + "creation_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), + "modification_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), + "timescale" / Default(Int32ub, 10000000), + "duration" / Switch(this.version, {0: Int32ub, 1: Int64ub}), "rate" / Default(Int32sb, 65536), "volume" / Default(Int16sb, 256), # below could be just Padding(10) but why not @@ -97,22 +87,11 @@ "type" / Const(b"tkhd"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 1), - Embedded(Switch(this.version, { - 1: Struct( - "creation_time" / Default(Int64ub, 0), - "modification_time" / Default(Int64ub, 0), - "track_ID" / Default(Int32ub, 1), - Padding(4), - "duration" / Default(Int64ub, 0), - ), - 0: Struct( - "creation_time" / Default(Int32ub, 0), - "modification_time" / Default(Int32ub, 0), - "track_ID" / Default(Int32ub, 1), - Padding(4), - "duration" / Default(Int32ub, 0), - ), - })), + "creation_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), + "modification_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), + "track_ID" / Default(Int32ub, 1), + Padding(4), + "duration" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), Padding(8), "layer" / Default(Int16sb, 0), "alternate_group" / Default(Int16sb, 0), From 0506105b071c7e07efc17f5b6aec3eda3a47c375 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 02:15:51 +0100 Subject: [PATCH 10/17] Add TellPlusSizeOf Subconstruct to fix Box end value --- src/pymp4/parser.py | 3 ++- src/pymp4/subconstructs.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 src/pymp4/subconstructs.py diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 79bb71c..c6dd151 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -21,6 +21,7 @@ from construct.lib import * from pymp4.adapters import ISO6392TLanguageCode, MaskedInteger, UUIDBytes +from pymp4.subconstructs import TellPlusSizeOf log = logging.getLogger(__name__) @@ -714,7 +715,7 @@ "asrt": HDSSegmentRunBox, "afrt": HDSFragmentRunBox }, default=RawBox)), - "end" / Tell + "end" / TellPlusSizeOf(Int32ub) ), includelength=True) ContainerBox = Struct( diff --git a/src/pymp4/subconstructs.py b/src/pymp4/subconstructs.py new file mode 100644 index 0000000..8ed31d7 --- /dev/null +++ b/src/pymp4/subconstructs.py @@ -0,0 +1,18 @@ +from abc import ABC + +from construct import Subconstruct + + +class TellPlusSizeOf(Subconstruct, ABC): + def __init__(self, subcon): + super(TellPlusSizeOf, self).__init__(subcon) + self.flagbuildnone = True + + def _parse(self, stream, context, path): + return stream.tell() + self.subcon.sizeof(context=context) + + def _build(self, obj, stream, context, path): + return b"" + + def sizeof(self, context=None, **kw): + return 0 From c2d87c7df5af3eaaa0ccb109f22cd1404c2e11b6 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 10:45:36 +0100 Subject: [PATCH 11/17] Nest all EmbedBitStructs as "flags" field --- src/pymp4/parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index c6dd151..a0e86a4 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -108,7 +108,7 @@ "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "info_version" / Int32ub, - EmbeddedBitStruct( + "flags" / BitStruct( Padding(1), "profile" / Flag, "live" / Flag, @@ -166,10 +166,10 @@ "modification_time" / IfThenElse(this.version == 1, Int64ub, Int32ub), "timescale" / Int32ub, "duration" / IfThenElse(this.version == 1, Int64ub, Int32ub), - Embedded(BitStruct( + "flags" / BitStruct( Padding(1), "language" / ISO6392TLanguageCode(BitsInteger(5)[3]), - )), + ), Padding(2, pattern=b"\x00"), ) @@ -242,7 +242,7 @@ "profile" / Int8ub, "compatibility" / Int8ub, "level" / Int8ub, - EmbeddedBitStruct( + "flags" / BitStruct( Padding(6, pattern=b'\x01'), "nal_unit_length_field" / Default(BitsInteger(2), 3), ), @@ -251,8 +251,8 @@ ) HVCC = Struct( - EmbeddedBitStruct( - "version" / Const(1, BitsInteger(8)), + "version" / Const(1, Int8ub), + "flags" / BitStruct( "profile_space" / BitsInteger(2), "general_tier_flag" / BitsInteger(1), "general_profile" / BitsInteger(5), From 7b6c1d7659429aa93c7485157d132d0d82abbe2a Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 12:08:53 +0100 Subject: [PATCH 12/17] Rework ISO6392TLanguageCode Adapter to expect Int16 data This way we don't have to use BitStruct and messy embedding/nesting decisions and can directly nest the language. --- src/pymp4/adapters.py | 12 ++++++++++-- src/pymp4/parser.py | 7 ++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/pymp4/adapters.py b/src/pymp4/adapters.py index 3e9313c..7eda9a9 100644 --- a/src/pymp4/adapters.py +++ b/src/pymp4/adapters.py @@ -8,10 +8,18 @@ class ISO6392TLanguageCode(Adapter, ABC): def _decode(self, obj, context, path): - return b"".join(map(int2byte, [c + 0x60 for c in bytearray(obj)])).decode("utf8") + return "".join([ + chr(bit + 0x60) + for bit in ( + (obj >> 10) & 0b11111, + (obj >> 5) & 0b11111, + obj & 0b11111 + ) + ]) def _encode(self, obj, context, path): - return [c - 0x60 for c in bytearray(obj.encode("utf8"))] + bits = [ord(c) - 0x60 for c in obj] + return (bits[0] << 10) | (bits[1] << 5) | bits[2] class MaskedInteger(Adapter, ABC): diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index a0e86a4..4acac8d 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -166,11 +166,8 @@ "modification_time" / IfThenElse(this.version == 1, Int64ub, Int32ub), "timescale" / Int32ub, "duration" / IfThenElse(this.version == 1, Int64ub, Int32ub), - "flags" / BitStruct( - Padding(1), - "language" / ISO6392TLanguageCode(BitsInteger(5)[3]), - ), - Padding(2, pattern=b"\x00"), + "language" / ISO6392TLanguageCode(Int16ub), + Padding(2, pattern=b"\x00") ) HandlerReferenceBox = Struct( From 2858709de9fd27e47469abfb023628c63dfdba0b Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 12:24:09 +0100 Subject: [PATCH 13/17] Fix ContainerBoxLazy as context is no longer passed --- src/pymp4/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 4acac8d..a668e53 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -651,7 +651,7 @@ }, GreedyBytes) ) -ContainerBoxLazy = LazyBound(lambda ctx: ContainerBox) +ContainerBoxLazy = LazyBound(lambda: ContainerBox) Box = Prefixed(Int32ub, Struct( From da2cf3cc21303663a0408ee40a3322971e057bd4 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 12:25:32 +0100 Subject: [PATCH 14/17] Remove types from each individual box struct This removes duplicate checks and duplicate output data. --- src/pymp4/parser.py | 46 +-------------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index a668e53..c80d7c5 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -31,14 +31,12 @@ # Header box FileTypeBox = Struct( - "type" / Const(b"ftyp"), "major_brand" / PaddedString(4, "ascii"), "minor_version" / Int32ub, "compatible_brands" / GreedyRange(PaddedString(4, "ascii")), ) SegmentTypeBox = Struct( - "type" / Const(b"styp"), "major_brand" / PaddedString(4, "ascii"), "minor_version" / Int32ub, "compatible_brands" / GreedyRange(PaddedString(4, "ascii")), @@ -52,19 +50,16 @@ ) FreeBox = Struct( - "type" / Const(b"free"), "data" / GreedyBytes ) SkipBox = Struct( - "type" / Const(b"skip"), "data" / GreedyBytes ) # Movie boxes, contained in a moov Box MovieHeaderBox = Struct( - "type" / Const(b"mvhd"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "creation_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), @@ -85,7 +80,6 @@ # Track boxes, contained in trak box TrackHeaderBox = Struct( - "type" / Const(b"tkhd"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 1), "creation_time" / Default(Switch(this.version, {0: Int32ub, 1: Int64ub}), 0), @@ -104,7 +98,6 @@ ) HDSSegmentBox = Struct( - "type" / Const(b"abst"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "info_version" / Int32ub, @@ -128,7 +121,6 @@ ) HDSSegmentRunBox = Struct( - "type" / Const(b"asrt"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "quality_entry_table" / PrefixedArray(Int8ub, CString("ascii")), @@ -139,7 +131,6 @@ ) HDSFragmentRunBox = Struct( - "type" / Const(b"afrt"), "version" / Default(Int8ub, 0), "flags" / BitStruct( Padding(23), @@ -159,7 +150,6 @@ # Boxes contained by Media Box MediaHeaderBox = Struct( - "type" / Const(b"mdhd"), "version" / Default(Int8ub, 0), "flags" / Const(0, Int24ub), "creation_time" / IfThenElse(this.version == 1, Int64ub, Int32ub), @@ -171,7 +161,6 @@ ) HandlerReferenceBox = Struct( - "type" / Const(b"hdlr"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), Padding(4, pattern=b"\x00"), @@ -183,7 +172,6 @@ # Boxes contained by Media Info Box VideoMediaHeaderBox = Struct( - "type" / Const(b"vmhd"), "version" / Default(Int8ub, 0), "flags" / Const(1, Int24ub), "graphics_mode" / Default(Int16ub, 0), @@ -195,7 +183,6 @@ ) DataEntryUrlBox = Prefixed(Int32ub, Struct( - "type" / Const(b"url "), "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~this._.location) @@ -204,7 +191,6 @@ ), includelength=True) DataEntryUrnBox = Prefixed(Int32ub, Struct( - "type" / Const(b"urn "), "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), "self_contained" / Rebuild(Flag, ~(this._.name & this._.location)) @@ -214,7 +200,6 @@ ), includelength=True) DataReferenceBox = Struct( - "type" / Const(b"dref"), "version" / Const(0, Int8ub), "flags" / Default(Int24ub, 0), "data_entries" / PrefixedArray(Int32ub, Select(DataEntryUrnBox, DataEntryUrlBox)), @@ -317,21 +302,18 @@ ), includelength=True) BitRateBox = Struct( - "type" / Const(b"btrt"), "bufferSizeDB" / Int32ub, "maxBitrate" / Int32ub, "avgBirate" / Int32ub, ) SampleDescriptionBox = Struct( - "type" / Const(b"stsd"), "version" / Default(Int8ub, 0), "flags" / Const(0, Int24ub), "entries" / PrefixedArray(Int32ub, SampleEntryBox) ) SampleSizeBox = Struct( - "type" / Const(b"stsz"), "version" / Int8ub, "flags" / Const(0, Int24ub), "sample_size" / Int32ub, @@ -340,7 +322,6 @@ ) SampleSizeBox2 = Struct( - "type" / Const(b"stz2"), "version" / Int8ub, "flags" / Const(0, Int24ub), Padding(3, pattern=b"\x00"), @@ -352,13 +333,11 @@ ) SampleDegradationPriorityBox = Struct( - "type" / Const(b"stdp"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), ) TimeToSampleBox = Struct( - "type" / Const(b"stts"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( @@ -368,7 +347,6 @@ ) SyncSampleBox = Struct( - "type" / Const(b"stss"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( @@ -377,7 +355,6 @@ ) SampleToChunkBox = Struct( - "type" / Const(b"stsc"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( @@ -388,7 +365,6 @@ ) ChunkOffsetBox = Struct( - "type" / Const(b"stco"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "entries" / Default(PrefixedArray(Int32ub, Struct( @@ -397,7 +373,6 @@ ) ChunkLargeOffsetBox = Struct( - "type" / Const(b"co64"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "entries" / PrefixedArray(Int32ub, Struct( @@ -408,14 +383,12 @@ # Movie Fragment boxes, contained in moof box MovieFragmentHeaderBox = Struct( - "type" / Const(b"mfhd"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "sequence_number" / Int32ub ) TrackFragmentBaseMediaDecodeTimeBox = Struct( - "type" / Const(b"tfdt"), "version" / Int8ub, "flags" / Const(0, Int24ub), "baseMediaDecodeTime" / Switch(this.version, {1: Int64ub, 0: Int32ub}) @@ -433,7 +406,6 @@ ) TrackRunBox = Struct( - "type" / Const(b"trun"), "version" / Int8ub, "flags" / BitStruct( Padding(12), @@ -461,7 +433,6 @@ ) TrackFragmentHeaderBox = Struct( - "type" / Const(b"tfhd"), "version" / Int8ub, "flags" / BitStruct( Padding(6), @@ -484,7 +455,6 @@ ) MovieExtendsHeaderBox = Struct( - "type" / Const(b"mehd"), "version" / Default(Int8ub, 0), "flags" / Const(0, Int24ub), "fragment_duration" / IfThenElse(this.version == 1, @@ -493,7 +463,6 @@ ) TrackExtendsBox = Struct( - "type" / Const(b"trex"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "track_ID" / Int32ub, @@ -504,7 +473,6 @@ ) SegmentIndexBox = Struct( - "type" / Const(b"sidx"), "version" / Int8ub, "flags" / Const(0, Int24ub), "reference_ID" / Int32ub, @@ -524,7 +492,6 @@ ) SampleAuxiliaryInformationSizesBox = Struct( - "type" / Const(b"saiz"), "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(23), @@ -541,7 +508,6 @@ ) SampleAuxiliaryInformationOffsetsBox = Struct( - "type" / Const(b"saio"), "version" / Int8ub, "flags" / BitStruct( Padding(23), @@ -557,14 +523,12 @@ # Movie data box MovieDataBox = Struct( - "type" / Const(b"mdat"), "data" / GreedyBytes ) # Media Info Box SoundMediaHeaderBox = Struct( - "type" / Const(b"smhd"), "version" / Const(0, Int8ub), "flags" / Const(0, Int24ub), "balance" / Default(Int16sb, 0), @@ -575,7 +539,6 @@ # DASH Boxes ProtectionSystemHeaderBox = Struct( - "type" / If(this._.type != "uuid", Const(b"pssh")), "version" / Rebuild(Int8ub, lambda ctx: 1 if (hasattr(ctx, "key_IDs") and ctx.key_IDs) else 0), "flags" / Const(0, Int24ub), "system_ID" / UUIDBytes(Bytes(16)), @@ -586,7 +549,6 @@ ) TrackEncryptionBox = Struct( - "type" / If(this._.type != "uuid", Const(b"tenc")), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "_reserved0" / Const(0, Int8ub), @@ -602,7 +564,6 @@ ) SampleEncryptionBox = Struct( - "type" / If(this._.type != "uuid", Const(b"senc")), "version" / Const(0, Int8ub), "flags" / BitStruct( Padding(22), @@ -620,12 +581,10 @@ ) OriginalFormatBox = Struct( - "type" / Const(b"frma"), "original_format" / Default(PaddedString(4, "ascii"), "avc1") ) SchemeTypeBox = Struct( - "type" / Const(b"schm"), "version" / Default(Int8ub, 0), "flags" / Default(Int24ub, 0), "scheme_type" / Default(PaddedString(4, "ascii"), "cenc"), @@ -634,7 +593,6 @@ ) ProtectionSchemeInformationBox = Struct( - "type" / Const(b"sinf"), # TODO: define which children are required 'schm', 'schi' and 'tenc' "children" / LazyBound(lambda _: GreedyRange(Box)) ) @@ -642,7 +600,6 @@ # PIFF boxes UUIDBox = Struct( - "type" / Const(b"uuid"), "extended_type" / UUIDBytes(Bytes(16)), "data" / Switch(this.extended_type, { UUID("A2394F52-5A9B-4F14-A244-6C427C648DF4"): SampleEncryptionBox, @@ -656,7 +613,7 @@ Box = Prefixed(Int32ub, Struct( "offset" / Tell, - "type" / Peek(PaddedString(4, "ascii")), + "type" / PaddedString(4, "ascii"), Embedded(Switch(this.type, { "ftyp": FileTypeBox, "styp": SegmentTypeBox, @@ -716,7 +673,6 @@ ), includelength=True) ContainerBox = Struct( - "type" / PaddedString(4, "ascii"), "children" / GreedyRange(Box) ) From 8262cceca09c1047269d2096e4cc28c660957453 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 12:30:24 +0100 Subject: [PATCH 15/17] ListContainer must be used if the list is a list of containers --- tests/test_box.py | 14 +++++++------- tests/test_util.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_box.py b/tests/test_box.py index bba6704..5193bec 100644 --- a/tests/test_box.py +++ b/tests/test_box.py @@ -17,7 +17,7 @@ import logging import unittest -from construct import Container +from construct import Container, ListContainer from pymp4.parser import Box log = logging.getLogger(__name__) @@ -89,13 +89,13 @@ def test_mdhd_build(self): def test_moov_build(self): moov = \ - Container(type="moov", children=[ # 96 bytes - Container(type="mvex", children=[ # 88 bytes + Container(type="moov", children=ListContainer([ # 96 bytes + Container(type="mvex", children=ListContainer([ # 88 bytes Container(type="mehd", version=0, flags=0, fragment_duration=0), # 16 bytes Container(type="trex", track_ID=1), # 32 bytes Container(type="trex", track_ID=2), # 32 bytes - ]) - ]) + ])) + ])) moov_data = Box.build(moov) @@ -141,9 +141,9 @@ def test_stsd_parse(self): type="stsd", version=0, flags=0, - entries=[ + entries=ListContainer([ Container(format='tx3g', data_reference_index=1, data=tx3g_data) - ], + ]), end=len(in_bytes) ) ) diff --git a/tests/test_util.py b/tests/test_util.py index d30ac3d..0e45347 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -17,7 +17,7 @@ import logging import unittest -from construct import Container +from construct import Container, ListContainer from pymp4.exceptions import BoxNotFound from pymp4.util import BoxUtil @@ -28,23 +28,23 @@ class BoxTests(unittest.TestCase): box_data = Container( type="demo", - children=[ + children=ListContainer([ Container(type="a ", id=1), Container(type="b ", id=2), Container( type="c ", - children=[ + children=ListContainer([ Container(type="a ", id=3), Container(type="b ", id=4) - ] + ]) ), Container(type="d ", id=5) - ] + ]) ) box_extended_data = Container( type="test", - children=[ + children=ListContainer([ Container( type="a ", id=1, @@ -55,7 +55,7 @@ class BoxTests(unittest.TestCase): id=2, extended_type=b"e--b" ) - ] + ]) ) def test_find(self): @@ -73,10 +73,10 @@ def test_find_after_nest(self): def test_find_nested_type(self): self.assertListEqual( list(BoxUtil.find(self.box_data, "c ")), - [Container(type="c ", children=[ + [Container(type="c ", children=ListContainer([ Container(type="a ", id=3), Container(type="b ", id=4), - ])] + ]))] ) def test_find_empty(self): From d81c93294f1d93a4517473b4022d00b4eceef6c2 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 6 Apr 2023 12:38:47 +0100 Subject: [PATCH 16/17] Replace use of Embedded with nested "data" field This is by no means ideal. It would be much preferred to be embedded, but the maintainer of construct has stated there is no workaround and nesting is the only option. This means when building boxes we must now nest the data in `data` container. This also means grabbing a fields value must now specify `.data` first, e.g., `tenc_box.data.is_encrypted`. See changes with the tests. Not ideal at all. --- src/pymp4/parser.py | 12 ++--- tests/test_box.py | 103 ++++++++++++++++++++++++---------------- tests/test_dashboxes.py | 23 +++++---- 3 files changed, 82 insertions(+), 56 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index c80d7c5..17e3a27 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -280,10 +280,10 @@ "color_table_id" / Default(Int16sb, -1), "avc_data" / Prefixed(Int32ub, Struct( "type" / PaddedString(4, "ascii"), - Embedded(Switch(this.type, { + "data" / Switch(this.type, { "avcC": AAVC, "hvcC": HVCC, - }, Struct("data" / GreedyBytes))) + }, GreedyBytes) ), includelength=True), "sample_info" / LazyBound(lambda _: GreedyRange(Box)) ) @@ -292,13 +292,13 @@ "format" / PaddedString(4, "ascii"), Padding(6, pattern=b"\x00"), "data_reference_index" / Default(Int16ub, 1), - Embedded(Switch(this.format, { + "data" / Switch(this.format, { "ec-3": MP4ASampleEntryBox, "mp4a": MP4ASampleEntryBox, "enca": MP4ASampleEntryBox, "avc1": AVC1SampleEntryBox, "encv": AVC1SampleEntryBox - }, Struct("data" / GreedyBytes))) + }, GreedyBytes) ), includelength=True) BitRateBox = Struct( @@ -614,7 +614,7 @@ Box = Prefixed(Int32ub, Struct( "offset" / Tell, "type" / PaddedString(4, "ascii"), - Embedded(Switch(this.type, { + "data" / Switch(this.type, { "ftyp": FileTypeBox, "styp": SegmentTypeBox, "mvhd": MovieHeaderBox, @@ -668,7 +668,7 @@ "abst": HDSSegmentBox, "asrt": HDSSegmentRunBox, "afrt": HDSFragmentRunBox - }, default=RawBox)), + }, default=RawBox), "end" / TellPlusSizeOf(Int32ub) ), includelength=True) diff --git a/tests/test_box.py b/tests/test_box.py index 5193bec..39d1922 100644 --- a/tests/test_box.py +++ b/tests/test_box.py @@ -30,9 +30,11 @@ def test_ftyp_parse(self): Container( offset=0, type="ftyp", - major_brand="iso5", - minor_version=1, - compatible_brands=["iso5", "avc1"], + data=Container( + major_brand="iso5", + minor_version=1, + compatible_brands=["iso5", "avc1"] + ), end=24 ) ) @@ -41,9 +43,12 @@ def test_ftyp_build(self): self.assertEqual( Box.build(dict( type="ftyp", - major_brand="iso5", - minor_version=1, - compatible_brands=["iso5", "avc1"])), + data=dict( + major_brand="iso5", + minor_version=1, + compatible_brands=["iso5", "avc1"] + ) + )), b'\x00\x00\x00\x18ftypiso5\x00\x00\x00\x01iso5avc1') def test_mdhd_parse(self): @@ -52,13 +57,15 @@ def test_mdhd_parse(self): Container( offset=0, type="mdhd", - version=0, - flags=0, - creation_time=0, - modification_time=0, - timescale=1000000, - duration=0, - language="und", + data=Container( + version=0, + flags=0, + creation_time=0, + modification_time=0, + timescale=1000000, + duration=0, + language="und" + ), end=32 ) ) @@ -66,36 +73,40 @@ def test_mdhd_parse(self): def test_mdhd_build(self): mdhd_data = Box.build(dict( type="mdhd", - creation_time=0, - modification_time=0, - timescale=1000000, - duration=0, - language=u"und")) + data=dict( + creation_time=0, + modification_time=0, + timescale=1000000, + duration=0, + language="und" + ))) self.assertEqual(len(mdhd_data), 32) self.assertEqual(mdhd_data, b'\x00\x00\x00\x20mdhd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00U\xc4\x00\x00') mdhd_data64 = Box.build(dict( type="mdhd", - version=1, - creation_time=0, - modification_time=0, - timescale=1000000, - duration=0, - language=u"und")) + data=dict( + version=1, + creation_time=0, + modification_time=0, + timescale=1000000, + duration=0, + language="und" + ))) self.assertEqual(len(mdhd_data64), 44) self.assertEqual(mdhd_data64, b'\x00\x00\x00,mdhd\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0fB@\x00\x00\x00\x00\x00\x00\x00\x00U\xc4\x00\x00') def test_moov_build(self): moov = \ - Container(type="moov", children=ListContainer([ # 96 bytes - Container(type="mvex", children=ListContainer([ # 88 bytes - Container(type="mehd", version=0, flags=0, fragment_duration=0), # 16 bytes - Container(type="trex", track_ID=1), # 32 bytes - Container(type="trex", track_ID=2), # 32 bytes - ])) - ])) + Container(type="moov", data=Container(children=ListContainer([ # 96 bytes + Container(type="mvex", data=Container(children=ListContainer([ # 88 bytes + Container(type="mehd", data=Container(version=0, flags=0, fragment_duration=0)), # 16 bytes + Container(type="trex", data=Container(track_ID=1)), # 32 bytes + Container(type="trex", data=Container(track_ID=2)), # 32 bytes + ]))) + ]))) moov_data = Box.build(moov) @@ -116,10 +127,12 @@ def test_smhd_parse(self): Container( offset=0, type="smhd", - version=0, - flags=0, - balance=0, - reserved=0, + data=Container( + version=0, + flags=0, + balance=0, + reserved=0 + ), end=len(in_bytes) ) ) @@ -127,7 +140,9 @@ def test_smhd_parse(self): def test_smhd_build(self): smhd_data = Box.build(dict( type="smhd", - balance=0)) + data=dict( + balance=0 + ))) self.assertEqual(len(smhd_data), 16), self.assertEqual(smhd_data, b'\x00\x00\x00\x10smhd\x00\x00\x00\x00\x00\x00\x00\x00') @@ -139,11 +154,17 @@ def test_stsd_parse(self): Container( offset=0, type="stsd", - version=0, - flags=0, - entries=ListContainer([ - Container(format='tx3g', data_reference_index=1, data=tx3g_data) - ]), + data=Container( + version=0, + flags=0, + entries=ListContainer([ + Container( + format="tx3g", + data_reference_index=1, + data=tx3g_data + ) + ]) + ), end=len(in_bytes) ) ) diff --git a/tests/test_dashboxes.py b/tests/test_dashboxes.py index 1c735d6..4bbb4cc 100644 --- a/tests/test_dashboxes.py +++ b/tests/test_dashboxes.py @@ -31,12 +31,14 @@ def test_tenc_parse(self): Container( offset=0, type="tenc", - version=0, - flags=0, - is_encrypted=1, - iv_size=8, - key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), - constant_iv=None, + data=Container( + version=0, + flags=0, + is_encrypted=1, + iv_size=8, + key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), + constant_iv=None + ), end=32 ) ) @@ -45,7 +47,10 @@ def test_tenc_build(self): self.assertEqual( Box.build(dict( type="tenc", - key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), - iv_size=8, - is_encrypted=1)), + data=dict( + key_ID=UUID('337b9643-21b6-4355-9e59-3eccb46c7ef7'), + iv_size=8, + is_encrypted=1 + ) + )), b'\x00\x00\x00 tenc\x00\x00\x00\x00\x00\x00\x01\x083{\x96C!\xb6CU\x9eY>\xcc\xb4l~\xf7') From 33dc5d620f361cb49d713b60dcb2686ab8696f91 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Mon, 7 Aug 2023 08:11:21 +0100 Subject: [PATCH 17/17] Fix stuff I missed when rebasing --- src/pymp4/parser.py | 28 ++---------- tests/test_webvtt_boxes.py | 90 +++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 54 deletions(-) diff --git a/src/pymp4/parser.py b/src/pymp4/parser.py index 0d7c943..eaca852 100644 --- a/src/pymp4/parser.py +++ b/src/pymp4/parser.py @@ -298,7 +298,7 @@ "enca": MP4ASampleEntryBox, "avc1": AVC1SampleEntryBox, "encv": AVC1SampleEntryBox, - "wvtt": Struct("children" / LazyBound(lambda ctx: GreedyRange(Box))) + "wvtt": Struct("children" / LazyBound(lambda: GreedyRange(Box))) }, GreedyBytes) ), includelength=True) @@ -552,7 +552,7 @@ TrackEncryptionBox = Struct( "version" / Default(OneOf(Int8ub, (0, 1)), 0), "flags" / Default(Int24ub, 0), - "_reserved" / Const(Int8ub, 0), + "_reserved" / Const(0, Int8ub), "default_byte_blocks" / Default(IfThenElse( this.version > 0, BitStruct( @@ -561,7 +561,7 @@ # count of unencrypted blocks in the protection pattern "skip" / Nibble ), - Const(Int8ub, 0) + Const(0, Int8ub) ), 0), "is_encrypted" / OneOf(Int8ub, (0, 1)), "iv_size" / OneOf(Int8ub, (0, 8, 16)), @@ -620,46 +620,26 @@ # WebVTT boxes CueIDBox = Struct( - "type" / Const(b"iden"), "cue_id" / GreedyString("utf8") ) CueSettingsBox = Struct( - "type" / Const(b"sttg"), "settings" / GreedyString("utf8") ) CuePayloadBox = Struct( - "type" / Const(b"payl"), "cue_text" / GreedyString("utf8") ) WebVTTConfigurationBox = Struct( - "type" / Const(b"vttC"), "config" / GreedyString("utf8") ) WebVTTSourceLabelBox = Struct( - "type" / Const(b"vlab"), "label" / GreedyString("utf8") ) -ContainerBoxLazy = LazyBound(lambda ctx: ContainerBox) - - -class TellMinusSizeOf(Subconstruct): - def __init__(self, subcon): - super(TellMinusSizeOf, self).__init__(subcon) - self.flagbuildnone = True - - def _parse(self, stream, context, path): - return stream.tell() - self.subcon.sizeof(context) - - def _build(self, obj, stream, context, path): - return b"" - - def sizeof(self, context=None, **kw): - return 0 +ContainerBoxLazy = LazyBound(lambda: ContainerBox) Box = Prefixed(Int32ub, Struct( diff --git a/tests/test_webvtt_boxes.py b/tests/test_webvtt_boxes.py index 84f7db3..e322c2c 100644 --- a/tests/test_webvtt_boxes.py +++ b/tests/test_webvtt_boxes.py @@ -12,79 +12,109 @@ class BoxTests(unittest.TestCase): def test_iden_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x27iden2 - this is the second subtitle'), - Container(offset=0) - (type=b"iden") - (cue_id="2 - this is the second subtitle") - (end=39) + Container( + offset=0, + type="iden", + data=Container( + cue_id="2 - this is the second subtitle" + ), + end=39 + ) ) def test_iden_build(self): self.assertEqual( Box.build(dict( - type=b"iden", - cue_id="1 - first subtitle")), + type="iden", + data=dict( + cue_id="1 - first subtitle" + ))), b'\x00\x00\x00\x1aiden1 - first subtitle') def test_sttg_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x003sttgline:10% position:50% size:48% align:center'), - Container(offset=0) - (type=b"sttg") - (settings="line:10% position:50% size:48% align:center") - (end=51) + Container( + offset=0, + type="sttg", + data=Container( + settings="line:10% position:50% size:48% align:center" + ), + end=51 + ) ) def test_sttg_build(self): self.assertEqual( Box.build(dict( - type=b"sttg", - settings="line:75% position:20% size:2em align:right")), + type="sttg", + data=dict( + settings="line:75% position:20% size:2em align:right" + ))), b'\x00\x00\x002sttgline:75% position:20% size:2em align:right') def test_payl_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x13payl[chuckling]'), - Container(offset=0) - (type=b"payl") - (cue_text="[chuckling]") - (end=19) + Container( + offset=0, + type="payl", + data=Container( + cue_text="[chuckling]" + ), + end=19 + ) ) def test_payl_build(self): self.assertEqual( Box.build(dict( - type=b"payl", - cue_text="I have a bad feeling about- [boom]")), + type="payl", + data=dict( + cue_text="I have a bad feeling about- [boom]" + ))), b'\x00\x00\x00*paylI have a bad feeling about- [boom]') def test_vttC_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x0evttCWEBVTT'), - Container(offset=0) - (type=b"vttC") - (config="WEBVTT") - (end=14) + Container( + offset=0, + type="vttC", + data=Container( + config="WEBVTT" + ), + end=14 + ) ) def test_vttC_build(self): self.assertEqual( Box.build(dict( - type=b"vttC", - config="WEBVTT with a text header\n\nSTYLE\n::cue {\ncolor: red;\n}")), + type="vttC", + data=dict( + config="WEBVTT with a text header\n\nSTYLE\n::cue {\ncolor: red;\n}" + ))), b'\x00\x00\x00>vttCWEBVTT with a text header\n\nSTYLE\n::cue {\ncolor: red;\n}') def test_vlab_parse(self): self.assertEqual( Box.parse(b'\x00\x00\x00\x14vlabsource_label'), - Container(offset=0) - (type=b"vlab") - (label="source_label") - (end=20) + Container( + offset=0, + type="vlab", + data=Container( + label="source_label" + ), + end=20 + ) ) def test_vlab_build(self): self.assertEqual( Box.build(dict( - type=b"vlab", - label="1234 \n test_label \n\n")), + type="vlab", + data=dict( + label="1234 \n test_label \n\n" + ))), b'\x00\x00\x00\x1cvlab1234 \n test_label \n\n')