diff --git a/Makefile b/Makefile index a550011..1c726ca 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-zipfile fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml +all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) @@ -24,6 +24,8 @@ fuzzer-re: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"re.py\"" -ldl $(LDFLAGS) -o fuzzer-re fuzzer-zipfile: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zipfile.py\"" -ldl $(LDFLAGS) -o fuzzer-zipfile +fuzzer-zipfile-hypothesis: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zipfile_hypothesis.py\"" -ldl $(LDFLAGS) -o fuzzer-zipfile-hypothesis fuzzer-tarfile: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"tarfile.py\"" -ldl $(LDFLAGS) -o fuzzer-tarfile fuzzer-configparser: diff --git a/zipfile_hypothesis.py b/zipfile_hypothesis.py new file mode 100644 index 0000000..918a928 --- /dev/null +++ b/zipfile_hypothesis.py @@ -0,0 +1,79 @@ +import datetime +import io +import zipfile +import math + +from hypothesis import given +from hypothesis import strategies as st + +ZIP_EPOCH_START = datetime.datetime(1980, 1, 1) +ZIP_EPOCH_END = datetime.datetime(2107, 12, 31, 23, 59, 59) +ZIP_EPOCH_LENGTH = int(math.ceil((ZIP_EPOCH_END - ZIP_EPOCH_START).total_seconds())) + + +def utf8_text(*, max_size: int, min_size: int = 0) -> st.SearchStrategy[str]: + """Returns UTF-8 text that, when encoded to bytes, + is within the size restrictions. + """ + return st.text(min_size=min_size, max_size=max_size).filter( + lambda s: min_size <= len(s.encode("utf-8")) <= max_size + ) + + +def zip_date_time() -> st.SearchStrategy[tuple[int, int, int, int, int, int, int]]: + """Returns a tuple of (year, month, day, hour, minute, second) + for valid values within a ZIP archive. + """ + return st.integers(min_value=0, max_value=ZIP_EPOCH_LENGTH).map( + lambda s: (ZIP_EPOCH_START + datetime.timedelta(seconds=s)).timetuple()[:6] + ) + + +@st.composite +def zip_archives(draw): + compression_types = [ + zipfile.ZIP_STORED, + zipfile.ZIP_DEFLATED, + zipfile.ZIP_BZIP2, + zipfile.ZIP_LZMA, + ] + try: + import compression.zstd + + compression_types.append(zipfile.ZIP_ZSTANDARD) + except ImportError: + pass + + buf = io.BytesIO() + zfp = zipfile.ZipFile(buf, "w") + + for _ in range(draw(st.integers(min_value=0, max_value=10))): + zpi = zipfile.ZipInfo() + zpi.filename = draw(utf8_text(min_size=1, max_size=0xFFFF)) + zpi.date_time = draw(zip_date_time()) + + if draw(st.booleans()): + zpi.flag_bits |= zipfile._MASK_USE_DATA_DESCRIPTOR + + zpi.compress_type = draw(st.sampled_from(compression_types)) + zpi._compresslevel = draw(st.integers(min_value=1, max_value=9)) + zpi.comment = draw(utf8_text(min_size=0, max_size=0xFFFF)) + + force_zip64 = draw(st.booleans()) + with zfp.open(zpi, mode="w", force_zip64=force_zip64) as f: + f.write(b"") + + zfp.close() + return buf, zfp + + +@given(zip_archives()) +def zip_archive_fuzz_target(buf_zfp: tuple[io.BytesIO, zipfile.ZipFile]) -> None: + buf, zfp1 = buf_zfp + with zipfile.ZipFile(buf, "r") as zfp2: + # Assert that ZIP files round-trip. + assert list(zfp1.infolist()) == list(zfp2.infolist()) + + +# Exposes the Hypothesis fuzz target for integrating with OSS-Fuzz. +FuzzerRunOne = zip_archive_fuzz_target.hypothesis.fuzz_one_input