Skip to content

Commit

Permalink
Deflate64 reading support (#75)
Browse files Browse the repository at this point in the history
* add deflate64 test files to fixture

* fix tree hash

* Add support for Deflate64 decompression
  • Loading branch information
nhz2 authored Sep 1, 2024
1 parent 9292caa commit 19a1740
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 33 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "2.2.0"

[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
CodecInflate64 = "6309b1aa-fc58-479c-8956-599a07234577"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
InputBuffers = "0c81fc1b-5583-44fc-8770-48be1e1cca08"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand All @@ -13,6 +14,7 @@ Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"

[compat]
ArgCheck = "2"
CodecInflate64 = "0.1"
CodecZlib = "0.7"
InputBuffers = "1"
PrecompileTools = "1"
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ The central directory makes it fast to read just one random entry out of a very

When writing it is important to close the writer so the central directory gets written out.

More details on the file format can be found at https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT

### Reading Zip archives

Archives can be read from any `AbstractVector{UInt8}` containing the data of a zip archive.
Expand Down Expand Up @@ -85,14 +87,20 @@ ZipWriter(filename) do w
end
```

### Supported Compression Methods

| Compression Method | Reading | Writing |
|--------------------|---------|---------|
| 0 - Store (none) | Yes | Yes |
| 8 - Deflate | Yes | Yes |
| 9 - Deflate64 | Yes | No |

### Limitations

1. Cannot directly extract all files in an archive and write those files to disk.
1. Ignores time stamps.
1. Cannot write an archive fully in streaming mode. See https://github.com/madler/zipflow if you need this functionality.
1. Encryption and decryption not supported.
1. Only deflated and uncompressed data are supported. There is no support for bzip2 or zstd.
1. Multi disk archives not supported.
1. Cannot recover data from a corrupted archive. Especially if the end of the archive is corrupted.

Expand Down
1 change: 1 addition & 0 deletions src/ZipArchives.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ end
module ZipArchives

using CodecZlib: DeflateCompressorStream, DeflateDecompressorStream, DeflateCompressor
using CodecInflate64: Deflate64DecompressorStream
using TranscodingStreams: TranscodingStreams, TranscodingStream, Noop, NoopStream
using ArgCheck: @argcheck
using Zlib_jll: Zlib_jll
Expand Down
3 changes: 3 additions & 0 deletions src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ const Store = UInt16(0)
"Deflate compression method"
const Deflate = UInt16(8)

"Deflate64 compression method"
const Deflate64 = UInt16(9)

#=
see https://github.com/madler/zipflow/blob/2bef2123ebe519c17b18d2d0c3c71065088de952/zipflow.c#L214
=#
Expand Down
8 changes: 5 additions & 3 deletions src/reader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Return the compression method used for entry `i`.
See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT for a current list of methods.
Only Store(0x0000) and Deflate(0x0008) supported for now.
Only Store(0), Deflate(8), and Deflate64(9) are supported for now.
Note: if the zip file was corrupted, this might be wrong.
"""
Expand Down Expand Up @@ -774,8 +774,8 @@ end
function zip_openentry(r::ZipReader, i::Int)
compressed_size::Int64 = zip_compressed_size(r, i)
method = zip_compression_method(r, i)
if method != Store && method != Deflate
throw(ArgumentError("invalid compression method: $(method). Only Store(0) and Deflate(8) supported for now"))
if method != Store && method != Deflate && method != Deflate64
throw(ArgumentError("invalid compression method: $(method). Only Store(0), Deflate(8), and Deflate64(9) supported for now"))
end
entry_data_offset = zip_entry_data_offset(r, i)

Expand All @@ -792,6 +792,8 @@ function zip_openentry(r::ZipReader, i::Int)
return base_io
elseif method == Deflate
return DeflateDecompressorStream(base_io)
elseif method == Deflate64
return Deflate64DecompressorStream(base_io)
else
# should throw and ArgumentError before this
error("unreachable")
Expand Down
6 changes: 3 additions & 3 deletions test/Artifacts.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[fixture]
git-tree-sha1 = "6ab9957b72056a0c388b056305a24d9742cfb840"
git-tree-sha1 = "ed34b433321060f64bbdbda044851336788b9af6"

[[fixture.download]]
url = "https://github.com/JuliaIO/ZipArchives.jl/releases/download/v0.2.1/fixture.tar.gz"
sha256 = "a0505b18c35455b7163060355401ed722b9fdcd177bf2f93ab5e696f758d9507"
sha256 = "99a9bb1d9cba1fae77fd7be224b654da1f247815845ef4ac682a546e8dc70ceb"
url = "https://github.com/JuliaIO/ZipArchives.jl/releases/download/v2.1.6/fixture.tar.gz"
54 changes: 32 additions & 22 deletions test/Fixture-README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Add the file to the "fixture" directory, and a description to this file.
Then run
```julia
# This is the url that the artifact will be available from:
url_to_upload_to = "https://github.com/medyan-dev/ZipArchives.jl/releases/download/v0.2.1/fixture.tar.gz"
url_to_upload_to = "https://github.com/medyan-dev/ZipArchives.jl/releases/download/v2.1.6/fixture.tar.gz"
# This is the path to the Artifacts.toml we will manipulate
artifact_toml = "Artifacts.toml"
fixture_hash = create_artifact() do artifact_dir
Expand All @@ -30,27 +30,49 @@ bind_artifact!(artifact_toml, "fixture", fixture_hash; force=true,

Finally, upload the new "fixture.tar.gz" to `url_to_upload_to`

## `win11-excel.xlsx`
Small excel file created on windows 11 in microsoft Excel version 2305.
## `dotnet-deflate64.zip`
This file is downloaded from https://github.com/dotnet/runtime-assets/blob/95277f38e68b66f1b48600d90d456c32c9ae0fa2/src/System.IO.Compression.TestData/ZipTestData/compat/deflate64.zip

## `leftpad-core_2.13-0.1.11.jar`
Example jar file from https://mvnrepository.com/artifact/io.github.asakaev/leftpad-core_2.13/0.1.11

## `ubuntu22-7zip.zip`
Created with 7zip version 22.01 (x64)

## `ubuntu22-files.zip`
Created with default ubuntu files program

## `ubuntu22-infozip.zip`
Small zip file created with ubuntu22 Info-ZIP Zip 3.0

## `ubuntu22-old7zip.zip`
Created with 7zip version 16.02 p7zip 16.02

## `win11-7zip.zip`
Small zip file created with windows 11 7Zip 22.01

## `win11-deflate64.zip`
Large zip file created with windows 11 file explorer.
Designed to test the deflate64 decompressor.

## `win11-excel.ods`
Small OpenDocument Spreadsheet file created on windows 11 in microsoft Excel version 2305.

## `win11-libreoffice.ods`
Small OpenDocument Spreadsheet file created on windows 11 in LibreOffice Calc 7.5
## `win11-excel.xlsx`
Small excel file created on windows 11 in microsoft Excel version 2305.

## `win11-explorer.zip`
Small zip file created with windows 11 file explorer

## `win11-infozip.zip`
Small zip file created with windows 11 Info-ZIP Zip 3.0

## `win11-7zip.zip`
Small zip file created with windows 11 7Zip 22.01

## `win11-julia-p7zip.zip`
Small zip file created with windows 11 p7zip_jll 17.4.0+0

## `win11-libreoffice.ods`
Small OpenDocument Spreadsheet file created on windows 11 in LibreOffice Calc 7.5

## `win11-powerpoint.odp`
Small odp file created on windows 11 in microsoft PowerPoint version 2305

Expand All @@ -60,17 +82,5 @@ Small pptx file created on windows 11 in microsoft PowerPoint version 2305
## `ZipArchives.jl-main.zip`
Zip file downloaded from a github on 20 JUN 2023

## `leftpad-core_2.13-0.1.11.jar`
Example jar file from https://mvnrepository.com/artifact/io.github.asakaev/leftpad-core_2.13/0.1.11

## `ubuntu22-files.zip`
Created with default ubuntu files program

## `ubuntu22-7zip.zip`
Created with 7zip version 22.01 (x64)

## `ubuntu22-old7zip.zip`
Created with 7zip version 16.02 p7zip 16.02

## `ubuntu22-infozip.zip`
Small zip file created with ubuntu22 Info-ZIP Zip 3.0
## `zipfile-deflate64.zip`
Test file from https://github.com/brianhelba/zipfile-deflate64/blob/beec33184da6da4697a1994c0ac4c64cef8cff50/tests/data/deflate64.zip
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
Expand Down
10 changes: 6 additions & 4 deletions test/test_reader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using Base64: base64decode
using Setfield: @set
using p7zip_jll: p7zip_jll
using OffsetArrays: Origin
using SHA: sha256

@testset "find_end_of_central_directory_record unit tests" begin
find_eocd = ZipArchives.find_end_of_central_directory_record
Expand Down Expand Up @@ -149,8 +150,8 @@ end
data_b64 = "UEsDBD8AAgAOAHJb0FaLksVmIgAAABAAAAAJAAAAbHptYV9kYXRhCQQFAF0AAIAAADoaCWd+rnMR0beE5IbQKkMGbV//6/YgAFBLAQI/AD8AAgAOAHJb0FaLksVmIgAAABAAAAAJAAAAAAAAAAAAAACAAQAAAABsem1hX2RhdGFQSwUGAAAAAAEAAQA3AAAASQAAAAAA"
data = base64decode(data_b64)
r = ZipReader(data)
@test_throws ArgumentError("invalid compression method: 14. Only Store(0) and Deflate(8) supported for now") zip_test_entry(r, 1)
@test_throws ArgumentError("invalid compression method: 14. Only Store(0) and Deflate(8) supported for now") zip_openentry(r, 1)
@test_throws ArgumentError("invalid compression method: 14. Only Store(0), Deflate(8), and Deflate64(9) supported for now") zip_test_entry(r, 1)
@test_throws ArgumentError("invalid compression method: 14. Only Store(0), Deflate(8), and Deflate64(9) supported for now") zip_openentry(r, 1)
@test zip_iscompressed(r, 1)
@test zip_names(r) == ["lzma_data"]
@test zip_compression_method(r, 1) === 0x000e
Expand Down Expand Up @@ -233,8 +234,9 @@ end
if zip_isdir(r, i)
@test isdir(joinpath(tmpout,name))
else
entry_data = zip_readentry(r, i)
@test read(joinpath(tmpout,name)) == entry_data
sevenziphash = open(sha256, joinpath(tmpout,name))
ziphash = zip_openentry(sha256, r, i)
@test sevenziphash == ziphash
end
end
end
Expand Down

0 comments on commit 19a1740

Please sign in to comment.