Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 78 additions & 2 deletions src/hex_erl_tar.erl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
%% instead of loading them fully into memory
%% 6. Default chunk_size to 65536 in add_opts instead of 0 with special case
%% 7. Use compressed instead of compressed_one for file:open for OTP 24 compat
%% 8. Added {max_size, N} extraction option for zip bomb protection
%%
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
%%
Expand Down Expand Up @@ -87,6 +88,8 @@ format_error({invalid_gnu_0_1_sparsemap, Format}) ->
lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
format_error(unsafe_path) ->
"The path points above the current working directory";
format_error(too_big) ->
"Extraction size exceeds the configured max_size limit";
format_error({Name,Reason}) ->
lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
format_error(Atom) when is_atom(Atom) ->
Expand Down Expand Up @@ -138,9 +141,80 @@ extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->

do_extract(Handle, Opts) when is_list(Opts) ->
Opts2 = extract_opts(Opts),
Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
foldl_read(Handle, fun extract1/4, Acc, Opts2).
case maybe_inflate_with_limit(Handle, Opts2) of
{error, _} = Err ->
Err;
{ok, Handle2, Opts3} ->
Acc0 = if Opts3#read_opts.output =:= memory -> []; true -> ok end,
Acc = case Opts3#read_opts.max_size of
infinity -> Acc0;
_ -> {size_tracked, 0, Acc0}
end,
foldl_read(Handle2, fun extract1/4, Acc, Opts3)
end.

maybe_inflate_with_limit({binary, Bin}, #read_opts{max_size=MaxSize}=Opts)
when is_integer(MaxSize), is_binary(Bin) ->
case lists:member(compressed, Opts#read_opts.open_mode) of
true ->
case inflate_with_limit(Bin, MaxSize) of
{ok, Inflated} ->
OpenMode = Opts#read_opts.open_mode -- [compressed],
{ok, {binary, Inflated}, Opts#read_opts{open_mode=OpenMode}};
{error, too_big} ->
{error, too_big}
end;
false ->
{ok, {binary, Bin}, Opts}
end;
maybe_inflate_with_limit(Handle, Opts) ->
{ok, Handle, Opts}.

inflate_with_limit(Bin, MaxSize) ->
Z = zlib:open(),
try
zlib:inflateInit(Z, 31, cut),
inflate_with_limit_loop(Z, Bin, MaxSize, 0, [])
catch
_:_ -> {ok, Bin}
after
zlib:close(Z)
end.

inflate_with_limit_loop(Z, Bin, MaxSize, Total, Acc) ->
case zlib:safeInflate(Z, Bin) of
{finished, Chunks} ->
Size = iolist_size(Chunks),
NewTotal = Total + Size,
if NewTotal > MaxSize -> {error, too_big};
true -> {ok, iolist_to_binary(lists:reverse(Acc, Chunks))}
end;
{continue, Chunks} ->
Size = iolist_size(Chunks),
NewTotal = Total + Size,
if NewTotal > MaxSize -> {error, too_big};
true -> inflate_with_limit_loop(Z, <<>>, MaxSize, NewTotal, [Chunks|Acc])
end
end.

extract1(eof, Reader, _, {size_tracked, _, Acc}) when is_list(Acc) ->
{ok, {ok, lists:reverse(Acc)}, Reader};
extract1(eof, Reader, _, {size_tracked, _, leading_slash}) ->
error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
{ok, ok, Reader};
extract1(eof, Reader, _, {size_tracked, _, Acc}) ->
{ok, Acc, Reader};
extract1(#tar_header{size=Size}=Header, Reader0, Opts,
{size_tracked, Total, InnerAcc}) ->
NewTotal = Total + Size,
case NewTotal > Opts#read_opts.max_size of
true -> throw({error, too_big});
false -> ok
end,
case extract1(Header, Reader0, Opts, InnerAcc) of
{ok, NewInnerAcc, Reader1} ->
{ok, {size_tracked, NewTotal, NewInnerAcc}, Reader1}
end;
extract1(eof, Reader, _, Acc) when is_list(Acc) ->
{ok, {ok, lists:reverse(Acc)}, Reader};
extract1(eof, Reader, _, leading_slash) ->
Expand Down Expand Up @@ -2085,6 +2159,8 @@ extract_opts([verbose|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{verbose=true});
extract_opts([{chunks,N}|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{chunk_size=N});
extract_opts([{max_size,N}|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{max_size=N});
extract_opts([Other|Rest], Opts) ->
extract_opts(Rest, read_opts([Other], Opts));
extract_opts([], Opts) ->
Expand Down
6 changes: 5 additions & 1 deletion src/hex_erl_tar.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
%% 1. Added chunk_size field to #read_opts{} for streaming extraction to disk
%% 2. Added {chunks, pos_integer()} to extract_opt() type
%% 3. Default chunk_size to 65536 in #add_opts{} instead of 0
%% 4. Added max_size field to #read_opts{} for zip bomb protection
%% 5. Added {max_size, pos_integer() | infinity} to extract_opt() type
%%
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
%%
Expand Down Expand Up @@ -46,7 +48,8 @@
output = file :: 'file' | 'memory',
open_mode = [], %% Open mode options.
verbose = false :: boolean(), %% Verbose on/off.
chunk_size = 65536}). %% Chunk size for streaming to disk.
chunk_size = 65536, %% Chunk size for streaming to disk.
max_size = infinity :: pos_integer() | 'infinity'}).
-type read_opts() :: #read_opts{}.

-type add_opt() :: dereference |
Expand All @@ -64,6 +67,7 @@
-type extract_opt() :: {cwd, string()} |
{files, [name_in_archive()]} |
{chunks, pos_integer()} |
{max_size, pos_integer() | infinity} |
compressed |
cooked |
memory |
Expand Down
60 changes: 40 additions & 20 deletions src/hex_tarball.erl
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ unpack(Input, memory, Config) ->
{ok, FileList} ->
case validate_outer_file_sizes(maps:from_list(FileList)) of
{ok, Files} ->
do_unpack(Files, OuterChecksum, memory);
do_unpack(Files, OuterChecksum, memory, Config);
{error, _} = Error ->
Error
end;
Expand All @@ -241,7 +241,7 @@ unpack(Input, Output, Config) ->
ok ->
case read_outer_files(TmpDir) of
{ok, Files} ->
do_unpack(Files, OuterChecksum, Output);
do_unpack(Files, OuterChecksum, Output, Config);
{error, _} = Error ->
Error
end;
Expand Down Expand Up @@ -308,7 +308,8 @@ unpack(Tarball, Output) ->
unpack_docs(Input, Output, Config) ->
case check_docs_input_size(Input, Config) of
true ->
unpack_tarball(tar_source(Input), Output);
MaxSize = maps:get(docs_tarball_max_uncompressed_size, Config),
unpack_tarball(tar_source(Input), Output, MaxSize);
false ->
{error, {tarball, too_big}}
end.
Expand Down Expand Up @@ -414,14 +415,15 @@ encode_metadata(Meta) ->
iolist_to_binary(Data).

%% @private
do_unpack(Files, OuterChecksum, Output) ->
do_unpack(Files, OuterChecksum, Output, Config) ->
State = #{
inner_checksum => undefined,
outer_checksum => OuterChecksum,
contents => undefined,
files => Files,
metadata => undefined,
output => Output
output => Output,
config => Config
},
State1 = check_files(State),
State2 = check_version(State1),
Expand All @@ -437,10 +439,12 @@ finish_unpack(#{
files := Files,
inner_checksum := InnerChecksum,
outer_checksum := OuterChecksum,
output := Output
output := Output,
config := Config
}) ->
_ = maps:get("VERSION", Files),
Contents = maps:get("contents.tar.gz", Files),
MaxUncompressedSize = maps:get(tarball_max_uncompressed_size, Config),

Result = #{
inner_checksum => InnerChecksum,
Expand All @@ -452,15 +456,15 @@ finish_unpack(#{
none ->
{ok, Result};
memory ->
case unpack_contents(Contents, memory) of
case unpack_contents(Contents, memory, MaxUncompressedSize) of
{ok, UnpackedContents} ->
{ok, Result#{contents => UnpackedContents}};
{error, Reason} ->
{error, {inner_tarball, Reason}}
end;
_ ->
filelib:ensure_dir(filename:join(Output, "*")),
case unpack_contents(Contents, Output) of
case unpack_contents(Contents, Output, MaxUncompressedSize) of
ok ->
[
try_updating_mtime(filename:join(Output, P))
Expand All @@ -474,14 +478,23 @@ finish_unpack(#{
end.

%% @private
unpack_contents({path, ContentsPath}, memory) ->
hex_erl_tar:extract(ContentsPath, [memory, compressed]);
unpack_contents({path, ContentsPath}, Output) ->
hex_erl_tar:extract(ContentsPath, [{cwd, Output}, compressed]);
unpack_contents(ContentsBinary, memory) ->
hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]);
unpack_contents(ContentsBinary, Output) ->
hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]).
unpack_contents(Contents, Output, MaxSize) ->
Opts =
case Output of
memory -> [memory, compressed];
_ -> [{cwd, Output}, compressed]
end,
Source =
case Contents of
{path, ContentsPath} -> ContentsPath;
ContentsBinary -> {binary, ContentsBinary}
end,
case hex_erl_tar:extract(Source, [{max_size, MaxSize} | Opts]) of
{error, too_big} ->
{error, {too_big_uncompressed, MaxSize}};
Other ->
Other
end.

%% @private
copy_metadata_config(Output, MetadataBinary) ->
Expand Down Expand Up @@ -617,17 +630,24 @@ guess_build_tools(Metadata) ->
%%====================================================================

%% @private
unpack_tarball(Source, memory) ->
hex_erl_tar:extract(Source, [memory, compressed]);
unpack_tarball(Source, Output) ->
unpack_tarball(Source, memory, MaxSize) ->
case hex_erl_tar:extract(Source, [memory, compressed, {max_size, MaxSize}]) of
{error, too_big} ->
{error, {tarball, {too_big_uncompressed, MaxSize}}};
Other ->
Other
end;
unpack_tarball(Source, Output, MaxSize) ->
filelib:ensure_dir(filename:join(Output, "*")),
case hex_erl_tar:extract(Source, [{cwd, Output}, compressed]) of
case hex_erl_tar:extract(Source, [{cwd, Output}, compressed, {max_size, MaxSize}]) of
ok ->
[
try_updating_mtime(filename:join(Output, Path))
|| Path <- filelib:wildcard("**", Output)
],
ok;
{error, too_big} ->
{error, {tarball, {too_big_uncompressed, MaxSize}}};
Other ->
Other
end.
Expand Down
94 changes: 93 additions & 1 deletion test/hex_tarball_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ all() ->
streamed_extract_test,
file_unpack_docs_memory_test,
file_unpack_docs_disk_test,
file_unpack_docs_too_big_test
file_unpack_docs_too_big_test,
too_big_uncompressed_to_unpack_test,
docs_too_big_uncompressed_to_unpack_test,
file_unpack_too_big_uncompressed_test,
file_unpack_docs_too_big_uncompressed_test
].

too_big_to_create_test(_Config) ->
Expand Down Expand Up @@ -703,6 +707,94 @@ file_unpack_docs_too_big_test(Config) ->

ok.

too_big_uncompressed_to_unpack_test(CtConfig) ->
BaseDir = ?config(priv_dir, CtConfig),
Metadata = #{
<<"name">> => <<"foo">>,
<<"version">> => <<"1.0.0">>
},
Contents = [{"src/foo.erl", <<"-module(foo).">>}],
{ok, #{tarball := Tarball}} = hex_tarball:create(Metadata, Contents),

%% Uncompressed size limit too small - memory
Config = maps:put(tarball_max_uncompressed_size, 1, hex_core:default_config()),
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack(Tarball, memory, Config),

%% Uncompressed size limit too small - disk
UnpackDir = filename:join(BaseDir, "too_big_uncompressed"),
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack(Tarball, UnpackDir, Config),

%% Uncompressed size limit large enough
Config2 = maps:put(tarball_max_uncompressed_size, 10 * 1024 * 1024, hex_core:default_config()),
{ok, _} = hex_tarball:unpack(Tarball, memory, Config2),
ok.

docs_too_big_uncompressed_to_unpack_test(CtConfig) ->
BaseDir = ?config(priv_dir, CtConfig),
Files = [{"index.html", <<"Docs">>}],
{ok, Tarball} = hex_tarball:create_docs(Files),

%% Uncompressed size limit too small - memory
Config = maps:put(docs_tarball_max_uncompressed_size, 1, hex_core:default_config()),
{error, {tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack_docs(Tarball, memory, Config),

%% Uncompressed size limit too small - disk
UnpackDir = filename:join(BaseDir, "docs_too_big_uncompressed"),
{error, {tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack_docs(Tarball, UnpackDir, Config),

%% Uncompressed size limit large enough
Config2 = maps:put(
docs_tarball_max_uncompressed_size, 10 * 1024 * 1024, hex_core:default_config()
),
{ok, _} = hex_tarball:unpack_docs(Tarball, memory, Config2),
ok.

file_unpack_too_big_uncompressed_test(Config) ->
BaseDir = ?config(priv_dir, Config),
Metadata = #{
<<"name">> => <<"foo">>,
<<"version">> => <<"1.0.0">>
},
Contents = [{"src/foo.erl", <<"-module(foo).">>}],
{ok, #{tarball := Tarball}} = hex_tarball:create(Metadata, Contents),

TarballPath = filename:join(BaseDir, "test_file_too_big_uncompressed.tar"),
ok = file:write_file(TarballPath, Tarball),

%% Memory unpack from file
SmallConfig = maps:put(tarball_max_uncompressed_size, 1, hex_core:default_config()),
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack({file, TarballPath}, memory, SmallConfig),

%% Disk unpack from file
UnpackDir = filename:join(BaseDir, "file_unpack_too_big_uncompressed"),
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack({file, TarballPath}, UnpackDir, SmallConfig),
ok.

file_unpack_docs_too_big_uncompressed_test(Config) ->
BaseDir = ?config(priv_dir, Config),

Files = [{"index.html", <<"Docs">>}],
{ok, Tarball} = hex_tarball:create_docs(Files),
TarballPath = filename:join(BaseDir, "docs_big_uncompressed.tar.gz"),
ok = file:write_file(TarballPath, Tarball),

%% Memory unpack from file
SmallConfig = maps:put(docs_tarball_max_uncompressed_size, 1, hex_core:default_config()),
{error, {tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack_docs({file, TarballPath}, memory, SmallConfig),

%% Disk unpack from file
UnpackDir = filename:join(BaseDir, "file_unpack_docs_too_big_uncompressed"),
{error, {tarball, {too_big_uncompressed, 1}}} =
hex_tarball:unpack_docs({file, TarballPath}, UnpackDir, SmallConfig),
ok.

%%====================================================================
%% Helpers
%%====================================================================
Expand Down