Skip to content

Commit 4e3cee0

Browse files
committed
Add file opener brotli.open
1 parent f83aa51 commit 4e3cee0

2 files changed

Lines changed: 388 additions & 0 deletions

File tree

python/brotli_file.py

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
"""Functions that read and write gzipped files.
2+
3+
The user of the file doesn't have to worry about the compression,
4+
but random access is not allowed."""
5+
6+
# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7+
8+
import os
9+
import brotli
10+
import io
11+
import _compression
12+
13+
__all__ = ["BrotliFile", "open"]
14+
15+
READ, WRITE = 1, 2
16+
17+
18+
def open(filename, mode="rb", quality=11, lgwin=22, lgblock=0,
19+
encoding=None, errors=None, newline=None):
20+
"""Open a brotli-compressed file in binary or text mode.
21+
22+
The filename argument can be an actual filename (a str or bytes object), or
23+
an existing file object to read from or write to.
24+
25+
The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
26+
binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
27+
"rb", and the default compresslevel is 9.
28+
29+
For binary mode, this function is equivalent to the BrotliFile constructor:
30+
BrotliFile(filename, mode, compresslevel). In this case, the encoding,
31+
errors and newline arguments must not be provided.
32+
33+
For text mode, a BrotliFile object is created, and wrapped in an
34+
io.TextIOWrapper instance with the specified encoding, error handling
35+
behavior, and line ending(s).
36+
"""
37+
if "t" in mode:
38+
if "b" in mode:
39+
raise ValueError("Invalid mode: %r" % (mode,))
40+
else:
41+
if encoding is not None:
42+
raise ValueError("Argument 'encoding' not supported in binary mode")
43+
if errors is not None:
44+
raise ValueError("Argument 'errors' not supported in binary mode")
45+
if newline is not None:
46+
raise ValueError("Argument 'newline' not supported in binary mode")
47+
48+
gz_mode = mode.replace("t", "")
49+
if isinstance(filename, (str, bytes, os.PathLike)):
50+
binary_file = BrotliFile(filename, gz_mode, quality, lgwin, lgblock)
51+
elif hasattr(filename, "read") or hasattr(filename, "write"):
52+
binary_file = BrotliFile(
53+
None, gz_mode, quality, lgwin, lgblock, filename)
54+
else:
55+
raise TypeError("filename must be a str or bytes object, or a file")
56+
57+
if "t" in mode:
58+
return io.TextIOWrapper(binary_file, encoding, errors, newline)
59+
else:
60+
return binary_file
61+
62+
63+
class BrotliFile(_compression.BaseStream):
64+
"""The BrotliFile class simulates most of the methods of a file object with
65+
the exception of the truncate() method.
66+
67+
This class only supports opening files in binary mode. If you need to open
68+
a compressed file in text mode, use the brotli.open() function.
69+
"""
70+
71+
# Overridden with internal file object to be closed, if only a filename
72+
# is passed in
73+
myfileobj = None
74+
75+
def __init__(self, filename=None, mode=None,
76+
quality=11, lgwin=22, lgblock=0,
77+
fileobj=None):
78+
"""Constructor for the BrotliFile class.
79+
80+
At least one of fileobj and filename must be given a
81+
non-trivial value.
82+
83+
The new class instance is based on fileobj, which can be a regular
84+
file, an io.BytesIO object, or any other object which simulates a file.
85+
It defaults to None, in which case filename is opened to provide
86+
a file object.
87+
88+
The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x',
89+
or 'xb' depending on whether the file will be read or written. The
90+
default is the mode of fileobj if discernible; otherwise, the default
91+
is 'rb'. A mode of 'r' is equivalent to one of 'rb', and similarly for
92+
'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'.
93+
"""
94+
95+
if mode and ('t' in mode or 'U' in mode):
96+
raise ValueError("Invalid mode: {!r}".format(mode))
97+
if mode and 'b' not in mode:
98+
mode += 'b'
99+
if fileobj is None:
100+
fileobj = self.myfileobj = __builtins__.open(filename, mode or 'rb')
101+
if filename is None:
102+
filename = getattr(fileobj, 'name', '')
103+
if not isinstance(filename, (str, bytes)):
104+
filename = ''
105+
else:
106+
filename = os.fspath(filename)
107+
if mode is None:
108+
mode = getattr(fileobj, 'mode', 'rb')
109+
110+
if mode.startswith('r'):
111+
self.mode = READ
112+
raw = _BrotliReader(fileobj, _BrotliDecompressor)
113+
self._buffer = io.BufferedReader(raw)
114+
self.name = filename
115+
116+
elif mode.startswith(('w', 'a', 'x')):
117+
self.mode = WRITE
118+
self.size = 0
119+
self.offset = 0
120+
self.name = filename
121+
self.compress = brotli.Compressor(
122+
quality=quality, lgwin=lgwin, lgblock=lgblock)
123+
else:
124+
raise ValueError("Invalid mode: {!r}".format(mode))
125+
126+
self.fileobj = fileobj
127+
128+
@property
129+
def mtime(self):
130+
"""Last modification time read from stream, or None"""
131+
return self._buffer.raw._last_mtime
132+
133+
def __repr__(self):
134+
s = repr(self.fileobj)
135+
return '<brotli ' + s[1:-1] + ' ' + hex(id(self)) + '>'
136+
137+
def write(self, data):
138+
self._check_not_closed()
139+
if self.mode != WRITE:
140+
import errno
141+
raise OSError(errno.EBADF, "write() on read-only BrotliFile object")
142+
143+
if self.fileobj is None:
144+
raise ValueError("write() on closed BrotliFile object")
145+
146+
if isinstance(data, bytes):
147+
length = len(data)
148+
else:
149+
# accept any data that supports the buffer protocol
150+
data = memoryview(data)
151+
length = data.nbytes
152+
153+
if length > 0:
154+
self.fileobj.write(self.compress.process(data))
155+
self.size += length
156+
self.offset += length
157+
158+
return length
159+
160+
def read(self, size=-1):
161+
self._check_not_closed()
162+
if self.mode != READ:
163+
import errno
164+
raise OSError(errno.EBADF, "read() on write-only BrotliFile object")
165+
return self._buffer.read(size)
166+
167+
def read1(self, size=-1):
168+
"""Implements BufferedIOBase.read1()
169+
170+
Reads up to a buffer's worth of data if size is negative."""
171+
self._check_not_closed()
172+
if self.mode != READ:
173+
import errno
174+
raise OSError(errno.EBADF, "read1() on write-only BrotliFile object")
175+
176+
if size < 0:
177+
size = io.DEFAULT_BUFFER_SIZE
178+
return self._buffer.read1(size)
179+
180+
def peek(self, n):
181+
self._check_not_closed()
182+
if self.mode != READ:
183+
import errno
184+
raise OSError(errno.EBADF, "peek() on write-only BrotliFile object")
185+
return self._buffer.peek(n)
186+
187+
@property
188+
def closed(self):
189+
return self.fileobj is None
190+
191+
def close(self):
192+
fileobj = self.fileobj
193+
if fileobj is None:
194+
return
195+
self.fileobj = None
196+
try:
197+
if self.mode == WRITE:
198+
fileobj.write(self.compress.flush())
199+
fileobj.write(self.compress.finish())
200+
elif self.mode == READ:
201+
self._buffer.close()
202+
finally:
203+
myfileobj = self.myfileobj
204+
if myfileobj:
205+
self.myfileobj = None
206+
myfileobj.close()
207+
208+
def flush(self):
209+
self._check_not_closed()
210+
if self.mode == WRITE:
211+
# Ensure the compressor's buffer is flushed
212+
self.fileobj.write(self.compress.flush())
213+
self.fileobj.flush()
214+
215+
def fileno(self):
216+
"""Invoke the underlying file object's fileno() method.
217+
218+
This will raise AttributeError if the underlying file object
219+
doesn't support fileno().
220+
"""
221+
return self.fileobj.fileno()
222+
223+
def rewind(self):
224+
'''Return the uncompressed stream file position indicator to the
225+
beginning of the file'''
226+
if self.mode != READ:
227+
raise OSError("Can't rewind in write mode")
228+
self._buffer.seek(0)
229+
230+
def readable(self):
231+
return self.mode == READ
232+
233+
def writable(self):
234+
return self.mode == WRITE
235+
236+
def seekable(self):
237+
return True
238+
239+
def seek(self, offset, whence=io.SEEK_SET):
240+
if self.mode == WRITE:
241+
if whence != io.SEEK_SET:
242+
if whence == io.SEEK_CUR:
243+
offset = self.offset + offset
244+
else:
245+
raise ValueError('Seek from end not supported')
246+
if offset < self.offset:
247+
raise OSError('Negative seek in write mode')
248+
count = offset - self.offset
249+
chunk = b'\0' * 1024
250+
for i in range(count // 1024):
251+
self.write(chunk)
252+
self.write(b'\0' * (count % 1024))
253+
elif self.mode == READ:
254+
self._check_not_closed()
255+
return self._buffer.seek(offset, whence)
256+
257+
return self.offset
258+
259+
def readline(self, size=-1):
260+
self._check_not_closed()
261+
return self._buffer.readline(size)
262+
263+
264+
class _BrotliDecompressor:
265+
eof = False
266+
267+
def __init__(self):
268+
self.decompressor = brotli.Decompressor()
269+
self.needs_input = True
270+
self._buffer = bytearray(1)
271+
self._bufview = memoryview(self._buffer)
272+
self._buflen = len(self._buffer)
273+
self._pos = 0
274+
275+
def _check_buffer(self, new_len):
276+
if self._buflen < new_len:
277+
new_len = max(self._buflen, new_len)
278+
del self._bufview
279+
self._buffer.extend(b'\0' * (new_len * 2))
280+
self._bufview = memoryview(self._buffer)
281+
self._buflen = len(self._buffer)
282+
283+
def decompress(self, raw, size):
284+
if raw:
285+
uncompress = self.decompressor.process(raw)
286+
new_len = len(uncompress)
287+
self.needs_input = False
288+
else:
289+
uncompress = b''
290+
new_len = 0
291+
292+
if self._pos >= size:
293+
r = bytes(self._bufview[:size])
294+
pos = self._pos - size
295+
296+
self._check_buffer(pos + new_len)
297+
self._bufview[:pos] = self._bufview[size:self._pos]
298+
self._bufview[pos:pos + new_len] = uncompress
299+
self._pos = pos + new_len
300+
elif self._pos + new_len >= size:
301+
used_len = size - self._pos
302+
r = bytes(self._bufview[:self._pos]) + uncompress[:used_len]
303+
304+
rem_len = new_len - used_len
305+
self._check_buffer(rem_len)
306+
self._bufview[:rem_len] = uncompress[used_len:]
307+
self._pos = rem_len
308+
else:
309+
r = bytes(self._bufview[:self._pos]) + uncompress
310+
self._pos = 0
311+
self.needs_input = True
312+
return r
313+
314+
315+
class _BrotliReader(_compression.DecompressReader):
316+
def read(self, size=-1):
317+
try:
318+
return super(_BrotliReader, self).read(size)
319+
except EOFError:
320+
return b''

python/tests/file_test.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2016 The Brotli Authors. All rights reserved.
2+
#
3+
# Distributed under MIT license.
4+
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5+
6+
import unittest
7+
8+
from . import _test_utils
9+
import brotli
10+
import brotli_file
11+
12+
13+
class TestCompress(_test_utils.TestCase):
14+
15+
VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
16+
17+
def _check_decompression(self, test_data, **kwargs):
18+
kwargs = {}
19+
# Write decompression to temp file and verify it matches the original.
20+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
21+
temp_compressed = _test_utils.get_temp_compressed_name(test_data)
22+
original = test_data
23+
with open(temp_uncompressed, 'wb') as out_file:
24+
with open(temp_compressed, 'rb') as in_file:
25+
out_file.write(brotli.decompress(in_file.read(), **kwargs))
26+
self.assertFilesMatch(temp_uncompressed, original)
27+
28+
def _compress(self, test_data, **kwargs):
29+
temp_compressed = _test_utils.get_temp_compressed_name(test_data)
30+
with brotli_file.open(temp_compressed, 'w', **kwargs) as out_file:
31+
with open(test_data, 'rb') as in_file:
32+
out_file.write(in_file.read())
33+
34+
def _test_compress(self, test_data, **kwargs):
35+
self._compress(test_data, **kwargs)
36+
self._check_decompression(test_data, **kwargs)
37+
38+
39+
_test_utils.generate_test_methods(TestCompress, variants=TestCompress.VARIANTS)
40+
41+
42+
def _get_original_name(test_data):
43+
return test_data.split('.compressed')[0]
44+
45+
46+
class TestDecompress(_test_utils.TestCase):
47+
48+
def _check_decompression(self, test_data):
49+
# Verify decompression matches the original.
50+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
51+
original = _get_original_name(test_data)
52+
self.assertFilesMatch(temp_uncompressed, original)
53+
54+
def _decompress(self, test_data):
55+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
56+
with open(temp_uncompressed, 'wb') as out_file:
57+
with brotli_file.open(test_data) as in_file:
58+
out_file.write(in_file.read())
59+
60+
def _test_decompress(self, test_data):
61+
self._decompress(test_data)
62+
self._check_decompression(test_data)
63+
64+
65+
_test_utils.generate_test_methods(TestDecompress, for_decompression=True)
66+
67+
if __name__ == '__main__':
68+
unittest.main()

0 commit comments

Comments
 (0)