Skip to content

[3.7] bpo-21872: fix lzma library decompresses data incompletely (GH-14048) #16055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions Lib/test/test_lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,36 @@ def test_tell_bad_args(self):
f.close()
self.assertRaises(ValueError, f.tell)

def test_issue21872(self):
# sometimes decompress data incompletely

# ---------------------
# when max_length == -1
# ---------------------
d1 = LZMADecompressor()
entire = d1.decompress(ISSUE_21872_DAT, max_length=-1)
self.assertEqual(len(entire), 13160)
self.assertTrue(d1.eof)

# ---------------------
# when max_length > 0
# ---------------------
d2 = LZMADecompressor()

# When this value of max_length is used, the input and output
# buffers are exhausted at the same time, and lzs's internal
# state still have 11 bytes can be output.
out1 = d2.decompress(ISSUE_21872_DAT, max_length=13149)
self.assertFalse(d2.needs_input) # ensure needs_input mechanism works
self.assertFalse(d2.eof)

# simulate needs_input mechanism
# output internal state's 11 bytes
out2 = d2.decompress(b'')
self.assertEqual(len(out2), 11)
self.assertTrue(d2.eof)
self.assertEqual(out1 + out2, entire)


class OpenTestCase(unittest.TestCase):

Expand Down Expand Up @@ -1761,6 +1791,139 @@ def test_filter_properties_roundtrip(self):
b"\x00"
)

ISSUE_21872_DAT = (
b']\x00\x00@\x00h3\x00\x00\x00\x00\x00\x00\x00\x00`D\x0c\x99\xc8'
b'\xd1\xbbZ^\xc43+\x83\xcd\xf1\xc6g\xec-\x061F\xb1\xbb\xc7\x17%-\xea'
b'\xfap\xfb\x8fs\x128\xb2,\x88\xe4\xc0\x12|*x\xd0\xa2\xc4b\x1b!\x02c'
b'\xab\xd9\x87U\xb8n \xfaVJ\x9a"\xb78\xff%_\x17`?@*\xc2\x82'
b"\xf2^\x1b\xb8\x04&\xc0\xbb\x03g\x9d\xca\xe9\xa4\xc9\xaf'\xe5\x8e}"
b'F\xdd\x11\xf3\x86\xbe\x1fN\x95\\\xef\xa2Mz-\xcb\x9a\xe3O@'
b"\x19\x07\xf6\xee\x9e\x9ag\xc6\xa5w\rnG'\x99\xfd\xfeGI\xb0"
b'\xbb\xf9\xc2\xe1\xff\xc5r\xcf\x85y[\x01\xa1\xbd\xcc/\xa3\x1b\x83\xaa'
b'\xc6\xf9\x99\x0c\xb6_\xc9MQ+x\xa2F\xda]\xdd\xe8\xfb\x1a&'
b',\xc4\x19\x1df\x81\x1e\x90\xf3\xb8Hgr\x85v\xbe\xa3qx\x01Y\xb5\x9fF'
b"\x13\x18\x01\xe69\x9b\xc8'\x1e\x9d\xd6\xe4F\x84\xac\xf8d<\x11\xd5"
b'\\\x0b\xeb\x0e\x82\xab\xb1\xe6\x1fka\xe1i\xc4 C\xb1"4)\xd6\xa7`\x02'
b'\xec\x11\x8c\xf0\x14\xb0\x1d\x1c\xecy\xf0\xb7|\x11j\x85X\xb2!\x1c'
b'\xac\xb5N\xc7\x85j\x9ev\xf5\xe6\x0b\xc1]c\xc15\x16\x9f\xd5\x99'
b"\xfei^\xd2G\x9b\xbdl\xab:\xbe,\xa9'4\x82\xe5\xee\xb3\xc1"
b'$\x93\x95\xa8Y\x16\xf5\xbf\xacw\x91\x04\x1d\x18\x06\xe9\xc5\xfdk\x06'
b'\xe8\xfck\xc5\x86>\x8b~\xa4\xcb\xf1\xb3\x04\xf1\x04G5\xe2\xcc]'
b'\x16\xbf\x140d\x18\xe2\xedw#(3\xca\xa1\x80bX\x7f\xb3\x84'
b'\x9d\xdb\xe7\x08\x97\xcd\x16\xb9\xf1\xd5r+m\x1e\xcb3q\xc5\x9e\x92'
b"\x7f\x8e*\xc7\xde\xe9\xe26\xcds\xb1\x10-\xf6r\x02?\x9d\xddCgJN'"
b'\x11M\xfa\nQ\n\xe6`m\xb8N\xbbq\x8el\x0b\x02\xc7:q\x04G\xa1T'
b'\xf1\xfe!0\x85~\xe5\x884\xe9\x89\xfb\x13J8\x15\xe42\xb6\xad'
b'\x877A\x9a\xa6\xbft]\xd0\xe35M\xb0\x0cK\xc8\xf6\x88\xae\xed\xa9,j7'
b'\x81\x13\xa0(\xcb\xe1\xe9l2\x7f\xcd\xda\x95(\xa70B\xbd\xf4\xe3'
b'hp\x94\xbdJ\xd7\t\xc7g\xffo?\x89?\xf8}\x7f\xbc\x1c\x87'
b'\x14\xc0\xcf\x8cV:\x9a\x0e\xd0\xb2\x1ck\xffk\xb9\xe0=\xc7\x8d/'
b'\xb8\xff\x7f\x1d\x87`\x19.\x98X*~\xa7j\xb9\x0b"\xf4\xe4;V`\xb9\xd7'
b'\x03\x1e\xd0t0\xd3\xde\x1fd\xb9\xe2)\x16\x81}\xb1\\b\x7fJ'
b'\x92\xf4\xff\n+V!\xe9\xde\x98\xa0\x8fK\xdf7\xb9\xc0\x12\x1f\xe2'
b'\xe9\xb0`\xae\x14\r\xa7\xc4\x81~\xd8\x8d\xc5\x06\xd8m\xb0Y\x8a)'
b'\x06/\xbb\xf9\xac\xeaP\xe0\x91\x05m[\xe5z\xe6Z\xf3\x9f\xc7\xd0'
b'\xd3\x8b\xf3\x8a\x1b\xfa\xe4Pf\xbc0\x17\x10\xa9\xd0\x95J{\xb3\xc3'
b'\xfdW\x9bop\x0f\xbe\xaee\xa3]\x93\x9c\xda\xb75<\xf6g!\xcc\xb1\xfc\\'
b'7\x152Mc\x17\x84\x9d\xcd35\r0\xacL-\xf3\xfb\xcb\x96\x1e\xe9U\x7f'
b'\xd7\xca\xb0\xcc\x89\x0c*\xce\x14\xd1P\xf1\x03\xb6.~9o?\xe8'
b'\r\x86\xe0\x92\x87}\xa3\x84\x03P\xe0\xc2\x7f\n;m\x9d\x9e\xb4|'
b'\x8c\x18\xc0#0\xfe3\x07<\xda\xd8\xcf^\xd4Hi\xd6\xb3\x0bT'
b'\x1dF\x88\x85q}\x02\xc6&\xc4\xae\xce\x9cU\xfa\x0f\xcc\xb6\x1f\x11'
b'drw\x9eN\x19\xbd\xffz\x0f\xf0\x04s\xadR\xc1\xc0\xbfl\xf1\xba\xf95^'
b'e\xb1\xfbVY\xd9\x9f\x1c\xbf*\xc4\xa86\x08+\xd6\x88[\xc4_rc\xf0f'
b'\xb8\xd4\xec\x1dx\x19|\xbf\xa7\xe0\x82\x0b\x8c~\x10L/\x90\xd6\xfb'
b'\x81\xdb\x98\xcc\x02\x14\xa5C\xb2\xa7i\xfd\xcd\x1fO\xf7\xe9\x89t\xf0'
b'\x17\xa5\x1c\xad\xfe<Q`%\x075k\n7\x9eI\x82<#)&\x04\xc2\xf0C\xd4`!'
b'\xcb\xa9\xf9\xb3F\x86\xb5\xc3M\xbeu\x12\xb2\xca\x95e\x10\x0b\xb1\xcc'
b'\x01b\x9bXa\x1b[B\x8c\x07\x11Of;\xeaC\xebr\x8eb\xd9\x9c\xe4i]<z\x9a'
b'\x03T\x8b9pF\x10\x8c\x84\xc7\x0e\xeaPw\xe5\xa0\x94\x1f\x84\xdd'
b'a\xe8\x85\xc2\x00\xebq\xe7&Wo5q8\xc2t\x98\xab\xb7\x7f\xe64-H'
b'\t\xb4d\xbe\x06\xe3Q\x8b\xa9J\xb0\x00\xd7s.\x85"\xc0p\x05'
b'\x1c\x06N\x87\xa5\xf8\xc3g\x1b}\x0f\x0f\xc3|\x90\xea\xefd3X'
b'[\xab\x04E\xf2\xf2\xc9\x08\x8a\xa8+W\xa2v\xec\x15G\x08/I<L\\1'
b'\xff\x15O\xaa\x89{\xd1mW\x13\xbd~\xe1\x90^\xc4@\r\xed\xb5D@\xb4\x08'
b'A\x90\xe69;\xc7BO\xdb\xda\xebu\x9e\xa9tN\xae\x8aJ5\xcd\x11\x1d\xea'
b"\xe5\xa7\x04\xe6\x82Z\xc7O\xe46[7\xdco*[\xbe\x0b\xc9\xb7a\xab'\xf6"
b"\xd1u\xdb\xd9q\xf5+y\x1b\x00\xb4\xf3a\xae\xf1M\xc4\xbc\xd00'\x06pQ"
b'\x8dH\xaa\xaa\xc4\xd2K\x9b\xc0\xe9\xec=n\xa9\x1a\x8a\xc2\xe8\x18\xbc'
b'\x93\xb8F\xa1\x8fOY\xe7\xda\xcf0\t\xff|\xd9\xe5\xcf\xe7\xf6\xbe'
b'\xf8\x04\x17\xf2\xe5P\xa7y~\xce\x11h0\x81\x80d[\x00_v\xbbc\xdbI'
b'3\xbc`W\xc0yrkB\xf5\x9f\xe9i\xc5\x8a^\x8d\xd4\x81\xd9\x05\xc1\xfc>'
b'"\xd1v`\x82\xd5$\x89\xcf^\xd52.\xafd\xe8d@\xaa\xd5Y|\x90\x84'
b'j\xdb}\x84riV\x8e\xf0X4rB\xf2NPS[\x8e\x88\xd4\x0fI\xb8'
b'\xdd\xcb\x1d\xf2(\xdf;9\x9e|\xef^0;.*[\x9fl\x7f\xa2_X\xaff!\xbb\x03'
b'\xff\x19\x8f\x88\xb5\xb6\x884\xa3\x05\xde3D{\xe3\xcb\xce\xe4t]'
b'\x875\xe3Uf\xae\xea\x88\x1c\x03b\n\xb1,Q\xec\xcf\x08\t\xde@\x83\xaa<'
b',-\xe4\xee\x9b\x843\xe5\x007\tK\xac\x057\xd6*X\xa3\xc6~\xba\xe6O'
b'\x81kz"\xbe\xe43sL\xf1\xfa;\xf4^\x1e\xb4\x80\xe2\xbd\xaa\x17Z\xe1f'
b'\xda\xa6\xb9\x07:]}\x9fa\x0b?\xba\xe7\xf15\x04M\xe3\n}M\xa4\xcb\r'
b'2\x8a\x88\xa9\xa7\x92\x93\x84\x81Yo\x00\xcc\xc4\xab\x9aT\x96\x0b\xbe'
b'U\xac\x1d\x8d\x1b\x98"\xf8\x8f\xf1u\xc1n\xcc\xfcA\xcc\x90\xb7i'
b'\x83\x9c\x9c~\x1d4\xa2\xf0*J\xe7t\x12\xb4\xe3\xa0u\xd7\x95Z'
b'\xf7\xafG\x96~ST,\xa7\rC\x06\xf4\xf0\xeb`2\x9e>Q\x0e\xf6\xf5\xc5'
b'\x9b\xb5\xaf\xbe\xa3\x8f\xc0\xa3hu\x14\x12 \x97\x99\x04b\x8e\xc7\x1b'
b'VKc\xc1\xf3 \xde\x85-:\xdc\x1f\xac\xce*\x06\xb3\x80;`'
b'\xdb\xdd\x97\xfdg\xbf\xe7\xa8S\x08}\xf55e7\xb8/\xf0!\xc8'
b"Y\xa8\x9a\x07'\xe2\xde\r\x02\xe1\xb2\x0c\xf4C\xcd\xf9\xcb(\xe8\x90"
b'\xd3bTD\x15_\xf6\xc3\xfb\xb3E\xfc\xd6\x98{\xc6\\fz\x81\xa99\x85\xcb'
b'\xa5\xb1\x1d\x94bqW\x1a!;z~\x18\x88\xe8i\xdb\x1b\x8d\x8d'
b'\x06\xaa\x0e\x99s+5k\x00\xe4\xffh\xfe\xdbt\xa6\x1bU\xde\xa3'
b'\xef\xcb\x86\x9e\x81\x16j\n\x9d\xbc\xbbC\x80?\x010\xc7Jj;'
b'\xc4\xe5\x86\xd5\x0e0d#\xc6;\xb8\xd1\xc7c\xb5&8?\xd9J\xe5\xden\xb9'
b'\xe9cb4\xbb\xe6\x14\xe0\xe7l\x1b\x85\x94\x1fh\xf1n\xdeZ\xbe'
b'\x88\xff\xc2e\xca\xdc,B-\x8ac\xc9\xdf\xf5|&\xe4LL\xf0\x1f\xaa8\xbd'
b'\xc26\x94bVi\xd3\x0c\x1c\xb6\xbb\x99F\x8f\x0e\xcc\x8e4\xc6/^W\xf5?'
b'\xdc\x84(\x14dO\x9aD6\x0f4\xa3,\x0c\x0bS\x9fJ\xe1\xacc^\x8a0\t\x80D['
b'\xb8\xe6\x86\xb0\xe8\xd4\xf9\x1en\xf1\xf5^\xeb\xb8\xb8\xf8'
b')\xa8\xbf\xaa\x84\x86\xb1a \x95\x16\x08\x1c\xbb@\xbd+\r/\xfb'
b'\x92\xfbh\xf1\x8d3\xf9\x92\xde`\xf1\x86\x03\xaa+\xd9\xd9\xc6P\xaf'
b'\xe3-\xea\xa5\x0fB\xca\xde\xd5n^\xe3/\xbf\xa6w\xc8\x0e<M'
b'\xc2\x1e!\xd4\xc6E\xf2\xad\x0c\xbc\x1d\x88Y\x03\x98<\x92\xd9\xa6B'
b'\xc7\x83\xb5"\x97D|&\xc4\xd4\xfad\x0e\xde\x06\xa3\xc2\x9c`\xf2'
b'7\x03\x1a\xed\xd80\x10\xe9\x0co\x10\xcf\x18\x16\xa7\x1c'
b"\xe5\x96\xa4\xd9\xe1\xa5v;]\xb7\xa9\xdc'hA\xe3\x9c&\x98\x0b9\xdf~@"
b'\xf8\xact\x87<\xf94\x0c\x9d\x93\xb0)\xe1\xa2\x0f\x1e=:&\xd56\xa5A+'
b'\xab\xc4\x00\x8d\x81\x93\xd4\xd8<\x82k\\d\xd8v\xab\xbd^l5C?\xd4\xa0'
b'M\x12C\xc8\x80\r\xc83\xe8\xc0\xf5\xdf\xca\x05\xf4BPjy\xbe\x91\x9bzE'
b"\xd8[\x93oT\r\x13\x16'\x1a\xbd*H\xd6\xfe\r\xf3\x91M\x8b\xee\x8f7f"
b"\x0b;\xaa\x85\xf2\xdd'\x0fwM \xbd\x13\xb9\xe5\xb8\xb7 D+P\x1c\xe4g"
b'n\xd2\xf1kc\x15\xaf\xc6\x90V\x03\xc2UovfZ\xcc\xd23^\xb3\xe7\xbf'
b'\xacv\x1d\x82\xedx\xa3J\xa9\xb7\xcf\x0c\xe6j\x96n*o\x18>'
b'\xc6\xfd\x97_+D{\x03\x15\xe8s\xb1\xc8HAG\xcf\xf4\x1a\xdd'
b'\xad\x11\xbf\x157q+\xdeW\x89g"X\x82\xfd~\xf7\xab4\xf6`\xab\xf1q'
b')\x82\x10K\xe9sV\xfe\xe45\xafs*\x14\xa7;\xac{\x06\x9d<@\x93G'
b'j\x1d\xefL\xe9\xd8\x92\x19&\xa1\x16\x19\x04\tu5\x01]\xf6\xf4'
b'\xcd\\\xd8A|I\xd4\xeb\x05\x88C\xc6e\xacQ\xe9*\x97~\x9au\xf8Xy'
b"\x17P\x10\x9f\n\x8c\xe2fZEu>\x9b\x1e\x91\x0b'`\xbd\xc0\xa8\x86c\x1d"
b'Z\xe2\xdc8j\x95\xffU\x90\x1e\xf4o\xbc\xe5\xe3e>\xd2R\xc0b#\xbc\x15'
b'H-\xb9!\xde\x9d\x90k\xdew\x9b{\x99\xde\xf7/K)A\xfd\xf5\xe6:\xda'
b'UM\xcc\xbb\xa2\x0b\x9a\x93\xf5{9\xc0 \xd2((6i\xc0\xbbu\xd8\x9e\x8d'
b'\xf8\x04q\x10\xd4\x14\x9e7-\xb9B\xea\x01Q8\xc8v\x9a\x12A\x88Cd\x92'
b"\x1c\x8c!\xf4\x94\x87'\xe3\xcd\xae\xf7\xd8\x93\xfa\xde\xa8b\x9e\xee2"
b'K\xdb\x00l\x9d\t\xb1|D\x05U\xbb\xf4>\xf1w\x887\xd1}W\x9d|g|1\xb0\x13'
b"\xa3 \xe5\xbfm@\xc06+\xb7\t\xcf\x15D\x9a \x1fM\x1f\xd2\xb5'\xa9\xbb"
b'~Co\x82\xfa\xc2\t\xe6f\xfc\xbeI\xae1\x8e\xbe\xb8\xcf\x86\x17'
b'\x9f\xe2`\xbd\xaf\xba\xb9\xbc\x1b\xa3\xcd\x82\x8fwc\xefd\xa9\xd5\x14'
b'\xe2C\xafUE\xb6\x11MJH\xd0=\x05\xd4*I\xff"\r\x1b^\xcaS6=\xec@\xd5'
b'\x11,\xe0\x87Gr\xaa[\xb8\xbc>n\xbd\x81\x0c\x07<\xe9\x92('
b'\xb2\xff\xac}\xe7\xb6\x15\x90\x9f~4\x9a\xe6\xd6\xd8s\xed\x99tf'
b'\xa0f\xf8\xf1\x87\t\x96/)\x85\xb6\n\xd7\xb2w\x0b\xbc\xba\x99\xee'
b'Q\xeen\x1d\xad\x03\xc3s\xd1\xfd\xa2\xc6\xb7\x9a\x9c(G<6\xad[~H '
b'\x16\x89\x89\xd0\xc3\xd2\xca~\xac\xea\xa5\xed\xe5\xfb\r:'
b'\x8e\xa6\xf1e\xbb\xba\xbd\xe0(\xa3\x89_\x01(\xb5c\xcc\x9f\x1fg'
b'v\xfd\x17\xb3\x08S=S\xee\xfc\x85>\x91\x8d\x8d\nYR\xb3G\xd1A\xa2\xb1'
b'\xec\xb0\x01\xd2\xcd\xf9\xfe\x82\x06O\xb3\xecd\xa9c\xe0\x8eP\x90\xce'
b'\xe0\xcd\xd8\xd8\xdc\x9f\xaa\x01"[Q~\xe4\x88\xa1#\xc1\x12C\xcf'
b'\xbe\x80\x11H\xbf\x86\xd8\xbem\xcfWFQ(X\x01DK\xdfB\xaa\x10.-'
b'\xd5\x9e|\x86\x15\x86N]\xc7Z\x17\xcd=\xd7)M\xde\x15\xa4LTi\xa0\x15'
b'\xd1\xe7\xbdN\xa4?\xd1\xe7\x02\xfe4\xe4O\x89\x98&\x96\x0f\x02\x9c'
b'\x9e\x19\xaa\x13u7\xbd0\xdc\xd8\x93\xf4BNE\x1d\x93\x82\x81\x16'
b'\xe5y\xcf\x98D\xca\x9a\xe2\xfd\xcdL\xcc\xd1\xfc_\x0b\x1c\xa0]\xdc'
b'\xa91 \xc9c\xd8\xbf\x97\xcfp\xe6\x19-\xad\xff\xcc\xd1N(\xe8'
b'\xeb#\x182\x96I\xf7l\xf3r\x00'
)


def test_main():
run_unittest(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :mod:`lzma`: module decompresses data incompletely. When decompressing a
FORMAT_ALONE format file, and it doesn't have the end marker, sometimes the
last one to dozens bytes can't be output. Patch by Ma Lin.
28 changes: 22 additions & 6 deletions Modules/_lzmamodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -881,9 +881,6 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
PyObject *result;
lzma_stream *lzs = &d->lzs;

if (lzs->avail_in == 0)
return PyBytes_FromStringAndSize(NULL, 0);

if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
else
Expand All @@ -900,23 +897,30 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
Py_BEGIN_ALLOW_THREADS
lzret = lzma_code(lzs, LZMA_RUN);
data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0)
lzret = LZMA_OK; /* That wasn't a real error */
Py_END_ALLOW_THREADS

if (catch_lzma_error(lzret))
goto error;
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
d->check = lzma_get_check(&d->lzs);
if (lzret == LZMA_STREAM_END) {
d->eof = 1;
break;
} else if (lzs->avail_in == 0) {
break;
} else if (lzs->avail_out == 0) {
/* Need to check lzs->avail_out before lzs->avail_in.
Maybe lzs's internal state still have a few bytes
can be output, grow the output buffer and continue
if max_lengh < 0. */
if (data_size == max_length)
break;
if (grow_buffer(&result, max_length) == -1)
goto error;
lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
} else if (lzs->avail_in == 0) {
break;
}
}
if (data_size != PyBytes_GET_SIZE(result))
Expand Down Expand Up @@ -999,7 +1003,19 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
}
else if (lzs->avail_in == 0) {
lzs->next_in = NULL;
d->needs_input = 1;

if (lzs->avail_out == 0) {
/* (avail_in==0 && avail_out==0)
Maybe lzs's internal state still have a few bytes can
be output, try to output them next time. */
d->needs_input = 0;

/* if max_length < 0, lzs->avail_out always > 0 */
assert(max_length >= 0);
} else {
/* Input buffer exhausted, output buffer has space. */
d->needs_input = 1;
}
}
else {
d->needs_input = 0;
Expand Down