-
Notifications
You must be signed in to change notification settings - Fork 1
Fix infinite loop when parsing MagicMock objects #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -410,14 +410,24 @@ def _reset(self, something): | |
| # string is so large that the overhead of copying it into a | ||
| # BytesIO is significant, advise caller to pass a stream instead. | ||
| self._stream = io.BytesIO(something) | ||
| elif something.seekable(): | ||
| # 'something' is already a seekable stream, use directly | ||
| self._stream = something | ||
| elif isinstance(something, io.IOBase): | ||
| # 'something' is a proper IO stream - must be seekable for parsing | ||
| if something.seekable(): | ||
| self._stream = something | ||
| else: | ||
| raise LLSDParseError( | ||
| "Cannot parse LLSD from non-seekable stream." | ||
| ) | ||
| else: | ||
| # 'something' isn't seekable, wrap in BufferedReader | ||
| # (let BufferedReader handle the problem of passing an | ||
| # inappropriate object) | ||
| self._stream = io.BufferedReader(something) | ||
| # Invalid input type - raise a clear error | ||
| # This catches MagicMock and other non-stream objects that might | ||
| # have read/seek attributes but aren't actual IO streams | ||
| raise LLSDParseError( | ||
| "Cannot parse LLSD from {0}. " | ||
| "Expected bytes or a file-like object (io.IOBase subclass).".format( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like technically this won't support regular file objects in Python 2.7: $ git log --graph --oneline -1
* 801f8e7 (HEAD -> fix-for-infinite-loop-issue-for-magicmock-objects, secondlife/fix-for-infinite-loop-issue-for-magicmock-objects) updated for test coverage
$ git diff
diff --git example.llsd example.llsd
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ example.llsd
@@ -0,0 +1 @@
+{}
diff --git tests/llsd_test.py tests/llsd_test.py
index c1900db..c66c8ea 100644
--- tests/llsd_test.py
+++ tests/llsd_test.py
@@ -1977,13 +1977,13 @@ class MapConstraints(unittest.TestCase):
self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")
-@unittest.skipIf(PY2, "These tests require Python 3")
class InvalidInputTypes(unittest.TestCase):
'''
Tests for handling invalid input types that should raise LLSDParseError
instead of hanging or consuming infinite memory.
'''
+ @unittest.skipIf(PY2, "These tests require Python 3")
def test_parse_magicmock_raises_error(self):
'''
Parsing a MagicMock object should raise LLSDParseError, not hang.
@@ -2003,8 +2003,8 @@ class InvalidInputTypes(unittest.TestCase):
Only applies to Python 3 where str and bytes are distinct.
'''
with self.assertRaises(llsd.LLSDParseError) as context:
- llsd.parse('not bytes')
- self.assertIn('str', str(context.exception))
+ llsd.parse(b'not bytes'.decode('ascii'))
+ self.assertIn('unicode' if PY2 else 'str', str(context.exception))
def test_parse_none_raises_error(self):
'''
@@ -2032,4 +2032,10 @@ class InvalidInputTypes(unittest.TestCase):
llsd.parse(stream)
self.assertIn('non-seekable', str(context.exception))
+ def test_parse_file(self):
+ with open("example.llsd", "rb") as f:
+ llsd.parse_notation(f)
+ def test_parse_io_base(self):
+ with io.open("example.llsd", "rb") as f:
+ llsd.parse_notation(f)
$ .venv/bin/python --version
Python 2.7.18
$ .venv/bin/python -m unittest tests.llsd_test.InvalidInputTypes
E..s...
======================================================================
ERROR: test_parse_file (tests.llsd_test.InvalidInputTypes)
–---------------------------------------------------------------------
Traceback (most recent call last):
File "tests/llsd_test.py", line 2037, in test_parse_file
llsd.parse_notation(f)
File "llsd/serde_notation.py", line 537, in parse_notation
parser = LLSDBaseParser(something)
File "llsd/base.py", line 399, in __init__
self._reset(something)
File "llsd/base.py", line 428, in _reset
type(something).__name__
LLSDParseError: Cannot parse LLSD from file. Expected bytes or a file-like object (io.IOBase subclass).
–---------------------------------------------------------------------
Ran 7 tests in 0.002s
FAILED (errors=1, skipped=1)
$However in Python 2.7 file objects don't have a $ git log --graph --oneline -1
* 0931046 (HEAD, tag: v1.2.0) Merge pull request #6 from secondlife/sl-18330-perf
|\
$ git diff
diff --git example.llsd example.llsd
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ example.llsd
@@ -0,0 +1 @@
+{}
diff --git tests/llsd_test.py tests/llsd_test.py
index 6691c1c..02905fa 100644
--- tests/llsd_test.py
+++ tests/llsd_test.py
@@ -1884,3 +1884,13 @@ class MapConstraints(unittest.TestCase):
llsdmap=llsd.LLSD({uuid.UUID(int=0) : 'uuid'})
self.assertEqual(llsd.format_xml(llsdmap), b'<?xml version="1.0" ?><llsd><map><key>00000000-0000-0000-0000-000000000000</key><string>uuid</string></map></llsd>')
self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")
+
+
+class InvalidInputTypes(unittest.TestCase):
+ def test_parse_file(self):
+ with open("example.llsd", "rb") as f:
+ llsd.parse_notation(f)
+
+ def test_parse_io_base(self):
+ with io.open("example.llsd", "rb") as f:
+ llsd.parse_notation(f)
$ .venv/bin/python --version
Python 2.7.18
$ .venv/bin/python -m unittest tests.llsd_test.InvalidInputTypes
E.
======================================================================
ERROR: test_parse_file (tests.llsd_test.InvalidInputTypes)
–---------------------------------------------------------------------
Traceback (most recent call last):
File "tests/llsd_test.py", line 1892, in test_parse_file
llsd.parse_notation(f)
File "llsd/serde_notation.py", line 519, in parse_notation
parser = LLSDBaseParser(something)
File "llsd/base.py", line 394, in __init__
self._reset(something)
File "llsd/base.py", line 408, in _reset
elif something.seekable():
AttributeError: 'file' object has no attribute 'seekable'
–---------------------------------------------------------------------
Ran 2 tests in 0.001s
FAILED (errors=1)
$Release-wise, v1.2.0 was adding support for parsing files/streams in the first place, so this is not a loss of functionality. However if we don't support regular files (the most obviously file-like objects, I imagine), we might could phrase this error message more clearly. “Expected bytes or a seekable io.IOBase object” maybe? 🤔 |
||
| type(something).__name__ | ||
| ) | ||
| ) | ||
|
|
||
| def starts_with(self, pattern): | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1977,3 +1977,59 @@ def test_uuid_map_key(self): | |
| self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}") | ||
|
|
||
|
|
||
| @unittest.skipIf(PY2, "These tests require Python 3") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, until we formally drop support for Python 2.7 in this library, we should run as many tests as possible with it, to avoid cases like those discussed above where functionality is broken / not implemented as intended for Python 2. Ideally we could also split these tests into a commit that precedes the actual fix, to demonstrate the above findings that these cases already raise exceptions (though with different exception classes & messages in some cases), so this fix should be a-OK. Let me know if you're busy and I can take a crack at that, if you like! |
||
| class InvalidInputTypes(unittest.TestCase): | ||
| ''' | ||
| Tests for handling invalid input types that should raise LLSDParseError | ||
| instead of hanging or consuming infinite memory. | ||
| ''' | ||
|
|
||
| def test_parse_magicmock_raises_error(self): | ||
| ''' | ||
| Parsing a MagicMock object should raise LLSDParseError, not hang. | ||
| This is a regression test for a bug where llsd.parse() would go into | ||
| an infinite loop when passed a MagicMock (e.g., from an improperly | ||
| mocked requests.Response.content). | ||
| ''' | ||
| from unittest.mock import MagicMock | ||
| mock = MagicMock() | ||
| with self.assertRaises(llsd.LLSDParseError) as context: | ||
| llsd.parse(mock) | ||
| self.assertIn('MagicMock', str(context.exception)) | ||
|
|
||
| def test_parse_string_raises_error(self): | ||
| ''' | ||
| Parsing a string (not bytes) should raise LLSDParseError. | ||
| Only applies to Python 3 where str and bytes are distinct. | ||
| ''' | ||
| with self.assertRaises(llsd.LLSDParseError) as context: | ||
| llsd.parse('not bytes') | ||
| self.assertIn('str', str(context.exception)) | ||
|
|
||
| def test_parse_none_raises_error(self): | ||
| ''' | ||
| Parsing None should raise LLSDParseError. | ||
| ''' | ||
| with self.assertRaises(llsd.LLSDParseError) as context: | ||
| llsd.parse(None) | ||
| self.assertIn('NoneType', str(context.exception)) | ||
|
|
||
| def test_parse_int_raises_error(self): | ||
| ''' | ||
| Parsing an integer should raise LLSDParseError. | ||
| ''' | ||
| with self.assertRaises(llsd.LLSDParseError) as context: | ||
| llsd.parse(42) | ||
| self.assertIn('int', str(context.exception)) | ||
|
|
||
| def test_parse_non_seekable_stream_raises_error(self): | ||
| ''' | ||
| Parsing a non-seekable stream should raise LLSDParseError. | ||
| ''' | ||
| stream = io.BytesIO() | ||
| stream.seekable = lambda: False | ||
| with self.assertRaises(llsd.LLSDParseError) as context: | ||
| llsd.parse(stream) | ||
| self.assertIn('non-seekable', str(context.exception)) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're replacing this codepath for unseekable streams with a new
LLSDParseError, but that should be OK.Looks like we stopped supporting unseekable streams in f5aab9f (#6, released in v1.2.0), where we replaced uses of
peek()with uses oftell()&seek()to rewind when necessary instead. All the parsing functions useLLSDBaseParser.matchseq()to consume sequences from the stream, which starts by callingtell()on the stream, which should only be supported if the stream is seekable.The use of
io.BufferedReader()comes from earlier, in 2ffd753 (#4), where we usedio.BufferedReaderto addpeek()support to streams. Per f5aab9f:This seems to imply wrapping an unseekable stream in an
io.BufferedReadershould add the ability to seek, but it doesn't: if the stream it contains is unseekable, theio.BufferedReaderis unseekable too. (That makes sense, sinceio.BufferedReaderwould need to keep an unbounded buffer for the whole stream in order to seek arbitrarily.)The
test_parse_non_seekable_stream_raises_errortest confirms this when run without thellsd/base.pyfix: once parsing proceeds tomatchseq(), we attempt totell()orseek()on the stream (depending how parsing is invoked), which raises anio.UnsupportedOperation: File or stream is not seekable.Seems acceptable to throw an exception here when the stream is unseekable.