Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions llsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,24 @@ def _reset(self, something):
# string is so large that the overhead of copying it into a
# BytesIO is significant, advise caller to pass a stream instead.
self._stream = io.BytesIO(something)
elif something.seekable():
# 'something' is already a seekable stream, use directly
self._stream = something
elif isinstance(something, io.IOBase):
# 'something' is a proper IO stream - must be seekable for parsing
if something.seekable():
self._stream = something
else:
raise LLSDParseError(
"Cannot parse LLSD from non-seekable stream."
)
else:
# 'something' isn't seekable, wrap in BufferedReader
# (let BufferedReader handle the problem of passing an
# inappropriate object)
self._stream = io.BufferedReader(something)
Comment on lines -417 to -420
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're replacing this codepath for unseekable streams with a new LLSDParseError, but that should be OK.

Looks like we stopped supporting unseekable streams in f5aab9f (#6, released in v1.2.0), where we replaced uses of peek() with uses of tell() & seek() to rewind when necessary instead. All the parsing functions use LLSDBaseParser.matchseq() to consume sequences from the stream, which starts by calling tell() on the stream, which should only be supported if the stream is seekable.

The use of io.BufferedReader() comes from earlier, in 2ffd753 (#4), where we used io.BufferedReader to add peek() support to streams. Per f5aab9f:

Refine LLSDBaseParser._reset() wrapper logic to wrap an existing stream in io.BufferedReader only if it's not already seekable(). That includes an incoming bytes object: wrap it only in BytesIO, which is seekable().

This seems to imply wrapping an unseekable stream in an io.BufferedReader should add the ability to seek, but it doesn't: if the stream it contains is unseekable, the io.BufferedReader is unseekable too. (That makes sense, since io.BufferedReader would need to keep an unbounded buffer for the whole stream in order to seek arbitrarily.)

Python 3.11.2 (main, Apr 28 2025, 14:11:48) [GCC 12.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import io, sys
>>> sys.stdin.buffer.seekable()
False
>>> reader = io.BufferedReader(sys.stdin.buffer)
>>> reader.seekable()
False
>>> reader.tell()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
OSError: [Errno 29] Illegal seek
>>>
Python 2.7.18 (default, Dec 18 2025, 00:08:35)
[GCC 12.2.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import io, sys
>>> reader = io.open(sys.stdin.fileno(), 'rb')
>>> type(reader)
<type '_io.BufferedReader'>
>>> reader.seekable()
False
>>> reader.tell()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
IOError: [Errno 29] Illegal seek
>>>

The test_parse_non_seekable_stream_raises_error test confirms this when run without the llsd/base.py fix: once parsing proceeds to matchseq(), we attempt to tell() or seek() on the stream (depending how parsing is invoked), which raises an io.UnsupportedOperation: File or stream is not seekable.

$ git log --graph --oneline -1
* f00d36b (HEAD -> main, secondlife/main) Allow dependabot in CLA check (#33)

$ python3
Python 3.11.2 (main, Apr 28 2025, 14:11:48) [GCC 12.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import io, llsd
>>> stream = io.BytesIO()
>>> stream.seekable = lambda: False
>>> llsd.parse(stream)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/local/natty/python-llsd/llsd/__init__.py", line 53, in parse
    if baseparser.matchseq(pattern):
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/local/natty/python-llsd/llsd/base.py", line 455, in matchseq
    self._stream.seek(oldpos)
io.UnsupportedOperation: File or stream is not seekable.
>>>

$

Seems acceptable to throw an exception here when the stream is unseekable.

# Invalid input type - raise a clear error
# This catches MagicMock and other non-stream objects that might
# have read/seek attributes but aren't actual IO streams
raise LLSDParseError(
"Cannot parse LLSD from {0}. "
"Expected bytes or a file-like object (io.IOBase subclass).".format(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like technically this won't support regular file objects in Python 2.7:

$ git log --graph --oneline -1
* 801f8e7 (HEAD -> fix-for-infinite-loop-issue-for-magicmock-objects, secondlife/fix-for-infinite-loop-issue-for-magicmock-objects) updated for test coverage

$ git diff
diff --git example.llsd example.llsd
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ example.llsd
@@ -0,0 +1 @@
+{}
diff --git tests/llsd_test.py tests/llsd_test.py
index c1900db..c66c8ea 100644
--- tests/llsd_test.py
+++ tests/llsd_test.py
@@ -1977,13 +1977,13 @@ class MapConstraints(unittest.TestCase):
         self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")
 
 
-@unittest.skipIf(PY2, "These tests require Python 3")
 class InvalidInputTypes(unittest.TestCase):
     '''
     Tests for handling invalid input types that should raise LLSDParseError
     instead of hanging or consuming infinite memory.
     '''
 
+    @unittest.skipIf(PY2, "These tests require Python 3")
     def test_parse_magicmock_raises_error(self):
         '''
         Parsing a MagicMock object should raise LLSDParseError, not hang.
@@ -2003,8 +2003,8 @@ class InvalidInputTypes(unittest.TestCase):
         Only applies to Python 3 where str and bytes are distinct.
         '''
         with self.assertRaises(llsd.LLSDParseError) as context:
-            llsd.parse('not bytes')
-        self.assertIn('str', str(context.exception))
+            llsd.parse(b'not bytes'.decode('ascii'))
+        self.assertIn('unicode' if PY2 else 'str', str(context.exception))
 
     def test_parse_none_raises_error(self):
         '''
@@ -2032,4 +2032,10 @@ class InvalidInputTypes(unittest.TestCase):
             llsd.parse(stream)
         self.assertIn('non-seekable', str(context.exception))
 
+    def test_parse_file(self):
+        with open("example.llsd", "rb") as f:
+            llsd.parse_notation(f)
 
+    def test_parse_io_base(self):
+        with io.open("example.llsd", "rb") as f:
+            llsd.parse_notation(f)

$ .venv/bin/python --version
Python 2.7.18

$ .venv/bin/python -m unittest tests.llsd_test.InvalidInputTypes
E..s...
======================================================================
ERROR: test_parse_file (tests.llsd_test.InvalidInputTypes)
–---------------------------------------------------------------------
Traceback (most recent call last):
  File "tests/llsd_test.py", line 2037, in test_parse_file
    llsd.parse_notation(f)
  File "llsd/serde_notation.py", line 537, in parse_notation
    parser = LLSDBaseParser(something)
  File "llsd/base.py", line 399, in __init__
    self._reset(something)
  File "llsd/base.py", line 428, in _reset
    type(something).__name__
LLSDParseError: Cannot parse LLSD from file. Expected bytes or a file-like object (io.IOBase subclass).

–---------------------------------------------------------------------
Ran 7 tests in 0.002s

FAILED (errors=1, skipped=1)

$

However in Python 2.7 file objects don't have a seekable() method, so as with unseekable streams, we already haven't supported parsing Python 2.7 file objects as of f5aab9f / #6 / v1.2.0:

$ git log --graph --oneline -1
*   0931046 (HEAD, tag: v1.2.0) Merge pull request #6 from secondlife/sl-18330-perf
|\

$ git diff
diff --git example.llsd example.llsd
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ example.llsd
@@ -0,0 +1 @@
+{}
diff --git tests/llsd_test.py tests/llsd_test.py
index 6691c1c..02905fa 100644
--- tests/llsd_test.py
+++ tests/llsd_test.py
@@ -1884,3 +1884,13 @@ class MapConstraints(unittest.TestCase):
         llsdmap=llsd.LLSD({uuid.UUID(int=0) : 'uuid'})
         self.assertEqual(llsd.format_xml(llsdmap), b'<?xml version="1.0" ?><llsd><map><key>00000000-0000-0000-0000-000000000000</key><string>uuid</string></map></llsd>')
         self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")
+
+
+class InvalidInputTypes(unittest.TestCase):
+    def test_parse_file(self):
+        with open("example.llsd", "rb") as f:
+            llsd.parse_notation(f)
+
+    def test_parse_io_base(self):
+        with io.open("example.llsd", "rb") as f:
+            llsd.parse_notation(f)

$ .venv/bin/python --version
Python 2.7.18

$ .venv/bin/python -m unittest tests.llsd_test.InvalidInputTypes
E.
======================================================================
ERROR: test_parse_file (tests.llsd_test.InvalidInputTypes)
–---------------------------------------------------------------------
Traceback (most recent call last):
  File "tests/llsd_test.py", line 1892, in test_parse_file
    llsd.parse_notation(f)
  File "llsd/serde_notation.py", line 519, in parse_notation
    parser = LLSDBaseParser(something)
  File "llsd/base.py", line 394, in __init__
    self._reset(something)
  File "llsd/base.py", line 408, in _reset
    elif something.seekable():
AttributeError: 'file' object has no attribute 'seekable'

–---------------------------------------------------------------------
Ran 2 tests in 0.001s

FAILED (errors=1)

$

Release-wise, v1.2.0 was adding support for parsing files/streams in the first place, so this is not a loss of functionality. However if we don't support regular files (the most obviously file-like objects, I imagine), we might could phrase this error message more clearly. “Expected bytes or a seekable io.IOBase object” maybe? 🤔

type(something).__name__
)
)

def starts_with(self, pattern):
"""
Expand Down
56 changes: 56 additions & 0 deletions tests/llsd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1977,3 +1977,59 @@ def test_uuid_map_key(self):
self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")


@unittest.skipIf(PY2, "These tests require Python 3")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, until we formally drop support for Python 2.7 in this library, we should run as many tests as possible with it, to avoid cases like those discussed above where functionality is broken / not implemented as intended for Python 2.

Ideally we could also split these tests into a commit that precedes the actual fix, to demonstrate the above findings that these cases already raise exceptions (though with different exception classes & messages in some cases), so this fix should be a-OK.

Let me know if you're busy and I can take a crack at that, if you like!

class InvalidInputTypes(unittest.TestCase):
'''
Tests for handling invalid input types that should raise LLSDParseError
instead of hanging or consuming infinite memory.
'''

def test_parse_magicmock_raises_error(self):
'''
Parsing a MagicMock object should raise LLSDParseError, not hang.
This is a regression test for a bug where llsd.parse() would go into
an infinite loop when passed a MagicMock (e.g., from an improperly
mocked requests.Response.content).
'''
from unittest.mock import MagicMock
mock = MagicMock()
with self.assertRaises(llsd.LLSDParseError) as context:
llsd.parse(mock)
self.assertIn('MagicMock', str(context.exception))

def test_parse_string_raises_error(self):
'''
Parsing a string (not bytes) should raise LLSDParseError.
Only applies to Python 3 where str and bytes are distinct.
'''
with self.assertRaises(llsd.LLSDParseError) as context:
llsd.parse('not bytes')
self.assertIn('str', str(context.exception))

def test_parse_none_raises_error(self):
'''
Parsing None should raise LLSDParseError.
'''
with self.assertRaises(llsd.LLSDParseError) as context:
llsd.parse(None)
self.assertIn('NoneType', str(context.exception))

def test_parse_int_raises_error(self):
'''
Parsing an integer should raise LLSDParseError.
'''
with self.assertRaises(llsd.LLSDParseError) as context:
llsd.parse(42)
self.assertIn('int', str(context.exception))

def test_parse_non_seekable_stream_raises_error(self):
'''
Parsing a non-seekable stream should raise LLSDParseError.
'''
stream = io.BytesIO()
stream.seekable = lambda: False
with self.assertRaises(llsd.LLSDParseError) as context:
llsd.parse(stream)
self.assertIn('non-seekable', str(context.exception))