Skip to content
This repository was archived by the owner on Jan 22, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions sre_yield/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
DEFAULT_RE_FLAGS = re.ASCII

STATE_START, STATE_MIDDLE, STATE_END = list(range(3))
_SEQUENCE_OF_EMPTY_STRING = tuple([""])


def Not(chars):
Expand Down Expand Up @@ -366,6 +367,11 @@ def lookaround_parse_error(self, *_):

def branch_values(self, _, items):
"""Converts SRE parser data into literals and merges those lists."""
count = len(items)
if count == 0:
return ""
elif count == 1:
return self.sub_values(items[0])
return ConcatenatedSequence(*[self.sub_values(parsed) for parsed in items])

def max_repeat_values(self, min_count, max_count, items):
Expand Down Expand Up @@ -411,6 +417,11 @@ def sub_values(self, parsed):
parsed = parsed.data
# A list indicates sequential elements of a string
if isinstance(parsed, list):
count = len(parsed)
if count == 0:
return _SEQUENCE_OF_EMPTY_STRING
elif count == 1:
return self.sub_values(parsed[0])
elements = [self.sub_values(p) for p in parsed]
return CombinatoricsSequence(*elements)
# If not a list, a tuple represents a specific match type
Expand Down
32 changes: 14 additions & 18 deletions sre_yield/tests/test_sre_yield_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

MAX_REPEAT_COUNT = sre_yield.MAX_REPEAT_COUNT

DOT_STAR_ALL_REPR = r"\({repeat base=256 low=0 high=%d}, \d+\)" % MAX_REPEAT_COUNT
DOT_STAR_ALL_REPR = "{repeat base=256 low=0 high=%d}" % MAX_REPEAT_COUNT
DOT_STAR_ALL_REPR_ITEM = r"\(%s, (\d+)\)" % DOT_STAR_ALL_REPR


class ReprYieldTest(unittest.TestCase):
Expand All @@ -33,41 +34,41 @@ class ReprYieldTest(unittest.TestCase):
def testDotStar(self):
parsed = sre_yield.AllStrings(".*", re.DOTALL)
out = repr(parsed.raw)
self.assertTrue(re.match(r"{combin \[%s\]}" % DOT_STAR_ALL_REPR, out))
self.assertTrue(re.match(DOT_STAR_ALL_REPR, out))

parsed = sre_yield.AllStrings(".*.*.*", re.DOTALL)
out = repr(parsed.raw)

expected_re = r"{combin \[%s\]}" % ", ".join([DOT_STAR_ALL_REPR] * 3)
expected_re = r"{combin \[%s\]}" % ", ".join([DOT_STAR_ALL_REPR_ITEM] * 3)
self.assertTrue(re.match(expected_re, out))

def testAlternatives(self):
parsed = sre_yield.AllStrings(r"a|b")
self.assertEqual(
repr(parsed.raw), "{combin [({concat [(['a'], 1), (['b'], 1)]}, 2)]}"
repr(parsed.raw), "{concat [(['a'], 1), (['b'], 1)]}"
)
parsed = sre_yield.AllStrings(r"a||b")
self.assertEqual(
repr(parsed.raw),
"{combin [({concat [({combin [(['a'], 1)]}, 1), ({combin []}, 1), ({combin [(['b'], 1)]}, 1)]}, 3)]}",
"{concat [(['a'], 1), (('',), 1), (['b'], 1)]}",
)

def testRepeat(self):
parsed = sre_yield.AllStrings(r"\d{1}")
self.assertEqual(
repr(parsed.raw), "{combin [({repeat base=10 low=1 high=1}, 10)]}"
repr(parsed.raw), "{repeat base=10 low=1 high=1}"
)
parsed = sre_yield.AllStrings(r"\d{2}")
self.assertEqual(
repr(parsed.raw), "{combin [({repeat base=10 low=2 high=2}, 100)]}"
repr(parsed.raw), "{repeat base=10 low=2 high=2}"
)

def testRepeatPlus(self):
parsed = sre_yield.AllStrings(r"\d+")
out = repr(parsed.raw)

expected_re = (
r"{combin \[\({repeat base=10 low=1 high=%d}, \d+\)\]}" % MAX_REPEAT_COUNT
r"{repeat base=10 low=1 high=%d}" % MAX_REPEAT_COUNT
)
self.assertTrue(re.match(expected_re, out))

Expand All @@ -80,14 +81,14 @@ def testRepeatMulti(self):

def testGroup(self):
parsed = sre_yield.AllStrings(r"(?:\d{2})")
expected = "{combin [({repeat base=10 low=2 high=2}, 100)]}"
expected = "{repeat base=10 low=2 high=2}"
if PY36:
expected = "{combin [(%s, 100)]}" % expected

self.assertEqual(repr(parsed.raw), expected)

parsed = sre_yield.AllStrings(r"(?:\d{,2})")
expected = "{combin [({repeat base=10 low=0 high=2}, 111)]}"
expected = "{repeat base=10 low=0 high=2}"
if PY36:
expected = "{combin [(%s, 111)]}" % expected

Expand All @@ -96,12 +97,12 @@ def testGroup(self):
def testBenchInput(self):
parsed = sre_yield.AllStrings("[01]{,10}")
self.assertEqual(
repr(parsed.raw), "{combin [({repeat base=2 low=0 high=10}, 2047)]}"
repr(parsed.raw), "{repeat base=2 low=0 high=10}"
)

parsed = sre_yield.AllStrings("(?:[a-z]{,10}){,1000}")
out = repr(parsed.raw)
expected_re = r"{combin \[\({repeat base=(\d+) low=0 high=1000}, (\d+)\)\]}"
expected_re = r"{repeat base=(\d+) low=0 high=1000}"
m = re.match(expected_re, out)
self.assertTrue(m)
self.assertEqual(int(m.group(1)), 146813779479511)
Expand All @@ -119,7 +120,7 @@ def testBenchInput(self):
def testBenchInputSlow(self):
parsed = sre_yield.AllStrings("(?:[a-z]{,100})")
out = repr(parsed.raw)
expected_re1 = r"{combin \[\({repeat base=(\d+) low=0 high=100}, (\d+)\)\]}"
expected_re1 = r"{repeat base=(\d+) low=0 high=100}"
if PY36:
expected_re = r"{combin \[\(%s, (\d+)\)\]}" % expected_re1
else:
Expand All @@ -128,10 +129,7 @@ def testBenchInputSlow(self):
m = re.match(expected_re, out)
self.assertTrue(m)
base1 = m.group(1)
repeat1 = m.group(2)
self.assertEqual(int(base1), 26)
if PY36:
self.assertEqual(int(repeat1), int(m.group(3)))

parsed = sre_yield.AllStrings("(?:(?:[a-z]{,100}){,100}){,100}")
out = repr(parsed.raw)
Expand All @@ -143,11 +141,9 @@ def testBenchInputSlow(self):
self.assertTrue(m)

base2 = m.group(1)
repeat2 = m.group(2)
self.assertEqual(len(base2), 14152)

self.assertGreater(int(base2), int(base1))
self.assertGreater(int(repeat2), int(repeat1))


if __name__ == "__main__":
Expand Down