Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions general-tests/deptest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
assert reinflect("mökkiammeemme", model="talossa") == {"mökkiammeessa"}
assert reinflect("esijuosta", model="katselemme") == {'esijuoksemme'}
assert reinflect("mökkiammeemme", new_form="+sg+nom") == {'mökkiamme'}
assert reinflect("löhkö", new_form="+pl+ine+ko") == {'löhköissäkö'}
assert reinflect("löhkö", orig_form="+sg+nom", new_form="+pl+ine+ko") == {'löhköissäkö'}
assert reinflect("viinissä", model="talot") == {'viinet'}
assert reinflect("viinissä", model="talot", orig_form="+sg+ine") == {'viinit'}
assert reinflect("hömppäämme", model="juokset", pos="verb") == {'hömppäät'}
assert reinflect("hömppäämme", model="juokset", pos="noun") == {'hömpät'}
assert reinflect("homppaamme", model="juokset", pos="verb") == {'homppaat'}
assert reinflect("homppaamme", model="talot", pos="noun") == {'hompat'}

from pypykko.utils import analyze
assert analyze("hätkähtäneet") == [('hätkähtäneet', 'Lexicon', 'hätkähtää', 'verb', '', '', '+past+conneg+pl', 0.0), ('hätkähtäneet', 'Lexicon', 'hätkähtää', 'verb', '', '', '+part_past+pl+nom', 0.0), ('hätkähtäneet', 'Lexicon', 'hätkähtänyt', 'participle', '', ' ← verb:hätkähtää:+part_past', '+pl+nom', 0.0)]
Expand Down
240 changes: 9 additions & 231 deletions general-tests/test_alignment_pypykko.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,238 +4,16 @@

isyms = PARSER_FST.split_to_symbols(text)
gold = [
[
(0, '@0@', 'Lexicon'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(4, '@0@', '|'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'v'),
(8, 'a', 'a'),
(9, 'a', 's'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Lexicon|Pfx'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, '@0@', '⁅BOUNDARY⁆'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'v'),
(8, 'a', 'a'),
(9, 'a', 's'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(10, '@0@', 't'),
(10, '@0@', 'a'),
(10, '@0@', '\tverb\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+pres'),
(10, 'n', '+1sg')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'I'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(10, '@0@', 't'),
(10, '@0@', 'a'),
(10, '@0@', '\tverb\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+pres'),
(10, 'n', '+1sg')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, '@0@', '\tnoun\t'),
(9, '@0@', '\t'),
(9, '@0@', '\t'),
(9, '@0@', '+sg'),
(9, 'a', '+ill')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 's'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 'a'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'i'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 'a'),
(10, 'n', 'n'),
(11, '@0@', '\tnoun\t'),
(11, '@0@', '\t'),
(11, '@0@', '\t'),
(11, '@0@', '+sg'),
(11, '@0@', '+nom')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'I'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, '@0@', '\tnoun\t'),
(9, '@0@', '\t'),
(9, '@0@', '\t'),
(9, '@0@', '+sg'),
(9, 'a', '+ill')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'I'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 's'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'I'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 'a'),
(10, '@0@', '\tnoun\t'),
(10, '@0@', '\t'),
(10, '@0@', '\t'),
(10, '@0@', '+sg'),
(10, 'n', '+gen')
],
[
(0, '@0@', 'Guesser|Any'),
(0, '@0@', '\t'),
(0, 'i', 'I'),
(1, 's', 's'),
(2, 'o', 'o'),
(3, 'n', 'n'),
(4, 'v', 'v'),
(5, 'a', 'a'),
(6, 'r', 'r'),
(7, 'p', 'p'),
(8, 'a', 'a'),
(9, 'a', 'a'),
(10, 'n', 'n'),
(11, '@0@', '\tnoun\t'),
(11, '@0@', '\t'),
(11, '@0@', '\t'),
(11, '@0@', '+sg'),
(11, '@0@', '+nom')
]
[(0, '@0@', 'Lexicon'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (4, '@0@', '|'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'v'), (8, 'a', 'a'), (9, 'a', 's'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
[(0, '@0@', 'Lexicon|Pfx'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, '@0@', '⁅BOUNDARY⁆'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'v'), (8, 'a', 'a'), (9, 'a', 's'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, '@0@', '\tnoun\t'), (9, '@0@', '\t'), (9, '@0@', '\t'), (9, '@0@', '+sg'), (9, 'a', '+ill')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, 'n', 'n'), (11, '@0@', '\tnoun\t'), (11, '@0@', '\t'), (11, '@0@', '\t'), (11, '@0@', '+sg'), (11, '@0@', '+nom')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, '@0@', '\tnoun\t'), (9, '@0@', '\t'), (9, '@0@', '\t'), (9, '@0@', '+sg'), (9, 'a', '+ill')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
[(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, 'n', 'n'), (11, '@0@', '\tnoun\t'), (11, '@0@', '\t'), (11, '@0@', '\t'), (11, '@0@', '+sg'), (11, '@0@', '+nom')]
]

sys = []

for t, weight in PARSER_FST.lookup_aligned(text):
Expand Down
2 changes: 1 addition & 1 deletion pypykko/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ Eg. when looking at "isonvarpaan", one might want to not only know that it is th

PyPykko is licensed under the MIT license like Pykko itself, as it is mostly constituted of Pykko's files with minor modifications. See the LICENSE file for details. Note that kfst (and kfst-rs) have less permissive licenses.

Files from Pykko itself are modified from the version in commit 95f3d51f0e94a1e88ab7c750f2bedcb6b3fd5edd. The compiled transducers are from the same commit.
Files from Pykko itself are modified from the version in commit 9bf1f02a3b03046955a82643e273b6fc3b28174f. The compiled transducers are from the same commit.
Loading