diff --git a/general-tests/deptest.py b/general-tests/deptest.py
index 343dacc..db1f7a7 100644
--- a/general-tests/deptest.py
+++ b/general-tests/deptest.py
@@ -10,11 +10,11 @@
 assert reinflect("mökkiammeemme", model="talossa") == {"mökkiammeessa"}
 assert reinflect("esijuosta", model="katselemme") == {'esijuoksemme'}
 assert reinflect("mökkiammeemme", new_form="+sg+nom") == {'mökkiamme'}
-assert reinflect("löhkö", new_form="+pl+ine+ko") == {'löhköissäkö'}
+assert reinflect("löhkö", orig_form="+sg+nom", new_form="+pl+ine+ko") == {'löhköissäkö'}
 assert reinflect("viinissä", model="talot") == {'viinet'}
 assert reinflect("viinissä", model="talot", orig_form="+sg+ine") == {'viinit'}
-assert reinflect("hömppäämme", model="juokset", pos="verb") == {'hömppäät'}
-assert reinflect("hömppäämme", model="juokset", pos="noun") == {'hömpät'}
+assert reinflect("homppaamme", model="juokset", pos="verb") == {'homppaat'}
+assert reinflect("homppaamme", model="talot", pos="noun") == {'hompat'}
 
 from pypykko.utils import analyze
 assert analyze("hätkähtäneet") == [('hätkähtäneet', 'Lexicon', 'hätkähtää', 'verb', '', '', '+past+conneg+pl', 0.0), ('hätkähtäneet', 'Lexicon', 'hätkähtää', 'verb', '', '', '+part_past+pl+nom', 0.0), ('hätkähtäneet', 'Lexicon', 'hätkähtänyt', 'participle', '', ' ← verb:hätkähtää:+part_past', '+pl+nom', 0.0)]
diff --git a/general-tests/test_alignment_pypykko.py b/general-tests/test_alignment_pypykko.py
index 7da0c7c..c701773 100644
--- a/general-tests/test_alignment_pypykko.py
+++ b/general-tests/test_alignment_pypykko.py
@@ -4,238 +4,16 @@
 
 isyms = PARSER_FST.split_to_symbols(text)
 gold = [
-    [
-        (0, '@0@', 'Lexicon'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (4, '@0@', '|'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'v'),
-        (8, 'a', 'a'),
-        (9, 'a', 's'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Lexicon|Pfx'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, '@0@', '⁅BOUNDARY⁆'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'v'),
-        (8, 'a', 'a'),
-        (9, 'a', 's'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (10, '@0@', 't'),
-        (10, '@0@', 'a'),
-        (10, '@0@', '\tverb\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+pres'),
-        (10, 'n', '+1sg')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'I'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (10, '@0@', 't'),
-        (10, '@0@', 'a'),
-        (10, '@0@', '\tverb\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+pres'),
-        (10, 'n', '+1sg')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, '@0@', '\tnoun\t'),
-        (9, '@0@', '\t'),
-        (9, '@0@', '\t'),
-        (9, '@0@', '+sg'),
-        (9, 'a', '+ill')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 's'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 'a'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'i'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 'a'),
-        (10, 'n', 'n'),
-        (11, '@0@', '\tnoun\t'),
-        (11, '@0@', '\t'),
-        (11, '@0@', '\t'),
-        (11, '@0@', '+sg'),
-        (11, '@0@', '+nom')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'I'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, '@0@', '\tnoun\t'),
-        (9, '@0@', '\t'),
-        (9, '@0@', '\t'),
-        (9, '@0@', '+sg'),
-        (9, 'a', '+ill')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'I'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 's'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'I'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 'a'),
-        (10, '@0@', '\tnoun\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '\t'),
-        (10, '@0@', '+sg'),
-        (10, 'n', '+gen')
-    ],
-    [
-        (0, '@0@', 'Guesser|Any'),
-        (0, '@0@', '\t'),
-        (0, 'i', 'I'),
-        (1, 's', 's'),
-        (2, 'o', 'o'),
-        (3, 'n', 'n'),
-        (4, 'v', 'v'),
-        (5, 'a', 'a'),
-        (6, 'r', 'r'),
-        (7, 'p', 'p'),
-        (8, 'a', 'a'),
-        (9, 'a', 'a'),
-        (10, 'n', 'n'),
-        (11, '@0@', '\tnoun\t'),
-        (11, '@0@', '\t'),
-        (11, '@0@', '\t'),
-        (11, '@0@', '+sg'),
-        (11, '@0@', '+nom')
-    ]
+  [(0, '@0@', 'Lexicon'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (4, '@0@', '|'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'v'), (8, 'a', 'a'), (9, 'a', 's'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
+  [(0, '@0@', 'Lexicon|Pfx'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, '@0@', '⁅BOUNDARY⁆'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'v'), (8, 'a', 'a'), (9, 'a', 's'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, '@0@', '\tnoun\t'), (9, '@0@', '\t'), (9, '@0@', '\t'), (9, '@0@', '+sg'), (9, 'a', '+ill')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'i'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, 'n', 'n'), (11, '@0@', '\tnoun\t'), (11, '@0@', '\t'), (11, '@0@', '\t'), (11, '@0@', '+sg'), (11, '@0@', '+nom')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, '@0@', '\tnoun\t'), (9, '@0@', '\t'), (9, '@0@', '\t'), (9, '@0@', '+sg'), (9, 'a', '+ill')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, '@0@', '\tnoun\t'), (10, '@0@', '\t'), (10, '@0@', '\t'), (10, '@0@', '+sg'), (10, 'n', '+gen')],
+  [(0, '@0@', 'Guesser|Any'), (0, '@0@', '\t'), (0, 'i', 'I'), (1, 's', 's'), (2, 'o', 'o'), (3, 'n', 'n'), (4, 'v', 'v'), (5, 'a', 'a'), (6, 'r', 'r'), (7, 'p', 'p'), (8, 'a', 'a'), (9, 'a', 'a'), (10, 'n', 'n'), (11, '@0@', '\tnoun\t'), (11, '@0@', '\t'), (11, '@0@', '\t'), (11, '@0@', '+sg'), (11, '@0@', '+nom')]
 ]
+
 sys = []
 
 for t, weight in PARSER_FST.lookup_aligned(text):
diff --git a/pypykko/README.md b/pypykko/README.md
index 15db3a4..4943d7a 100644
--- a/pypykko/README.md
+++ b/pypykko/README.md
@@ -108,4 +108,4 @@ Eg. when looking at "isonvarpaan", one might want to not only know that it is th
 
 PyPykko is licensed under the MIT license like Pykko itself, as it is mostly constituted of Pykko's files with minor modifications. See the LICENSE file for details. Note that kfst (and kfst-rs) have less permissive licenses.
 
-Files from Pykko itself are modified from the version in commit 95f3d51f0e94a1e88ab7c750f2bedcb6b3fd5edd. The compiled transducers are from the same commit.
+Files from Pykko itself are modified from the version in commit 9bf1f02a3b03046955a82643e273b6fc3b28174f. The compiled transducers are from the same commit.
diff --git a/pypykko/pypykko/aux-abbreviations.tsv b/pypykko/pypykko/aux-abbreviations.tsv
index a860b7a..6bc16ad 100644
--- a/pypykko/pypykko/aux-abbreviations.tsv
+++ b/pypykko/pypykko/aux-abbreviations.tsv
@@ -1,149 +1,133 @@
--	t.	0	conjunction	-	-	-	-	abbr	-
--	A.A.A.	0	none	-	-	-	-	abbr	-
--	Inc.	0	none	-	-	-	-	abbr|foreign	-
--	Joh.	0	none	-	-	-	-	abbr	-
--	Ltd.	0	none	-	-	-	-	abbr|foreign	-
--	Luuk.	0	none	-	-	-	-	abbr	-
--	Mark.	0	none	-	-	-	-	abbr	-
--	Matt.	0	none	-	-	-	-	abbr	-
--	Moos.	0	none	-	-	-	-	abbr	-
--	Mr.	0	none	-	-	-	-	abbr	-
--	Mrs.	0	none	-	-	-	-	abbr	-
--	Ms.	0	none	-	-	-	-	abbr	-
--	P.S.	0	none	-	-	-	-	abbr	-
--	R.S.V.P.	0	none	-	-	-	-	abbr	-
--	Room.	0	none	-	-	-	-	abbr	-
--	Sananl.	0	none	-	-	-	-	abbr	-
--	U.S.	0	none	-	-	-	-	abbr|foreign	-
--	al.	0	none	-	-	-	-	abbr|foreign	-
--	al.	0	none	-	-	-	-	abbr	-
--	alk.	0	none	-	-	-	-	abbr	-
--	alkup.	0	none	-	-	-	-	abbr	-
--	ao.	0	none	-	-	-	-	abbr	-
--	arab.	0	none	-	-	-	-	abbr	-
--	as.	0	none	-	-	-	-	abbr	-
--	biol.	0	none	-	-	-	-	abbr	-
--	d.o.o.	0	none	-	-	-	-	abbr|foreign	-
--	e.g.	0	none	-	-	-	-	abbr|foreign	-
--	eKr.	0	none	-	-	-	-	abbr	-
--	eaa.	0	none	-	-	-	-	abbr	-
--	ed.	0	none	-	-	-	-	abbr	-
--	em.	0	none	-	-	-	-	abbr	-
--	engl.	0	none	-	-	-	-	abbr	-
--	ent.	0	none	-	-	-	-	abbr	-
--	esim.	0	none	-	-	-	-	abbr	-
--	esp.	0	none	-	-	-	-	abbr	-
--	etc.	0	none	-	-	-	-	abbr|foreign	-
--	evp	0	none	-	-	-	-	abbr	-
--	evp.	0	none	-	-	-	-	abbr	-
--	harv.	0	none	-	-	-	-	abbr	-
--	hepr.	0	none	-	-	-	-	abbr	-
--	hist.	0	none	-	-	-	-	abbr	-
--	hl.	0	none	-	-	-	-	abbr|foreign	-
--	huom.	0	none	-	-	-	-	abbr	-
--	ital.	0	none	-	-	-	-	abbr	-
--	jKr.	0	none	-	-	-	-	abbr	-
--	jaa.	0	none	-	-	-	-	abbr	-
--	jap.	0	none	-	-	-	-	abbr	-
--	jne.	0	none	-	-	-	-	abbr	-
--	k.	0	none	-	-	-	-	abbr	-
--	kd	0	none	-	-	-	-	abbr	-
--	ke	0	none	-	-	-	-	abbr	-
--	kesk	0	none	-	-	-	-	abbr	-
--	kesk.	0	none	-	-	-	-	abbr	-
--	kft.	0	none	-	-	-	-	abbr	-
--	kiin.	0	none	-	-	-	-	abbr	-
--	kirj.	0	none	-	-	-	-	abbr	-
--	kk	0	none	-	-	-	-	abbr	-
--	klo	0	none	-	-	-	-	abbr	-
--	ko.	0	none	-	-	-	-	abbr	-
--	kok	0	none	-	-	-	-	abbr	-
--	kpl	0	none	-	-	-	-	abbr	-
--	kreik.	0	none	-	-	-	-	abbr	-
--	kreikk.	0	none	-	-	-	-	abbr	-
--	ks.	0	none	-	-	-	-	abbr	-
--	kts.	0	none	-	-	-	-	abbr	-
--	la	0	none	-	-	-	-	abbr	-
--	lat.	0	none	-	-	-	-	abbr	-
--	lääk.	0	none	-	-	-	-	abbr	-
--	m.	0	none	-	-	-	-	abbr	-
--	ma	0	none	-	-	-	-	abbr	-
--	mat.	0	none	-	-	-	-	abbr	-
--	milj.	0	none	-	-	-	-	abbr	-
--	ml.	0	none	-	-	-	-	abbr	-
--	mm.	0	none	-	-	-	-	abbr	-
--	mon.	0	none	-	-	-	-	abbr	-
--	mrd	0	none	-	-	-	-	abbr	-
--	mrd.	0	none	-	-	-	-	abbr	-
--	n.	0	none	-	-	-	-	abbr	-
--	n/a	0	none	-	-	-	-	abbr	-
--	nk.	0	none	-	-	-	-	abbr	-
--	nro	0	none	-	-	-	-	abbr	-
--	ns.	0	none	-	-	-	-	abbr	-
--	nyk.	0	none	-	-	-	-	abbr	-
--	o.s.	0	none	-	-	-	-	abbr	-
--	oik.	0	none	-	-	-	-	abbr	-
--	os.	0	none	-	-	-	-	abbr	-
--	p.	0	none	-	-	-	-	abbr	-
--	paal.	0	none	-	-	-	-	abbr	-
--	pe	0	none	-	-	-	-	abbr	-
--	pp.	0	none	-	-	-	-	abbr	-
--	ps.	0	none	-	-	-	-	abbr	-
--	pvm	0	none	-	-	-	-	abbr	-
--	päätoim.	0	none	-	-	-	-	abbr	-
--	ransk.	0	none	-	-	-	-	abbr	-
--	room.	0	none	-	-	-	-	abbr	-
--	ruots.	0	none	-	-	-	-	abbr	-
--	s.	0	none	-	-	-	-	abbr	-
--	saks.	0	none	-	-	-	-	abbr	-
--	sd	0	none	-	-	-	-	abbr	-
--	sin	0	none	-	-	-	-	abbr	-
--	so.	0	none	-	-	-	-	abbr	-
--	su	0	none	-	-	-	-	abbr	-
--	suom.	0	none	-	-	-	-	abbr	-
--	terv.	0	none	-	-	-	-	abbr	-
--	ti	0	none	-	-	-	-	abbr	-
--	tms.	0	none	-	-	-	-	abbr	-
--	to	0	none	-	-	-	-	abbr	-
--	toim.	0	none	-	-	-	-	abbr	-
+-	t.	-	conjunction	-	-	-	-	abbr	-
+-	A.A.A.	-	none	-	-	-	-	abbr	-
+-	Dr.	-	none	-	-	-	-	abbr	-
+-	Joh.	-	none	-	-	-	-	abbr	-
+-	Luuk.	-	none	-	-	-	-	abbr	-
+-	Mark.	-	none	-	-	-	-	abbr	-
+-	Matt.	-	none	-	-	-	-	abbr	-
+-	Moos.	-	none	-	-	-	-	abbr	-
+-	Mr.	-	none	-	-	-	-	abbr	-
+-	Mrs.	-	none	-	-	-	-	abbr	-
+-	Ms.	-	none	-	-	-	-	abbr	-
+-	Mt.	-	none	-	-	-	-	foreign	-
+-	P.S.	-	none	-	-	-	-	abbr	-
+-	R.S.V.P.	-	none	-	-	-	-	abbr	-
+-	Room.	-	none	-	-	-	-	abbr	-
+-	Sananl.	-	none	-	-	-	-	abbr	-
+-	St.	-	none	!	-	-	-	foreign	-
+-	al.	-	none	-	-	-	-	abbr|foreign	-
+-	alk.	-	none	-	-	-	-	abbr	-
+-	alkup.	-	none	-	-	-	-	abbr	-
+-	ao.	-	none	-	-	-	-	abbr	-
+-	arab.	-	none	-	-	-	-	abbr	-
+-	arkkit.	-	none	-	-	-	-	abbr	-
+-	as.	-	none	-	-	-	-	abbr	-
+-	biol.	-	none	-	-	-	-	abbr	-
+-	d.o.o.	-	none	-	-	-	-	abbr|foreign	-
+-	e.g.	-	none	-	-	-	-	abbr|foreign	-
+-	eKr.	-	none	-	-	-	-	abbr	-
+-	eaa.	-	none	-	-	-	-	abbr	-
+-	ed.	-	none	-	-	-	-	abbr	-
+-	em.	-	none	-	-	-	-	abbr	-
+-	engl.	-	none	-	-	-	-	abbr	-
+-	ent.	-	none	-	-	-	-	abbr	-
+-	esim.	-	none	-	-	-	-	abbr	-
+-	esp.	-	none	-	-	-	-	abbr	-
+-	etc.	-	none	-	-	-	-	abbr|foreign	-
+-	evp.	-	none	-	-	-	-	abbr	-
+-	farm.	-	none	-	-	-	-	abbr	-
+-	feat.	-	none	-	-	-	-	abbr	-
+-	fil.	-	none	-	-	-	-	abbr	-
+-	harv.	-	none	-	-	-	-	abbr	-
+-	hepr.	-	none	-	-	-	-	abbr	-
+-	hist.	-	none	-	-	-	-	abbr	-
+-	hl.	-	none	-	-	-	-	abbr|foreign	-
+-	hum.	-	none	-	-	-	-	abbr	-
+-	huom.	-	none	-	-	-	-	abbr	-
+-	ital.	-	none	-	-	-	-	abbr	-
+-	jKr.	-	none	-	-	-	-	abbr	-
+-	jaa.	-	none	-	-	-	-	abbr	-
+-	jap.	-	none	-	-	-	-	abbr	-
+-	jne.	-	none	-	-	-	-	abbr	-
+-	joht.	-	none	-	-	-	-	abbr	-
+-	k.	-	none	-	-	-	-	abbr	-
+-	kand.	-	none	-	-	-	-	abbr	-
+-	kasvatust.	-	none	-	-	-	-	abbr	-
+-	kauppat.	-	none	-	-	-	-	abbr	-
+-	kesk.	-	none	-	-	-	-	abbr	-
+-	kft.	-	none	-	-	-	-	abbr	-
+-	kiin.	-	none	-	-	-	-	abbr	-
+-	kirj.	-	none	-	-	-	-	abbr	-
+-	ko.	-	none	-	-	-	-	abbr	-
+-	kreik.	-	none	-	-	-	-	abbr	-
+-	kreikk.	-	none	-	-	-	-	abbr	-
+-	ks.	-	none	-	-	-	-	abbr	-
+-	kts.	-	none	-	-	-	-	abbr	-
+-	lat.	-	none	-	-	-	-	abbr	-
+-	liikuntat.	-	none	-	-	-	-	abbr	-
+-	lis.	-	none	-	-	-	-	abbr	-
+-	lyh.	-	none	-	-	-	-	abbr	-
+-	lääk.	-	none	-	-	-	-	abbr	-
+-	lääket.	-	none	-	-	-	-	abbr	-
+-	m.	-	none	-	-	-	-	abbr	-
+-	maist.	-	none	-	-	-	-	abbr	-
+-	mat.	-	none	-	-	-	-	abbr	-
+-	milj.	-	none	-	-	-	-	abbr	-
+-	ml.	-	none	-	-	-	-	abbr	-
+-	mm.	-	none	-	-	-	-	abbr	-
+-	mon.	-	none	-	-	-	-	abbr	-
+-	mrd.	-	none	-	-	-	-	abbr	-
+-	n.	-	none	-	-	-	-	abbr	-
+-	nk.	-	none	-	-	-	-	abbr	-
+-	ns.	-	none	-	-	-	-	abbr	-
+-	nyk.	-	none	-	-	-	-	abbr	-
+-	o.s.	-	none	-	-	-	-	abbr	-
+-	oik.	-	none	-	-	-	-	abbr	-
+-	os.	-	none	-	-	-	-	abbr	-
+-	p.	-	none	-	-	-	-	abbr	-
+-	paal.	-	none	-	-	-	-	abbr	-
+-	pp.	-	none	-	-	-	-	abbr	-
+-	ps.	-	none	-	-	-	-	abbr	-
+-	päätoim.	-	none	-	-	-	-	abbr	-
+-	ransk.	-	none	-	-	-	-	abbr	-
+-	room.	-	none	-	-	-	-	abbr	-
+-	ruots.	-	none	-	-	-	-	abbr	-
+-	s.	-	none	-	-	-	-	abbr	-
+-	saks.	-	none	-	-	-	-	abbr	-
+-	san.	-	none	-	-	-	-	abbr	-
+-	so.	-	none	-	-	-	-	abbr	-
+-	sov.	-	none	-	-	-	-	abbr	-
+-	suom.	-	none	-	-	-	-	abbr	-
+-	säv.	-	none	-	-	-	-	abbr	-
+-	taloust.	-	none	-	-	-	-	abbr	-
+-	tekn.	-	none	-	-	-	-	abbr	-
+-	teol.	-	none	-	-	-	-	abbr	-
+-	terv.	-	none	-	-	-	-	abbr	-
+-	tms.	-	none	-	-	-	-	abbr	-
+-	toht.	-	none	-	-	-	-	abbr	-
+-	toim.	-	none	-	-	-	-	abbr	-
 -	torj.	-	none	-	-	-	-	abbr	-
--	ts.	0	none	-	-	-	-	abbr	-
--	v.	0	none	-	-	-	-	abbr	-
--	vas	0	none	-	-	-	-	abbr	-
--	vas.	0	none	-	-	-	-	abbr	-
--	ven.	0	none	-	-	-	-	abbr	-
--	vietn.	0	none	-	-	-	-	abbr	-
--	vihr.	0	none	-	-	-	-	abbr	-
--	vko	0	none	-	-	-	-	abbr	-
--	vol.	0	none	-	-	-	-	abbr	-
--	vrt.	0	none	-	-	-	-	abbr	-
--	vs.	0	none	-	-	-	-	abbr	-
--	yks.	0	none	-	-	-	-	abbr	-
--	ym.	0	none	-	-	-	-	abbr	-
--	yms.	0	none	-	-	-	-	abbr	-
--	yo.	0	none	-	-	-	-	abbr	-
--	yst.	0	none	-	-	-	-	abbr	-
--	yst.terv.	0	none	-	-	-	-	abbr	-
--	ADHD	0	noun	18B	-	-	e	abbr	-
--	BKT	0	noun	18B	-	front|back	e	abbr	-
--	CV	0	noun	18B	-	-	e	abbr	-
--	Co.	-	noun	XX	-	-	-	abbr	-
--	DI	0	noun	18B	-	-	i	abbr	-
--	DNA	0	noun	18B	-	-	a	abbr	-
--	FM	0	noun	10B	-	-	-	abbr	-
--	FT	0	noun	18B	-	-	e	abbr	-
--	HuK	0	noun	18B	-	-	o	abbr	-
--	LuK	0	noun	18B	-	-	-	abbr	-
--	TV	0	noun	18B	-	-	e	abbr	-
--	ab	0	noun	18B	-	-	e	abbr	-
--	bkt	0	noun	18B	-	front|back	e	abbr	-
--	btm	0	noun	5	-	-	-	abbr	-
--	ky	0	noun	18B	-	-	y	abbr	-
--	lng	0	noun	18B	-	-	e	abbr	-
--	oy	0	noun	18B	-	-	y	abbr	-
--	oyj	0	noun	18B	-	-	i	abbr	-
--	pj.	0	noun	XX	-	-	-	abbr	-
--	ry	0	noun	18B	-	-	y	abbr	-
--	tj.	0	noun	XX	-	-	-	abbr	-
--	tri	0	noun	18	-	-	-	abbr	-
--	tv	0	noun	18B	-	-	e	abbr	-
+-	trad.	-	none	-	-	-	-	abbr	-
+-	ts.	-	none	-	-	-	-	abbr	-
+-	v.	-	none	-	-	-	-	abbr	-
+-	valt.	-	none	-	-	-	-	abbr	-
+-	ven.	-	none	-	-	-	-	abbr	-
+-	vietn.	-	none	-	-	-	-	abbr	-
+-	vihr.	-	none	-	-	-	-	abbr	-
+-	vol.	-	none	-	-	-	-	abbr	-
+-	vrt.	-	none	-	-	-	-	abbr	-
+-	vs.	-	none	-	-	-	-	abbr	-
+-	vt.	-	none	-	-	-	-	abbr	-
+-	yks.	-	none	-	-	-	-	abbr	-
+-	ym.	-	none	-	-	-	-	abbr	-
+-	yms.	-	none	-	-	-	-	abbr	-
+-	yo.	-	none	-	-	-	-	abbr	-
+-	yst.	-	none	-	-	-	-	abbr	-
+-	yst.terv.	-	none	-	-	-	-	abbr	-
+-	Co.	-	noun	XX	-	-	-	abbr|foreign	-
+-	Inc.	-	noun	XX	-	-	-	abbr|foreign	-
+-	Ltd.	-	noun	XX	-	-	-	abbr|foreign	-
+-	Tl.	-	noun	XX	-	-	-	abbr|dated	-
+-	U.S.	-	noun	!	-	-	-	abbr|foreign	-
+-	pj.	-	noun	!	-	-	-	abbr	-
+-	tj.	-	noun	!	-	-	-	abbr	-
diff --git a/pypykko/pypykko/constants.py b/pypykko/pypykko/constants.py
index 82f68be..c84e23d 100644
--- a/pypykko/pypykko/constants.py
+++ b/pypykko/pypykko/constants.py
@@ -19,35 +19,20 @@
 	'ȘŅĻŖȚĶ' \
 	'ØßÐĐÆŒŁĞŐŊÞ'
 
-ALPHA_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖÜŠŽČĆ'
+ALPHA_LOWER_BASIC = 'abcdefghijklmnopqrstuvwxyzåäö'
 ALPHA_LOWER = 'abcdefghijklmnopqrstuvwxyzåäöüšžčćı'
+ALPHA_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖÜŠŽČĆ'
 
 PARSER_FST_PATH = os.path.join(scripts_path, 'fi-parser.kfst')
 GENERATOR_FST_PATH = os.path.join(scripts_path, 'fi-generator.kfst')
 
 LINE_BREAK = '@_LINEBREAK_@'
 SENT_BREAK = '@_SENTBREAK_@'
-ZERO = '@_zero_@'
+ZERO = '@_ZERO_@'
 TAB = '^TAB'
 
 OPENING_TAGS = [f'<{tag}>' for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']]
 
-STYLE_TAGS = [
-	'+arch',
-	'+child',
-	'+coll',
-	'+dated',
-	'+dial',
-	'+foreign',
-	'+jocul',
-	'+nstd',
-	'+poet',
-	'+rare',
-	'+slang',
-	'+vulg',
-]
-STYLE_TAG_REGEX = '|'.join(tag[1:] for tag in STYLE_TAGS)
-
 POS_TAGS = [
 	'noun',
 	'noun-pl',
@@ -70,21 +55,37 @@
 ]
 
 CLITICS = {
-"+han",
-"+ka",
-"+kaan",
-"+kin",
-"+ko",
-"+pa",
-"+poss1pl",
-"+poss1sg",
-"+poss2pl",
-"+poss2sg",
-"+poss3",
-"+poss3",
-"+s",
+	"+han",
+	"+ka",
+	"+kaan",
+	"+kin",
+	"+ko",
+	"+pa",
+	"+poss1pl",
+	"+poss1sg",
+	"+poss2pl",
+	"+poss2sg",
+	"+poss3",
+	"+poss3",
+	"+s",
 }
 
+STYLE_TAGS = [
+	'+arch',
+	'+child',
+	'+coll',
+	'+dated',
+	'+dial',
+	'+foreign',
+	'+jocul',
+	'+nstd',
+	'+poet',
+	'+rare',
+	'+slang',
+	'+vulg',
+]
+STYLE_TAG_REGEX = '|'.join(tag[1:] for tag in STYLE_TAGS)
+
 FIELDS = [
 	'',    # 1. source
 	'%s',  # 2. lemma
diff --git a/pypykko/pypykko/fi-generator.kfst b/pypykko/pypykko/fi-generator.kfst
index d60495e..b41a53c 100644
Binary files a/pypykko/pypykko/fi-generator.kfst and b/pypykko/pypykko/fi-generator.kfst differ
diff --git a/pypykko/pypykko/fi-parser.kfst b/pypykko/pypykko/fi-parser.kfst
index 394ef37..2f3aa78 100644
Binary files a/pypykko/pypykko/fi-parser.kfst and b/pypykko/pypykko/fi-parser.kfst differ
diff --git a/pypykko/pypykko/file_tools.py b/pypykko/pypykko/file_tools.py
index 78c26d5..85bccce 100644
--- a/pypykko/pypykko/file_tools.py
+++ b/pypykko/pypykko/file_tools.py
@@ -12,20 +12,12 @@ def get_filepath(filename, directory):
 
 
 def read_tsv(filename, directory=''):
-
 	filename = get_filepath(filename, directory)
-
-	table = []
-	with open(filename, 'r') as file:
-		for line in file:
-			line = line.strip('\n')
-			if not line:
-				continue
-			if line.startswith('#'):
-				continue
-			row = ['' if val == '-' else val for val in line.split('\t')]
-			table.append(row)
-	return table
+	for line in read_list(filename):
+		if line.startswith('#'):
+			continue
+		row = ['' if val == '-' else val for val in line.split('\t')]
+		yield row
 
 
 def read_list_tsv(filename):
@@ -52,17 +44,22 @@ def save_txt(filename, text: str, directory=''):
 
 def read_list(filename, directory=''):
 	filename = get_filepath(filename, directory)
-	return [s for s in read_txt(filename).splitlines() if s]
+	with open(filename, 'r') as file:
+		for line in file:
+			line = line.strip('\n\r')
+			if not line:
+				continue
+			yield line
 
 
 def save_list(filename, items: list, sort=True, directory=''):
 	filename = get_filepath(filename, directory)
 	items = sorted(items) if sort else items
-	return save_txt(filename, '\n'.join(items))
+	return save_txt(filename, ''.join(f'{item}\n' for item in items))
 
 
 def load_json(filename, directory=''):
 	filename = get_filepath(filename, directory)
 	with open(filename, 'r') as file:
 		data = json.load(file)
-	return data
+	return data
\ No newline at end of file
diff --git a/pypykko/pypykko/generate.py b/pypykko/pypykko/generate.py
index 6f9a656..ca5ea1a 100644
--- a/pypykko/pypykko/generate.py
+++ b/pypykko/pypykko/generate.py
@@ -1,39 +1,163 @@
-from .constants import GENERATOR_FST_PATH, TAB
-from .utils import add_compound_separators, inf
+import os
+from .file_tools import read_list
 import kfst
+from .constants import GENERATOR_FST_PATH, TAB
+from .scriptutils import is_valid_pos, is_uninflectable
+from .utils import add_compound_separators, inf, pos_tag
+
+CURR = os.path.dirname(__file__)
 
+POS_FST_SOURCES = {
+	'noun-pl':          ['Lexicon', 'Lexicon|Pfx', 'Lexicon|Hyp'],
+	'noun':             ['Lexicon', 'Lexicon|Pfx', 'Lexicon|Hyp'],
+	'proper-pl':        ['Lexicon', 'Lexicon|Pfx', 'Lexicon|Hyp'],
+	'proper':           ['Lexicon', 'Lexicon|Pfx', 'Lexicon|Hyp'],
+	'adjective':        ['Lexicon', 'Lexicon|Pfx', 'Lexicon|Hyp'],
+	'pronoun':          ['Lexicon', 'Lexicon|Hyp'],
+	'pronoun-pl':       ['Lexicon', 'Lexicon|Hyp'],
+	'verb':             ['Lexicon', 'Lexicon|Pfx'],
+	'participle':       ['Lexicon', 'Lexicon|Pfx'],
+	'numeral':          ['Lexicon|Num', 'Lexicon', 'Guesser|Any'],  # (!)
+	'ordinal':          ['Lexicon|Num', 'Lexicon', 'Guesser|Any'],  # (!)
+	'adverb':           ['Lexicon'],
+	'adposition':       ['Lexicon'],
+	'interjection':     ['Lexicon'],
+	'conjunction':      ['Lexicon'],
+	'conjunction+verb': ['Lexicon'],
+	'adverb+verb':      ['Lexicon'],
+	'none':             ['Lexicon'],
+}
+
+HOMONYMOUS = {
+	("ahtaus", "noun"),
+	("ale", "noun"),
+	("appi", "noun"),
+	("g", "noun"),
+	("haiku", "noun"),
+	("halata", "verb"),
+	("hepo", "noun"),
+	("isota", "verb"),
+	("joka", "pronoun"),
+	("karvaus", "noun"),
+	("keritä", "verb"),
+	("koto", "noun"),
+	("kuori", "noun"),
+	("kuti", "noun"),
+	("l", "noun"),
+	("lahti", "noun"),
+	("laki", "noun"),
+	("lento", "noun"),
+	("live", "noun"),
+	("m", "noun"),
+	("merirosvous", "noun"),
+	("mutu", "noun"),
+	("palvi", "noun"),
+	("parka", "noun"),
+	("peitsi", "noun"),
+	("pokata", "verb"),
+	("puola", "noun"),
+	("raakata", "verb"),
+	("raita", "noun"),
+	("raueta", "verb"),
+	("ripsi", "noun"),
+	("riuku", "noun"),
+	("rosvous", "noun"),
+	("s", "noun"),
+	("saksi", "noun"),
+	("sietä", "verb"),
+	("siivous", "noun"),
+	("sini", "noun"),
+	("soppi", "noun"),
+	("syli", "noun"),
+	("säkä", "noun"),
+	("tavata", "verb"),
+	("tutti", "noun"),
+	("tyvetä", "verb"),
+	("vakaus", "noun"),
+	("veto", "noun"),
+	("viini", "noun"),
+	("vika", "noun"),
+	("vuori", "noun"),
+}
 
 generator_fst = kfst.FST.from_kfst_file(GENERATOR_FST_PATH)
 
+POS_MORPHTAG_PATTERNS = {
+	pos: list(read_list(os.path.join(CURR, 'patterns', f'pos-{pos}-patterns.txt'))) or ['']
+	for pos in POS_FST_SOURCES
+}
+
+def generate_inflection_paradigm(word: str, pos: str, homonym: str = ''):
 
-def generate_wordform(word: str, pos: str, morphtags: str, homonym: str = '', source='Lexicon'):
+	"""
+	Return a mapping of morphological tags to worforms.
+	"""
 
-	words = add_compound_separators(word, pos=pos, normalize_separators=False)
-	if not words:
+	if is_uninflectable(word):
+		return set()
+	if not is_valid_pos(pos):
 		return set()
 
-	for word in words:
-		input_fields = source, word, f'^{pos}', str(homonym), '', morphtags
-		input_string = TAB.join(input_fields)
+	inflections = {}
+	for source in POS_FST_SOURCES[pos]:
+		for morphtags in POS_MORPHTAG_PATTERNS[pos]:
+			forms = generate_wordform(word, pos, morphtags, homonym, source)
+			if forms:
+				inflections[morphtags] = list(forms)
+		if inflections:
+			break
+	return inflections
+
 
+def generate_forms(word: str, pos: str | None = None, homonym: str = ''):
+
+	"""
+	Return default set of unannotated standard inflected forms for given word (lemma).
+	"""
+
+	if is_uninflectable(word):
+		return set()
+	if not is_valid_pos(pos):
+		return set()
+
+	# Return all valid interpretations if POS tag has not been specified
+	if not pos:
+		return {form for pos in pos_tag(word) for form in generate_forms(word, pos, homonym)}
+
+	for source in POS_FST_SOURCES[pos]:
 		forms = set()
+		for morphtags in POS_MORPHTAG_PATTERNS[pos]:
+			forms.update(generate_wordform(word, pos, morphtags, homonym, source))
+		if forms:
+			return forms
+	return set()
+
+
+def generate_wordform(word: str, pos: str, morphtags: str, homonym: str = '', source: str ='Lexicon'):
+
+	"""
+	Generate set of valid inflected form specified by the morphological tags for the given word (lemma).
+	"""
+
+	if is_uninflectable(word):
+		return set()
+	if not is_valid_pos(pos):
+		return set()
+
+	# TODO: Make this work for other sources as well?
+	if not homonym and (word, pos) in HOMONYMOUS and source == 'Lexicon':
+		forms1 = generate_wordform(word, pos, morphtags, '1', source)
+		forms2 = generate_wordform(word, pos, morphtags, '2', source)
+		return forms1 | forms2
+
+	forms = set()
+	for word in add_compound_separators(word, pos=pos, normalize_separators=False):
+		input_fields = source, word, f'^{pos}', str(homonym), '', morphtags
+		input_string = TAB.join(input_fields)
 		best = inf
 		for form, weight in generator_fst.lookup(input_string):
 			if weight > best:
 				break
 			forms.add(form)
 			best = weight
-		if forms:
-			return forms
-	return set()
-
-
-if __name__ == '__main__':
-	print(generate_wordform('suuri', 'adjective', '+sg+gen'))
-	print(generate_wordform('kissakoira', 'noun', '+pl+par', source='Lexicon|Pfx'))
-	print(generate_wordform('-rakenteinen', 'adjective', '+sg+ine', source='Lexicon|Hyp'))   # FIXME!
-	print(generate_wordform('-valkoinen', 'adjective', '+sg+ine', source='Lexicon|Hyp'))
-	print(generate_wordform('a-rakenteinen', 'adjective', '+sg+ine', source='Lexicon|Hyp'))
-	print(generate_wordform('a-valkoinen', 'adjective', '+sg+ine', source='Lexicon|Hyp'))
-	print(generate_wordform('16', 'numeral', '+sg+ine', source='Lexicon'))
-	print(generate_wordform('16:s', 'ordinal', '+sg+ine', source='Lexicon'))
\ No newline at end of file
+	return forms
diff --git a/pypykko/pypykko/normalize.py b/pypykko/pypykko/normalize.py
index 4ab4cb7..6fe5725 100644
--- a/pypykko/pypykko/normalize.py
+++ b/pypykko/pypykko/normalize.py
@@ -6,7 +6,7 @@
 
 inf = float('inf')
 indices = defaultdict(float)
-LEADING_PUNCTUATION = set('-–—"”„’([')
+LEADING_PUNCTUATION = set('-–—"”“‟„’([')
 
 
 def is_lowercase(w):
@@ -60,8 +60,8 @@ def process_analyses(analyses, sentence_initial=None):
 		# if '+ins' in tags or '+com' in tags:
 		# 	weight += 0.5
 
-		# if not sentence_initial:
-		# 	lemma = fix_lettercase(wform, lemma)
+		if not sentence_initial and is_uppercase(wform) and is_lowercase(lemma) and pos.startswith('noun'):
+			lemma = fix_lettercase(wform, lemma)
 
 		index = indices[lemma, pos] or inf
 		pair = weight, index
@@ -87,16 +87,23 @@ def process_analyses(analyses, sentence_initial=None):
 def main():
 
 	analyses = []
-	analysis = '', '', '', '', ''
+	analysis = '', '', '', '', '', '', '', inf
 	sentence_initial = True
+	prev_wform = ''
 
 	for line in sys.stdin:
 
 		line = line.strip('\n\r')
 
 		if not line and analyses:
+
+			wform, _, _, _, _, _, _, _ = analysis
+			if prev_wform == ':' and wform in '"”„':
+				sentence_initial = True
+
 			process_analyses(analyses, sentence_initial)
-			prev_wform = analysis[0]
+
+			prev_wform = wform
 			sentence_initial = (
 				sentence_initial if prev_wform in LEADING_PUNCTUATION else
 				prev_wform in [SENT_BREAK] + OPENING_TAGS
diff --git a/pypykko/pypykko/patterns/pos-adjective-patterns.txt b/pypykko/pypykko/patterns/pos-adjective-patterns.txt
new file mode 100644
index 0000000..aadf6e1
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-adjective-patterns.txt
@@ -0,0 +1,68 @@
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
++comparative+sg+nom
++superlative+sg+nom
++comparative+rare+sg+nom
++superlative+rare+sg+nom
++comparative+pl+nom
++superlative+pl+nom
++comparative+rare+pl+nom
++superlative+rare+pl+nom
diff --git a/pypykko/pypykko/patterns/pos-adposition-patterns.txt b/pypykko/pypykko/patterns/pos-adposition-patterns.txt
new file mode 100644
index 0000000..6f08916
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-adposition-patterns.txt
@@ -0,0 +1,7 @@
++poss1sg
++poss2sg
++poss3
++poss1pl
++poss2pl
++comparative
++superlative
diff --git a/pypykko/pypykko/patterns/pos-adverb+verb-patterns.txt b/pypykko/pypykko/patterns/pos-adverb+verb-patterns.txt
new file mode 100644
index 0000000..d82a230
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-adverb+verb-patterns.txt
@@ -0,0 +1,6 @@
++1sg
++2sg
++3sg
++1pl
++2pl
++3pl
diff --git a/pypykko/pypykko/patterns/pos-adverb-patterns.txt b/pypykko/pypykko/patterns/pos-adverb-patterns.txt
new file mode 100644
index 0000000..6f08916
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-adverb-patterns.txt
@@ -0,0 +1,7 @@
++poss1sg
++poss2sg
++poss3
++poss1pl
++poss2pl
++comparative
++superlative
diff --git a/pypykko/pypykko/patterns/pos-conjunction+verb-patterns.txt b/pypykko/pypykko/patterns/pos-conjunction+verb-patterns.txt
new file mode 100644
index 0000000..d82a230
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-conjunction+verb-patterns.txt
@@ -0,0 +1,6 @@
++1sg
++2sg
++3sg
++1pl
++2pl
++3pl
diff --git a/pypykko/pypykko/patterns/pos-conjunction-patterns.txt b/pypykko/pypykko/patterns/pos-conjunction-patterns.txt
new file mode 100644
index 0000000..e69de29
diff --git a/pypykko/pypykko/patterns/pos-interjection-patterns.txt b/pypykko/pypykko/patterns/pos-interjection-patterns.txt
new file mode 100644
index 0000000..e69de29
diff --git a/pypykko/pypykko/patterns/pos-none-patterns.txt b/pypykko/pypykko/patterns/pos-none-patterns.txt
new file mode 100644
index 0000000..e69de29
diff --git a/pypykko/pypykko/patterns/pos-noun-patterns.txt b/pypykko/pypykko/patterns/pos-noun-patterns.txt
new file mode 100644
index 0000000..2b4fd97
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-noun-patterns.txt
@@ -0,0 +1,90 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
diff --git a/pypykko/pypykko/patterns/pos-noun-pl-patterns.txt b/pypykko/pypykko/patterns/pos-noun-pl-patterns.txt
new file mode 100644
index 0000000..8b0f14d
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-noun-pl-patterns.txt
@@ -0,0 +1,30 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
diff --git a/pypykko/pypykko/patterns/pos-numeral-patterns.txt b/pypykko/pypykko/patterns/pos-numeral-patterns.txt
new file mode 100644
index 0000000..be26141
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-numeral-patterns.txt
@@ -0,0 +1,60 @@
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
diff --git a/pypykko/pypykko/patterns/pos-ordinal-patterns.txt b/pypykko/pypykko/patterns/pos-ordinal-patterns.txt
new file mode 100644
index 0000000..2b4fd97
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-ordinal-patterns.txt
@@ -0,0 +1,90 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
diff --git a/pypykko/pypykko/patterns/pos-participle-patterns.txt b/pypykko/pypykko/patterns/pos-participle-patterns.txt
new file mode 100644
index 0000000..2b4fd97
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-participle-patterns.txt
@@ -0,0 +1,90 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
diff --git a/pypykko/pypykko/patterns/pos-pronoun-patterns.txt b/pypykko/pypykko/patterns/pos-pronoun-patterns.txt
new file mode 100644
index 0000000..9e30ca6
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-pronoun-patterns.txt
@@ -0,0 +1,98 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
++comparative+sg+nom
++superlative+sg+nom
++comparative+rare+sg+nom
++superlative+rare+sg+nom
++comparative+pl+nom
++superlative+pl+nom
++comparative+rare+pl+nom
++superlative+rare+pl+nom
diff --git a/pypykko/pypykko/patterns/pos-pronoun-pl-patterns.txt b/pypykko/pypykko/patterns/pos-pronoun-pl-patterns.txt
new file mode 100644
index 0000000..8b0f14d
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-pronoun-pl-patterns.txt
@@ -0,0 +1,30 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
diff --git a/pypykko/pypykko/patterns/pos-proper-patterns.txt b/pypykko/pypykko/patterns/pos-proper-patterns.txt
new file mode 100644
index 0000000..2b4fd97
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-proper-patterns.txt
@@ -0,0 +1,90 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++sg+nom
++sg+gen
++sg+acc
++sg+par
++sg+ill
++sg+ine
++sg+ela
++sg+all
++sg+ade
++sg+abl
++sg+ess
++sg+tra
++sg+com
++sg+ins
++sg+abe
++pl+nom
++pl+gen
++pl+acc
++pl+par
++pl+ill
++pl+ine
++pl+ela
++pl+all
++pl+ade
++pl+abl
++pl+ess
++pl+tra
++pl+com
++pl+ins
++pl+abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
++sg+nom+rare
++sg+gen+rare
++sg+acc+rare
++sg+par+rare
++sg+ill+rare
++sg+ine+rare
++sg+ela+rare
++sg+all+rare
++sg+ade+rare
++sg+abl+rare
++sg+ess+rare
++sg+tra+rare
++sg+com+rare
++sg+ins+rare
++sg+abe+rare
++pl+nom+rare
++pl+gen+rare
++pl+acc+rare
++pl+par+rare
++pl+ill+rare
++pl+ine+rare
++pl+ela+rare
++pl+all+rare
++pl+ade+rare
++pl+abl+rare
++pl+ess+rare
++pl+tra+rare
++pl+com+rare
++pl+ins+rare
++pl+abe+rare
diff --git a/pypykko/pypykko/patterns/pos-proper-pl-patterns.txt b/pypykko/pypykko/patterns/pos-proper-pl-patterns.txt
new file mode 100644
index 0000000..8b0f14d
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-proper-pl-patterns.txt
@@ -0,0 +1,30 @@
++nom
++gen
++acc
++par
++ill
++ine
++ela
++all
++ade
++abl
++ess
++tra
++com
++ins
++abe
++nom+rare
++gen+rare
++acc+rare
++par+rare
++ill+rare
++ine+rare
++ela+rare
++all+rare
++ade+rare
++abl+rare
++ess+rare
++tra+rare
++com+rare
++ins+rare
++abe+rare
diff --git a/pypykko/pypykko/patterns/pos-verb-patterns.txt b/pypykko/pypykko/patterns/pos-verb-patterns.txt
new file mode 100644
index 0000000..94230a7
--- /dev/null
+++ b/pypykko/pypykko/patterns/pos-verb-patterns.txt
@@ -0,0 +1,96 @@
++pres+1sg
++pres+1sg+rare
++pres+2sg
++pres+2sg+rare
++pres+3sg
++pres+3sg+rare
++pres+1pl
++pres+1pl+rare
++pres+2pl
++pres+2pl+rare
++pres+3pl
++pres+3pl+rare
++pres+conneg
++pres+conneg+rare
++past+1sg
++past+1sg+rare
++past+2sg
++past+2sg+rare
++past+3sg
++past+3sg+rare
++past+1pl
++past+1pl+rare
++past+2pl
++past+2pl+rare
++past+3pl
++past+3pl+rare
++past+conneg
++past+conneg+sg
++past+conneg+pl
++imper+1sg
++imper+2sg
++imper+2sg+rare
++imper+3sg
++imper+1pl
++imper+2pl
++imper+3pl
++imper+2sg+conneg
++imper+2sg+conneg+rare
++imper+3sg+conneg
++imper+pl+conneg
++cond+1sg
++cond+1sg+rare
++cond+2sg
++cond+2sg+rare
++cond+3sg
++cond+3sg+rare
++cond+1pl
++cond+1pl+rare
++cond+2pl
++cond+2pl+rare
++cond+3pl
++cond+3pl+rare
++cond+conneg
++cond+conneg+rare
++poten+1sg
++poten+2sg
++poten+3sg
++poten+1pl
++poten+2pl
++poten+3pl
++poten+conneg
++part_ma+sg+nom
++part_maton+sg+nom
++part_pres+sg+nom
++part_pres+pl+nom
++part_past+sg+nom
++part_past+pl+nom
++inf1
++inf1+tra+poss3
++inf2+ine
++inf2+ins
++inf3+ill
++inf3+ill+rare
++inf3+ine
++inf3+ine+rare
++inf3+ela
++inf3+ela+rare
++inf3+ade
++inf3+ade+rare
++inf3+abe
++inf3+abe+rare
++pass+pres
++pass+pres+conneg
++pass+past
++pass+past+conneg
++pass+imper
++pass+imper+conneg
++pass+cond
++pass+cond+conneg
++pass+poten
++pass+poten+conneg
++pass+part_pres+sg+nom
++pass+part_pres+pl+nom
++pass+part_past+sg+nom
++pass+part_past+pl+nom
++pass+inf2+ine
diff --git a/pypykko/pypykko/scriptutils.py b/pypykko/pypykko/scriptutils.py
index 07c5186..4539277 100644
--- a/pypykko/pypykko/scriptutils.py
+++ b/pypykko/pypykko/scriptutils.py
@@ -4,6 +4,8 @@
 from .file_tools import load_json
 from .constants import POS_TAGS
 from collections import defaultdict
+from .utils import syllabify
+C = '[bcdfghjklmnpqrstvwxz]'
 
 try:
 	ADVERB_INFLECTIONS = load_json(filename='adverbs.json', directory='scripts/inflection')
@@ -26,12 +28,14 @@
 	'kuinka',
 ]
 
-def validate_pos(pos):
+
+def is_valid_pos(pos):
 	if pos and pos not in POS_TAGS:
 		print(sys.stderr.write(f'Warning! Unknown POS tag "{pos}"\n'))
 		return False
 	return True
 
+
 def get_wordform(pairs):
 	return ''.join(c for _, c in pairs if c != '0')
 
@@ -86,21 +90,85 @@ def determine_separator(w1, w2, default='0', strip_zeros=True):
 	return default
 
 
-def determine_wordform_harmony(wordform, default=None):
-	if default:
-		return default.upper()
+def get_parts(lemma):
+	return re.findall(r'[^-|% ]+[-|% ]?', lemma) or [lemma]
+
+
+def get_base_lemma(lemma):
+	return get_parts(lemma)[-1]
+
+
+def count_syllables(lemma):
+	syllabified = syllabify(lemma, compound=False)
+	return len(syllabified.split('·'))
+
+
+def determine_lemma_vowel_harmony(lemma, kotus_class=None):
+
+	lemma = get_parts(lemma).pop()
+
+	# "onomatopoeettinen"
+	if re.fullmatch('.*(poeettinen)', lemma):
+		return 'back'
+
+	# "prototyyppi", "prototyyppinen", "geotekninen", "biokteknisesti"
+	if re.fullmatch('.*('
+		'depressiivi|elementti|elementtisesti|kineettinen|kineettisesti|kliininen|kliinisesti|oeettinen|oeettisesti|semiitti|semiittinen|semitismi|semitisti|semitistinen|semitistisesti|sentrinen|sentrisesti|sentrismi|synteesi|synteettinen|synteettisesti|tekninen|teknisesti|tyyppi|tyyppinen|tyyppisesti|syklinen|'
+		'syklisesti|psyykkinen|psyykkisesti|fyysinen|fyysisesti)', lemma):
+		return 'front'
+
+	# "makromolekyyli", "psykoanalyyttinen"
+	if re.fullmatch('.*(aldehydi|analyysi|analyyttinen|analyyttisesti|molekyyli|molekyylinen)', lemma):
+		return 'front|back'
+
+	# "porfyyri", polyyppi", "dialyysi", "porfyriini", "molybdeeni"
+	if re.fullmatch(f'.*[aou].*(y{C}{C}?i|y{C}{C}?inen|y{C}{C}?isesti|y{C}{C}?ismi|y{C}{C}?isti|y{C}{C}?ii{C}{C}?i|y{C}{C}?ee{C}{C}?i)', lemma):
+		return 'front|back'
+
+	#  "anglofiili", "karsinogeeni", "telomeeri", "ortopedi", "antisepti", "dynamometri"/"barometri" "hypoteesi"
+	if count_syllables(lemma) >= 4 and re.fullmatch('.*[aou].*(geeni|iili|meeri|metri|pedi|septi|teesi)', lemma):
+		return 'front|back'
+
+	# "fylogeneesi", "fylogeneettisesti"
+	if re.fullmatch('.*[aou].*(elektrinen|elektisesti|fiili|fiilinen|fiilisesti|geeninen|geenisesti|geneesi|geneettinen|geneettisesti|metrinen|metrisesti|pedinen|pedisesti|septinen|septisesti|teismi|teisti|teistinen|teistisesti|terminen|termisesti|tsepiini)', lemma):
+		return 'front|back'
+
+	# Initialisms and numbers
+	if re.fullmatch('.*[14579BCDEFGIJLMNPRSTVWXYÄÖÜÉ]', lemma):
+		return 'front'
+	if re.fullmatch('.*[2368AHKOQUZÅ]', lemma):
+		return 'back'
+	if re.fullmatch('.*[123456789]0(:s)?', lemma):
+		return 'front'
+	if re.fullmatch('.+oy', lemma):
+		return 'back'
+	if re.fullmatch(f'.*[aouAOU]{C}+y', lemma):
+		return 'back'
+	if re.fullmatch('.*[aouAOU].*y', lemma):
+		return 'front|back'
+	if kotus_class in {'18B', '10B'} and lemma[-1] in set('bcdefgijlmnprstvwxyzäöüé'):
+		return 'front'
+	if kotus_class in {'18B', '10B'}:
+		return 'back'
+
+	return determine_wordform_harmony(lemma)
+
+
+def determine_wordform_harmony(wordform, default_harmony=None):
+	if default_harmony in {'front', 'back'}:
+		return default_harmony
 	for c in reversed(wordform.lower()):
 		if c in set('y'):
-			return 'FRONT'
-		if c in set('aouáóúàòùâôû'):
-			return 'BACK'
+			return 'front'
+		if c in set('aouáóúàòùâôûå'):
+			return 'back'
 		if c in set('äöüø'):
-			return 'FRONT'
+			return 'front'
 		if c in set('14579'):
-			return 'FRONT'
+			return 'front'
 		if c in set('2368'):
-			return 'BACK'
-	return 'FRONT'
+			return 'back'
+	return 'front'
 
 
 def unpack(classes='', gradations='', harmonies='', vowels='', ignore_styles=False):
@@ -139,6 +207,14 @@ def ddict(d: dict):
 	result.update(d)
 	return result
 
+def is_uninflectable(lemma):
+
+	"""
+	Return True if string is or ends with punctuation.
+	"""
+
+	return not lemma or lemma[-1] in set('.:;-')
+
 
 """
 def combine(obj1: dict, obj2: dict):
@@ -154,4 +230,4 @@ def combine_objs(objs):
 	for obj in objs:
 		combined = combine(combined, obj)
 	return combined
-"""
\ No newline at end of file
+"""
diff --git a/pypykko/pypykko/tokenizer.py b/pypykko/pypykko/tokenizer.py
index 3587e03..c261e90 100644
--- a/pypykko/pypykko/tokenizer.py
+++ b/pypykko/pypykko/tokenizer.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python3
 
 """
-Tokenize text.
+Text tokenization.
 For testing/debugging purposes only.
 """
 
@@ -25,30 +25,31 @@
 REGEX_HASHTAG = r'#[A-Za-z0-9_]+'
 REGEX_HANDLE = r'@[A-Za-z0-9_]+'
 REGEX_REDDIT = r'r/[A-Za-z0-9_]+|u/[A-Za-z0-9_]+'
-REGEX_THOUSANDS = r'[1-9][0-9]?[0-9]?(?: [0-9][0-9][0-9])+(?:-[a-zåäö-]+)?'
-REGEX_THOUSANDS_RANGE = r'[1-9][0-9]?[0-9]?(?: [0-9][0-9][0-9])+[-–][1-9][0-9]?[0-9]?(?: [0-9][0-9][0-9])+'
+REGEX_THOUSANDS = r'[1-9][0-9]?[0-9]?(?:[  ][0-9][0-9][0-9])+(?:-[a-zåäö-]+)?'
+REGEX_THOUSANDS_RANGE = r'[1-9][0-9]?[0-9]?(?:[  ][0-9][0-9][0-9])+[-–][1-9][0-9]?[0-9]?(?:[  ][0-9][0-9][0-9])+'
 REGEX_XML_ELEM = r'<[^<>]+>'
 REGEX_HTML_ENTITY = r'&[^;\s]+;'
-# REGEX_URL = '(?:https?://|file:///)[a-z0-9](?:[.][a-z0-9][a-z0-9]+)+'
-# REGEX_EMAIL = '(?:https?://|file:///)[a-z0-9](?:[.][a-z0-9][a-z0-9]+)+'
-# REGEX_CHORD = 'xxx'
-# REGEX_IUPAC_NAME = 'xxx'
-
-REGEX_ALL = f'{LINE_BREAK}' \
-			f'{REGEX_UNITS}|' \
-			f'{REGEX_EMOTICON}|' \
-			f'{REGEX_ABBREV}|' \
-			f'{REGEX_INITIAL}|' \
-			f'{REGEX_ORDINAL}|' \
-			f'{REGEX_DATE}|' \
-			f'{REGEX_CLOCK}|' \
-			f'{REGEX_HASHTAG}|' \
-			f'{REGEX_HANDLE}|' \
-			f'{REGEX_REDDIT}|' \
-			f'{REGEX_THOUSANDS}|' \
-			f'{REGEX_THOUSANDS_RANGE}|' \
-			f'{REGEX_XML_ELEM}|' \
-			f'{REGEX_HTML_ENTITY}'
+REGEX_URL = r'(?:https?://|file:///|www\.)(?:[a-z0-9]+\.)+\S+[^ \t\n)(:;,.]'
+# REGEX_IUPAC_NAME = r'...'
+
+REGEX_ALL = '|'.join((
+	LINE_BREAK,
+	REGEX_UNITS,
+	REGEX_EMOTICON,
+	REGEX_ABBREV,
+	REGEX_INITIAL,
+	REGEX_ORDINAL,
+	REGEX_DATE,
+	REGEX_CLOCK,
+	REGEX_HASHTAG,
+	REGEX_HANDLE,
+	REGEX_REDDIT,
+	REGEX_THOUSANDS,
+	REGEX_THOUSANDS_RANGE,
+	REGEX_XML_ELEM,
+	REGEX_HTML_ENTITY,
+	REGEX_URL,
+))
 
 PUNCT_HEAD = '\t\n (/"“”„¿¡‹«»{[\'’'
 PUNCT_TAIL = '\t\n .…,;?!)/"“”„›»}\\]\'’'
@@ -74,6 +75,7 @@ def separate_punct(s):
 		separated = head + tail[::-1]
 		return separated
 
+	text = re.sub('^( *[-–—])([A-ZÅÄÖ])', r'\1 \2', text)
 	text = f' {text} '
 	text = re.sub(rf'({REGEX_XML_ELEM})', r' \1 ', text)
 	text = text.replace('\n\n', f' {LINE_BREAK} ')
@@ -145,10 +147,5 @@ def tokenize(text):
 
 if __name__ == '__main__':
 	for line in sys.stdin:
-		line = line.replace('&amp; ', '&').replace('&lt; ', '<').replace('&gt; ', '>')
+		# line = line.replace('&amp; ', '&').replace('&lt; ', '<').replace('&gt; ', '>')
 		print(tokenize(line), end="")
-
-
-
-
-
diff --git a/pypykko/pypykko/utils.py b/pypykko/pypykko/utils.py
index 26580ab..2b499fd 100644
--- a/pypykko/pypykko/utils.py
+++ b/pypykko/pypykko/utils.py
@@ -1,7 +1,6 @@
 import re
 from .constants import PARSER_FST_PATH, FIELD_STRING
 import kfst
-from .scriptutils import validate_pos
 from typing import NamedTuple
 
 C = "[bcdfghjklmnpqrstvwxzšžčśźćń'’]"
@@ -76,6 +75,8 @@ def compare_with_others(a_source, analyses):
 			a_target[5] += f' ← {pos}:{lemma_source}:{participle_tag}'
 			return 'has-participle'
 
+	return
+
 class PykkoAnalysis(NamedTuple):
 	wordform: str
 	source: str
@@ -86,7 +87,7 @@ class PykkoAnalysis(NamedTuple):
 	morphtags: str
 	weight: float
 
-def analyze(word: str, only_best=True, normalize_separators=True, ignore_derivatives=True) -> list[PykkoAnalysis]:
+def analyze(word, only_best=True, normalize_separators=True, ignore_derivatives=True) -> list[PykkoAnalysis]:
 
 	"""
 	Return list of tuples (morphological analyses) with duplicates removed.
@@ -95,7 +96,7 @@ def analyze(word: str, only_best=True, normalize_separators=True, ignore_derivat
 
 	analyses = []
 	taken = {}
-	for analysis_string, weight in list(PARSER_FST.lookup(word)) or [(unk_result(word), inf)]:
+	for analysis_string, weight in PARSER_FST.lookup(word):
 
 		if normalize_separators:
 			analysis_string = analysis_string.replace('⁅BOUNDARY⁆', '|').replace('⁅HYPHEN⁆', '-')
@@ -124,14 +125,16 @@ def analyze(word: str, only_best=True, normalize_separators=True, ignore_derivat
 		filtered.append(analysis)
 		best = weight
 
+	filtered = filtered or [([word] + unk_result(word).split('\t') + [inf])]
+
 	return [PykkoAnalysis(*a) for a in filtered]
 
 
-def add_compound_separators(word: str, pos=None, normalize_separators=True, pick_first=False) -> set[str] | str:
+def add_compound_separators(word, pos=None, normalize_separators=True, pick_first=False):
 
 	# TODO: Allow adding separators to non-lemma words?
 
-	valid: set[str] = set()
+	valid = set()
 	best = inf
 	for a in analyze(word, only_best=False, normalize_separators=normalize_separators):
 		_, _, lemma, p, _, _, _, weight = a
@@ -155,9 +158,11 @@ def is_plural(word):
 			return lemma
 	return False
 
+
 def singularize(word):
 	return is_plural(word) or word
 
+
 def pos_tag(word, force_match=False, max_weight=inf):
 
 	if force_match:
@@ -189,15 +194,7 @@ def lemmatize(word, pos=None):
 
 def syllabify(word: str, pos=None, compound=True, big_words=False):
 
-	validate_pos(pos)
-
-	# Type checker doesn't particularly enjoy add_compound_separators having a return type
-	# that depends on whether pick_first is True or not.
-
-	if compound:
-		separated = add_compound_separators(word, pos, pick_first=True)
-		assert isinstance(separated, str)
-		word = separated
+	word = add_compound_separators(word, pos, pick_first=True) if compound else word # type: ignore
 
 	# lito·grafia, mikro·skooppi (alternative syllabification)
 	if big_words:
diff --git a/pypykko/setup.cfg b/pypykko/setup.cfg
index 92d2951..0f87c02 100644
--- a/pypykko/setup.cfg
+++ b/pypykko/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = pypykko
-version = 0.3.0
+version = 0.4.0-beta
 author = Théo Salmenkivi-Friberg
 author_email = theo.friberg@helsinki.f
 description = A pure-python wrapper for the pykko Finnish morphological analyser and inflector
@@ -29,3 +29,4 @@ install_requires =
 pypykko =
     *.kfst
     *.tsv
+    patterns/*.txt