utils/pyutil.py at master · ESBigeard/utils · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
#!usr/bin/python
# -*- coding:utf-8 -*-
"""various usefull functions"""

import unicodedata, re, codecs, os
path = os.path.dirname(os.path.realpath(__file__))+"/"

def normalise_unicode(s,diac=True):
	"""normalise a string toward a standard unicode string, w/ or w/o diacritics

	normalise une chaine vers une unicode string standard, avec ou sans diacritiques

	:param arg1: string to normalise
	:type arg1: str or unicode
	:param arg2: True to keep diacritics, False to delete them. Default : keep them
	:type arg2: bool

	:example:
	>>> fun.normalise_unicode(u"\\xc3\\xa9ternel",True)
	u"\\xe9ternel"
	"""
	#note : if you look at the code of the example above, the double backslashes are escaped single backslashes. those are to be read as single backslashes

	try:
		s=unicode(s)
	except UnicodeDecodeError:
		s=s.decode("utf-8")


	if diac:
		nf=unicodedata.normalize('NFKC',s)
	else:
		nf=unicodedata.normalize('NFKD',s)
	nf=nf.replace(u'\u0153','oe')
	nf=nf.replace(u'\u00B7','.') #puce RCP
	nf=nf.replace(u'\u2019',"'") #apostrophe RCP
	#nf=nf.replace(u'\u2028',' ') #line separator

	if False : #normaliser les whitespaces
		nf2=""
		for char in nf:
			if not re.match(u"\s",char,re.UNICODE) or char in ["\n"," "]:
				nf2+=char
		return u''.join(c for c in nf2 if not unicodedata.combining(c))

	if diac:
		return nf
	else:
		return u''.join(c for c in nf if not unicodedata.combining(c))


def empty_tree(input_list):
	"""Recursively iterate through values in nested lists to check if the structure is empty

	from http://stackoverflow.com/questions/1593564

	:type arg1: list
	:return: True if the structure is empty, false otherwise
	:rtype: bool"""
	for item in input_list:
		if not isinstance(item, list) or not empty_tree(item):
			return False
	return True

def csv_named_rows_reader(csv_file,delimiter=","):
	"""argument is a file object generated by open() containing a csv file, or a path to this file
	for each row of the csv, yields the row as a dictionary [header]=cell_content
	assumes the first row of the csv is the header
	this makes it easier to access cells in files with many columns of arbitrary order
	"""

	#the function below expects a file reader object. if the argument is a string, we assume it's a path toward a file to open, and open it
	if type(csv_file)==str:
		csv_file=open(csv_file,mode="r",encoding="utf-8")

	reader=csv.reader(csv_file, delimiter=delimiter, quotechar='"')
	first=True
	headers=[]
	for row in reader:
		if first:
			first=False
			for cell in row:
				cell=cell.strip()
				if cell in headers:
					raise ValueError("column name in csv file appears twice")
				headers.append(cell)
		else:
			row2={}
			for i,cell in enumerate(row):
				header=headers[i] #if I get an index error here, it might be because there are spaces next to the commas + quotechars. csv_reader interprets the quotechars incorrectly in that case
				row2[header]=cell.strip()
			yield row2


def chunker(s,keep_delimiters=True):
	"""
	split a string, intended to split french text. keep delimiters, keep groups of dots or numbers together, keep ' attached to previous word, keep aujourd'hui and peut-etre as single groups

	split une string, utiliser pour split du texte en francais. conserve les separateurs, garde les groupes de points ou de chiffres ensemble, colle ' au mot precedent, aujourd'hui et peut-etre restent en un seul mot

	:param arg1: string to split
	:type arg1: str or unicode
	:return: list of unicode strings
	:rtype: list

	:example:
	>>> fun.chunker("j'ai vu un lama aujourd'hui... qu'il etait beau!")
	["j'"," ","ai"," ","vu"," ","un"," ","lama"," ","aujourd'hui","..."," ","qu'","il","etait","beau","!"]

	"""
	s=s.replace(u"\u2019","'") #apostrophe RCP
	s=re.split("(\s+|\.+|\W|\d+)",s,flags=re.UNICODE) #les groupes de whitespace de . ou groupes de chiffres sont laissés ensemble, le reste séparé
	s2=[]
	i=-1 #compteur de mots
	while i < len(s)-1:
		i+=1
		e=s[i] #e = mot en cours

		if e and len(e)>0:
			if s2 : #s'il existe au moins un char précédent

				if e=="'": #coller les apostrophes au mot précédent
					if s2[-1].endswith("'"): #double ' utilisée comme guillemets
						#TODO traiter mieux que ça les apostrophes multiples
						s2[-1]=s2[-1][:-1]
						s2.append("''")
					else:
						s2[-1]+="'"
				#elif s2[-1]=="$": #remonter les codes type $med
				#	s2[-1]="$"+e
				else: #pour que aujourd'hui soit traité après l'apostrophe
					if s2[-1]=="aujourd'":
						s2[-1]+=e
					elif len(s2)>1 and  s2[-2]=="peut" and s2[-1]=="-" and e in ["etre",u"être"]:
						s2.pop()#tiret
						s2.pop()#peut
						s2.append("peut-etre")
					else:
						s2.append(e)

			else:
				s2.append(e)

	if not keep_delimiters:
		s3=[]
		for w in s2:
			if re.match(" +$",w):
				pass
			else:
				s3.append(w)
		return s3
	return s2

def join_treetagger(fname,col=2):
	"""open a treetagger output file and join the text. default output the lemmatized text"""

	with codecs.open(fname,"r","utf-8") as f:
		text=[]
		for l in f:
			l=l.rstrip()
			l=l.split("\t")
			try:
				word=l[col]
				text.append(word)
			except IndexError:
				pass
	return " ".join(text)


def merge(lsts):
	"""
	merge a list of lists into a list of sets, where all lists sharing at least one element are merged. used to merge sub-syntagms

	fusionne une liste de listes en une liste de sets, où les listes ayant au moins 1 élément en commun sont fusionnées. utilisé pour merge les sous-syntagmes

	:param arg1: list of lists to merge
	:type arg1: list
	:return: list of merged sets
	:rtype: list of sets

	:example:
	>>> fun.merge([[0,1,2],[2,3],[4,5]])
	[set([0,1,2,3]),set([4,5])]

	"""

	sets = [set(lst) for lst in lsts if lst]
	merged = 1
	while merged:
		merged = 0
		results = []
		while sets:
			common, rest = sets[0], sets[1:]
			sets = []
			for x in rest:
				if x.isdisjoint(common):
					sets.append(x)
				else:
					merged = 1
					common |= x
			results.append(common)
		sets = results
	return sets

def fmesure(rappel,pre):
	return 2*((pre*rappel)/(pre+rappel))

def unicode_block(ch):
	'''
	Return the Unicode block name for ch, or None if ch has no block.

	>>> block(u'a')
	'Basic Latin'
	>>> block(unichr(0x0b80))
	'Tamil'
	>>> block(unichr(0xe0080))

	'''

	assert isinstance(ch, unicode) and len(ch) == 1, repr(ch)
	cp = ord(ch)
	for start, end, name in _blocks:
		if start <= cp <= end:
			return name

def _initBlocks(text):
	"""used by unicode_block()
	uses a global variable, so be careful when importing this whole script
	"""
	global _blocks
	_blocks = []
	import re
	pattern = re.compile(r'([0-9A-F]+)\.\.([0-9A-F]+);\ (\S.*\S)')
	for line in text.splitlines():
		m = pattern.match(line)
		if m:
			start, end, name = m.groups()
			_blocks.append((int(start, 16), int(end, 16), name))

	# retrieved from http://unicode.org/Public/UNIDATA/Blocks.txt
	_initBlocks('''
	# Blocks-12.0.0.txt
	# Date: 2018-07-30, 19:40:00 GMT [KW]
	# © 2018 Unicode®, Inc.
	# For terms of use, see http://www.unicode.org/terms_of_use.html
	#
	# Unicode Character Database
	# For documentation, see http://www.unicode.org/reports/tr44/
	#
	# Format:
	# Start Code..End Code; Block Name

	# ================================================

	# Note:   When comparing block names, casing, whitespace, hyphens,
	#         and underbars are ignored.
	#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
	#         For more information on the comparison of property values,
	#            see UAX #44: http://www.unicode.org/reports/tr44/
	#
	#  All block ranges start with a value where (cp MOD 16) = 0,
	#  and end with a value where (cp MOD 16) = 15. In other words,
	#  the last hexadecimal digit of the start of range is ...0
	#  and the last hexadecimal digit of the end of range is ...F.
	#  This constraint on block ranges guarantees that allocations
	#  are done in terms of whole columns, and that code chart display
	#  never involves splitting columns in the charts.
	#
	#  All code points not explicitly listed for Block
	#  have the value No_Block.

	# Property: Block
	#
	# @missing: 0000..10FFFF; No_Block

	0000..007F; Basic Latin
	0080..00FF; Latin-1 Supplement
	0100..017F; Latin Extended-A
	0180..024F; Latin Extended-B
	0250..02AF; IPA Extensions
	02B0..02FF; Spacing Modifier Letters
	0300..036F; Combining Diacritical Marks
	0370..03FF; Greek and Coptic
	0400..04FF; Cyrillic
	0500..052F; Cyrillic Supplement
	0530..058F; Armenian
	0590..05FF; Hebrew
	0600..06FF; Arabic
	0700..074F; Syriac
	0750..077F; Arabic Supplement
	0780..07BF; Thaana
	07C0..07FF; NKo
	0800..083F; Samaritan
	0840..085F; Mandaic
	0860..086F; Syriac Supplement
	08A0..08FF; Arabic Extended-A
	0900..097F; Devanagari
	0980..09FF; Bengali
	0A00..0A7F; Gurmukhi
	0A80..0AFF; Gujarati
	0B00..0B7F; Oriya
	0B80..0BFF; Tamil
	0C00..0C7F; Telugu
	0C80..0CFF; Kannada
	0D00..0D7F; Malayalam
	0D80..0DFF; Sinhala
	0E00..0E7F; Thai
	0E80..0EFF; Lao
	0F00..0FFF; Tibetan
	1000..109F; Myanmar
	10A0..10FF; Georgian
	1100..11FF; Hangul Jamo
	1200..137F; Ethiopic
	1380..139F; Ethiopic Supplement
	13A0..13FF; Cherokee
	1400..167F; Unified Canadian Aboriginal Syllabics
	1680..169F; Ogham
	16A0..16FF; Runic
	1700..171F; Tagalog
	1720..173F; Hanunoo
	1740..175F; Buhid
	1760..177F; Tagbanwa
	1780..17FF; Khmer
	1800..18AF; Mongolian
	18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
	1900..194F; Limbu
	1950..197F; Tai Le
	1980..19DF; New Tai Lue
	19E0..19FF; Khmer Symbols
	1A00..1A1F; Buginese
	1A20..1AAF; Tai Tham
	1AB0..1AFF; Combining Diacritical Marks Extended
	1B00..1B7F; Balinese
	1B80..1BBF; Sundanese
	1BC0..1BFF; Batak
	1C00..1C4F; Lepcha
	1C50..1C7F; Ol Chiki
	1C80..1C8F; Cyrillic Extended-C
	1C90..1CBF; Georgian Extended
	1CC0..1CCF; Sundanese Supplement
	1CD0..1CFF; Vedic Extensions
	1D00..1D7F; Phonetic Extensions
	1D80..1DBF; Phonetic Extensions Supplement
	1DC0..1DFF; Combining Diacritical Marks Supplement
	1E00..1EFF; Latin Extended Additional
	1F00..1FFF; Greek Extended
	2000..206F; General Punctuation
	2070..209F; Superscripts and Subscripts
	20A0..20CF; Currency Symbols
	20D0..20FF; Combining Diacritical Marks for Symbols
	2100..214F; Letterlike Symbols
	2150..218F; Number Forms
	2190..21FF; Arrows
	2200..22FF; Mathematical Operators
	2300..23FF; Miscellaneous Technical
	2400..243F; Control Pictures
	2440..245F; Optical Character Recognition
	2460..24FF; Enclosed Alphanumerics
	2500..257F; Box Drawing
	2580..259F; Block Elements
	25A0..25FF; Geometric Shapes
	2600..26FF; Miscellaneous Symbols
	2700..27BF; Dingbats
	27C0..27EF; Miscellaneous Mathematical Symbols-A
	27F0..27FF; Supplemental Arrows-A
	2800..28FF; Braille Patterns
	2900..297F; Supplemental Arrows-B
	2980..29FF; Miscellaneous Mathematical Symbols-B
	2A00..2AFF; Supplemental Mathematical Operators
	2B00..2BFF; Miscellaneous Symbols and Arrows
	2C00..2C5F; Glagolitic
	2C60..2C7F; Latin Extended-C
	2C80..2CFF; Coptic
	2D00..2D2F; Georgian Supplement
	2D30..2D7F; Tifinagh
	2D80..2DDF; Ethiopic Extended
	2DE0..2DFF; Cyrillic Extended-A
	2E00..2E7F; Supplemental Punctuation
	2E80..2EFF; CJK Radicals Supplement
	2F00..2FDF; Kangxi Radicals
	2FF0..2FFF; Ideographic Description Characters
	3000..303F; CJK Symbols and Punctuation
	3040..309F; Hiragana
	30A0..30FF; Katakana
	3100..312F; Bopomofo
	3130..318F; Hangul Compatibility Jamo
	3190..319F; Kanbun
	31A0..31BF; Bopomofo Extended
	31C0..31EF; CJK Strokes
	31F0..31FF; Katakana Phonetic Extensions
	3200..32FF; Enclosed CJK Letters and Months
	3300..33FF; CJK Compatibility
	3400..4DBF; CJK Unified Ideographs Extension A
	4DC0..4DFF; Yijing Hexagram Symbols
	4E00..9FFF; CJK Unified Ideographs
	A000..A48F; Yi Syllables
	A490..A4CF; Yi Radicals
	A4D0..A4FF; Lisu
	A500..A63F; Vai
	A640..A69F; Cyrillic Extended-B
	A6A0..A6FF; Bamum
	A700..A71F; Modifier Tone Letters
	A720..A7FF; Latin Extended-D
	A800..A82F; Syloti Nagri
	A830..A83F; Common Indic Number Forms
	A840..A87F; Phags-pa
	A880..A8DF; Saurashtra
	A8E0..A8FF; Devanagari Extended
	A900..A92F; Kayah Li
	A930..A95F; Rejang
	A960..A97F; Hangul Jamo Extended-A
	A980..A9DF; Javanese
	A9E0..A9FF; Myanmar Extended-B
	AA00..AA5F; Cham
	AA60..AA7F; Myanmar Extended-A
	AA80..AADF; Tai Viet
	AAE0..AAFF; Meetei Mayek Extensions
	AB00..AB2F; Ethiopic Extended-A
	AB30..AB6F; Latin Extended-E
	AB70..ABBF; Cherokee Supplement
	ABC0..ABFF; Meetei Mayek
	AC00..D7AF; Hangul Syllables
	D7B0..D7FF; Hangul Jamo Extended-B
	D800..DB7F; High Surrogates
	DB80..DBFF; High Private Use Surrogates
	DC00..DFFF; Low Surrogates
	E000..F8FF; Private Use Area
	F900..FAFF; CJK Compatibility Ideographs
	FB00..FB4F; Alphabetic Presentation Forms
	FB50..FDFF; Arabic Presentation Forms-A
	FE00..FE0F; Variation Selectors
	FE10..FE1F; Vertical Forms
	FE20..FE2F; Combining Half Marks
	FE30..FE4F; CJK Compatibility Forms
	FE50..FE6F; Small Form Variants
	FE70..FEFF; Arabic Presentation Forms-B
	FF00..FFEF; Halfwidth and Fullwidth Forms
	FFF0..FFFF; Specials
	10000..1007F; Linear B Syllabary
	10080..100FF; Linear B Ideograms
	10100..1013F; Aegean Numbers
	10140..1018F; Ancient Greek Numbers
	10190..101CF; Ancient Symbols
	101D0..101FF; Phaistos Disc
	10280..1029F; Lycian
	102A0..102DF; Carian
	102E0..102FF; Coptic Epact Numbers
	10300..1032F; Old Italic
	10330..1034F; Gothic
	10350..1037F; Old Permic
	10380..1039F; Ugaritic
	103A0..103DF; Old Persian
	10400..1044F; Deseret
	10450..1047F; Shavian
	10480..104AF; Osmanya
	104B0..104FF; Osage
	10500..1052F; Elbasan
	10530..1056F; Caucasian Albanian
	10600..1077F; Linear A
	10800..1083F; Cypriot Syllabary
	10840..1085F; Imperial Aramaic
	10860..1087F; Palmyrene
	10880..108AF; Nabataean
	108E0..108FF; Hatran
	10900..1091F; Phoenician
	10920..1093F; Lydian
	10980..1099F; Meroitic Hieroglyphs
	109A0..109FF; Meroitic Cursive
	10A00..10A5F; Kharoshthi
	10A60..10A7F; Old South Arabian
	10A80..10A9F; Old North Arabian
	10AC0..10AFF; Manichaean
	10B00..10B3F; Avestan
	10B40..10B5F; Inscriptional Parthian
	10B60..10B7F; Inscriptional Pahlavi
	10B80..10BAF; Psalter Pahlavi
	10C00..10C4F; Old Turkic
	10C80..10CFF; Old Hungarian
	10D00..10D3F; Hanifi Rohingya
	10E60..10E7F; Rumi Numeral Symbols
	10F00..10F2F; Old Sogdian
	10F30..10F6F; Sogdian
	10FE0..10FFF; Elymaic
	11000..1107F; Brahmi
	11080..110CF; Kaithi
	110D0..110FF; Sora Sompeng
	11100..1114F; Chakma
	11150..1117F; Mahajani
	11180..111DF; Sharada
	111E0..111FF; Sinhala Archaic Numbers
	11200..1124F; Khojki
	11280..112AF; Multani
	112B0..112FF; Khudawadi
	11300..1137F; Grantha
	11400..1147F; Newa
	11480..114DF; Tirhuta
	11580..115FF; Siddham
	11600..1165F; Modi
	11660..1167F; Mongolian Supplement
	11680..116CF; Takri
	11700..1173F; Ahom
	11800..1184F; Dogra
	118A0..118FF; Warang Citi
	119A0..119FF; Nandinagari
	11A00..11A4F; Zanabazar Square
	11A50..11AAF; Soyombo
	11AC0..11AFF; Pau Cin Hau
	11C00..11C6F; Bhaiksuki
	11C70..11CBF; Marchen
	11D00..11D5F; Masaram Gondi
	11D60..11DAF; Gunjala Gondi
	11EE0..11EFF; Makasar
	11FC0..11FFF; Tamil Supplement
	12000..123FF; Cuneiform
	12400..1247F; Cuneiform Numbers and Punctuation
	12480..1254F; Early Dynastic Cuneiform
	13000..1342F; Egyptian Hieroglyphs
	13430..1343F; Egyptian Hieroglyph Format Controls
	14400..1467F; Anatolian Hieroglyphs
	16800..16A3F; Bamum Supplement
	16A40..16A6F; Mro
	16AD0..16AFF; Bassa Vah
	16B00..16B8F; Pahawh Hmong
	16E40..16E9F; Medefaidrin
	16F00..16F9F; Miao
	16FE0..16FFF; Ideographic Symbols and Punctuation
	17000..187FF; Tangut
	18800..18AFF; Tangut Components
	1B000..1B0FF; Kana Supplement
	1B100..1B12F; Kana Extended-A
	1B130..1B16F; Small Kana Extension
	1B170..1B2FF; Nushu
	1BC00..1BC9F; Duployan
	1BCA0..1BCAF; Shorthand Format Controls
	1D000..1D0FF; Byzantine Musical Symbols
	1D100..1D1FF; Musical Symbols
	1D200..1D24F; Ancient Greek Musical Notation
	1D2E0..1D2FF; Mayan Numerals
	1D300..1D35F; Tai Xuan Jing Symbols
	1D360..1D37F; Counting Rod Numerals
	1D400..1D7FF; Mathematical Alphanumeric Symbols
	1D800..1DAAF; Sutton SignWriting
	1E000..1E02F; Glagolitic Supplement
	1E100..1E14F; Nyiakeng Puachue Hmong
	1E2C0..1E2FF; Wancho
	1E800..1E8DF; Mende Kikakui
	1E900..1E95F; Adlam
	1EC70..1ECBF; Indic Siyaq Numbers
	1ED00..1ED4F; Ottoman Siyaq Numbers
	1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
	1F000..1F02F; Mahjong Tiles
	1F030..1F09F; Domino Tiles
	1F0A0..1F0FF; Playing Cards
	1F100..1F1FF; Enclosed Alphanumeric Supplement
	1F200..1F2FF; Enclosed Ideographic Supplement
	1F300..1F5FF; Miscellaneous Symbols and Pictographs
	1F600..1F64F; Emoticons
	1F650..1F67F; Ornamental Dingbats
	1F680..1F6FF; Transport and Map Symbols
	1F700..1F77F; Alchemical Symbols
	1F780..1F7FF; Geometric Shapes Extended
	1F800..1F8FF; Supplemental Arrows-C
	1F900..1F9FF; Supplemental Symbols and Pictographs
	1FA00..1FA6F; Chess Symbols
	1FA70..1FAFF; Symbols and Pictographs Extended-A
	20000..2A6DF; CJK Unified Ideographs Extension B
	2A700..2B73F; CJK Unified Ideographs Extension C
	2B740..2B81F; CJK Unified Ideographs Extension D
	2B820..2CEAF; CJK Unified Ideographs Extension E
	2CEB0..2EBEF; CJK Unified Ideographs Extension F
	2F800..2FA1F; CJK Compatibility Ideographs Supplement
	E0000..E007F; Tags
	E0100..E01EF; Variation Selectors Supplement
	F0000..FFFFF; Supplementary Private Use Area-A
	100000..10FFFF; Supplementary Private Use Area-B

	# EOF
	''')