From 58c2cf399875ff0b6fb576321e0688cddbd1195a Mon Sep 17 00:00:00 2001 From: Devin Prescott Date: Mon, 9 Oct 2017 10:29:05 -0400 Subject: [PATCH 1/2] bug fix: encoding missing from open() Encoding option that is passed to read_table was never passed to open() command. Should fix ISSUE #8: UnicodeDecodeError: 'gbk' codec can't decode byte 0xbf in position 2: illegal multibyte sequence --- geotext/geotext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geotext/geotext.py b/geotext/geotext.py index 848b395..5cd8b7d 100755 --- a/geotext/geotext.py +++ b/geotext/geotext.py @@ -42,7 +42,7 @@ def read_table(filename, usecols=(0, 1), sep='\t', comment='#', encoding='utf-8' A dictionary with the same length as the number of lines in `filename` """ - with open(filename, 'r') as f: + with open(filename, 'rt', encoding=encoding) as f: # skip initial lines for _ in range(skip): next(f) From 2371fe09f931c4a3e705cf454acc0c93a9ed54a4 Mon Sep 17 00:00:00 2001 From: Devin Prescott Date: Wed, 18 Oct 2017 10:09:34 -0400 Subject: [PATCH 2/2] Encoding Fix Updated to make backward compatible with Python 2 (>2.5) --- geotext/geotext.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geotext/geotext.py b/geotext/geotext.py index 5cd8b7d..247b548 100755 --- a/geotext/geotext.py +++ b/geotext/geotext.py @@ -42,13 +42,13 @@ def read_table(filename, usecols=(0, 1), sep='\t', comment='#', encoding='utf-8' A dictionary with the same length as the number of lines in `filename` """ - with open(filename, 'rt', encoding=encoding) as f: + with open(filename, 'rb') as f: # skip initial lines for _ in range(skip): next(f) - # filter comment lines - lines = (line for line in f if not line.startswith(comment)) + # filter comment lines (removes BOM during comment checking, but leaves it in capturing) + lines = (line.decode(encoding) for line in f if not line.decode(encoding).replace(u'\ufeff','').startswith(comment)) d = dict() for line in lines: