diff --git a/setup.cfg b/setup.cfg index e6a4541..2e174fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.1.0 +current_version = 1.1.1 tag = True commit = True @@ -8,7 +8,7 @@ name = statadict author = Adrian Tuzimek author_email = tuziomek@gmail.com description = Utility for parsing Stata dictionary files for usage with Pandas. -long-description = file: README.md +long_description = file: README.md long_description_content_type = text/markdown url = https://github.com/atudomain/statadict packages = setuptools.find_packages() diff --git a/statadict/base.py b/statadict/base.py index 8f4da63..31743fc 100644 --- a/statadict/base.py +++ b/statadict/base.py @@ -109,13 +109,13 @@ class StataDictParser: _COLUMN_PATTERN = r'^\s+_column' _LINE_PATTERN = r'^\s+_column\((\d+)\)\s+(\S+)\s+(\S+)\s+(\S+)\s*(".*")?' - def parse(self, file) -> StataDict: + def parse(self, file, encoding) -> StataDict: column_numbers = deque() types = deque() names = deque() formats = deque() comments = deque() - with open(file, "r") as dct_file: + with open(file, "r", encoding=encoding) as dct_file: for line in dct_file: if re.search(self._COLUMN_PATTERN, line): line_values = re.findall(self._LINE_PATTERN, line) @@ -136,7 +136,7 @@ def parse(self, file) -> StataDict: ) -def parse_stata_dict(file: str) -> StataDict: +def parse_stata_dict(file: str, encoding: str = "utf-8") -> StataDict: """ Parses Stata dictionary file and returns object containing column data as attributes. @@ -148,4 +148,4 @@ def parse_stata_dict(file: str) -> StataDict: :rtype: statadict.base.StataDict """ stata_dict_parser = StataDictParser() - return stata_dict_parser.parse(file) + return stata_dict_parser.parse(file, encoding)