diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..0bc799f --- /dev/null +++ b/.flake8 @@ -0,0 +1,8 @@ +[flake8] +exclude = + venv, + **/migrations/* +# So flake8 plays nicely with black +# https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html +max-line-length = 120 +extend-ignore = E203 \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..2df5e83 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,54 @@ +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + release: + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest black isort + pip install .[tests] + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Check black + run: black --check irs_reader tests + - name: isort + run: isort --profile=black --check-only irs_reader tests + - name: Test with pytest + run: | + pytest + - name: Build distribution + if: ${{ github.event_name == 'release' }} + run: | + pip install build + python -m build + - name: Upload source distribution + if: ${{ github.event_name == 'release' }} + uses: softprops/action-gh-release@v2 + with: + files: dist/* diff --git a/.gitmodules b/.gitmodules index 2d13bc8..1015855 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,8 +1,3 @@ [submodule "irs_reader/metadata"] path = irs_reader/metadata - url = https://github.com/jsfenfen/990-xml-metadata - branch = master -[submodule "metadata"] - path = metadata - url = https://github.com/jsfenfen/990-xml-metadata - branch = master + url = https://github.com/datamade/990-xml-metadata.git diff --git a/README.md b/README.md index e7ced4a..9989814 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,5 @@ # IRSx -Update: 12/16. The IRS has announced it will no longer post xml 990 filings to AWS, thereby undermining irsx' ability to automatically retrieve filings. The IRS does appear to make the raw filings available in [bulk format on this page](https://www.irs.gov/charities-non-profits/form-990-series-downloads). It is possible to use IRSx by retrieving the files and placing them at the location that IRSX expects to find them. We are seeking additional information from IRS and plan to address this soon. - - ## Table of Contents - [Installation](#installation) @@ -289,21 +286,11 @@ For example: ### Legacy configuration ### -You also can configure IRSx's cache location by setting the local_settings.py file. To figure out where that settings file is, log in to a terminal and type: - - >>> from irsx.settings import IRSX_SETTINGS_LOCATION - >>> IRSX_SETTINGS_LOCATION - '/long/path/to/lib/python3.6/site-packages/irsx/settings.py' - -[ If you get an error, try upgrading irsx with `pip install irsx --upgrade` -- this feature was added in 0.1.1. ] - - -Go to that directory. You can either modify the settings.py file or the local_settings.py file. To do the latter, first `cd` into the directory where the settings files live and run: - - $ cp local_settings.py-example local_settings.py - -Then edit local_settings.py to set WORKING\_DIRECTORY to where the raw xml files are found. +You also can configure IRSx's cache location by setting an environmntal variable. +```console +> export IRSX_CACHE_DIRECTORY=/where/you/like +``` ## IRSx from python @@ -477,20 +464,16 @@ You can still add command line args, like this: ## Testing -Nosetests - Test coverage is incomplete, improve it with coverage.py; run 'pip install coverage' -then: - - $ nosetests --with-coverage --cover-erase --cover-package=irs_reader - -or - - $ coverage report -m - - - -Tox -- see tox.ini; testing for: 2.7,3.4,3.5,3.6. You may need to run `pip install tox` in the testing environment. +Install dependencies +```console +> pip install .[tests] +``` +And run tests +```console +> pytest +``` ## Acknowledgements diff --git a/irs_reader/_version.py b/irs_reader/_version.py index 73e3bb4..f9aa3e1 100644 --- a/irs_reader/_version.py +++ b/irs_reader/_version.py @@ -1 +1 @@ -__version__ = '0.3.2' +__version__ = "0.3.2" diff --git a/irs_reader/dir_utils.py b/irs_reader/dir_utils.py index 64ac79d..d218da6 100644 --- a/irs_reader/dir_utils.py +++ b/irs_reader/dir_utils.py @@ -3,7 +3,7 @@ def mkdir_p(paths): - """ Makedirs, from http://stackoverflow.com/a/600612 """ + """Makedirs, from http://stackoverflow.com/a/600612""" for path in paths: try: os.makedirs(path) diff --git a/irs_reader/file_utils.py b/irs_reader/file_utils.py index 3eaa2b0..7fe8882 100644 --- a/irs_reader/file_utils.py +++ b/irs_reader/file_utils.py @@ -1,11 +1,17 @@ -import re import os +import re +from datetime import datetime + import requests -from datetime import datetime -from .settings import IRS_XML_HTTP_BASE, WORKING_DIRECTORY, INDEX_DIRECTORY, IRS_INDEX_BASE +from .settings import ( + INDEX_DIRECTORY, + IRS_INDEX_BASE, + IRS_XML_HTTP_BASE, + WORKING_DIRECTORY, +) -OBJECT_ID_RE = re.compile(r'20\d{16}') +OBJECT_ID_RE = re.compile(r"20\d{16}") # Not sure how much detail we need to go into here OBJECT_ID_MSG = """ @@ -18,30 +24,28 @@ def stream_download(url, target_path, verbose=False): - """ Download a large file without loading it into memory. """ + """Download a large file without loading it into memory.""" response = requests.get(url, stream=True) handle = open(target_path, "wb") if verbose: print("Beginning streaming download of %s" % url) start = datetime.now() try: - content_length = int(response.headers['Content-Length']) - content_MB = content_length/1048576.0 + content_length = int(response.headers["Content-Length"]) + content_MB = content_length / 1048576.0 print("Total file size: %.2f MB" % content_MB) except KeyError: - pass # allow Content-Length to be missing + pass # allow Content-Length to be missing for chunk in response.iter_content(chunk_size=512): - if chunk: # filter out keep-alive new chunks + if chunk: # filter out keep-alive new chunks handle.write(chunk) if verbose: - print( - "Download completed to %s in %s" % - (target_path, datetime.now() - start)) + print("Download completed to %s in %s" % (target_path, datetime.now() - start)) def validate_object_id(object_id): - """ It's easy to make a mistake entering these, validate the format """ + """It's easy to make a mistake entering these, validate the format""" result = re.match(OBJECT_ID_RE, str(object_id)) if not result: print("'%s' appears not to be a valid 990 object_id" % object_id) @@ -49,9 +53,8 @@ def validate_object_id(object_id): return object_id -# Files are no longer available on S3 -# def get_s3_URL(object_id): -# return ("%s/%s_public.xml" % (IRS_XML_HTTP_BASE, object_id)) +def get_s3_URL(object_id): + return "%s/%s_public.xml" % (IRS_XML_HTTP_BASE, object_id) def get_local_path(object_id): diff --git a/irs_reader/filing.py b/irs_reader/filing.py index e0e432e..80db1ae 100644 --- a/irs_reader/filing.py +++ b/irs_reader/filing.py @@ -1,17 +1,14 @@ -import os -import sys import io -import xmltodict import json +import os from collections import OrderedDict from xml.parsers.expat import ExpatError -from .type_utils import dictType, orderedDictType, listType, \ - unicodeType, noneType, strType -from .file_utils import stream_download, validate_object_id, \ - get_local_path +import xmltodict -from .settings import KNOWN_SCHEDULES, IRS_READER_ROOT +from .file_utils import get_local_path, get_s3_URL, stream_download, validate_object_id +from .settings import KNOWN_SCHEDULES +from .type_utils import dictType, listType, orderedDictType class InvalidXMLException(Exception): @@ -21,18 +18,18 @@ class InvalidXMLException(Exception): class FileMissingException(Exception): pass -class Filing(object): +class Filing(object): def __init__(self, object_id, filepath=None, URL=None, json=None): - """ Filepath is the location of the file locally; - URL is it's remote location (if not default) - Ignore these and defaults will be used. - If filepath is set, URL is ignored. - json is a json representation of the data, so if given, - no file will be downloaded. + """Filepath is the location of the file locally; + URL is it's remote location (if not default) + Ignore these and defaults will be used. + If filepath is set, URL is ignored. + json is a json representation of the data, so if given, + no file will be downloaded. """ - self.raw_irs_dict = None # The parsed xml will go here - self.version_string = None # Version number here + self.raw_irs_dict = None # The parsed xml will go here + self.version_string = None # Version number here self.object_id = validate_object_id(object_id) self.result = None @@ -42,10 +39,10 @@ def __init__(self, object_id, filepath=None, URL=None, json=None): if json: self.json = json - self.input_type = 'json' + self.input_type = "json" else: self.json = None - self.input_type = 'xml' + self.input_type = "xml" if filepath: self.filepath = filepath else: @@ -53,35 +50,38 @@ def __init__(self, object_id, filepath=None, URL=None, json=None): if URL: self.URL = URL - + else: + self.URL = get_s3_URL(self.object_id) def _download(self, force_overwrite=False, verbose=False): - """ - Files are no longer downloadable. - """ - - if os.path.isfile(self.filepath): - return True - else: - raise FileMissingException( - "Filing not available, try downloading with irsx_retrieve [ YEAR ]" - ) - - def _denamespacify(self,entity): + if not force_overwrite: + # If the file is already there, we're done + if os.path.isfile(self.filepath): + if verbose: + print("File already available at %s -- skipping" % (self.filepath)) + return False + stream_download(self.URL, self.filepath, verbose=verbose) + return True + + def _denamespacify(self, entity): """ It's legal to include namespaces in the xml tags, e.g. irs:Return instead of Return This is very rare; see 201940149349301304_public.xml for an example. """ thisentitytype = type(entity) - if thisentitytype == orderedDictType: + if thisentitytype == orderedDictType or thisentitytype == dictType: newOD = OrderedDict() for key in entity.keys(): newkey = key if ":" in key: newkey = key.split(":")[1] newvalue = entity[key] - if type(newvalue) == listType or type(newvalue) == orderedDictType: + if ( + type(newvalue) == listType + or type(newvalue) == orderedDictType + or type(newvalue) == dictType + ): newvalue = self._denamespacify(newvalue) newOD[newkey] = newvalue return newOD @@ -90,53 +90,57 @@ def _denamespacify(self,entity): newlist = list() for item in entity: newvalue = item - if type(newvalue) == listType or type(newvalue) == orderedDictType: + if ( + type(newvalue) == listType + or type(newvalue) == orderedDictType + or type(newvalue) == dictType + ): newvalue = self._denamespacify(newvalue) newlist.append(newvalue) return newlist - else: + else: return entity - def _set_dict_from_xml(self): - # io works across python2 and 3, and allows an encoding arg - with io.open(self.filepath, 'r', encoding='utf-8-sig') as fh: + # io works across python2 and 3, and allows an encoding arg + with io.open(self.filepath, "r", encoding="utf-8-sig") as fh: raw_file = fh.read() try: - self.raw_irs_dict = self._denamespacify(xmltodict.parse(raw_file)) except ExpatError: raise InvalidXMLException( - "\nXML Parse error in " + self.filepath \ - + "\nFile may be damaged or incomplete.\n"\ + "\nXML Parse error in " + + self.filepath + + "\nFile may be damaged or incomplete.\n" + "Try erasing this file and downloading again." ) try: - self.raw_irs_dict['Return'] + self.raw_irs_dict["Return"] except KeyError: raise InvalidXMLException( - "'Return' element not located in" + self.filepath \ - + "\nFile may be damaged or incomplete.\n" \ + "'Return' element not located in" + + self.filepath + + "\nFile may be damaged or incomplete.\n" + "Try erasing this file and downloading again." ) - - def _set_dict_from_json(self): self.raw_irs_dict = self.json def _set_version(self): - self.version_string = self.raw_irs_dict['Return']['@returnVersion'] + self.version_string = self.raw_irs_dict["Return"]["@returnVersion"] def _set_ein(self): - self.ein = self.raw_irs_dict['Return']['ReturnHeader']['Filer']['EIN'] + self.ein = self.raw_irs_dict["Return"]["ReturnHeader"]["Filer"]["EIN"] def _set_schedules(self): - """ Attach the known and unknown schedules """ - self.schedules = ['ReturnHeader990x', ] + """Attach the known and unknown schedules""" + self.schedules = [ + "ReturnHeader990x", + ] self.otherforms = [] - for sked in self.raw_irs_dict['Return']['ReturnData'].keys(): + for sked in self.raw_irs_dict["Return"]["ReturnData"].keys(): if not sked.startswith("@"): if sked in KNOWN_SCHEDULES: self.schedules.append(sked) @@ -147,10 +151,10 @@ def get_object_id(self): return self.object_id def get_schedule(self, skedname): - if skedname == 'ReturnHeader990x': - return self.raw_irs_dict['Return']['ReturnHeader'] + if skedname == "ReturnHeader990x": + return self.raw_irs_dict["Return"]["ReturnHeader"] elif skedname in self.schedules: - return self.raw_irs_dict['Return']['ReturnData'][skedname] + return self.raw_irs_dict["Return"]["ReturnData"][skedname] else: return None @@ -159,7 +163,7 @@ def get_ein(self): def get_otherform(self, skedname): if skedname in self.otherforms: - return self.raw_irs_dict['Return']['ReturnData'][skedname] + return self.raw_irs_dict["Return"]["ReturnData"][skedname] else: return None @@ -183,7 +187,7 @@ def get_result(self): def set_csv_result(self, csv_result): self.csv_result = csv_result - + def get_csv_result(self): return self.csv_result @@ -192,29 +196,29 @@ def set_keyerrors(self, keyerrorlist): def get_keyerrors(self): return self.keyerrors - + def get_unparsed_json(self): - """ Json dicts are unordered """ + """Json dicts are unordered""" return json.dumps(self.raw_irs_dict) def get_type(self): - if 'IRS990' in self.schedules: - return 'IRS990' - elif 'IRS990EZ' in self.schedules: - return 'IRS990EZ' - elif 'IRS990PF' in self.schedules: - return 'IRS990PF' + if "IRS990" in self.schedules: + return "IRS990" + elif "IRS990EZ" in self.schedules: + return "IRS990EZ" + elif "IRS990PF" in self.schedules: + return "IRS990PF" else: raise Exception("Missing 990/990EZ/990PF-is this filing valid?") def get_parsed_sked(self, skedname): - """ Returns an array because multiple sked K's are allowed""" + """Returns an array because multiple sked K's are allowed""" if not self.processed: raise Exception("Filing must be processed to return parsed sked") if skedname in self.schedules: matching_skeds = [] for sked in self.result: - if sked['schedule_name']==skedname: + if sked["schedule_name"] == skedname: matching_skeds.append(sked) return matching_skeds else: @@ -223,11 +227,11 @@ def get_parsed_sked(self, skedname): def process(self, verbose=False): # don't reprocess inadvertently if not self.processed: - self.processed=True + self.processed = True if self.json: self._set_dict_from_json() else: - + self._download(verbose=verbose) self._set_dict_from_xml() diff --git a/irs_reader/flatten_utils.py b/irs_reader/flatten_utils.py index aa73f1d..b2afe6b 100644 --- a/irs_reader/flatten_utils.py +++ b/irs_reader/flatten_utils.py @@ -1,8 +1,9 @@ import collections + # Mostly from: http://stackoverflow.com/a/6027615 -def flatten(d, parent_key='', sep='/'): +def flatten(d, parent_key="", sep="/"): items = [] if d: for k, v in d.items(): @@ -10,7 +11,7 @@ def flatten(d, parent_key='', sep='/'): if isinstance(v, collections.abc.MutableMapping): items.extend(flatten(v, new_key, sep=sep).items()) else: - new_key = new_key.replace("/#text","") + new_key = new_key.replace("/#text", "") items.append((new_key, v)) return dict(items) else: diff --git a/irs_reader/irsx_cli.py b/irs_reader/irsx_cli.py index 14d0a1d..8bb0519 100644 --- a/irs_reader/irsx_cli.py +++ b/irs_reader/irsx_cli.py @@ -1,70 +1,58 @@ import argparse from .filing import Filing -from .settings import KNOWN_SCHEDULES, IRS_READER_ROOT +from .settings import KNOWN_SCHEDULES +from .text_format_utils import to_csv, to_json, to_txt from .xmlrunner import XMLRunner -from .text_format_utils import * def get_parser(): parser = argparse.ArgumentParser("irsx") parser.add_argument( - 'object_ids', - metavar='object_ids', - type=int, - nargs='+', - help='object ids' + "object_ids", metavar="object_ids", type=int, nargs="+", help="object ids" ) parser.add_argument( - '--verbose', - dest='verbose', - action='store_const', - const=True, default=False, - help='Verbose output' + "--verbose", + dest="verbose", + action="store_const", + const=True, + default=False, + help="Verbose output", ) parser.add_argument( "--schedule", choices=KNOWN_SCHEDULES, default=None, - help='Get only that schedule' + help="Get only that schedule", ) parser.add_argument( "--xpath", - dest='documentation', - action='store_const', - const=True, default=False, - help='show xpath in text format' + dest="documentation", + action="store_const", + const=True, + default=False, + help="show xpath in text format", ) parser.add_argument( - "--format", - choices=['json', 'csv', 'txt'], - default='json', - help='Output format' + "--format", choices=["json", "csv", "txt"], default="json", help="Output format" ) + parser.add_argument("--file", default=None, help="Write result to file") parser.add_argument( - "--file", - default=None, - help='Write result to file' - ) - parser.add_argument( - '--list_schedules', - dest='list_schedules', - action='store_const', + "--list_schedules", + dest="list_schedules", + action="store_const", const=True, default=False, - help='Only list schedules' + help="Only list schedules", ) return parser def run_main(args_read): - csv_format = args_read.format == 'csv' or args_read.format == 'txt' - xml_runner = XMLRunner( - documentation=args_read.documentation, - csv_format=csv_format - ) + csv_format = args_read.format == "csv" or args_read.format == "txt" + xml_runner = XMLRunner(documentation=args_read.documentation, csv_format=csv_format) # Use the standardizer that was init'ed by XMLRunner standardizer = xml_runner.get_standardizer() @@ -84,35 +72,32 @@ def run_main(args_read): else: if args_read.schedule: parsed_filing = xml_runner.run_sked( - object_id, - args_read.schedule, - verbose=args_read.verbose + object_id, args_read.schedule, verbose=args_read.verbose ) else: parsed_filing = xml_runner.run_filing( - object_id, - verbose=args_read.verbose + object_id, verbose=args_read.verbose ) - if args_read.format == 'json': + if args_read.format == "json": to_json(parsed_filing.get_result(), outfilepath=args_read.file) - elif args_read.format == 'csv': - to_csv( - parsed_filing, - object_id=object_id, - standardizer=standardizer, - documentation=args_read.documentation, - outfilepath=args_read.file - ) + elif args_read.format == "csv": + to_csv( + parsed_filing, + object_id=object_id, + standardizer=standardizer, + documentation=args_read.documentation, + outfilepath=args_read.file, + ) - elif args_read.format == 'txt': - to_txt( - parsed_filing, - standardizer=standardizer, - documentation=args_read.documentation, - outfilepath=args_read.file - ) + elif args_read.format == "txt": + to_txt( + parsed_filing, + standardizer=standardizer, + documentation=args_read.documentation, + outfilepath=args_read.file, + ) def main(args=None): @@ -121,5 +106,6 @@ def main(args=None): run_main(args_read) print("\n") + if __name__ == "__main__": main() diff --git a/irs_reader/irsx_index_cli.py b/irs_reader/irsx_index_cli.py index 89fe0f4..6567c2c 100644 --- a/irs_reader/irsx_index_cli.py +++ b/irs_reader/irsx_index_cli.py @@ -1,11 +1,10 @@ -import sys import argparse from datetime import date -from .file_utils import get_index_file_URL, get_local_index_path, \ - stream_download + +from .file_utils import get_index_file_URL, get_local_index_path, stream_download this_year = date.today().year -INDEXED_YEARS = [str(i) for i in range(2011, this_year+1)] +INDEXED_YEARS = [str(i) for i in range(2011, this_year + 1)] def get_cli_index_parser(): @@ -14,15 +13,16 @@ def get_cli_index_parser(): "--year", choices=INDEXED_YEARS, default=None, - help='Optionally update an index file' + help="Optionally update an index file", ) parser.add_argument( - '--verbose', - dest='verbose', - action='store_const', - const=True, default=False, - help='Verbose output' + "--verbose", + dest="verbose", + action="store_const", + const=True, + default=False, + help="Verbose output", ) return parser diff --git a/irs_reader/irsx_retrieve_cli.py b/irs_reader/irsx_retrieve_cli.py index 3756949..38b1f7d 100644 --- a/irs_reader/irsx_retrieve_cli.py +++ b/irs_reader/irsx_retrieve_cli.py @@ -1,40 +1,38 @@ -import sys -import os import argparse +import os from zipfile import ZipFile + from .file_utils import stream_download from .settings import WORKING_DIRECTORY IRS_location = "https://apps.irs.gov/pub/epostcard/990/xml/%s/download990xml_%s" ref_url = "https://www.irs.gov/charities-non-profits/form-990-series-downloads" -# How many files are available per year? +# How many files are available per year? # https://www.irs.gov/charities-non-profits/form-990-series-downloads number_of_files = { - '2022':0, - '2021':6, - '2020':8, - '2019':8, - '2018':7, - '2017':7, - '2016':6, - '2015':2 + "2022": 0, + "2021": 6, + "2020": 8, + "2019": 8, + "2018": 7, + "2017": 7, + "2016": 6, + "2015": 2, } + def get_cli_retrieve_parser(): parser = argparse.ArgumentParser("Irsreader") - parser.add_argument( - "year", - nargs='+', - help='4-digit year to retrieve, ' - ) + parser.add_argument("year", nargs="+", help="4-digit year to retrieve, ") parser.add_argument( - '--verbose', - dest='verbose', - action='store_const', - const=True, default=False, - help='Verbose output' + "--verbose", + dest="verbose", + action="store_const", + const=True, + default=False, + help="Verbose output", ) return parser @@ -47,14 +45,15 @@ def download_unzip_erase(remote_url, verbose=False): print("Downloading %s to %s" % (remote_url, local_path)) stream_download(remote_url, local_path, verbose=verbose) - with ZipFile(local_path, 'r') as zipObj: - # Extract all the contents of zip file in different directory - print('Unzipping %s to %s' % (local_path, WORKING_DIRECTORY)) + with ZipFile(local_path, "r") as zipObj: + # Extract all the contents of zip file in different directory + print("Unzipping %s to %s" % (local_path, WORKING_DIRECTORY)) zipObj.extractall(WORKING_DIRECTORY) print("Cleaning up, removing raw file.") os.remove(local_path) + def unload_zipfile_by_year(year, verbose=False): print("Retrieving zipfiles for year %s" % year) if verbose: @@ -68,7 +67,7 @@ def unload_zipfile_by_year(year, verbose=False): file_list.append(location_base + ".zip") if num_files > 0: - for i in range(1, num_files+1): + for i in range(1, num_files + 1): file_list.append(location_base + "_" + str(i) + ".zip") for this_file in file_list: @@ -76,18 +75,22 @@ def unload_zipfile_by_year(year, verbose=False): def run_cli_retrieve_main(args_read): - print(""" + print( + """ Please visit https://www.irs.gov/charities-non-profits/form-990-series-downloads - To see if any additional files are available. - """) + To see if any additional files are available. + """ + ) for year in args_read.year: print("Processing %s files for year %s" % (year, number_of_files[year])) unload_zipfile_by_year(year, verbose=args_read.verbose) + def main(args=None): parser = get_cli_retrieve_parser() args = parser.parse_args() run_cli_retrieve_main(args) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/irs_reader/keyerror_utils.py b/irs_reader/keyerror_utils.py index 6d7ac2e..7b5348a 100644 --- a/irs_reader/keyerror_utils.py +++ b/irs_reader/keyerror_utils.py @@ -1,16 +1,61 @@ +ignorable_keyerrors = ["/ReturnHeader/BuildTS"] -ignorable_keyerrors = ['/ReturnHeader/BuildTS'] - -## Todo: put in 2013 / 2015 series canonicals. +# Todo: put in 2013 / 2015 series canonicals. # 2013 vars that no longer exist -discontinued_2013_vars = [ '/IRS990ScheduleA/CertificationInd', '/IRS990ScheduleA/Contribution35ControlledInd', '/IRS990ScheduleA/ContributionControllerInd', '/IRS990ScheduleA/ContributionFamilyInd', '/IRS990ScheduleA/Form990ScheduleAPartIVGrp/ExplanationTxt', '/IRS990ScheduleA/SupportedOrgInformationGrp/SupportedOrgNotifiedInd', '/IRS990ScheduleA/SupportedOrgInformationGrp/USOrganizedInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AdoptBudgetInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AdoptImplementationStrategyInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AllNeedsAddressedInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AttachedToInvoiceInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AvailableOnRequestInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/BodyAttachmentsInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/DevelopCommunityWidePlanInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ExecCommunityWidePlanInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ExecImplementationStrategyInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FPGUsedDeterEligFreeCareInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FPGUsedDetermEligDscntCareInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/IncludeOperationalPlanInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LawsuitInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LiensOnResidencesInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/MedicaidMedicareInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/OtherNeedsAddressedInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitBodyAttachmentsInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitLawsuitInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitLienOnResidenceInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PostedInAdmissionOfficeInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PostedInEmergencyRoomInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PrioritizeHealthNeedsInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PrioritizeServicesInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ProvidedOnAdmissionInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/StateRegulationInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/UninsuredDiscountInd'] +discontinued_2013_vars = [ + "/IRS990ScheduleA/CertificationInd", + "/IRS990ScheduleA/Contribution35ControlledInd", + "/IRS990ScheduleA/ContributionControllerInd", + "/IRS990ScheduleA/ContributionFamilyInd", + "/IRS990ScheduleA/Form990ScheduleAPartIVGrp/ExplanationTxt", + "/IRS990ScheduleA/SupportedOrgInformationGrp/SupportedOrgNotifiedInd", + "/IRS990ScheduleA/SupportedOrgInformationGrp/USOrganizedInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AdoptBudgetInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AdoptImplementationStrategyInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AllNeedsAddressedInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AttachedToInvoiceInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AvailableOnRequestInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/BodyAttachmentsInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/DevelopCommunityWidePlanInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ExecCommunityWidePlanInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ExecImplementationStrategyInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FPGUsedDeterEligFreeCareInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FPGUsedDetermEligDscntCareInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/IncludeOperationalPlanInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LawsuitInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LiensOnResidencesInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/MedicaidMedicareInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/OtherNeedsAddressedInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitBodyAttachmentsInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitLawsuitInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PermitLienOnResidenceInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PostedInAdmissionOfficeInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PostedInEmergencyRoomInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PrioritizeHealthNeedsInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/PrioritizeServicesInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/ProvidedOnAdmissionInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/StateRegulationInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/UninsuredDiscountInd", +] # 2015 skedh vars removed -discontinued_2015_vars = ['/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AverageNegotiatedRatesInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/DocumentedEligDeterminationInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNoticeDisplayedInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedAllPatientsInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedBeforeDischargeInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedUponAdmissionInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/InformationGapsInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LowestNegotiatedRatesInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/MedicareRatesInd', '/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/OtherMethodUsedInd'] +discontinued_2015_vars = [ + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/AverageNegotiatedRatesInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/DocumentedEligDeterminationInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNoticeDisplayedInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedAllPatientsInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedBeforeDischargeInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/FAPNotifiedUponAdmissionInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/InformationGapsInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/LowestNegotiatedRatesInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/MedicareRatesInd", + "/IRS990ScheduleH/HospitalFcltyPoliciesPrctcGrp/OtherMethodUsedInd", +] ignorable = {} for key in ignorable_keyerrors + discontinued_2013_vars + discontinued_2015_vars: ignorable[key] = 1 + def ignorable_keyerror(xpath): try: ignorable[xpath] diff --git a/irs_reader/local_settings-example.py b/irs_reader/local_settings-example.py deleted file mode 100644 index 5201003..0000000 --- a/irs_reader/local_settings-example.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -from .dir_utils import mkdir_p - - -IRS_READER_ROOT = "/path/to/irsreader/990-xml-reader" - -# This is the URL to amazon's bucket, could use another synced to it -IRS_XML_HTTP_BASE = "https://s3.amazonaws.com/irs-form-990" - -# The directory we put files in while we're processing them -WORKING_DIRECTORY = (os.path.join(IRS_READER_ROOT, "XML") ) - -# Helpful to keep these around for lookup purposes -INDEX_DIRECTORY = (os.path.join(IRS_READER_ROOT, "CSV") ) - -mkdir_p([WORKING_DIRECTORY, INDEX_DIRECTORY]) \ No newline at end of file diff --git a/irs_reader/log_utils.py b/irs_reader/log_utils.py index c65584d..e6ebb0c 100644 --- a/irs_reader/log_utils.py +++ b/irs_reader/log_utils.py @@ -1,5 +1,6 @@ import logging -from .settings import LOG_KEY, KEYERROR_LOG + +from .settings import KEYERROR_LOG, LOG_KEY def configure_logging(name=LOG_KEY): @@ -7,7 +8,8 @@ def configure_logging(name=LOG_KEY): logger.setLevel(logging.INFO) # Format formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s") + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) # Setup console logging ch = logging.StreamHandler() diff --git a/irs_reader/metadata b/irs_reader/metadata index 4ad69cc..3d5dcf7 160000 --- a/irs_reader/metadata +++ b/irs_reader/metadata @@ -1 +1 @@ -Subproject commit 4ad69cc0f68dedb1137ccae34c4c84f88295b0a9 +Subproject commit 3d5dcf7ce120c90f657b1bdce937723a2bf4de20 diff --git a/irs_reader/object_ids.py b/irs_reader/object_ids.py index 556be07..0027928 100644 --- a/irs_reader/object_ids.py +++ b/irs_reader/object_ids.py @@ -1,12 +1,3012 @@ # poor person's sampling--just grab the first 1000 rows per year to test. # csvcut -c 9 index_2017.csv | head -n 1000 -object_ids_2017 = ['201612439349300006', '201612439349300026', '201612439349300341', '201612439349300516', '201612439349300546', '201612439349300601', '201612439349300621', '201612439349300746', '201612439349300861', '201612449349100601', '201612449349100706', '201612449349200001', '201612449349200101', '201612449349200111', '201612449349200121', '201612449349200756', '201612449349200761', '201612449349200806', '201612449349200906', '201612449349300216', '201612449349300336', '201612449349300406', '201612449349300606', '201612449349300636', '201612449349300756', '201612449349300786', '201612449349301061', '201612449349301071', '201612449349301076', '201612459349100321', '201612459349200111', '201612459349200206', '201612459349200321', '201612459349200431', '201612459349200506', '201612459349200711', '201612459349300241', '201612459349300301', '201632519349300958', '201632469349300108', '201632519349300313', '201642509349300319', '201642509349300704', '201642509349300209', '201642469349300244', '201632529349300833', '201632519349301153', '201642519349300434', '201602379349100415', '201602379349100605', '201602379349300230', '201602379349300530', '201602389349100605', '201602389349300545', '201602389349300845', '201602399349200620', '201602399349200715', '201602399349300310', '201602399349300330', '201602399349300410', '201602399349300605', '201602399349300610', '201602399349300625', '201602399349300700', '201602399349300725', '201602399349300800', '201602399349300900', '201602399349300915', '201602409349200000', '201602409349200200', '201602409349200700', '201612459349300346', '201612459349300426', '201612459349300431', '201612459349300536', '201612459349300626', '201612459349300816', '201612459349300971', '201612459349300976', '201612459349301101', '201612459349301111', '201612469349200411', '201612469349300206', '201612469349300231', '201612469349300426', '201612469349300501', '201612469349300601', '201612509349300101', '201612509349300166', '201612509349300211', '201612509349300356', '201612509349300476', '201612509349300501', '201612509349300621', '201612519349200021', '201612519349200611', '201612519349200736', '201612519349300136', '201612519349300141', '201612519349300146', '201612519349300206', '201612519349300601', '201612519349300766', '201612519349300826', '201612519349300901', '201612519349300951', '201612529349200701', '201612529349300801', '201612529349300836', '201612529349301251', '201622449349300427', '201602429349200000', '201602429349200100', '201602429349200310', '201602429349200615', '201602429349200640', '201602439349300045', '201602439349300415', '201602439349300535', '201602449349300910', '201602449349301055', '201602449349301090', '201602449349301405', '201602459349300345', '201602459349300405', '201602459349300715', '201602459349301055', '201602509349300350', '201602529349200405', '201602529349301100', '201602529349301155', '201602589349100020', '201602599349100130', '201612539349100751', '201602589349100610', '201602589349100210', '201622569349100422', '201622579349100302', '201602599349100000', '201602589349100310', '201602589349100515', '201602599349100405', '201602589349100120', '201622569349100452', '201622579349100117', '201602589349100300', '201602589349100205', '201602599349100630', '201602599349100310', '201622569349100312', '201602579349100515', '201602589349100105', '201602579349100705', '201602569349100600', '201622579349100017', '201602149349301245', '201602159349300625', '201642089349300449', '201642049349300519', '201632099349301278', '201632049349300243', '201612099349301021', '201641979349301154', '201641599349300219', '201631619349300243', '201612109349301306', '201642029349300864', '201602149349300040', '201602049349300800', '201632089349300823', '201622109349300627', '201602079349301200', '201632099349301353', '201632079349301368', '201632039349300953', '201602159349300225', '201602599349100435', '201612539349100406', '201602599349100030', '201602599349100640', '201602599349100430', '201602599349100625', '201602599349100205', '201602579349100200', '201602579349100115', '201602579349100105', '201602569349100085', '201622539349100702', '201622569349100122', '201612549349100401', '201612539349100001', '201612539349100311', '201622599349100022', '201602589349100625', '201602579349100715', '201602579349100100', '201602579349100005', '201602539349200810', '201602549349200005', '201602539349200000', '201602539349200510', '201602539349200800', '201602539349200230', '201602539349200805', '201602549349200600', '201602539349200310', '201622579349200247', '201602539349200010', '201602539349200135', '201602539349200605', '201602539349200005', '201602549349200105', '201602539349200600', '201602549349200100', '201602539349200525', '201602539349200300', '201602539349200520', '201602539349200235', '201602539349200710', '201602539349200115', '201602539349200515', '201602599349100215', '201612569349100466', '201602589349100400', '201612599349100301', '201612599349100116', '201602589349100810', '201612569349100501', '201602589349100005', '201622539349100802', '201612569349100401', '201602599349100515', '201602599349100210', '201602589349100805', '201602589349100000', '201612589349100511', '201622549349100002', '201622539349100312', '201612599349100136', '201612579349100706', '201612589349100421', '201602579349100000', '201602579349100210', '201612589349100411', '201612589349100406', '201612589349100326', '201602549349200700', '201602539349200110', '201602539349200225', '201612599349100106', '201612539349100616', '201602599349100530', '201622579349100702', '201622539349100502', '201622579349100412', '201612549349100501', '201612539349100401', '201602589349100750', '201602599349100135', '201602589349100510', '201602599349100420', '201602589349100110', '201602599349100535', '201602589349100015', '201622539349100757', '201622549349100302', '201622599349100002', '201622579349100802', '201622569349100617', '201622539349100007', '201622569349100512', '201612599349100211', '201612569349100616', '201612569349200726', '201612579349200761', '201612599349200636', '201612569349200621', '201612599349200606', '201612599349200131', '201612589349200501', '201622589349200342', '201622589349200132', '201612569349200136', '201612599349200121', '201612569349200011', '201612569349200751', '201612569349200001', '201612579349200641', '201612599349200711', '201612599349201111', '201612569349200616', '201612599349200611', '201612599349200541', '201612589349200441', '201642239349302074', '201632249349301788', '201632199349300108', '201602289349302985', '201602259349301770', '201642239349302474', '201602319349300950', '201642249349303004', '201632249349303053', '201632249349301818', '201642229349301129', '201602259349303350', '201602289349301880', '201632229349302013', '201622249349302207', '201632229349300443', '201642219349301344', '201642229349300884', '201642229349301644', '201602289349303935', '201632239349302363', '201642249349301329', '201602599349100635', '201612539349100301', '201602539349100760', '201612589349100626', '201612569349100606', '201602599349100525', '201602599349100400', '201612569349100506', '201622539349100767', '201612569349100461', '201602549349100400', '201602579349100510', '201602579349100800', '201612539349100306', '201602589349100710', '201602599349100020', '201602599349100010', '201602589349100410', '201612599349100201', '201612599349100006', '201612589349100601', '201622589349100717', '201612579349100511', '201602189349300440', '201642169349301589', '201632289349203713', '201632289349203468', '201632289349203398', '201632289349204603', '201632289349203743', '201632289349203373', '201632289349203233', '201632289349203158', '201632289349202553', '201632289349204143', '201632289349204153', '201632289349204003', '201632289349204313', '201632289349203663', '201632289349203628', '201632289349203563', '201632289349202753', '201632289349202633', '201632319349200013', '201632289349204248', '201632289349203993', '201632179349301418', '201612229349301241', '201602239349301585', '201602189349300985', '201602189349300760', '201642169349300104', '201612179349300641', '201622159349300027', '201642159349300334', '201602239349302575', '201602189349300865', '201632169349300528', '201622169349301667', '201622159349300787', '201602229349301885', '201612149349301421', '201642179349301574', '201602169349300300', '201602179349301510', '201622119349301027', '201642149349301339', '201622159349301767', '201612189349300641', '201612249349300436', '201612229349301486', '201612219349301091', '201602249349300445', '201602229349301920', '201602219349300525', '201602239349302375', '201602249349302260', '201622229349301242', '201612229349301306', '201602199349300500', '201622219349301032', '201622189349300897', '201602219349302255', '201602189349300950', '201602249349301185', '201612229349300146', '201612249349301301', '201632289349203813', '201632289349203808', '201632289349203568', '201632289349202108', '201632289349204358', '201632289349203223', '201632289349202923', '201642259349201179', '201642259349200709', '201642259349200424', '201632319349200113', '201632319349200033', '201632299349201003', '201632299349200933', '201632289349204413', '201632289349202683', '201632289349202558', '201632289349201388', '201632289349201243', '201632289349200923', '201632269349200133', '201642259349200549', '201632309349200418', '201632289349204273', '201632289349204258', '201632289349203703', '201632289349202983', '201632289349201408', '201632289349201353', '201632289349200228', '201642289349202399', '201642289349201969', '201642289349201744', '201642319349200329', '201642289349202324', '201642289349201379', '201642289349203804', '201642289349203564', '201642289349202134', '201642289349201074', '201642319349200319', '201642289349203174', '201642289349202924', '201642289349202874', '201642289349201979', '201642289349200829', '201642319349200719', '201612229349301736', '201612189349301406', '201622229349300527', '201602239349301695', '201612569349100081', '201622569349100457', '201612579349100001', '201622539349100412', '201612579349100206', '201612599349100311', '201612599349100506', '201612599349100401', '201612589349100206', '201612589349100016', '201622579349100312', '201622579349100402', '201602579349100610', '201612569349100076', '201612569349100006', '201612569349100001', '201622569349100072', '201622559349100002', '201612589349100211', '201612589349100126', '201602599349200910', '201602569349200640', '201602599349200745', '201602539349200405', '201602569349200635', '201602579349200715', '201602569349200860', '201602559349200050', '201602569349200135', '201602569349200405', '201602569349200000', '201602569349200530', '201602569349200240', '201602569349200865', '201602589349200120', '201602589349200520', '201602589349200300', '201602589349201005', '201602589349200725', '201602589349200005', '201602559349200000', '201602569349200125', '201602549349200500', '201642289349203299', '201642289349202499', '201642289349201729', '201632299349200913', '201632279349200808', '201632319349200038', '201632289349202428', '201632289349201613', '201632269349200003', '201632309349200718', '201632299349200903', '201632289349202593', '201642259349201084', '201642259349200719', '201632319349200128', '201632309349200438', '201632289349200938', '201632289349200853', '201632269349200613', '201642259349200334', '201632289349202218', '201632289349201508', '201632289349200208', '201632309349200528', '201632289349202408', '201632289349201643', '201632289349200413', '201632289349204608', '201632289349202943', '201632289349202363', '201632289349202008', '201632289349201988', '201632289349201773', '201632289349201578', '201632289349200003', '201632289349204233', '201632289349203433', '201632289349200433', '201632289349202213', '201632289349200223', '201632289349204103', '201632289349203348', '201632289349204238', '201632289349203138', '201632289349201733', '201632289349203003', '201632289349202803', '201632289349204378', '201602599349200945', '201602599349200920', '201602599349200325', '201602309349301000', '201602299349300525', '201632259349301783', '201602289349305800', '201612259349302576', '201602289349306480', '201602289349303740', '201602079349300950', '201612089349301256', '201612109349301301', '201642039349300014', '201632079349301123', '201642019349300124', '201641619349300539', '201642089349301274', '201632099349300403', '201602289349305745', '201612259349303741', '201612259349302571', '201612259349301956', '201602259349301840', '201642229349301139', '201602289349305390', '201632259349301883', '201622569349100052', '201612599349100141', '201612549349100001', '201612589349100021', '201612579349100011', '201612599349100411', '201622569349100132', '201622589349100422', '201622589349100507', '201602599349100510', '201602599349100140', '201602599349100105', '201622569349100302', '201622569349100002', '201622569349100082', '201622569349100077', '201622569349100612', '201612599349100216', '201612599349100121', '201612579349100016', '201632289349200143', '201632269349200603', '201631609349200708', '201642179349200249', '201642179349200439', '201602289349202390', '201602259349201435', '201612049349200001', '201612149349200121', '201612169349200301', '201612219349200731', '201612249349202001', '201602239349200910', '201632289349201078', '201632289349201113', '201632279349200038', '201632289349202003', '201632289349201973', '201632289349201143', '201632289349200028', '201632279349200223', '201632279349200713', '201632269349200233', '201632239349300138', '201622249349302802', '201622249349301907', '201642229349301629', '201632249349301433', '201632229349301808', '201642229349301344', '201602289349301630', '201642249349301799', '201642239349301989', '201642249349302259', '201642249349300714', '201602259349302660', '201632219349300933', '201632229349301128', '201622249349303002', '201632229349301373', '201642219349302219', '201632249349302148', '201622229349301197', '201622249349301172', '201612569349100086', '201632289349102063', '201632289349100543', '201632289349100508', '201632289349101548', '201632289349100748', '201632319349100103', '201632289349100003', '201622239349301852', '201632289349303273', '201632289349305258', '201632289349306863', '201632289349303708', '201632289349306598', '201632289349306363', '201632289349302613', '201632289349301978', '201632319349300748', '201632289349305448', '201632299349300208', '201632289349304968', '201632289349306638', '201632289349307128', '201632289349305023', '201632289349304128', '201632289349305693', '201632289349303163', '201632289349303643', '201632289349301418', '201622579349100512', '201612539349100811', '201622589349100512', '201622569349100467', '201622569349100307', '201622589349100407', '201622589349100002', '201622579349100507', '201622569349100117', '201622589349100427', '201602579349100600', '201602579349100400', '201622559349100152', '201622539349100407', '201612539349100851', '201622569349100112', '201622569349100102', '201602599349100425', '201622579349100012', '201622589349100807', '201622569349100552', '201622589349100417', '201622589349100327', '201622589349100207', '201622579349100307', '201612329349100001', '201612329349100501', '201612329349100506', '201612329349100706', '201612329349200006', '201612329349200226', '201612329349200306', '201612329349200526', '201612329349200801', '201612329349300301', '201632289349101028', '201632289349101583', '201632289349102008', '201632289349100418', '201642259349100004', '201632299349100303', '201632309349100303', '201642259349100544', '201632299349100003', '201642259349101114', '201632289349100938', '201632289349100623', '201632289349101358', '201632289349101918', '201632289349101198', '201632289349100913', '201632289349101578', '201632289349100733', '201632269349100403', '201642289349101929', '201642289349100929', '201642289349101909', '201642289349101564', '201602329349100705', '201642289349101289', '201642269349100304', '201642289349101274', '201602329349100805', '201602339349100100', '201642299349100714', '201642299349100709', '201642289349100944', '201642289349101879', '201642289349101519', '201642289349100734', '201642289349100414', '201642289349101689', '201642289349100224', '201642289349100349', '201642289349100214', '201642229349301184', '201642249349301549', '201632249349300013', '201642229349300839', '201642229349300909', '201632249349302078', '201632249349300513', '201632249349300503', '201632249349301498', '201642249349302379', '201642249349302039', '201642249349301544', '201642249349300729', '201642249349301614', '201642229349301239', '201642229349301324', '201642229349301054', '201642229349301179', '201642219349301764', '201642219349302074', '201642219349301564', '201602579349300515', '201612579349300236', '201602569349300880', '201602569349300420', '201602579349300825', '201602569349300315', '201602579349300310', '201602579349300315', '201602579349301210', '201602569349300900', '201602569349300785', '201602559349300155', '201602569349300110', '201602559349300350', '201612539349300006', '201612589349301136', '201602579349301265', '201602579349300520', '201602579349300855', '201602579349300400', '201602579349300895', '201602579349300720', '201602579349300885', '201602579349300820', '201602569349301265', '201602569349301000', '201612329349300631', '201612329349300646', '201612329349301101', '201612329349301151', '201612329349301206', '201612339349100201', '201612339349200301', '201612339349200501', '201612349349200251', '201612359349100351', '201612359349100361', '201612359349100411', '201612359349200041', '201612359349200256', '201612359349200501', '201612359349300216', '201612359349300461', '201612359349300501', '201612359349300721', '201612359349300901', '201612359349301006', '201612369349100756', '201612369349200111', '201612369349200406', '201612369349200416', '201612369349200531', '201612369349300001', '201612369349300011', '201612369349300201', '201612369349300301', '201612369349300336', '201612369349300346', '201612369349300506', '201612369349300611', '201612369349300726', '201612369349300906', '201612369349301156', '201612379349100001', '201602539349100765', '201602539349100210', '201602539349100805', '201602569349100465', '201602539349100610', '201602539349100100', '201602549349100000', '201602539349100005', '201602539349100200', '201602539349100105', '201602539349100800', '201602539349100615', '201622569349100067', '201622569349100057', '201602539349100205', '201602539349100605', '201602539349100600', '201602539349100110', '201602539349100500', '201602539349100810', '201602539349100415', '201602539349100410', '201602549349100500', '201602539349100405', '201602539349100000', '201602569349100300', '201602569349100415', '201642259349100034', '201632289349101218', '201632289349100243', '201632289349100218', '201632269349100003', '201632289349100838', '201632289349100038', '201632289349100523', '201632289349101158', '201632289349101043', '201632289349100048', '201632289349100408', '201632289349101668', '201632289349101213', '201632289349100333', '201632269349100203', '201632289349102003', '201632289349101658', '201632289349100933', '201632289349101513', '201632289349101018', '201632279349100403', '201642259349101004', '201642259349100509', '201632319349100708', '201612379349100116', '201612379349100411', '201612379349100606', '201612379349200001', '201612379349200231', '201612379349200616', '201612379349200706', '201612379349200711', '201612379349200721', '201612379349300016', '201612379349300036', '201612379349300126', '201612379349300226', '201612379349300301', '201612379349300331', '201612379349300401', '201612379349300606', '201612379349300631', '201612379349300716', '201612379349300766', '201612389349100501', '201612389349100506', '201612389349200006', '201612389349200126', '201612389349200206', '201612389349200401', '201612389349200606', '201612389349200726', '201612389349300001', '201612389349300016', '201612389349300031', '201612389349300121', '201612389349300141', '201612389349300206', '201612389349300411', '201612389349300501', '201612389349300541', '201612389349300631', '201642459349300959', '201642459349300939', '201642459349300969', '201642439349300724', '201642449349300534', '201632509349300238', '201632509349300003', '201632509349300023', '201642449349300544', '201642459349300949', '201642459349301054', '201642449349300104', '201642519349300709', '201642509349300384', '201642519349300319', '201642509349300219', '201642469349300624', '201642469349300204', '201632529349301013', '201612389349300816', '201612399349200316', '201612399349200506', '201612399349300011', '201612399349300041', '201612399349300211', '201612399349300426', '201612399349300516', '201612399349300626', '201612399349300726', '201612399349300736', '201612409349200401', '201612419349200301', '201612419349300051', '201612429349200131', '201612429349200146', '201612429349200201', '201612429349200216', '201612429349200511', '201612429349200716', '201612429349200751', '201612429349300111', '201612429349300576', '201612429349300586', '201612429349300591', '201612429349300596', '201612429349300691', '201612429349300721', '201612429349300811', '201612429349300966', '201612429349301051', '201612429349301161', '201612439349100201', '201612439349100306', '201612439349100606', '201612439349200401', '201612439349200516', '201612439349200621', '201612439349200631', '201602519349100005', '201602509349100165', '201602509349100060', '201632529349100603', '201642519349100314', '201642459349100104', '201632519349100508', '201602509349100000', '201602509349100210', '201602469349100505', '201622519349100512', '201612519349100311', '201642459349100659', '201602509349100310', '201642509349100359', '201622519349100507', '201622519349100702', '201612509349100356', '201602519349100205', '201602519349100315', '201642459349100654', '201602469349100205', '201642449349100409', '201642469349100709', '201602469349100310', '201602469349100010'] +object_ids_2017 = [ + "201612439349300006", + "201612439349300026", + "201612439349300341", + "201612439349300516", + "201612439349300546", + "201612439349300601", + "201612439349300621", + "201612439349300746", + "201612439349300861", + "201612449349100601", + "201612449349100706", + "201612449349200001", + "201612449349200101", + "201612449349200111", + "201612449349200121", + "201612449349200756", + "201612449349200761", + "201612449349200806", + "201612449349200906", + "201612449349300216", + "201612449349300336", + "201612449349300406", + "201612449349300606", + "201612449349300636", + "201612449349300756", + "201612449349300786", + "201612449349301061", + "201612449349301071", + "201612449349301076", + "201612459349100321", + "201612459349200111", + "201612459349200206", + "201612459349200321", + "201612459349200431", + "201612459349200506", + "201612459349200711", + "201612459349300241", + "201612459349300301", + "201632519349300958", + "201632469349300108", + "201632519349300313", + "201642509349300319", + "201642509349300704", + "201642509349300209", + "201642469349300244", + "201632529349300833", + "201632519349301153", + "201642519349300434", + "201602379349100415", + "201602379349100605", + "201602379349300230", + "201602379349300530", + "201602389349100605", + "201602389349300545", + "201602389349300845", + "201602399349200620", + "201602399349200715", + "201602399349300310", + "201602399349300330", + "201602399349300410", + "201602399349300605", + "201602399349300610", + "201602399349300625", + "201602399349300700", + "201602399349300725", + "201602399349300800", + "201602399349300900", + "201602399349300915", + "201602409349200000", + "201602409349200200", + "201602409349200700", + "201612459349300346", + "201612459349300426", + "201612459349300431", + "201612459349300536", + "201612459349300626", + "201612459349300816", + "201612459349300971", + "201612459349300976", + "201612459349301101", + "201612459349301111", + "201612469349200411", + "201612469349300206", + "201612469349300231", + "201612469349300426", + "201612469349300501", + "201612469349300601", + "201612509349300101", + "201612509349300166", + "201612509349300211", + "201612509349300356", + "201612509349300476", + "201612509349300501", + "201612509349300621", + "201612519349200021", + "201612519349200611", + "201612519349200736", + "201612519349300136", + "201612519349300141", + "201612519349300146", + "201612519349300206", + "201612519349300601", + "201612519349300766", + "201612519349300826", + "201612519349300901", + "201612519349300951", + "201612529349200701", + "201612529349300801", + "201612529349300836", + "201612529349301251", + "201622449349300427", + "201602429349200000", + "201602429349200100", + "201602429349200310", + "201602429349200615", + "201602429349200640", + "201602439349300045", + "201602439349300415", + "201602439349300535", + "201602449349300910", + "201602449349301055", + "201602449349301090", + "201602449349301405", + "201602459349300345", + "201602459349300405", + "201602459349300715", + "201602459349301055", + "201602509349300350", + "201602529349200405", + "201602529349301100", + "201602529349301155", + "201602589349100020", + "201602599349100130", + "201612539349100751", + "201602589349100610", + "201602589349100210", + "201622569349100422", + "201622579349100302", + "201602599349100000", + "201602589349100310", + "201602589349100515", + "201602599349100405", + "201602589349100120", + "201622569349100452", + "201622579349100117", + "201602589349100300", + "201602589349100205", + "201602599349100630", + "201602599349100310", + "201622569349100312", + "201602579349100515", + "201602589349100105", + "201602579349100705", + "201602569349100600", + "201622579349100017", + "201602149349301245", + "201602159349300625", + "201642089349300449", + "201642049349300519", + "201632099349301278", + "201632049349300243", + "201612099349301021", + "201641979349301154", + "201641599349300219", + "201631619349300243", + "201612109349301306", + "201642029349300864", + "201602149349300040", + "201602049349300800", + "201632089349300823", + "201622109349300627", + "201602079349301200", + "201632099349301353", + "201632079349301368", + "201632039349300953", + "201602159349300225", + "201602599349100435", + "201612539349100406", + "201602599349100030", + "201602599349100640", + "201602599349100430", + "201602599349100625", + "201602599349100205", + "201602579349100200", + "201602579349100115", + "201602579349100105", + "201602569349100085", + "201622539349100702", + "201622569349100122", + "201612549349100401", + "201612539349100001", + "201612539349100311", + "201622599349100022", + "201602589349100625", + "201602579349100715", + "201602579349100100", + "201602579349100005", + "201602539349200810", + "201602549349200005", + "201602539349200000", + "201602539349200510", + "201602539349200800", + "201602539349200230", + "201602539349200805", + "201602549349200600", + "201602539349200310", + "201622579349200247", + "201602539349200010", + "201602539349200135", + "201602539349200605", + "201602539349200005", + "201602549349200105", + "201602539349200600", + "201602549349200100", + "201602539349200525", + "201602539349200300", + "201602539349200520", + "201602539349200235", + "201602539349200710", + "201602539349200115", + "201602539349200515", + "201602599349100215", + "201612569349100466", + "201602589349100400", + "201612599349100301", + "201612599349100116", + "201602589349100810", + "201612569349100501", + "201602589349100005", + "201622539349100802", + "201612569349100401", + "201602599349100515", + "201602599349100210", + "201602589349100805", + "201602589349100000", + "201612589349100511", + "201622549349100002", + "201622539349100312", + "201612599349100136", + "201612579349100706", + "201612589349100421", + "201602579349100000", + "201602579349100210", + "201612589349100411", + "201612589349100406", + "201612589349100326", + "201602549349200700", + "201602539349200110", + "201602539349200225", + "201612599349100106", + "201612539349100616", + "201602599349100530", + "201622579349100702", + "201622539349100502", + "201622579349100412", + "201612549349100501", + "201612539349100401", + "201602589349100750", + "201602599349100135", + "201602589349100510", + "201602599349100420", + "201602589349100110", + "201602599349100535", + "201602589349100015", + "201622539349100757", + "201622549349100302", + "201622599349100002", + "201622579349100802", + "201622569349100617", + "201622539349100007", + "201622569349100512", + "201612599349100211", + "201612569349100616", + "201612569349200726", + "201612579349200761", + "201612599349200636", + "201612569349200621", + "201612599349200606", + "201612599349200131", + "201612589349200501", + "201622589349200342", + "201622589349200132", + "201612569349200136", + "201612599349200121", + "201612569349200011", + "201612569349200751", + "201612569349200001", + "201612579349200641", + "201612599349200711", + "201612599349201111", + "201612569349200616", + "201612599349200611", + "201612599349200541", + "201612589349200441", + "201642239349302074", + "201632249349301788", + "201632199349300108", + "201602289349302985", + "201602259349301770", + "201642239349302474", + "201602319349300950", + "201642249349303004", + "201632249349303053", + "201632249349301818", + "201642229349301129", + "201602259349303350", + "201602289349301880", + "201632229349302013", + "201622249349302207", + "201632229349300443", + "201642219349301344", + "201642229349300884", + "201642229349301644", + "201602289349303935", + "201632239349302363", + "201642249349301329", + "201602599349100635", + "201612539349100301", + "201602539349100760", + "201612589349100626", + "201612569349100606", + "201602599349100525", + "201602599349100400", + "201612569349100506", + "201622539349100767", + "201612569349100461", + "201602549349100400", + "201602579349100510", + "201602579349100800", + "201612539349100306", + "201602589349100710", + "201602599349100020", + "201602599349100010", + "201602589349100410", + "201612599349100201", + "201612599349100006", + "201612589349100601", + "201622589349100717", + "201612579349100511", + "201602189349300440", + "201642169349301589", + "201632289349203713", + "201632289349203468", + "201632289349203398", + "201632289349204603", + "201632289349203743", + "201632289349203373", + "201632289349203233", + "201632289349203158", + "201632289349202553", + "201632289349204143", + "201632289349204153", + "201632289349204003", + "201632289349204313", + "201632289349203663", + "201632289349203628", + "201632289349203563", + "201632289349202753", + "201632289349202633", + "201632319349200013", + "201632289349204248", + "201632289349203993", + "201632179349301418", + "201612229349301241", + "201602239349301585", + "201602189349300985", + "201602189349300760", + "201642169349300104", + "201612179349300641", + "201622159349300027", + "201642159349300334", + "201602239349302575", + "201602189349300865", + "201632169349300528", + "201622169349301667", + "201622159349300787", + "201602229349301885", + "201612149349301421", + "201642179349301574", + "201602169349300300", + "201602179349301510", + "201622119349301027", + "201642149349301339", + "201622159349301767", + "201612189349300641", + "201612249349300436", + "201612229349301486", + "201612219349301091", + "201602249349300445", + "201602229349301920", + "201602219349300525", + "201602239349302375", + "201602249349302260", + "201622229349301242", + "201612229349301306", + "201602199349300500", + "201622219349301032", + "201622189349300897", + "201602219349302255", + "201602189349300950", + "201602249349301185", + "201612229349300146", + "201612249349301301", + "201632289349203813", + "201632289349203808", + "201632289349203568", + "201632289349202108", + "201632289349204358", + "201632289349203223", + "201632289349202923", + "201642259349201179", + "201642259349200709", + "201642259349200424", + "201632319349200113", + "201632319349200033", + "201632299349201003", + "201632299349200933", + "201632289349204413", + "201632289349202683", + "201632289349202558", + "201632289349201388", + "201632289349201243", + "201632289349200923", + "201632269349200133", + "201642259349200549", + "201632309349200418", + "201632289349204273", + "201632289349204258", + "201632289349203703", + "201632289349202983", + "201632289349201408", + "201632289349201353", + "201632289349200228", + "201642289349202399", + "201642289349201969", + "201642289349201744", + "201642319349200329", + "201642289349202324", + "201642289349201379", + "201642289349203804", + "201642289349203564", + "201642289349202134", + "201642289349201074", + "201642319349200319", + "201642289349203174", + "201642289349202924", + "201642289349202874", + "201642289349201979", + "201642289349200829", + "201642319349200719", + "201612229349301736", + "201612189349301406", + "201622229349300527", + "201602239349301695", + "201612569349100081", + "201622569349100457", + "201612579349100001", + "201622539349100412", + "201612579349100206", + "201612599349100311", + "201612599349100506", + "201612599349100401", + "201612589349100206", + "201612589349100016", + "201622579349100312", + "201622579349100402", + "201602579349100610", + "201612569349100076", + "201612569349100006", + "201612569349100001", + "201622569349100072", + "201622559349100002", + "201612589349100211", + "201612589349100126", + "201602599349200910", + "201602569349200640", + "201602599349200745", + "201602539349200405", + "201602569349200635", + "201602579349200715", + "201602569349200860", + "201602559349200050", + "201602569349200135", + "201602569349200405", + "201602569349200000", + "201602569349200530", + "201602569349200240", + "201602569349200865", + "201602589349200120", + "201602589349200520", + "201602589349200300", + "201602589349201005", + "201602589349200725", + "201602589349200005", + "201602559349200000", + "201602569349200125", + "201602549349200500", + "201642289349203299", + "201642289349202499", + "201642289349201729", + "201632299349200913", + "201632279349200808", + "201632319349200038", + "201632289349202428", + "201632289349201613", + "201632269349200003", + "201632309349200718", + "201632299349200903", + "201632289349202593", + "201642259349201084", + "201642259349200719", + "201632319349200128", + "201632309349200438", + "201632289349200938", + "201632289349200853", + "201632269349200613", + "201642259349200334", + "201632289349202218", + "201632289349201508", + "201632289349200208", + "201632309349200528", + "201632289349202408", + "201632289349201643", + "201632289349200413", + "201632289349204608", + "201632289349202943", + "201632289349202363", + "201632289349202008", + "201632289349201988", + "201632289349201773", + "201632289349201578", + "201632289349200003", + "201632289349204233", + "201632289349203433", + "201632289349200433", + "201632289349202213", + "201632289349200223", + "201632289349204103", + "201632289349203348", + "201632289349204238", + "201632289349203138", + "201632289349201733", + "201632289349203003", + "201632289349202803", + "201632289349204378", + "201602599349200945", + "201602599349200920", + "201602599349200325", + "201602309349301000", + "201602299349300525", + "201632259349301783", + "201602289349305800", + "201612259349302576", + "201602289349306480", + "201602289349303740", + "201602079349300950", + "201612089349301256", + "201612109349301301", + "201642039349300014", + "201632079349301123", + "201642019349300124", + "201641619349300539", + "201642089349301274", + "201632099349300403", + "201602289349305745", + "201612259349303741", + "201612259349302571", + "201612259349301956", + "201602259349301840", + "201642229349301139", + "201602289349305390", + "201632259349301883", + "201622569349100052", + "201612599349100141", + "201612549349100001", + "201612589349100021", + "201612579349100011", + "201612599349100411", + "201622569349100132", + "201622589349100422", + "201622589349100507", + "201602599349100510", + "201602599349100140", + "201602599349100105", + "201622569349100302", + "201622569349100002", + "201622569349100082", + "201622569349100077", + "201622569349100612", + "201612599349100216", + "201612599349100121", + "201612579349100016", + "201632289349200143", + "201632269349200603", + "201631609349200708", + "201642179349200249", + "201642179349200439", + "201602289349202390", + "201602259349201435", + "201612049349200001", + "201612149349200121", + "201612169349200301", + "201612219349200731", + "201612249349202001", + "201602239349200910", + "201632289349201078", + "201632289349201113", + "201632279349200038", + "201632289349202003", + "201632289349201973", + "201632289349201143", + "201632289349200028", + "201632279349200223", + "201632279349200713", + "201632269349200233", + "201632239349300138", + "201622249349302802", + "201622249349301907", + "201642229349301629", + "201632249349301433", + "201632229349301808", + "201642229349301344", + "201602289349301630", + "201642249349301799", + "201642239349301989", + "201642249349302259", + "201642249349300714", + "201602259349302660", + "201632219349300933", + "201632229349301128", + "201622249349303002", + "201632229349301373", + "201642219349302219", + "201632249349302148", + "201622229349301197", + "201622249349301172", + "201612569349100086", + "201632289349102063", + "201632289349100543", + "201632289349100508", + "201632289349101548", + "201632289349100748", + "201632319349100103", + "201632289349100003", + "201622239349301852", + "201632289349303273", + "201632289349305258", + "201632289349306863", + "201632289349303708", + "201632289349306598", + "201632289349306363", + "201632289349302613", + "201632289349301978", + "201632319349300748", + "201632289349305448", + "201632299349300208", + "201632289349304968", + "201632289349306638", + "201632289349307128", + "201632289349305023", + "201632289349304128", + "201632289349305693", + "201632289349303163", + "201632289349303643", + "201632289349301418", + "201622579349100512", + "201612539349100811", + "201622589349100512", + "201622569349100467", + "201622569349100307", + "201622589349100407", + "201622589349100002", + "201622579349100507", + "201622569349100117", + "201622589349100427", + "201602579349100600", + "201602579349100400", + "201622559349100152", + "201622539349100407", + "201612539349100851", + "201622569349100112", + "201622569349100102", + "201602599349100425", + "201622579349100012", + "201622589349100807", + "201622569349100552", + "201622589349100417", + "201622589349100327", + "201622589349100207", + "201622579349100307", + "201612329349100001", + "201612329349100501", + "201612329349100506", + "201612329349100706", + "201612329349200006", + "201612329349200226", + "201612329349200306", + "201612329349200526", + "201612329349200801", + "201612329349300301", + "201632289349101028", + "201632289349101583", + "201632289349102008", + "201632289349100418", + "201642259349100004", + "201632299349100303", + "201632309349100303", + "201642259349100544", + "201632299349100003", + "201642259349101114", + "201632289349100938", + "201632289349100623", + "201632289349101358", + "201632289349101918", + "201632289349101198", + "201632289349100913", + "201632289349101578", + "201632289349100733", + "201632269349100403", + "201642289349101929", + "201642289349100929", + "201642289349101909", + "201642289349101564", + "201602329349100705", + "201642289349101289", + "201642269349100304", + "201642289349101274", + "201602329349100805", + "201602339349100100", + "201642299349100714", + "201642299349100709", + "201642289349100944", + "201642289349101879", + "201642289349101519", + "201642289349100734", + "201642289349100414", + "201642289349101689", + "201642289349100224", + "201642289349100349", + "201642289349100214", + "201642229349301184", + "201642249349301549", + "201632249349300013", + "201642229349300839", + "201642229349300909", + "201632249349302078", + "201632249349300513", + "201632249349300503", + "201632249349301498", + "201642249349302379", + "201642249349302039", + "201642249349301544", + "201642249349300729", + "201642249349301614", + "201642229349301239", + "201642229349301324", + "201642229349301054", + "201642229349301179", + "201642219349301764", + "201642219349302074", + "201642219349301564", + "201602579349300515", + "201612579349300236", + "201602569349300880", + "201602569349300420", + "201602579349300825", + "201602569349300315", + "201602579349300310", + "201602579349300315", + "201602579349301210", + "201602569349300900", + "201602569349300785", + "201602559349300155", + "201602569349300110", + "201602559349300350", + "201612539349300006", + "201612589349301136", + "201602579349301265", + "201602579349300520", + "201602579349300855", + "201602579349300400", + "201602579349300895", + "201602579349300720", + "201602579349300885", + "201602579349300820", + "201602569349301265", + "201602569349301000", + "201612329349300631", + "201612329349300646", + "201612329349301101", + "201612329349301151", + "201612329349301206", + "201612339349100201", + "201612339349200301", + "201612339349200501", + "201612349349200251", + "201612359349100351", + "201612359349100361", + "201612359349100411", + "201612359349200041", + "201612359349200256", + "201612359349200501", + "201612359349300216", + "201612359349300461", + "201612359349300501", + "201612359349300721", + "201612359349300901", + "201612359349301006", + "201612369349100756", + "201612369349200111", + "201612369349200406", + "201612369349200416", + "201612369349200531", + "201612369349300001", + "201612369349300011", + "201612369349300201", + "201612369349300301", + "201612369349300336", + "201612369349300346", + "201612369349300506", + "201612369349300611", + "201612369349300726", + "201612369349300906", + "201612369349301156", + "201612379349100001", + "201602539349100765", + "201602539349100210", + "201602539349100805", + "201602569349100465", + "201602539349100610", + "201602539349100100", + "201602549349100000", + "201602539349100005", + "201602539349100200", + "201602539349100105", + "201602539349100800", + "201602539349100615", + "201622569349100067", + "201622569349100057", + "201602539349100205", + "201602539349100605", + "201602539349100600", + "201602539349100110", + "201602539349100500", + "201602539349100810", + "201602539349100415", + "201602539349100410", + "201602549349100500", + "201602539349100405", + "201602539349100000", + "201602569349100300", + "201602569349100415", + "201642259349100034", + "201632289349101218", + "201632289349100243", + "201632289349100218", + "201632269349100003", + "201632289349100838", + "201632289349100038", + "201632289349100523", + "201632289349101158", + "201632289349101043", + "201632289349100048", + "201632289349100408", + "201632289349101668", + "201632289349101213", + "201632289349100333", + "201632269349100203", + "201632289349102003", + "201632289349101658", + "201632289349100933", + "201632289349101513", + "201632289349101018", + "201632279349100403", + "201642259349101004", + "201642259349100509", + "201632319349100708", + "201612379349100116", + "201612379349100411", + "201612379349100606", + "201612379349200001", + "201612379349200231", + "201612379349200616", + "201612379349200706", + "201612379349200711", + "201612379349200721", + "201612379349300016", + "201612379349300036", + "201612379349300126", + "201612379349300226", + "201612379349300301", + "201612379349300331", + "201612379349300401", + "201612379349300606", + "201612379349300631", + "201612379349300716", + "201612379349300766", + "201612389349100501", + "201612389349100506", + "201612389349200006", + "201612389349200126", + "201612389349200206", + "201612389349200401", + "201612389349200606", + "201612389349200726", + "201612389349300001", + "201612389349300016", + "201612389349300031", + "201612389349300121", + "201612389349300141", + "201612389349300206", + "201612389349300411", + "201612389349300501", + "201612389349300541", + "201612389349300631", + "201642459349300959", + "201642459349300939", + "201642459349300969", + "201642439349300724", + "201642449349300534", + "201632509349300238", + "201632509349300003", + "201632509349300023", + "201642449349300544", + "201642459349300949", + "201642459349301054", + "201642449349300104", + "201642519349300709", + "201642509349300384", + "201642519349300319", + "201642509349300219", + "201642469349300624", + "201642469349300204", + "201632529349301013", + "201612389349300816", + "201612399349200316", + "201612399349200506", + "201612399349300011", + "201612399349300041", + "201612399349300211", + "201612399349300426", + "201612399349300516", + "201612399349300626", + "201612399349300726", + "201612399349300736", + "201612409349200401", + "201612419349200301", + "201612419349300051", + "201612429349200131", + "201612429349200146", + "201612429349200201", + "201612429349200216", + "201612429349200511", + "201612429349200716", + "201612429349200751", + "201612429349300111", + "201612429349300576", + "201612429349300586", + "201612429349300591", + "201612429349300596", + "201612429349300691", + "201612429349300721", + "201612429349300811", + "201612429349300966", + "201612429349301051", + "201612429349301161", + "201612439349100201", + "201612439349100306", + "201612439349100606", + "201612439349200401", + "201612439349200516", + "201612439349200621", + "201612439349200631", + "201602519349100005", + "201602509349100165", + "201602509349100060", + "201632529349100603", + "201642519349100314", + "201642459349100104", + "201632519349100508", + "201602509349100000", + "201602509349100210", + "201602469349100505", + "201622519349100512", + "201612519349100311", + "201642459349100659", + "201602509349100310", + "201642509349100359", + "201622519349100507", + "201622519349100702", + "201612509349100356", + "201602519349100205", + "201602519349100315", + "201642459349100654", + "201602469349100205", + "201642449349100409", + "201642469349100709", + "201602469349100310", + "201602469349100010", +] -#csvcut -c 9 index_2016.csv | head -n 1000 > returns_2016.txt +# csvcut -c 9 index_2016.csv | head -n 1000 > returns_2016.txt -object_ids_2016 = ['201543159349100344', '201543109349200219', '201513089349200226', '201513089349200236', '201523229349300327', '201543089349301829', '201533179349306298', '201533179349201108', '201533179349203783', '201533209349304768', '201533179349307343', '201533209349204083', '201533209349204123', '201533209349204128', '201533209349204148', '201533209349204153', '201533209349204178', '201533209349204198', '201533209349204208', '201533209349204223', '201533209349204228', '201533189349300608', '201523069349301367', '201533069349300963', '201523099349300542', '201533099349301033', '201533099349301043', '201523169349304367', '201533099349301803', '201523069349300142', '201533109349300348', '201503069349100380', '201513089349100601', '201523039349200407', '201543039349301204', '201523039349200632', '201523039349200637', '201523089349301462', '201533069349300788', '201533079349300238', '201543149349201279', '201543159349100504', '201543169349201334', '201543169349201349', '201543109349200229', '201533169349100748', '201533169349100808', '201513069349200601', '201523209349314227', '201523209349314257', '201523209349311332', '201533179349302173', '201533179349307048', '201523219349200632', '201533179349201623', '201533179349201643', '201543109349100104', '201533209349302633', '201533179349200538', '201533179349200618', '201533179349203683', '201533179349203728', '201533209349306188', '201533209349204843', '201533099349301103', '201533099349301113', '201523039349300127', '201523079349301652', '201533039349300813', '201533139349300148', '201533139349300208', '201533069349301413', '201533079349300003', '201523039349200827', '201523079349200027', '201523079349200237', '201523069349300957', '201523079349301387', '201533079349200823', '201523209349310937', '201523209349310947', '201543089349201054', '201533179349306528', '201533179349303278', '201543079349200609', '201543079349200529', '201533179349306278', '201523099349201102', '201523239349300002', '201533209349205278', '201533209349205353', '201533209349201488', '201533209349203893', '201533209349203908', '201533209349203913', '201533209349203923', '201533209349201753', '201533209349201808', '201533209349302303', '201533179349307818', '201533179349307828', '201523209349311892', '201533179349309453', '201533209349301728', '201533209349301738', '201533189349100703', '201533209349102838', '201533209349102858', '201533209349101368', '201533209349101373', '201533179349307538', '201533209349203328', '201533209349203503', '201533209349306423', '201533209349306438', '201533209349203508', '201533209349203518', '201533179349307838', '201533189349300223', '201533179349309083', '201533189349300233', '201543099349200889', '201503099349201105', '201513079349201106', '201513089349200936', '201513089349100726', '201523069349301172', '201533069349300408', '201543169349201379', '201523209349314732', '201523209349313972', '201533179349202718', '201533179349202668', '201533179349202733', '201533099349200108', '201523209349311802', '201533209349205593', '201533209349101563', '201533179349308913', '201533209349102818', '201533179349309088', '201533189349300443', '201533209349101958', '201533209349206523', '201533179349309208', '201533209349305433', '201533209349305438', '201533209349206623', '201533179349309223', '201533179349309268', '201533209349102023', '201533209349102028', '201533179349309348', '201533199349100413', '201533209349102278', '201543099349301839', '201503069349200845', '201513069349200231', '201503069349200970', '201503069349200980', '201513069349200316', '201513099349201151', '201513079349100106', '201513099349100201', '201523069349301047', '201523069349301057', '201523069349301112', '201523069349301117', '201523069349301127', '201523069349301132', '201523069349301157', '201503039349100615', '201523089349300312', '201523089349300317', '201523089349300322', '201533069349300508', '201533069349300708', '201533069349300718', '201533069349300778', '201533069349300803', '201543159349200314', '201543159349200319', '201543089349301374', '201523209349316527', '201533169349101073', '201543069349100569', '201533179349305958', '201523229349300237', '201533179349303308', '201533179349305888', '201533179349305893', '201533179349305903', '201533179349305923', '201533179349306913', '201543079349301154', '201523219349200727', '201523219349200747', '201543109349200224', '201543109349200234', '201543109349200304', '201533169349100833', '201533069349300443', '201533069349300868', '201533179349301803', '201523209349311197', '201523069349100357', '201533179349305948', '201523229349300307', '201523209349314597', '201523209349314607', '201523209349315117', '201523209349312647', '201523209349312657', '201523209349310667', '201523219349301067', '201523209349313657', '201533179349201693', '201543109349100329', '201523219349100207', '201533179349100403', '201533179349100418', '201533179349100508', '201533179349100513', '201523219349200132', '201533179349200718', '201533209349302068', '201533179349100823', '201533179349100938', '201533179349101128', '201533179349101688', '201533179349101693', '201533179349307523', '201533209349306058', '201543069349301474', '201543149349201919', '201543149349202034', '201543089349200014', '201543099349201214', '201543159349100614', '201543159349100629', '201543149349201549', '201513089349201211', '201503089349201250', '201513099349100036', '201513099349100041', '201523069349301752', '201543139349101009', '201523229349300507', '201543109349100009', '201533209349302098', '201533209349301008', '201533209349301023', '201533179349203653', '201533179349203853', '201533179349203858', '201533179349308653', '201533179349203888', '201533189349200133', '201533179349102213', '201533179349102453', '201523069349301392', '201533099349302063', '201533099349302068', '201533099349302103', '201523079349300547', '201523079349300607', '201523079349300707', '201523039349301092', '201533099349301028', '201523039349301227', '201533089349300648', '201513089349100511', '201503099349201100', '201543089349100729', '201523229349300122', '201523229349300142', '201533179349303898', '201533179349303908', '201533179349303913', '201523209349310247', '201533179349300928', '201523209349314977', '201533179349302118', '201523209349313352', '201523209349313387', '201523209349313392', '201533179349303713', '201533179349303723', '201533179349302818', '201523229349200117', '201533179349201233', '201523219349200142', '201523219349200147', '201523219349200207', '201523039349300942', '201523039349300977', '201523039349300982', '201533039349300948', '201543039349300044', '201543039349301154', '201533079349300213', '201533089349300538', '201533079349300328', '201533089349200978', '201523079349201057', '201533039349200308', '201533099349301313', '201523079349200447', '201523079349200627', '201533079349301248', '201533079349301328', '201533049349300403', '201543069349201204', '201543169349201579', '201543109349200424', '201523209349311052', '201523239349300612', '201523209349312367', '201543109349100014', '201543109349100124', '201533099349301538', '201523089349301817', '201523089349301927', '201533089349300633', '201523069349200307', '201533069349200328', '201533069349200338', '201533069349200428', '201533069349200433', '201523069349201167', '201523069349201202', '201523069349201257', '201523069349201302', '201533209349302198', '201523239349300107', '201533209349203173', '201523239349300202', '201533209349206183', '201533209349206208', '201533179349300543', '201533209349204003', '201533199349200413', '201533199349200648', '201533209349205633', '201533209349205643', '201533209349302503', '201523209349311932', '201533179349308283', '201533179349308293', '201533179349308308', '201533209349202273', '201533209349303908', '201533209349306913', '201533209349103568', '201543159349200914', '201543159349200919', '201543099349200334', '201543099349200624', '201523239349300307', '201533179349303018', '201533179349306963', '201533089349100613', '201533089349100623', '201533179349306108', '201533179349203403', '201533209349301973', '201533179349101908', '201533209349304733', '201533189349200148', '201533189349200213', '201533189349200223', '201533209349201793', '201523049349300007', '201523049349300107', '201533099349301308', '201503099349300215', '201503099349300225', '201533109349300523', '201533089349301458', '201523099349300347', '201543039349200339', '201523069349200962', '201523069349201002', '201543079349301354', '201533209349203958', '201533209349203963', '201533209349203978', '201533209349203983', '201533209349205873', '201533209349103663', '201533209349103678', '201533209349103238', '201533189349100513', '201503069349201110', '201503069349201115', '201503069349201150', '201503069349201165', '201503069349201215', '201503039349200340', '201513069349200956', '201513099349201116', '201523069349301482', '201543169349201394', '201543169349201369', '201543139349100604', '201523229349300047', '201523209349310187', '201523209349310192', '201523209349312807', '201523209349314007', '201533089349100528', '201523209349314152', '201523219349200627', '201523219349200712', '201523219349200732', '201523219349200802', '201523219349200907', '201533179349202178', '201533179349203378', '201533179349203398', '201533209349300603', '201533209349300608', '201533209349300618', '201533209349300623', '201523069349301327', '201523069349300707', '201523089349301977', '201533039349300038', '201513089349100326', '201523089349301827', '201533089349200923', '201523079349301177', '201533039349300823', '201533039349300828', '201543159349101159', '201543139349100014', '201543139349100024', '201543139349100029', '201513089349201101', '201513089349201106', '201513089349201116', '201513089349201156', '201513089349201161', '201513069349200331', '201503079349200795', '201503089349100030', '201543089349301464', '201533179349303958', '201533179349303998', '201523209349312622', '201533179349101313', '201533209349201068', '201523099349200307', '201523069349200337', '201523219349201152', '201533179349202388', '201533179349202413', '201533179349202453', '201523209349314792', '201543079349301174', '201543079349301194', '201543079349301214', '201543079349301309', '201533209349205268', '201533209349201613', '201533209349204028', '201533209349204038', '201533209349204053', '201533209349305963', '201533179349308908', '201533199349300033', '201533209349101623', '201533209349101633', '201533209349101643', '201543119349200204', '201543119349200304', '201543159349200519', '201543159349200529', '201543149349201779', '201543159349200609', '201543149349201799', '201543159349200614', '201543159349200704', '201543159349200734', '201543159349200749', '201543149349201439', '201543159349100804', '201543159349100809', '201543099349200234', '201543099349200339', '201503099349200960', '201513039349100006', '201523209349308842', '201523209349308857', '201533179349306558', '201543079349300119', '201533179349302383', '201533179349302388', '201533179349302413', '201533179349302423', '201533179349101993', '201533179349101998', '201533209349304753', '201533179349305163', '201533209349305143', '201533209349305178', '201533189349300433', '201523089349301007', '201523099349301812', '201533099349301168', '201533099349301173', '201523059349300002', '201523059349300007', '201523209349310582', '201523219349300902', '201523219349300927', '201523219349300932', '201533179349303218', '201543079349200724', '201533179349303963', '201533179349201488', '201533179349201513', '201523209349315257', '201533209349301013', '201523069349300737', '201523039349300142', '201523089349301962', '201523039349300912', '201523089349300147', '201523099349301322', '201533089349200928', '201523079349301522', '201523079349301562', '201533079349300603', '201533209349304958', '201533209349202433', '201533209349103903', '201533209349307203', '201533079349200308', '201503089349100620', '201533089349301298', '201543149349303884', '201513069349100376', '201543159349302009', '201543169349301754', '201543159349302049', '201533089349300118', '201543089349200419', '201543169349301814', '201543099349200014', '201543099349200239', '201543159349200949', '201543149349201409', '201543149349201429', '201543099349201109', '201543109349200404', '201543139349100044', '201543139349100104', '201543139349100109', '201523209349313967', '201523209349313982', '201523039349301162', '201533039349301308', '201523099349301372', '201533099349300408', '201533099349300433', '201533099349300513', '201533099349300623', '201523099349301587', '201533079349300708', '201533089349301623', '201533079349301718', '201533099349300003', '201533099349300008', '201533089349301943', '201533089349301953', '201523099349200332', '201523099349200402', '201533209349304158', '201533179349202848', '201533179349202858', '201533179349202873', '201533179349202903', '201533209349202243', '201533209349202258', '201533209349202298', '201533209349202313', '201533209349202343', '201513079349200921', '201523209349310107', '201523209349313822', '201533179349304353', '201533179349200728', '201523209349315237', '201533209349200208', '201533209349201173', '201533209349201178', '201533209349302323', '201523209349311942', '201533209349306063', '201533209349307733', '201533179349302083', '201533179349302108', '201533209349309743', '201533209349310963', '201533209349310968', '201533209349310998', '201533209349310378', '201533209349308923', '201533209349310413', '201533209349311033', '201533209349308968', '201533209349310418', '201533209349311133', '201533209349314503', '201543179349306014', '201533209349309328', '201533209349314523', '201533209349314548', '201533209349314553', '201533209349310808', '201533209349314683', '201533209349314693', '201533209349310838', '201533209349314718', '201533209349314723', '201503089349200640', '201513099349200901', '201513079349200321', '201503069349200425', '201503069349200430', '201503069349200515', '201503069349200530', '201503069349200755', '201503069349200900', '201503099349200145', '201503099349200210', '201513079349200541', '201513089349200131', '201513089349200206', '201513079349100601', '201523089349300307', '201523209349308707', '201523209349311307', '201523209349314277', '201523069349100212', '201523209349310987', '201543089349201069', '201523209349311582', '201523209349311657', '201523039349100802', '201523079349100007', '201543089349301989', '201533179349201188', '201533209349300923', '201533179349203573', '201533209349306638', '201523099349301782', '201523099349301787', '201523089349300032', '201503069349300805', '201503069349300815', '201523039349301212', '201523099349300047', '201523099349300112', '201533139349300103', '201533089349300748', '201543099349301914', '201503069349201300', '201543089349301469', '201533179349301748', '201533179349301758', '201523209349313117', '201523209349315172', '201523209349315217', '201523209349314172', '201533179349302358', '201533179349302363', '201523239349100202', '201523239349100307', '201523239349100402', '201533079349300148', '201533089349300513', '201543039349300509', '201543039349300519', '201523069349300892', '201533039349200338', '201533039349200343', '201533039349200418', '201543039349300624', '201523069349300217', '201523069349300247', '201523069349300432', '201523099349300237', '201533089349300048', '201533079349200303', '201533079349200403', '201523069349200972', '201533209349205148', '201533209349206148', '201533209349201633', '201533209349201643', '201533199349200318', '201533209349201398', '201533209349101543', '201533189349100603', '201533209349306018', '201533199349100708', '201533199349100718', '201533069349300238', '201533179349306763', '201523209349312602', '201523209349312607', '201533179349301013', '201533179349303838', '201543089349301684', '201543089349301719', '201543089349301724', '201543089349301814', '201543089349301909', '201543089349301924', '201543089349301949', '201523239349200117', '201533209349307668', '201533209349307673', '201533179349203813', '201533209349204963', '201533209349204988', '201533179349202458', '201533209349102443', '201533179349309068', '201533209349300428', '201533199349301023', '201533199349301028', '201533179349309583', '201533209349307183', '201533179349306873', '201533069349200213', '201533069349200233', '201543069349201104', '201543159349100639', '201543149349201534', '201543139349100004', '201543139349100034', '201523209349308717', '201543089349200729', '201523209349316562', '201523209349313147', '201533179349302053', '201533169349101053', '201533179349306148', '201543079349300814', '201523209349312297', '201523209349312322', '201523209349312327', '201523209349312337', '201533179349201983', '201523099349301962', '201533089349301428', '201523069349300127', '201523219349201107', '201533179349202373', '201533209349302993', '201533209349103268', '201533209349103288', '201533209349306393', '201533179349309508', '201533199349301408', '201533179349303488', '201533179349305663', '201533179349305693', '201533179349305708', '201533179349306808', '201533069349200343', '201543089349200229', '201543089349200244', '201543089349200249', '201543099349201204', '201543169349201574', '201543139349100419', '201523209349314662', '201523209349314712', '201523209349313887', '201533179349303978', '201523209349312442', '201533179349201388', '201533179349202838', '201523219349200307', '201523209349312307', '201523229349100702', '201533209349304743', '201533209349204813', '201523069349300797', '201523069349300812', '201523069349300827', '201503099349300235', '201533089349301583', '201533089349300433', '201533079349301213', '201533079349301223', '201533209349204298', '201533209349100828', '201533209349206463', '201533209349206818', '201533099349200443', '201533099349200518', '201523089349200247', '201523099349200437', '201523089349200507', '201523089349200512', '201523089349200547', '201523039349200832', '201533089349200738', '201533169349100923', '201533179349303518', '201543069349100369', '201543069349100379', '201543069349100414', '201543069349100524', '201543079349200629', '201533179349307778', '201533209349302523', '201533209349307303', '201533209349307333', '201533209349307363', '201523239349300862', '201533209349303283', '201533209349303298', '201533209349303303', '201533209349303773', '201523099349200432', '201523079349300612', '201503089349100510', '201533039349300213', '201513099349300511', '201543159349100124', '201543159349100134', '201543159349100139', '201543149349100034', '201543149349100049', '201543149349303704', '201543149349303804', '201543149349101224', '201543169349301619', '201533159349304133', '201543039349100624', '201543039349100629', '201533099349100038', '201533089349300043', '201543089349200044', '201543089349200104', '201543089349200119', '201543089349200139', '201543069349200314', '201523209349310152', '201523209349313832', '201533179349201673', '201533179349201678', '201533179349201723', '201543109349100404', '201523209349312222', '201523209349312232', '201533209349303123', '201533209349303163', '201533179349307363', '201533209349302293', '201533209349204648', '201533209349205588', '201533209349307383', '201533179349202793', '201533179349202843', '201533179349202853', '201533179349202863', '201533179349308278', '201533179349308328', '201533209349203058', '201533209349102923', '201533209349206783', '201533209349303308', '201533209349303358', '201533209349303388', '201533209349303708', '201533209349303418', '201503069349200810', '201503069349200830', '201503069349200990', '201513089349200511', '201513089349200526', '201543079349300739', '201543079349300834', '201523209349313357', '201523209349312217', '201523239349200227', '201523239349200312', '201533179349200708', '201533179349300308', '201533179349300323', '201533179349300333', '201533179349300338', '201533209349302678', '201533179349203833', '201523209349311832', '201533209349103233', '201533209349101553', '201533209349306053', '201503039349300600', '201543159349100039', '201543169349301719', '201523089349100002', '201523089349100017', '201523089349100022', '201523089349100207', '201533099349100613', '201533209349310883', '201533209349310893', '201533209349310923', '201533209349310928', '201543169349201529', '201543149349202024', '201543159349201019', '201543159349200724', '201543109349200209', '201543109349200314', '201543109349200504', '201543109349200434', '201543099349200224', '201543099349200429', '201543099349301959', '201513079349201066', '201513079349201101', '201513039349200211', '201503099349100735', '201533069349300143', '201533069349300618', '201523099349301612', '201543139349100749', '201543139349100754', '201543139349100764', '201523209349308752', '201523209349308762', '201523209349310132', '201523209349308852', '201523209349311232', '201523069349100322', '201523069349100327', '201543079349100204', '201523209349313587', '201533179349305998', '201543099349100009', '201523239349100407', '201523239349100502', '201533179349102208', '201523099349301772', '201533099349301158', '201503099349300130', '201523079349301702', '201523079349301712', '201543069349201114', '201543069349201304', '201543159349200134', '201523209349312642', '201533179349306993', '201533169349100218', '201533179349201218', '201533179349203553', '201523209349312382', '201533209349301128', '201533179349305138', '201523099349300012', '201543039349301004', '201533109349300648', '201533039349300848', '201543039349301109', '201523079349301527', '201523079349200032', '201533089349200743', '201543039349300544', '201533069349301108', '201533089349300023', '201533039349200213', '201533079349300818', '201533039349300923', '201523099349200737', '201533079349300903', '201523069349200342', '201533209349202168', '201533209349202178', '201533209349302238', '201533209349201453', '201533209349201548', '201523209349312842', '201533209349201278', '201533209349201418', '201533199349300638', '201533209349306133', '201533209349103323', '201533209349103328', '201543159349200419', '201523209349313022'] +object_ids_2016 = [ + "201543159349100344", + "201543109349200219", + "201513089349200226", + "201513089349200236", + "201523229349300327", + "201543089349301829", + "201533179349306298", + "201533179349201108", + "201533179349203783", + "201533209349304768", + "201533179349307343", + "201533209349204083", + "201533209349204123", + "201533209349204128", + "201533209349204148", + "201533209349204153", + "201533209349204178", + "201533209349204198", + "201533209349204208", + "201533209349204223", + "201533209349204228", + "201533189349300608", + "201523069349301367", + "201533069349300963", + "201523099349300542", + "201533099349301033", + "201533099349301043", + "201523169349304367", + "201533099349301803", + "201523069349300142", + "201533109349300348", + "201503069349100380", + "201513089349100601", + "201523039349200407", + "201543039349301204", + "201523039349200632", + "201523039349200637", + "201523089349301462", + "201533069349300788", + "201533079349300238", + "201543149349201279", + "201543159349100504", + "201543169349201334", + "201543169349201349", + "201543109349200229", + "201533169349100748", + "201533169349100808", + "201513069349200601", + "201523209349314227", + "201523209349314257", + "201523209349311332", + "201533179349302173", + "201533179349307048", + "201523219349200632", + "201533179349201623", + "201533179349201643", + "201543109349100104", + "201533209349302633", + "201533179349200538", + "201533179349200618", + "201533179349203683", + "201533179349203728", + "201533209349306188", + "201533209349204843", + "201533099349301103", + "201533099349301113", + "201523039349300127", + "201523079349301652", + "201533039349300813", + "201533139349300148", + "201533139349300208", + "201533069349301413", + "201533079349300003", + "201523039349200827", + "201523079349200027", + "201523079349200237", + "201523069349300957", + "201523079349301387", + "201533079349200823", + "201523209349310937", + "201523209349310947", + "201543089349201054", + "201533179349306528", + "201533179349303278", + "201543079349200609", + "201543079349200529", + "201533179349306278", + "201523099349201102", + "201523239349300002", + "201533209349205278", + "201533209349205353", + "201533209349201488", + "201533209349203893", + "201533209349203908", + "201533209349203913", + "201533209349203923", + "201533209349201753", + "201533209349201808", + "201533209349302303", + "201533179349307818", + "201533179349307828", + "201523209349311892", + "201533179349309453", + "201533209349301728", + "201533209349301738", + "201533189349100703", + "201533209349102838", + "201533209349102858", + "201533209349101368", + "201533209349101373", + "201533179349307538", + "201533209349203328", + "201533209349203503", + "201533209349306423", + "201533209349306438", + "201533209349203508", + "201533209349203518", + "201533179349307838", + "201533189349300223", + "201533179349309083", + "201533189349300233", + "201543099349200889", + "201503099349201105", + "201513079349201106", + "201513089349200936", + "201513089349100726", + "201523069349301172", + "201533069349300408", + "201543169349201379", + "201523209349314732", + "201523209349313972", + "201533179349202718", + "201533179349202668", + "201533179349202733", + "201533099349200108", + "201523209349311802", + "201533209349205593", + "201533209349101563", + "201533179349308913", + "201533209349102818", + "201533179349309088", + "201533189349300443", + "201533209349101958", + "201533209349206523", + "201533179349309208", + "201533209349305433", + "201533209349305438", + "201533209349206623", + "201533179349309223", + "201533179349309268", + "201533209349102023", + "201533209349102028", + "201533179349309348", + "201533199349100413", + "201533209349102278", + "201543099349301839", + "201503069349200845", + "201513069349200231", + "201503069349200970", + "201503069349200980", + "201513069349200316", + "201513099349201151", + "201513079349100106", + "201513099349100201", + "201523069349301047", + "201523069349301057", + "201523069349301112", + "201523069349301117", + "201523069349301127", + "201523069349301132", + "201523069349301157", + "201503039349100615", + "201523089349300312", + "201523089349300317", + "201523089349300322", + "201533069349300508", + "201533069349300708", + "201533069349300718", + "201533069349300778", + "201533069349300803", + "201543159349200314", + "201543159349200319", + "201543089349301374", + "201523209349316527", + "201533169349101073", + "201543069349100569", + "201533179349305958", + "201523229349300237", + "201533179349303308", + "201533179349305888", + "201533179349305893", + "201533179349305903", + "201533179349305923", + "201533179349306913", + "201543079349301154", + "201523219349200727", + "201523219349200747", + "201543109349200224", + "201543109349200234", + "201543109349200304", + "201533169349100833", + "201533069349300443", + "201533069349300868", + "201533179349301803", + "201523209349311197", + "201523069349100357", + "201533179349305948", + "201523229349300307", + "201523209349314597", + "201523209349314607", + "201523209349315117", + "201523209349312647", + "201523209349312657", + "201523209349310667", + "201523219349301067", + "201523209349313657", + "201533179349201693", + "201543109349100329", + "201523219349100207", + "201533179349100403", + "201533179349100418", + "201533179349100508", + "201533179349100513", + "201523219349200132", + "201533179349200718", + "201533209349302068", + "201533179349100823", + "201533179349100938", + "201533179349101128", + "201533179349101688", + "201533179349101693", + "201533179349307523", + "201533209349306058", + "201543069349301474", + "201543149349201919", + "201543149349202034", + "201543089349200014", + "201543099349201214", + "201543159349100614", + "201543159349100629", + "201543149349201549", + "201513089349201211", + "201503089349201250", + "201513099349100036", + "201513099349100041", + "201523069349301752", + "201543139349101009", + "201523229349300507", + "201543109349100009", + "201533209349302098", + "201533209349301008", + "201533209349301023", + "201533179349203653", + "201533179349203853", + "201533179349203858", + "201533179349308653", + "201533179349203888", + "201533189349200133", + "201533179349102213", + "201533179349102453", + "201523069349301392", + "201533099349302063", + "201533099349302068", + "201533099349302103", + "201523079349300547", + "201523079349300607", + "201523079349300707", + "201523039349301092", + "201533099349301028", + "201523039349301227", + "201533089349300648", + "201513089349100511", + "201503099349201100", + "201543089349100729", + "201523229349300122", + "201523229349300142", + "201533179349303898", + "201533179349303908", + "201533179349303913", + "201523209349310247", + "201533179349300928", + "201523209349314977", + "201533179349302118", + "201523209349313352", + "201523209349313387", + "201523209349313392", + "201533179349303713", + "201533179349303723", + "201533179349302818", + "201523229349200117", + "201533179349201233", + "201523219349200142", + "201523219349200147", + "201523219349200207", + "201523039349300942", + "201523039349300977", + "201523039349300982", + "201533039349300948", + "201543039349300044", + "201543039349301154", + "201533079349300213", + "201533089349300538", + "201533079349300328", + "201533089349200978", + "201523079349201057", + "201533039349200308", + "201533099349301313", + "201523079349200447", + "201523079349200627", + "201533079349301248", + "201533079349301328", + "201533049349300403", + "201543069349201204", + "201543169349201579", + "201543109349200424", + "201523209349311052", + "201523239349300612", + "201523209349312367", + "201543109349100014", + "201543109349100124", + "201533099349301538", + "201523089349301817", + "201523089349301927", + "201533089349300633", + "201523069349200307", + "201533069349200328", + "201533069349200338", + "201533069349200428", + "201533069349200433", + "201523069349201167", + "201523069349201202", + "201523069349201257", + "201523069349201302", + "201533209349302198", + "201523239349300107", + "201533209349203173", + "201523239349300202", + "201533209349206183", + "201533209349206208", + "201533179349300543", + "201533209349204003", + "201533199349200413", + "201533199349200648", + "201533209349205633", + "201533209349205643", + "201533209349302503", + "201523209349311932", + "201533179349308283", + "201533179349308293", + "201533179349308308", + "201533209349202273", + "201533209349303908", + "201533209349306913", + "201533209349103568", + "201543159349200914", + "201543159349200919", + "201543099349200334", + "201543099349200624", + "201523239349300307", + "201533179349303018", + "201533179349306963", + "201533089349100613", + "201533089349100623", + "201533179349306108", + "201533179349203403", + "201533209349301973", + "201533179349101908", + "201533209349304733", + "201533189349200148", + "201533189349200213", + "201533189349200223", + "201533209349201793", + "201523049349300007", + "201523049349300107", + "201533099349301308", + "201503099349300215", + "201503099349300225", + "201533109349300523", + "201533089349301458", + "201523099349300347", + "201543039349200339", + "201523069349200962", + "201523069349201002", + "201543079349301354", + "201533209349203958", + "201533209349203963", + "201533209349203978", + "201533209349203983", + "201533209349205873", + "201533209349103663", + "201533209349103678", + "201533209349103238", + "201533189349100513", + "201503069349201110", + "201503069349201115", + "201503069349201150", + "201503069349201165", + "201503069349201215", + "201503039349200340", + "201513069349200956", + "201513099349201116", + "201523069349301482", + "201543169349201394", + "201543169349201369", + "201543139349100604", + "201523229349300047", + "201523209349310187", + "201523209349310192", + "201523209349312807", + "201523209349314007", + "201533089349100528", + "201523209349314152", + "201523219349200627", + "201523219349200712", + "201523219349200732", + "201523219349200802", + "201523219349200907", + "201533179349202178", + "201533179349203378", + "201533179349203398", + "201533209349300603", + "201533209349300608", + "201533209349300618", + "201533209349300623", + "201523069349301327", + "201523069349300707", + "201523089349301977", + "201533039349300038", + "201513089349100326", + "201523089349301827", + "201533089349200923", + "201523079349301177", + "201533039349300823", + "201533039349300828", + "201543159349101159", + "201543139349100014", + "201543139349100024", + "201543139349100029", + "201513089349201101", + "201513089349201106", + "201513089349201116", + "201513089349201156", + "201513089349201161", + "201513069349200331", + "201503079349200795", + "201503089349100030", + "201543089349301464", + "201533179349303958", + "201533179349303998", + "201523209349312622", + "201533179349101313", + "201533209349201068", + "201523099349200307", + "201523069349200337", + "201523219349201152", + "201533179349202388", + "201533179349202413", + "201533179349202453", + "201523209349314792", + "201543079349301174", + "201543079349301194", + "201543079349301214", + "201543079349301309", + "201533209349205268", + "201533209349201613", + "201533209349204028", + "201533209349204038", + "201533209349204053", + "201533209349305963", + "201533179349308908", + "201533199349300033", + "201533209349101623", + "201533209349101633", + "201533209349101643", + "201543119349200204", + "201543119349200304", + "201543159349200519", + "201543159349200529", + "201543149349201779", + "201543159349200609", + "201543149349201799", + "201543159349200614", + "201543159349200704", + "201543159349200734", + "201543159349200749", + "201543149349201439", + "201543159349100804", + "201543159349100809", + "201543099349200234", + "201543099349200339", + "201503099349200960", + "201513039349100006", + "201523209349308842", + "201523209349308857", + "201533179349306558", + "201543079349300119", + "201533179349302383", + "201533179349302388", + "201533179349302413", + "201533179349302423", + "201533179349101993", + "201533179349101998", + "201533209349304753", + "201533179349305163", + "201533209349305143", + "201533209349305178", + "201533189349300433", + "201523089349301007", + "201523099349301812", + "201533099349301168", + "201533099349301173", + "201523059349300002", + "201523059349300007", + "201523209349310582", + "201523219349300902", + "201523219349300927", + "201523219349300932", + "201533179349303218", + "201543079349200724", + "201533179349303963", + "201533179349201488", + "201533179349201513", + "201523209349315257", + "201533209349301013", + "201523069349300737", + "201523039349300142", + "201523089349301962", + "201523039349300912", + "201523089349300147", + "201523099349301322", + "201533089349200928", + "201523079349301522", + "201523079349301562", + "201533079349300603", + "201533209349304958", + "201533209349202433", + "201533209349103903", + "201533209349307203", + "201533079349200308", + "201503089349100620", + "201533089349301298", + "201543149349303884", + "201513069349100376", + "201543159349302009", + "201543169349301754", + "201543159349302049", + "201533089349300118", + "201543089349200419", + "201543169349301814", + "201543099349200014", + "201543099349200239", + "201543159349200949", + "201543149349201409", + "201543149349201429", + "201543099349201109", + "201543109349200404", + "201543139349100044", + "201543139349100104", + "201543139349100109", + "201523209349313967", + "201523209349313982", + "201523039349301162", + "201533039349301308", + "201523099349301372", + "201533099349300408", + "201533099349300433", + "201533099349300513", + "201533099349300623", + "201523099349301587", + "201533079349300708", + "201533089349301623", + "201533079349301718", + "201533099349300003", + "201533099349300008", + "201533089349301943", + "201533089349301953", + "201523099349200332", + "201523099349200402", + "201533209349304158", + "201533179349202848", + "201533179349202858", + "201533179349202873", + "201533179349202903", + "201533209349202243", + "201533209349202258", + "201533209349202298", + "201533209349202313", + "201533209349202343", + "201513079349200921", + "201523209349310107", + "201523209349313822", + "201533179349304353", + "201533179349200728", + "201523209349315237", + "201533209349200208", + "201533209349201173", + "201533209349201178", + "201533209349302323", + "201523209349311942", + "201533209349306063", + "201533209349307733", + "201533179349302083", + "201533179349302108", + "201533209349309743", + "201533209349310963", + "201533209349310968", + "201533209349310998", + "201533209349310378", + "201533209349308923", + "201533209349310413", + "201533209349311033", + "201533209349308968", + "201533209349310418", + "201533209349311133", + "201533209349314503", + "201543179349306014", + "201533209349309328", + "201533209349314523", + "201533209349314548", + "201533209349314553", + "201533209349310808", + "201533209349314683", + "201533209349314693", + "201533209349310838", + "201533209349314718", + "201533209349314723", + "201503089349200640", + "201513099349200901", + "201513079349200321", + "201503069349200425", + "201503069349200430", + "201503069349200515", + "201503069349200530", + "201503069349200755", + "201503069349200900", + "201503099349200145", + "201503099349200210", + "201513079349200541", + "201513089349200131", + "201513089349200206", + "201513079349100601", + "201523089349300307", + "201523209349308707", + "201523209349311307", + "201523209349314277", + "201523069349100212", + "201523209349310987", + "201543089349201069", + "201523209349311582", + "201523209349311657", + "201523039349100802", + "201523079349100007", + "201543089349301989", + "201533179349201188", + "201533209349300923", + "201533179349203573", + "201533209349306638", + "201523099349301782", + "201523099349301787", + "201523089349300032", + "201503069349300805", + "201503069349300815", + "201523039349301212", + "201523099349300047", + "201523099349300112", + "201533139349300103", + "201533089349300748", + "201543099349301914", + "201503069349201300", + "201543089349301469", + "201533179349301748", + "201533179349301758", + "201523209349313117", + "201523209349315172", + "201523209349315217", + "201523209349314172", + "201533179349302358", + "201533179349302363", + "201523239349100202", + "201523239349100307", + "201523239349100402", + "201533079349300148", + "201533089349300513", + "201543039349300509", + "201543039349300519", + "201523069349300892", + "201533039349200338", + "201533039349200343", + "201533039349200418", + "201543039349300624", + "201523069349300217", + "201523069349300247", + "201523069349300432", + "201523099349300237", + "201533089349300048", + "201533079349200303", + "201533079349200403", + "201523069349200972", + "201533209349205148", + "201533209349206148", + "201533209349201633", + "201533209349201643", + "201533199349200318", + "201533209349201398", + "201533209349101543", + "201533189349100603", + "201533209349306018", + "201533199349100708", + "201533199349100718", + "201533069349300238", + "201533179349306763", + "201523209349312602", + "201523209349312607", + "201533179349301013", + "201533179349303838", + "201543089349301684", + "201543089349301719", + "201543089349301724", + "201543089349301814", + "201543089349301909", + "201543089349301924", + "201543089349301949", + "201523239349200117", + "201533209349307668", + "201533209349307673", + "201533179349203813", + "201533209349204963", + "201533209349204988", + "201533179349202458", + "201533209349102443", + "201533179349309068", + "201533209349300428", + "201533199349301023", + "201533199349301028", + "201533179349309583", + "201533209349307183", + "201533179349306873", + "201533069349200213", + "201533069349200233", + "201543069349201104", + "201543159349100639", + "201543149349201534", + "201543139349100004", + "201543139349100034", + "201523209349308717", + "201543089349200729", + "201523209349316562", + "201523209349313147", + "201533179349302053", + "201533169349101053", + "201533179349306148", + "201543079349300814", + "201523209349312297", + "201523209349312322", + "201523209349312327", + "201523209349312337", + "201533179349201983", + "201523099349301962", + "201533089349301428", + "201523069349300127", + "201523219349201107", + "201533179349202373", + "201533209349302993", + "201533209349103268", + "201533209349103288", + "201533209349306393", + "201533179349309508", + "201533199349301408", + "201533179349303488", + "201533179349305663", + "201533179349305693", + "201533179349305708", + "201533179349306808", + "201533069349200343", + "201543089349200229", + "201543089349200244", + "201543089349200249", + "201543099349201204", + "201543169349201574", + "201543139349100419", + "201523209349314662", + "201523209349314712", + "201523209349313887", + "201533179349303978", + "201523209349312442", + "201533179349201388", + "201533179349202838", + "201523219349200307", + "201523209349312307", + "201523229349100702", + "201533209349304743", + "201533209349204813", + "201523069349300797", + "201523069349300812", + "201523069349300827", + "201503099349300235", + "201533089349301583", + "201533089349300433", + "201533079349301213", + "201533079349301223", + "201533209349204298", + "201533209349100828", + "201533209349206463", + "201533209349206818", + "201533099349200443", + "201533099349200518", + "201523089349200247", + "201523099349200437", + "201523089349200507", + "201523089349200512", + "201523089349200547", + "201523039349200832", + "201533089349200738", + "201533169349100923", + "201533179349303518", + "201543069349100369", + "201543069349100379", + "201543069349100414", + "201543069349100524", + "201543079349200629", + "201533179349307778", + "201533209349302523", + "201533209349307303", + "201533209349307333", + "201533209349307363", + "201523239349300862", + "201533209349303283", + "201533209349303298", + "201533209349303303", + "201533209349303773", + "201523099349200432", + "201523079349300612", + "201503089349100510", + "201533039349300213", + "201513099349300511", + "201543159349100124", + "201543159349100134", + "201543159349100139", + "201543149349100034", + "201543149349100049", + "201543149349303704", + "201543149349303804", + "201543149349101224", + "201543169349301619", + "201533159349304133", + "201543039349100624", + "201543039349100629", + "201533099349100038", + "201533089349300043", + "201543089349200044", + "201543089349200104", + "201543089349200119", + "201543089349200139", + "201543069349200314", + "201523209349310152", + "201523209349313832", + "201533179349201673", + "201533179349201678", + "201533179349201723", + "201543109349100404", + "201523209349312222", + "201523209349312232", + "201533209349303123", + "201533209349303163", + "201533179349307363", + "201533209349302293", + "201533209349204648", + "201533209349205588", + "201533209349307383", + "201533179349202793", + "201533179349202843", + "201533179349202853", + "201533179349202863", + "201533179349308278", + "201533179349308328", + "201533209349203058", + "201533209349102923", + "201533209349206783", + "201533209349303308", + "201533209349303358", + "201533209349303388", + "201533209349303708", + "201533209349303418", + "201503069349200810", + "201503069349200830", + "201503069349200990", + "201513089349200511", + "201513089349200526", + "201543079349300739", + "201543079349300834", + "201523209349313357", + "201523209349312217", + "201523239349200227", + "201523239349200312", + "201533179349200708", + "201533179349300308", + "201533179349300323", + "201533179349300333", + "201533179349300338", + "201533209349302678", + "201533179349203833", + "201523209349311832", + "201533209349103233", + "201533209349101553", + "201533209349306053", + "201503039349300600", + "201543159349100039", + "201543169349301719", + "201523089349100002", + "201523089349100017", + "201523089349100022", + "201523089349100207", + "201533099349100613", + "201533209349310883", + "201533209349310893", + "201533209349310923", + "201533209349310928", + "201543169349201529", + "201543149349202024", + "201543159349201019", + "201543159349200724", + "201543109349200209", + "201543109349200314", + "201543109349200504", + "201543109349200434", + "201543099349200224", + "201543099349200429", + "201543099349301959", + "201513079349201066", + "201513079349201101", + "201513039349200211", + "201503099349100735", + "201533069349300143", + "201533069349300618", + "201523099349301612", + "201543139349100749", + "201543139349100754", + "201543139349100764", + "201523209349308752", + "201523209349308762", + "201523209349310132", + "201523209349308852", + "201523209349311232", + "201523069349100322", + "201523069349100327", + "201543079349100204", + "201523209349313587", + "201533179349305998", + "201543099349100009", + "201523239349100407", + "201523239349100502", + "201533179349102208", + "201523099349301772", + "201533099349301158", + "201503099349300130", + "201523079349301702", + "201523079349301712", + "201543069349201114", + "201543069349201304", + "201543159349200134", + "201523209349312642", + "201533179349306993", + "201533169349100218", + "201533179349201218", + "201533179349203553", + "201523209349312382", + "201533209349301128", + "201533179349305138", + "201523099349300012", + "201543039349301004", + "201533109349300648", + "201533039349300848", + "201543039349301109", + "201523079349301527", + "201523079349200032", + "201533089349200743", + "201543039349300544", + "201533069349301108", + "201533089349300023", + "201533039349200213", + "201533079349300818", + "201533039349300923", + "201523099349200737", + "201533079349300903", + "201523069349200342", + "201533209349202168", + "201533209349202178", + "201533209349302238", + "201533209349201453", + "201533209349201548", + "201523209349312842", + "201533209349201278", + "201533209349201418", + "201533199349300638", + "201533209349306133", + "201533209349103323", + "201533209349103328", + "201543159349200419", + "201523209349313022", +] # csvcut -c 9 index_2015.csv | head -n 1000 > returns_2015.txt -object_ids_2015 = ['201542399349300614', '201542399349300619', '201542399349300629', '201542399349300634', '201542399349300719', '201542399349300724', '201542399349300739', '201522369349300102', '201522369349300112', '201522369349300117', '201522369349300122', '201522369349300127', '201522369349300132', '201522369349300137', '201522369349300142', '201522369349300147', '201522369349300202', '201522369349300207', '201522369349300212', '201522369349300227', '201522369349300307', '201522369349300317', '201532299349304913', '201532299349304953', '201542379349300864', '201542379349300874', '201542379349300884', '201542379349301004', '201542379349301009', '201532299349302418', '201532299349302423', '201532299349302433', '201532299349302443', '201532299349302473', '201532299349302483', '201532299349302488', '201532299349302498', '201532299349302503', '201532299349302518', '201532299349302523', '201532299349302543', '201532299349302558', '201542399349200309', '201542399349200319', '201542399349200324', '201542399349200334', '201542399349200339', '201542399349200609', '201542399349200614', '201542399349200709', '201542399349200714', '201542399349200814', '201542399349200909', '201542399349201004', '201522379349200037', '201522379349200127', '201522379349200202', '201522379349200212', '201522379349200307', '201522379349200312', '201522379349200322', '201522379349200402', '201522379349200612', '201522379349200712', '201522379349200722', '201532369349200018', '201532369349200023', '201532399349201003', '201542399349200019', '201542399349200104', '201542399349200119', '201542399349200129', '201542399349200504', '201542399349200509', '201502649349200005', '201502649349200010', '201502649349200100', '201502649349200105', '201502649349200110', '201502649349200120', '201502649349200125', '201502649349200200', '201502649349200205', '201502649349200210', '201502649349200215', '201502649349200225', '201502649349200250', '201502649349200255', '201502649349200265', '201502649349200300', '201502649349200315', '201502649349200330', '201502649349200355', '201522679349200002', '201522679349200022', '201522679349200032', '201522679349200102', '201502669349301050', '201502669349301070', '201522659349300002', '201522659349300012', '201522659349300042', '201532649349300343', '201532649349300348', '201532649349300433', '201532649349300438', '201532649349300538', '201532649349300603', '201532649349300658', '201532649349300663', '201542649349300034', '201542649349300039', '201542649349300184', '201542649349300234', '201542649349300329', '201542649349300409', '201512659349200121', '201512659349200131', '201512659349200201', '201512659349200211', '201512659349200316', '201512659349200326', '201512659349200501', '201512659349200526', '201512659349200616', '201512659349200621', '201512659349200716', '201522649349200267', '201522649349200357', '201522649349200372', '201522649349200402', '201522649349200407', '201532649349200008', '201532649349200013', '201532649349200103', '201542619349200304', '201542619349200514', '201542619349200809', '201512669349301106', '201522669349300002', '201522669349300017', '201522669349300022', '201532659349300538', '201532659349300548', '201532659349300608', '201532659349300628', '201532659349300638', '201532659349300728', '201532659349300753', '201532659349300953', '201532659349301003', '201532659349301053', '201532659349301058', '201542659349300224', '201542659349300234', '201542659349300239', '201542659349300314', '201512649349300026', '201512649349300036', '201532299349302568', '201532299349304203', '201532299349304208', '201532299349304223', '201522369349300517', '201522369349300527', '201522369349300532', '201522369349300537', '201522369349300542', '201522369349300547', '201522369349300602', '201522369349300607', '201522369349300612', '201522369349300617', '201522369349300622', '201522369349300632', '201522369349300637', '201522369349300652', '201522369349300667', '201522369349300687', '201522369349300802', '201532339349300003', '201532339349300008', '201532339349300018', '201542389349300009', '201542389349300014', '201542269349301474', '201542269349301479', '201542269349301489', '201542269349301499', '201542269349301554', '201542269349301564', '201542269349301574', '201542269349301584', '201542269349301589', '201542269349301599', '201542269349301659', '201542269349301664', '201542269349301674', '201542269349301679', '201542269349301694', '201542269349301699', '201542279349300039', '201542399349200819', '201542399349200904', '201542399349200914', '201532299349100643', '201532299349100648', '201532299349100703', '201532299349100713', '201532299349100718', '201532299349100723', '201542269349100129', '201542269349100134', '201542269349100139', '201542269349100144', '201542269349100149', '201542269349100204', '201542269349100209', '201542269349100219', '201542269349100224', '201542269349100234', '201542269349100239', '201542269349100249', '201542269349100304', '201542269349100314', '201542269349100324', '201542269349100329', '201542269349100339', '201502649349300020', '201542669349300224', '201542669349300234', '201542669349300244', '201542669349300304', '201542669349300309', '201542669349300324', '201542669349300329', '201542669349300409', '201542669349300414', '201542669349300429', '201542669349300434', '201542669349300444', '201542669349300509', '201542669349300514', '201542669349300524', '201542669349300529', '201542669349300534', '201502379349200000', '201502379349200010', '201502379349200020', '201502379349200040', '201502379349200115', '201502379349200135', '201502379349200305', '201502379349200310', '201502379349200320', '201502379349200620', '201502389349200410', '201502389349200415', '201502389349200520', '201502389349200630', '201502389349200705', '201502389349200730', '201502389349200760', '201512379349200136', '201512379349200316', '201512379349200711', '201502389349200300', '201512379349200016', '201512379349200026', '201512379349200031', '201512379349200041', '201512379349200101', '201512379349200111', '201512379349200116', '201512379349200126', '201512379349200201', '201512379349200211', '201512379349200216', '201512379349200226', '201512379349200301', '201512379349200306', '201512379349200321', '201512379349200401', '201512379349200406', '201512379349200416', '201512379349200601', '201512379349200611', '201512649349300051', '201512649349300151', '201512649349300166', '201512649349300176', '201512649349300196', '201512649349300226', '201512649349300231', '201512649349300301', '201512649349300316', '201512649349300326', '201512649349300341', '201542679349300134', '201542679349300149', '201542679349300214', '201542679349300219', '201542679349300239', '201542679349300244', '201542679349300314', '201542679349300319', '201532319349200118', '201532319349200323', '201542269349201814', '201542269349201874', '201542269349201884', '201542269349201969', '201542269349201979', '201542299349200214', '201542299349200244', '201542299349200329', '201542299349200429', '201542299349200509', '201542299349200524', '201542309349200034', '201542309349200104', '201542309349200134', '201542309349200244', '201542309349200404', '201502349349300000', '201502349349300200', '201502349349300700', '201502359349300100', '201502359349300400', '201502369349300000', '201542279349300104', '201542279349300114', '201542279349300119', '201512399349300006', '201512399349300016', '201512399349300021', '201512399349300031', '201512399349300036', '201512399349300116', '201512399349300121', '201512399349300136', '201542339349300119', '201542339349300124', '201542339349300129', '201542339349300134', '201542339349300204', '201542339349300214', '201542339349300234', '201542339349300309', '201542339349300314', '201542339349300334', '201542339349300404', '201512399349300321', '201512399349300336', '201512399349300341', '201512399349300411', '201532369349300503', '201532369349300508', '201532369349300513', '201532369349300518', '201532369349300523', '201542339349300419', '201542339349300504', '201542339349300519', '201542339349300529', '201542339349300609', '201542339349300614', '201542339349300624', '201542339349300804', '201542339349300814', '201542339349300819', '201542349349300204', '201542359349300304', '201502619349300005', '201502619349300010', '201502619349300015', '201502619349300020', '201502619349300025', '201502619349300100', '201502619349300105', '201502619349300110', '201502619349300125', '201502619349300135', '201502619349300200', '201502619349300205', '201502619349300215', '201502619349300225', '201502619349300230', '201542649349300619', '201542649349300624', '201542649349300634', '201542649349300639', '201542649349300654', '201542649349300664', '201542649349300684', '201542649349300754', '201512619349300911', '201512619349301001', '201512619349301006', '201512619349301101', '201512629349300101', '201512679349300716', '201512679349300731', '201512679349300736', '201512679349300746', '201512679349300801', '201512679349300816', '201512679349300826', '201512679349300836', '201512679349300901', '201512679349300906', '201512679349300921', '201512679349301011', '201512679349301101', '201502649349300615', '201502649349300620', '201512679349100006', '201512679349100101', '201512679349100106', '201512679349100201', '201512679349100206', '201512679349100306', '201522669349100002', '201522669349100102', '201522669349100202', '201522669349100402', '201522669349100602', '201522669349100702', '201532659349100503', '201532659349100508', '201532659349100603', '201542659349100009', '201542659349100104', '201542659349100204', '201542659349100404', '201542659349100504', '201542659349100604', '201522649349300162', '201522649349300172', '201522649349300177', '201522649349300182', '201522649349300187', '201532619349300708', '201532619349300718', '201532619349300808', '201532619349300813', '201532619349300903', '201532619349300908', '201532619349300918', '201532619349301103', '201532639349300053', '201502369349300005', '201512369349300001', '201512369349300031', '201512369349300111', '201512369349300116', '201512369349300126', '201512369349300221', '201512369349300231', '201512369349300246', '201512369349300336', '201512369349300451', '201512369349300496', '201512369349300521', '201512369349300546', '201512369349300621', '201532319349200423', '201532319349200433', '201532319349200518', '201532319349200543', '201532319349200608', '201532319349200713', '201532319349200833', '201542269349202034', '201542269349202039', '201542269349202059', '201542269349202074', '201542269349202094', '201542299349200609', '201542299349200619', '201542299349200724', '201542299349200814', '201542299349200844', '201542309349200524', '201542309349200534', '201542309349200619', '201532269349200713', '201532269349200738', '201532269349201038', '201532269349201043', '201532269349201053', '201532269349201078', '201532269349201108', '201532339349300508', '201532339349300513', '201532339349300528', '201532339349300533', '201532339349300608', '201532339349300613', '201532339349300623', '201532339349300633', '201532339349300808', '201532339349300823', '201532349349300203', '201532349349300303', '201532399349300238', '201532399349300243', '201532399349300308', '201542389349300844', '201542389349300849', '201542389349300914', '201522339349300127', '201522339349300137', '201522339349300202', '201522339349300217', '201522339349300222', '201522339349300227', '201522339349300232', '201522339349300302', '201522339349300307', '201522339349300317', '201522339349300322', '201522339349300327', '201522339349300407', '201522339349300417', '201522339349300427', '201522339349300517', '201522339349300522', '201542369349300244', '201542369349300249', '201542369349300324', '201542369349300404', '201542369349300454', '201532379349300883', '201532379349300903', '201542369349300474', '201502649349300640', '201502649349300650', '201502649349300655', '201502649349300660', '201502649349300670', '201502649349300675', '201502649349300700', '201522679349300612', '201522679349300617', '201522679349300702', '201522679349300712', '201522679349300717', '201532679349300013', '201532679349300023', '201532679349300033', '201532679349300108', '201532679349300113', '201532679349300133', '201502659349300105', '201502659349300110', '201502659349300120', '201502659349300135', '201502659349300140', '201512649349300416', '201512649349300421', '201512649349300431', '201512649349300436', '201512649349300501', '201512649349300511', '201512649349300521', '201512649349300531', '201512649349300541', '201512649349300546', '201512649349300611', '201512649349300631', '201512649349300641', '201502649349300535', '201502649349300610', '201502649349300625', '201502649349300635', '201502649349300665', '201502649349300680', '201502649349300690', '201532639349300203', '201542619349300109', '201542619349300114', '201542619349300119', '201542619349300124', '201542619349300134', '201542619349300204', '201542619349300209', '201542619349300219', '201512669349300326', '201512669349300331', '201512669349300336', '201512669349300411', '201512669349300416', '201532299349201658', '201532299349201668', '201532299349201703', '201532299349201708', '201542269349202354', '201542269349202364', '201542299349200919', '201542299349201129', '201542299349201139', '201542309349200804', '201542309349200964', '201542309349200974', '201542309349201104', '201532269349201478', '201532269349201518', '201532269349201543', '201532269349201563', '201532269349201603', '201532269349201633', '201532289349200113', '201532289349200118', '201532289349200313', '201532289349200603', '201532289349200608', '201532299349201993', '201532329349200028', '201532329349200113', '201532329349200303', '201532329349200403', '201532329349200423', '201542299349201424', '201542299349201434', '201542299349201554', '201542299349201569', '201542299349201604', '201532269349201783', '201532269349201793', '201532269349201803', '201532269349201818', '201532299349202373', '201532299349202388', '201532299349202393', '201542369349300484', '201542369349300489', '201542369349300509', '201542369349300524', '201542369349300529', '201542369349300539', '201542369349300544', '201542369349300609', '201542369349300619', '201542369349300624', '201542369349300634', '201542369349300649', '201542369349300654', '201542369349300669', '201542369349300674', '201502379349300400', '201502379349300500', '201502379349300600', '201502379349300610', '201502379349300615', '201502379349300620', '201502379349300700', '201502379349300705', '201502379349300710', '201502379349300720', '201502379349300725', '201502379349300730', '201502379349300740', '201502379349300745', '201502379349300855', '201502379349300860', '201502379349300875', '201502379349300905', '201502379349300910', '201502379349301005', '201542379349300729', '201542379349300734', '201532269349302848', '201532269349302868', '201532269349302918', '201532269349302923', '201532269349302973', '201532269349302993', '201502649349300710', '201502649349300750', '201502649349300800', '201512629349300601', '201512649349300006', '201512649349300011', '201522619349300007', '201522619349300012', '201522619349300127', '201522679349300227', '201522679349300232', '201522679349300412', '201522679349300517', '201522679349300722', '201532679349300028', '201532679349300118', '201502679349200720', '201502679349200750', '201522669349200002', '201522669349200017', '201522669349200022', '201522669349200107', '201522669349200117', '201522669349200122', '201522669349200202', '201522669349200217', '201522669349200222', '201542659349200424', '201542659349200504', '201542659349200524', '201542659349200529', '201542659349200614', '201542659349200619', '201542659349200709', '201542659349200714', '201512659349200001', '201512659349200006', '201512659349200011', '201512659349200021', '201512659349200101', '201512659349200111', '201512659349200126', '201512669349300431', '201512669349300441', '201512669349300446', '201512669349300511', '201512669349300531', '201512669349300601', '201512669349300621', '201512669349300626', '201522659349300722', '201522659349300757', '201522659349300762', '201522659349300802', '201522659349300812', '201522659349300907', '201522659349301002', '201522659349301052', '201512659349300041', '201512659349300046', '201512659349300101', '201512659349300121', '201512659349300126', '201512659349300206', '201512659349300216', '201512659349300221', '201512659349300236', '201512659349300241', '201512659349300301', '201512659349300311', '201512659349300326', '201512659349300401', '201512659349300416', '201512659349300431', '201522649349300547', '201522649349300602', '201542639349300204', '201502659349300235', '201502659349300300', '201502659349300330', '201502659349300415', '201502659349300430', '201502659349300545', '201502659349300625', '201512649349300661', '201532299349202433', '201532299349202508', '201532299349202518', '201532299349202533', '201532299349202538', '201532299349202568', '201542279349200014', '201542279349200139', '201542279349200229', '201542279349200324', '201542279349200429', '201542299349201924', '201542299349201984', '201542299349202154', '201532299349202613', '201532299349202618', '201532299349202668', '201532299349202678', '201532299349202723', '201532299349202763', '201532299349202778', '201542279349200634', '201542279349200714', '201542279349200724', '201542279349200734', '201542279349200809', '201542299349202209', '201542299349202259', '201542299349202334', '201542299349202359', '201542299349202369', '201542299349202379', '201542299349202394', '201532299349100018', '201532299349100023', '201532299349100028', '201532299349100033', '201532299349100038', '201532299349100043', '201532299349100048', '201532299349100103', '201532299349100108', '201532269349303013', '201532269349303018', '201532299349302438', '201532299349302453', '201532299349302463', '201532299349302468', '201532299349302538', '201532299349302553', '201532299349304308', '201532299349304318', '201532299349304353', '201532299349304423', '201532299349304443', '201532299349304463', '201512399349300731', '201512399349300741', '201522389349300042', '201522389349300047', '201522389349300102', '201522389349300107', '201522389349300202', '201522389349300207', '201522389349300212', '201522389349300242', '201522389349300247', '201522389349300307', '201522389349300312', '201522389349300317', '201522389349300322', '201522389349300327', '201532369349300608', '201532369349300658', '201532369349300673', '201532369349300683', '201532369349300758', '201532369349300803', '201532269349303203', '201532269349303233', '201532299349300113', '201532299349300128', '201532299349302648', '201532299349302703', '201512659349200216', '201522649349200317', '201522649349200322', '201522649349200332', '201522649349200362', '201522649349200367', '201542619349200504', '201542619349200509', '201542619349200604', '201542619349200704', '201542619349200804', '201542619349200904', '201502669349300410', '201502669349300420', '201502669349300425', '201502669349300440', '201502669349300445', '201502669349300500', '201502669349300515', '201502669349300525', '201502669349300530', '201502669349300535', '201502669349300600', '201502669349300605', '201502669349300610', '201502669349300615', '201502669349300620', '201502669349300630', '201502669349300645', '201502669349300800', '201512379349300911', '201512379349301006', '201512379349301016', '201512379349301021', '201522669349300952', '201522669349301057', '201522669349301067', '201522669349301102', '201532669349300003', '201532669349300008', '201532669349300023', '201532669349300108', '201532669349300113', '201512649349300681', '201512649349300701', '201522649349300027', '201522649349300152', '201522649349300167', '201532619349300203', '201532619349300213', '201532619349300238', '201532619349300303', '201532619349300418', '201532619349300513', '201532619349300603', '201542679349300919', '201502639349200250', '201512619349200001', '201512619349200011', '201512619349200101', '201512619349200206', '201512619349200211', '201512619349200306', '201512619349200311', '201512619349200316', '201512619349200321', '201512619349200401', '201512619349200411', '201512619349200501', '201512619349200601', '201512619349200606', '201512619349200701', '201512619349200711', '201512619349200801', '201512619349200811', '201532299349100113', '201532299349100118', '201532299349100123', '201532299349100128', '201532299349100133', '201532299349100138', '201532299349100203', '201532299349100208', '201532299349100213', '201532299349100218', '201532299349100223', '201532299349100233', '201532299349100238', '201532299349100248', '201532299349100303', '201542269349100004', '201542269349100014', '201542299349101134', '201542299349101154', '201542299349101159', '201542299349101169', '201542299349101174', '201542299349101184', '201542299349101189', '201542299349101209', '201542299349101219', '201542299349101224', '201542299349101254', '201542299349101259', '201542299349101269', '201542309349100004', '201542309349100104', '201542309349100109', '201542309349100304', '201542309349100314', '201532299349300603', '201532299349300613', '201532299349300618', '201532299349300633', '201532299349300638', '201532299349300648', '201532299349300713', '201532299349302723', '201532299349302753', '201532299349304498', '201532299349304578', '201532299349304603', '201532299349304628', '201532299349304638', '201532299349304653', '201532299349304668', '201542269349301714', '201542269349301719', '201542269349301809', '201542269349301814', '201522389349300607', '201522389349300612', '201522389349300617', '201522389349300622', '201522389349300632', '201522389349300637', '201522389349300642', '201522389349300647', '201522389349300702', '201522389349300707', '201522389349300712', '201522389349300717', '201522389349300727', '201522389349300732', '201522389349300802', '201522389349300822', '201522389349300827', '201522389349300902', '201522389349300907', '201522389349300917', '201532379349300013', '201522399349300032'] \ No newline at end of file +object_ids_2015 = [ + "201542399349300614", + "201542399349300619", + "201542399349300629", + "201542399349300634", + "201542399349300719", + "201542399349300724", + "201542399349300739", + "201522369349300102", + "201522369349300112", + "201522369349300117", + "201522369349300122", + "201522369349300127", + "201522369349300132", + "201522369349300137", + "201522369349300142", + "201522369349300147", + "201522369349300202", + "201522369349300207", + "201522369349300212", + "201522369349300227", + "201522369349300307", + "201522369349300317", + "201532299349304913", + "201532299349304953", + "201542379349300864", + "201542379349300874", + "201542379349300884", + "201542379349301004", + "201542379349301009", + "201532299349302418", + "201532299349302423", + "201532299349302433", + "201532299349302443", + "201532299349302473", + "201532299349302483", + "201532299349302488", + "201532299349302498", + "201532299349302503", + "201532299349302518", + "201532299349302523", + "201532299349302543", + "201532299349302558", + "201542399349200309", + "201542399349200319", + "201542399349200324", + "201542399349200334", + "201542399349200339", + "201542399349200609", + "201542399349200614", + "201542399349200709", + "201542399349200714", + "201542399349200814", + "201542399349200909", + "201542399349201004", + "201522379349200037", + "201522379349200127", + "201522379349200202", + "201522379349200212", + "201522379349200307", + "201522379349200312", + "201522379349200322", + "201522379349200402", + "201522379349200612", + "201522379349200712", + "201522379349200722", + "201532369349200018", + "201532369349200023", + "201532399349201003", + "201542399349200019", + "201542399349200104", + "201542399349200119", + "201542399349200129", + "201542399349200504", + "201542399349200509", + "201502649349200005", + "201502649349200010", + "201502649349200100", + "201502649349200105", + "201502649349200110", + "201502649349200120", + "201502649349200125", + "201502649349200200", + "201502649349200205", + "201502649349200210", + "201502649349200215", + "201502649349200225", + "201502649349200250", + "201502649349200255", + "201502649349200265", + "201502649349200300", + "201502649349200315", + "201502649349200330", + "201502649349200355", + "201522679349200002", + "201522679349200022", + "201522679349200032", + "201522679349200102", + "201502669349301050", + "201502669349301070", + "201522659349300002", + "201522659349300012", + "201522659349300042", + "201532649349300343", + "201532649349300348", + "201532649349300433", + "201532649349300438", + "201532649349300538", + "201532649349300603", + "201532649349300658", + "201532649349300663", + "201542649349300034", + "201542649349300039", + "201542649349300184", + "201542649349300234", + "201542649349300329", + "201542649349300409", + "201512659349200121", + "201512659349200131", + "201512659349200201", + "201512659349200211", + "201512659349200316", + "201512659349200326", + "201512659349200501", + "201512659349200526", + "201512659349200616", + "201512659349200621", + "201512659349200716", + "201522649349200267", + "201522649349200357", + "201522649349200372", + "201522649349200402", + "201522649349200407", + "201532649349200008", + "201532649349200013", + "201532649349200103", + "201542619349200304", + "201542619349200514", + "201542619349200809", + "201512669349301106", + "201522669349300002", + "201522669349300017", + "201522669349300022", + "201532659349300538", + "201532659349300548", + "201532659349300608", + "201532659349300628", + "201532659349300638", + "201532659349300728", + "201532659349300753", + "201532659349300953", + "201532659349301003", + "201532659349301053", + "201532659349301058", + "201542659349300224", + "201542659349300234", + "201542659349300239", + "201542659349300314", + "201512649349300026", + "201512649349300036", + "201532299349302568", + "201532299349304203", + "201532299349304208", + "201532299349304223", + "201522369349300517", + "201522369349300527", + "201522369349300532", + "201522369349300537", + "201522369349300542", + "201522369349300547", + "201522369349300602", + "201522369349300607", + "201522369349300612", + "201522369349300617", + "201522369349300622", + "201522369349300632", + "201522369349300637", + "201522369349300652", + "201522369349300667", + "201522369349300687", + "201522369349300802", + "201532339349300003", + "201532339349300008", + "201532339349300018", + "201542389349300009", + "201542389349300014", + "201542269349301474", + "201542269349301479", + "201542269349301489", + "201542269349301499", + "201542269349301554", + "201542269349301564", + "201542269349301574", + "201542269349301584", + "201542269349301589", + "201542269349301599", + "201542269349301659", + "201542269349301664", + "201542269349301674", + "201542269349301679", + "201542269349301694", + "201542269349301699", + "201542279349300039", + "201542399349200819", + "201542399349200904", + "201542399349200914", + "201532299349100643", + "201532299349100648", + "201532299349100703", + "201532299349100713", + "201532299349100718", + "201532299349100723", + "201542269349100129", + "201542269349100134", + "201542269349100139", + "201542269349100144", + "201542269349100149", + "201542269349100204", + "201542269349100209", + "201542269349100219", + "201542269349100224", + "201542269349100234", + "201542269349100239", + "201542269349100249", + "201542269349100304", + "201542269349100314", + "201542269349100324", + "201542269349100329", + "201542269349100339", + "201502649349300020", + "201542669349300224", + "201542669349300234", + "201542669349300244", + "201542669349300304", + "201542669349300309", + "201542669349300324", + "201542669349300329", + "201542669349300409", + "201542669349300414", + "201542669349300429", + "201542669349300434", + "201542669349300444", + "201542669349300509", + "201542669349300514", + "201542669349300524", + "201542669349300529", + "201542669349300534", + "201502379349200000", + "201502379349200010", + "201502379349200020", + "201502379349200040", + "201502379349200115", + "201502379349200135", + "201502379349200305", + "201502379349200310", + "201502379349200320", + "201502379349200620", + "201502389349200410", + "201502389349200415", + "201502389349200520", + "201502389349200630", + "201502389349200705", + "201502389349200730", + "201502389349200760", + "201512379349200136", + "201512379349200316", + "201512379349200711", + "201502389349200300", + "201512379349200016", + "201512379349200026", + "201512379349200031", + "201512379349200041", + "201512379349200101", + "201512379349200111", + "201512379349200116", + "201512379349200126", + "201512379349200201", + "201512379349200211", + "201512379349200216", + "201512379349200226", + "201512379349200301", + "201512379349200306", + "201512379349200321", + "201512379349200401", + "201512379349200406", + "201512379349200416", + "201512379349200601", + "201512379349200611", + "201512649349300051", + "201512649349300151", + "201512649349300166", + "201512649349300176", + "201512649349300196", + "201512649349300226", + "201512649349300231", + "201512649349300301", + "201512649349300316", + "201512649349300326", + "201512649349300341", + "201542679349300134", + "201542679349300149", + "201542679349300214", + "201542679349300219", + "201542679349300239", + "201542679349300244", + "201542679349300314", + "201542679349300319", + "201532319349200118", + "201532319349200323", + "201542269349201814", + "201542269349201874", + "201542269349201884", + "201542269349201969", + "201542269349201979", + "201542299349200214", + "201542299349200244", + "201542299349200329", + "201542299349200429", + "201542299349200509", + "201542299349200524", + "201542309349200034", + "201542309349200104", + "201542309349200134", + "201542309349200244", + "201542309349200404", + "201502349349300000", + "201502349349300200", + "201502349349300700", + "201502359349300100", + "201502359349300400", + "201502369349300000", + "201542279349300104", + "201542279349300114", + "201542279349300119", + "201512399349300006", + "201512399349300016", + "201512399349300021", + "201512399349300031", + "201512399349300036", + "201512399349300116", + "201512399349300121", + "201512399349300136", + "201542339349300119", + "201542339349300124", + "201542339349300129", + "201542339349300134", + "201542339349300204", + "201542339349300214", + "201542339349300234", + "201542339349300309", + "201542339349300314", + "201542339349300334", + "201542339349300404", + "201512399349300321", + "201512399349300336", + "201512399349300341", + "201512399349300411", + "201532369349300503", + "201532369349300508", + "201532369349300513", + "201532369349300518", + "201532369349300523", + "201542339349300419", + "201542339349300504", + "201542339349300519", + "201542339349300529", + "201542339349300609", + "201542339349300614", + "201542339349300624", + "201542339349300804", + "201542339349300814", + "201542339349300819", + "201542349349300204", + "201542359349300304", + "201502619349300005", + "201502619349300010", + "201502619349300015", + "201502619349300020", + "201502619349300025", + "201502619349300100", + "201502619349300105", + "201502619349300110", + "201502619349300125", + "201502619349300135", + "201502619349300200", + "201502619349300205", + "201502619349300215", + "201502619349300225", + "201502619349300230", + "201542649349300619", + "201542649349300624", + "201542649349300634", + "201542649349300639", + "201542649349300654", + "201542649349300664", + "201542649349300684", + "201542649349300754", + "201512619349300911", + "201512619349301001", + "201512619349301006", + "201512619349301101", + "201512629349300101", + "201512679349300716", + "201512679349300731", + "201512679349300736", + "201512679349300746", + "201512679349300801", + "201512679349300816", + "201512679349300826", + "201512679349300836", + "201512679349300901", + "201512679349300906", + "201512679349300921", + "201512679349301011", + "201512679349301101", + "201502649349300615", + "201502649349300620", + "201512679349100006", + "201512679349100101", + "201512679349100106", + "201512679349100201", + "201512679349100206", + "201512679349100306", + "201522669349100002", + "201522669349100102", + "201522669349100202", + "201522669349100402", + "201522669349100602", + "201522669349100702", + "201532659349100503", + "201532659349100508", + "201532659349100603", + "201542659349100009", + "201542659349100104", + "201542659349100204", + "201542659349100404", + "201542659349100504", + "201542659349100604", + "201522649349300162", + "201522649349300172", + "201522649349300177", + "201522649349300182", + "201522649349300187", + "201532619349300708", + "201532619349300718", + "201532619349300808", + "201532619349300813", + "201532619349300903", + "201532619349300908", + "201532619349300918", + "201532619349301103", + "201532639349300053", + "201502369349300005", + "201512369349300001", + "201512369349300031", + "201512369349300111", + "201512369349300116", + "201512369349300126", + "201512369349300221", + "201512369349300231", + "201512369349300246", + "201512369349300336", + "201512369349300451", + "201512369349300496", + "201512369349300521", + "201512369349300546", + "201512369349300621", + "201532319349200423", + "201532319349200433", + "201532319349200518", + "201532319349200543", + "201532319349200608", + "201532319349200713", + "201532319349200833", + "201542269349202034", + "201542269349202039", + "201542269349202059", + "201542269349202074", + "201542269349202094", + "201542299349200609", + "201542299349200619", + "201542299349200724", + "201542299349200814", + "201542299349200844", + "201542309349200524", + "201542309349200534", + "201542309349200619", + "201532269349200713", + "201532269349200738", + "201532269349201038", + "201532269349201043", + "201532269349201053", + "201532269349201078", + "201532269349201108", + "201532339349300508", + "201532339349300513", + "201532339349300528", + "201532339349300533", + "201532339349300608", + "201532339349300613", + "201532339349300623", + "201532339349300633", + "201532339349300808", + "201532339349300823", + "201532349349300203", + "201532349349300303", + "201532399349300238", + "201532399349300243", + "201532399349300308", + "201542389349300844", + "201542389349300849", + "201542389349300914", + "201522339349300127", + "201522339349300137", + "201522339349300202", + "201522339349300217", + "201522339349300222", + "201522339349300227", + "201522339349300232", + "201522339349300302", + "201522339349300307", + "201522339349300317", + "201522339349300322", + "201522339349300327", + "201522339349300407", + "201522339349300417", + "201522339349300427", + "201522339349300517", + "201522339349300522", + "201542369349300244", + "201542369349300249", + "201542369349300324", + "201542369349300404", + "201542369349300454", + "201532379349300883", + "201532379349300903", + "201542369349300474", + "201502649349300640", + "201502649349300650", + "201502649349300655", + "201502649349300660", + "201502649349300670", + "201502649349300675", + "201502649349300700", + "201522679349300612", + "201522679349300617", + "201522679349300702", + "201522679349300712", + "201522679349300717", + "201532679349300013", + "201532679349300023", + "201532679349300033", + "201532679349300108", + "201532679349300113", + "201532679349300133", + "201502659349300105", + "201502659349300110", + "201502659349300120", + "201502659349300135", + "201502659349300140", + "201512649349300416", + "201512649349300421", + "201512649349300431", + "201512649349300436", + "201512649349300501", + "201512649349300511", + "201512649349300521", + "201512649349300531", + "201512649349300541", + "201512649349300546", + "201512649349300611", + "201512649349300631", + "201512649349300641", + "201502649349300535", + "201502649349300610", + "201502649349300625", + "201502649349300635", + "201502649349300665", + "201502649349300680", + "201502649349300690", + "201532639349300203", + "201542619349300109", + "201542619349300114", + "201542619349300119", + "201542619349300124", + "201542619349300134", + "201542619349300204", + "201542619349300209", + "201542619349300219", + "201512669349300326", + "201512669349300331", + "201512669349300336", + "201512669349300411", + "201512669349300416", + "201532299349201658", + "201532299349201668", + "201532299349201703", + "201532299349201708", + "201542269349202354", + "201542269349202364", + "201542299349200919", + "201542299349201129", + "201542299349201139", + "201542309349200804", + "201542309349200964", + "201542309349200974", + "201542309349201104", + "201532269349201478", + "201532269349201518", + "201532269349201543", + "201532269349201563", + "201532269349201603", + "201532269349201633", + "201532289349200113", + "201532289349200118", + "201532289349200313", + "201532289349200603", + "201532289349200608", + "201532299349201993", + "201532329349200028", + "201532329349200113", + "201532329349200303", + "201532329349200403", + "201532329349200423", + "201542299349201424", + "201542299349201434", + "201542299349201554", + "201542299349201569", + "201542299349201604", + "201532269349201783", + "201532269349201793", + "201532269349201803", + "201532269349201818", + "201532299349202373", + "201532299349202388", + "201532299349202393", + "201542369349300484", + "201542369349300489", + "201542369349300509", + "201542369349300524", + "201542369349300529", + "201542369349300539", + "201542369349300544", + "201542369349300609", + "201542369349300619", + "201542369349300624", + "201542369349300634", + "201542369349300649", + "201542369349300654", + "201542369349300669", + "201542369349300674", + "201502379349300400", + "201502379349300500", + "201502379349300600", + "201502379349300610", + "201502379349300615", + "201502379349300620", + "201502379349300700", + "201502379349300705", + "201502379349300710", + "201502379349300720", + "201502379349300725", + "201502379349300730", + "201502379349300740", + "201502379349300745", + "201502379349300855", + "201502379349300860", + "201502379349300875", + "201502379349300905", + "201502379349300910", + "201502379349301005", + "201542379349300729", + "201542379349300734", + "201532269349302848", + "201532269349302868", + "201532269349302918", + "201532269349302923", + "201532269349302973", + "201532269349302993", + "201502649349300710", + "201502649349300750", + "201502649349300800", + "201512629349300601", + "201512649349300006", + "201512649349300011", + "201522619349300007", + "201522619349300012", + "201522619349300127", + "201522679349300227", + "201522679349300232", + "201522679349300412", + "201522679349300517", + "201522679349300722", + "201532679349300028", + "201532679349300118", + "201502679349200720", + "201502679349200750", + "201522669349200002", + "201522669349200017", + "201522669349200022", + "201522669349200107", + "201522669349200117", + "201522669349200122", + "201522669349200202", + "201522669349200217", + "201522669349200222", + "201542659349200424", + "201542659349200504", + "201542659349200524", + "201542659349200529", + "201542659349200614", + "201542659349200619", + "201542659349200709", + "201542659349200714", + "201512659349200001", + "201512659349200006", + "201512659349200011", + "201512659349200021", + "201512659349200101", + "201512659349200111", + "201512659349200126", + "201512669349300431", + "201512669349300441", + "201512669349300446", + "201512669349300511", + "201512669349300531", + "201512669349300601", + "201512669349300621", + "201512669349300626", + "201522659349300722", + "201522659349300757", + "201522659349300762", + "201522659349300802", + "201522659349300812", + "201522659349300907", + "201522659349301002", + "201522659349301052", + "201512659349300041", + "201512659349300046", + "201512659349300101", + "201512659349300121", + "201512659349300126", + "201512659349300206", + "201512659349300216", + "201512659349300221", + "201512659349300236", + "201512659349300241", + "201512659349300301", + "201512659349300311", + "201512659349300326", + "201512659349300401", + "201512659349300416", + "201512659349300431", + "201522649349300547", + "201522649349300602", + "201542639349300204", + "201502659349300235", + "201502659349300300", + "201502659349300330", + "201502659349300415", + "201502659349300430", + "201502659349300545", + "201502659349300625", + "201512649349300661", + "201532299349202433", + "201532299349202508", + "201532299349202518", + "201532299349202533", + "201532299349202538", + "201532299349202568", + "201542279349200014", + "201542279349200139", + "201542279349200229", + "201542279349200324", + "201542279349200429", + "201542299349201924", + "201542299349201984", + "201542299349202154", + "201532299349202613", + "201532299349202618", + "201532299349202668", + "201532299349202678", + "201532299349202723", + "201532299349202763", + "201532299349202778", + "201542279349200634", + "201542279349200714", + "201542279349200724", + "201542279349200734", + "201542279349200809", + "201542299349202209", + "201542299349202259", + "201542299349202334", + "201542299349202359", + "201542299349202369", + "201542299349202379", + "201542299349202394", + "201532299349100018", + "201532299349100023", + "201532299349100028", + "201532299349100033", + "201532299349100038", + "201532299349100043", + "201532299349100048", + "201532299349100103", + "201532299349100108", + "201532269349303013", + "201532269349303018", + "201532299349302438", + "201532299349302453", + "201532299349302463", + "201532299349302468", + "201532299349302538", + "201532299349302553", + "201532299349304308", + "201532299349304318", + "201532299349304353", + "201532299349304423", + "201532299349304443", + "201532299349304463", + "201512399349300731", + "201512399349300741", + "201522389349300042", + "201522389349300047", + "201522389349300102", + "201522389349300107", + "201522389349300202", + "201522389349300207", + "201522389349300212", + "201522389349300242", + "201522389349300247", + "201522389349300307", + "201522389349300312", + "201522389349300317", + "201522389349300322", + "201522389349300327", + "201532369349300608", + "201532369349300658", + "201532369349300673", + "201532369349300683", + "201532369349300758", + "201532369349300803", + "201532269349303203", + "201532269349303233", + "201532299349300113", + "201532299349300128", + "201532299349302648", + "201532299349302703", + "201512659349200216", + "201522649349200317", + "201522649349200322", + "201522649349200332", + "201522649349200362", + "201522649349200367", + "201542619349200504", + "201542619349200509", + "201542619349200604", + "201542619349200704", + "201542619349200804", + "201542619349200904", + "201502669349300410", + "201502669349300420", + "201502669349300425", + "201502669349300440", + "201502669349300445", + "201502669349300500", + "201502669349300515", + "201502669349300525", + "201502669349300530", + "201502669349300535", + "201502669349300600", + "201502669349300605", + "201502669349300610", + "201502669349300615", + "201502669349300620", + "201502669349300630", + "201502669349300645", + "201502669349300800", + "201512379349300911", + "201512379349301006", + "201512379349301016", + "201512379349301021", + "201522669349300952", + "201522669349301057", + "201522669349301067", + "201522669349301102", + "201532669349300003", + "201532669349300008", + "201532669349300023", + "201532669349300108", + "201532669349300113", + "201512649349300681", + "201512649349300701", + "201522649349300027", + "201522649349300152", + "201522649349300167", + "201532619349300203", + "201532619349300213", + "201532619349300238", + "201532619349300303", + "201532619349300418", + "201532619349300513", + "201532619349300603", + "201542679349300919", + "201502639349200250", + "201512619349200001", + "201512619349200011", + "201512619349200101", + "201512619349200206", + "201512619349200211", + "201512619349200306", + "201512619349200311", + "201512619349200316", + "201512619349200321", + "201512619349200401", + "201512619349200411", + "201512619349200501", + "201512619349200601", + "201512619349200606", + "201512619349200701", + "201512619349200711", + "201512619349200801", + "201512619349200811", + "201532299349100113", + "201532299349100118", + "201532299349100123", + "201532299349100128", + "201532299349100133", + "201532299349100138", + "201532299349100203", + "201532299349100208", + "201532299349100213", + "201532299349100218", + "201532299349100223", + "201532299349100233", + "201532299349100238", + "201532299349100248", + "201532299349100303", + "201542269349100004", + "201542269349100014", + "201542299349101134", + "201542299349101154", + "201542299349101159", + "201542299349101169", + "201542299349101174", + "201542299349101184", + "201542299349101189", + "201542299349101209", + "201542299349101219", + "201542299349101224", + "201542299349101254", + "201542299349101259", + "201542299349101269", + "201542309349100004", + "201542309349100104", + "201542309349100109", + "201542309349100304", + "201542309349100314", + "201532299349300603", + "201532299349300613", + "201532299349300618", + "201532299349300633", + "201532299349300638", + "201532299349300648", + "201532299349300713", + "201532299349302723", + "201532299349302753", + "201532299349304498", + "201532299349304578", + "201532299349304603", + "201532299349304628", + "201532299349304638", + "201532299349304653", + "201532299349304668", + "201542269349301714", + "201542269349301719", + "201542269349301809", + "201542269349301814", + "201522389349300607", + "201522389349300612", + "201522389349300617", + "201522389349300622", + "201522389349300632", + "201522389349300637", + "201522389349300642", + "201522389349300647", + "201522389349300702", + "201522389349300707", + "201522389349300712", + "201522389349300717", + "201522389349300727", + "201522389349300732", + "201522389349300802", + "201522389349300822", + "201522389349300827", + "201522389349300902", + "201522389349300907", + "201522389349300917", + "201532379349300013", + "201522399349300032", +] diff --git a/irs_reader/settings.py b/irs_reader/settings.py index 5a294ca..59d54cd 100644 --- a/irs_reader/settings.py +++ b/irs_reader/settings.py @@ -1,68 +1,159 @@ -import sys import os + +import environ + from .dir_utils import mkdir_p -IRS_READER_ROOT = os.path.abspath(os.path.dirname(__file__)) +env = environ.Env() + + +IRS_READER_ROOT = env( + "IRS_READER_ROOT", default=os.path.abspath(os.path.dirname(__file__)) +) -# This is the URL to amazon's bucket, could use another synced to it -IRS_XML_HTTP_BASE = "https://s3.amazonaws.com/irs-form-990" +# This is the URL to Giving Tuesday's bucket, could use another synced to it +IRS_XML_HTTP_BASE = env( + "IRS_XML_HTTP_BASE", + default="https://gt990datalake-rawdata.s3.amazonaws.com/EfileData/XmlFiles", +) # It can be hard to locate this. -IRSX_SETTINGS_LOCATION = (os.path.join(IRS_READER_ROOT, "settings.py")) +IRSX_SETTINGS_LOCATION = os.path.join(IRS_READER_ROOT, "settings.py") # Defaults to the same directory as this settings file, but you can override # with the `IRSX_CACHE_DIRECTORY` environment variable -IRSX_CACHE_DIRECTORY = os.environ.get("IRSX_CACHE_DIRECTORY", IRS_READER_ROOT) +IRSX_CACHE_DIRECTORY = env("IRSX_CACHE_DIRECTORY", default=IRS_READER_ROOT) # The directory we put files in while we're processing them -WORKING_DIRECTORY = os.environ.get( - "IRSX_WORKING_DIRECTORY", os.path.join(IRSX_CACHE_DIRECTORY, "XML")) +WORKING_DIRECTORY = env( + "IRSX_WORKING_DIRECTORY", default=os.path.join(IRSX_CACHE_DIRECTORY, "XML") +) # Helpful to keep these around for lookup purposes -INDEX_DIRECTORY = os.environ.get( - "IRSX_INDEX_DIRECTORY", os.path.join(IRSX_CACHE_DIRECTORY, "CSV")) +INDEX_DIRECTORY = env( + "IRSX_INDEX_DIRECTORY", default=os.path.join(IRSX_CACHE_DIRECTORY, "CSV") +) IRS_INDEX_BASE = "https://apps.irs.gov/pub/epostcard/990/xml/%s/index_%s.csv" KNOWN_SCHEDULES = [ - "IRS990", "IRS990EZ", "IRS990PF", "IRS990ScheduleA", - "IRS990ScheduleB", "IRS990ScheduleC", "IRS990ScheduleD", - "IRS990ScheduleE", "IRS990ScheduleF", "IRS990ScheduleG", - "IRS990ScheduleH", "IRS990ScheduleI", "IRS990ScheduleJ", - "IRS990ScheduleK", "IRS990ScheduleL", "IRS990ScheduleM", - "IRS990ScheduleN", "IRS990ScheduleO", "IRS990ScheduleR", - "ReturnHeader990x" + "IRS990", + "IRS990EZ", + "IRS990PF", + "IRS990ScheduleA", + "IRS990ScheduleB", + "IRS990ScheduleC", + "IRS990ScheduleD", + "IRS990ScheduleE", + "IRS990ScheduleF", + "IRS990ScheduleG", + "IRS990ScheduleH", + "IRS990ScheduleI", + "IRS990ScheduleJ", + "IRS990ScheduleK", + "IRS990ScheduleL", + "IRS990ScheduleM", + "IRS990ScheduleN", + "IRS990ScheduleO", + "IRS990ScheduleR", + "ReturnHeader990x", ] -# these could get pushed to metadata directory? +# these could get pushed to metadata directory? ALLOWED_VERSIONSTRINGS = [ - '2013v3.0', '2013v3.1', '2013v4.0', '2014v5.0', '2014v6.0', - '2015v2.0', '2015v2.1', '2015v3.0', '2016v3.0', '2016v3.1', - '2017v2.0', '2017v2.1', '2017v2.2', '2017v2.3', '2018v3.0', - '2018v3.1', '2018v3.2', '2018v3.3', '2019v5.0', '2019v5.1', - '2019v5.2', '2020v1.0', '2020v2.0', '2020v3.0', '2020v4.0', - '2020v4.1', '2020v4.2', '2020v1.1' + "2013v3.0", + "2013v3.1", + "2013v4.0", + "2014v5.0", + "2014v6.0", + "2015v2.0", + "2015v2.1", + "2015v3.0", + "2016v3.0", + "2016v3.1", + "2017v2.0", + "2017v2.1", + "2017v2.2", + "2017v2.3", + "2018v3.0", + "2018v3.1", + "2018v3.2", + "2018v3.3", + "2019v5.0", + "2019v5.1", + "2019v5.2", + "2020v1.0", + "2020v1.1", + "2020v1.2", + "2020v1.3", + "2020v2.0", + "2020v3.0", + "2020v4.0", + "2020v4.1", + "2020v4.2", + "2021v4.0", + "2021v4.1", + "2021v4.2", + "2021v4.3", + "2022v4.0", + "2022v4.1", + "2022v5.0", + "2022v6.0", + "2022v7.0", + # these are guesses for future 2023 schemas; they might not actually exist + "2023v1.0", + "2023v2.0", + "2023v3.0", + "2023v3.1", + "2023v3.2", + "2023v3.3", + "2023v4.0", + "2023v4.1", + "2023v4.2", + "2023v4.3", + "2023v5.0", + "2023v5.1", + "2023v5.2", + "2023v5.3", + "2023v6.0", + "2023v6.1", + "2023v6.2", + "2023v6.3", + "2023v7.0", + "2023v7.1", + "2023v7.2", + "2023v7.3", ] # 2020 is experimental -# see https://www.irs.gov/charities-non-profits/ty2020-xml-schemas-and-business-rules-for-exempt-organizations-modernized-e-file +# see https://www.irs.gov/charities-non-profits/ty2020-xml-schemas-and-business-rules-for-exempt-organizations-modernized-e-file # noqa # We can capture the group structure for these so it doesn't break # but these versions ARE NOT supported and aren't mapped to IRSx variables CSV_ALLOWED_VERSIONSTRINGS = ALLOWED_VERSIONSTRINGS + [ - '2010v3.2', '2010v3.4', '2010v3.6', '2010v3.7', '2011v1.2', '2011v1.3', - '2011v1.4', '2011v1.5', '2012v2.0', '2012v2.1', '2012v2.2', '2012v2.3', - '2012v3.0' + "2010v3.2", + "2010v3.4", + "2010v3.6", + "2010v3.7", + "2011v1.2", + "2011v1.3", + "2011v1.4", + "2011v1.5", + "2012v2.0", + "2012v2.1", + "2012v2.2", + "2012v2.3", + "2012v3.0", ] -METADATA_DIRECTORY = (os.path.join(IRS_READER_ROOT, "metadata")) +METADATA_DIRECTORY = os.path.join(IRS_READER_ROOT, "metadata") KEYERROR_LOG = os.path.join(IRS_READER_ROOT, "keyerrors.log") -LOG_KEY = 'xml' +LOG_KEY = "xml" mkdir_p([WORKING_DIRECTORY, INDEX_DIRECTORY]) try: - from .local_settings import * + from .local_settings import * # noqa except ImportError: pass diff --git a/irs_reader/sked_dict_reader.py b/irs_reader/sked_dict_reader.py index a7e52c7..d5e38d3 100644 --- a/irs_reader/sked_dict_reader.py +++ b/irs_reader/sked_dict_reader.py @@ -1,8 +1,13 @@ -from .type_utils import dictType, orderedDictType, listType, \ - unicodeType, noneType, strType from .flatten_utils import flatten from .keyerror_utils import ignorable_keyerror -from .settings import LOG_KEY +from .type_utils import ( + dictType, + listType, + noneType, + orderedDictType, + strType, + unicodeType, +) class SkedDictReader(object): @@ -11,6 +16,7 @@ class SkedDictReader(object): into xpath-ed variables and repeated structures. Will also work on reading xmltodict that was previously turned into json """ + def __init__( self, standardizer, @@ -19,21 +25,21 @@ def __init__( ein, documentId=None, documentation=False, - csv_format=False + csv_format=False, ): self.standardizer = standardizer self.object_id = object_id self.ein = ein self.documentId = documentId - self.schedule_parts = {} # allows one entry per filing - self.repeating_groups = {} # multiple per filing - self.csv_format = csv_format # Do we need to generate ordered csv - self.for_csv_list = [] # keep record of elements, line by line + self.schedule_parts = {} # allows one entry per filing + self.repeating_groups = {} # multiple per filing + self.csv_format = csv_format # Do we need to generate ordered csv + self.for_csv_list = [] # keep record of elements, line by line self.groups = groups self.documentation = documentation - self.variable_keyerrors = [] # record any unexpected variables - self.group_keyerrors = [] # or unexpected groups + self.variable_keyerrors = [] # record any unexpected variables + self.group_keyerrors = [] # or unexpected groups if self.documentation and not self.standardizer.get_documentation_status(): # Todo: split out documenter entirely so we don't have to do this @@ -43,95 +49,90 @@ def __init__( ) def _get_table_start(self): - """ prefill the columns we need for all tables """ + """prefill the columns we need for all tables""" if self.documentation: standardized_table_start = { - 'object_id': { - 'value': self.object_id, - 'ordering': -1, - 'line_number': 'NA', - 'description': 'IRS-assigned object id', - 'db_type': 'String(18)' + "object_id": { + "value": self.object_id, + "ordering": -1, + "line_number": "NA", + "description": "IRS-assigned object id", + "db_type": "String(18)", + }, + "ein": { + "value": self.ein, + "ordering": -2, + "line_number": "NA", + "description": "IRS employer id number", + "db_type": "String(9)", }, - 'ein': { - 'value': self.ein, - 'ordering': -2, - 'line_number': 'NA', - 'description': 'IRS employer id number', - 'db_type': 'String(9)' - } } if self.documentId: - standardized_table_start['documentId'] = { - 'value': self.documentId, - 'description': 'Document ID', - 'ordering': 0 + standardized_table_start["documentId"] = { + "value": self.documentId, + "description": "Document ID", + "ordering": 0, } else: - standardized_table_start = { - 'object_id': self.object_id, - 'ein': self.ein - } + standardized_table_start = {"object_id": self.object_id, "ein": self.ein} if self.documentId: - standardized_table_start['documentId'] = self.documentId + standardized_table_start["documentId"] = self.documentId return standardized_table_start def _process_group(self, json_node, path, this_group): for node_index, node in enumerate(json_node): - #print("_process_group %s " % (this_group['db_name'])) + # print("_process_group %s " % (this_group['db_name'])) this_node_type = type(node) flattened_list_item = None if this_node_type == unicodeType: - #print("_pg: unicodeType %s ") + # print("_pg: unicodeType %s ") flattened_list_item = {path: node} else: - #print("_pg: NOT unicodeType") - flattened_list_item = flatten(node, parent_key=path, sep='/') + # print("_pg: NOT unicodeType") + flattened_list_item = flatten(node, parent_key=path, sep="/") table_name = None standardized_group_dict = self._get_table_start() for xpath in flattened_list_item.keys(): - if '@' in xpath: - continue - else: - xpath = xpath.replace("/#text", "") - value = flattened_list_item[xpath] - - if self.csv_format: - this_var = { - 'xpath':xpath, - 'value':value, - 'in_group':True, - 'group_name':this_group['db_name'], - 'group_index':node_index - } - self.for_csv_list.append(this_var) + # if '@' in xpath: + # continue + # else: + xpath = xpath.replace("/#text", "") + value = flattened_list_item[xpath] + + if self.csv_format: + this_var = { + "xpath": xpath, + "value": value, + "in_group": True, + "group_name": this_group["db_name"], + "group_index": node_index, + } + self.for_csv_list.append(this_var) - try: - this_var_data = self.standardizer.get_var(xpath) - except KeyError: - if not ignorable_keyerror(xpath): - self.variable_keyerrors.append( - {'element_path':xpath} - ) - continue - this_var_value = flattened_list_item[xpath] - this_var_name = this_var_data['db_name'] - table_name = this_var_data['db_table'] - if self.documentation: - result = { - 'value': this_var_value, - 'ordering': this_var_data['ordering'], - 'line_number': this_var_data['line_number'], - 'description': this_var_data['description'], - 'db_type': this_var_data['db_type'] - } - standardized_group_dict[this_var_name] = result + try: + this_var_data = self.standardizer.get_var(xpath) + except KeyError: + if not ignorable_keyerror(xpath): + self.variable_keyerrors.append({"element_path": xpath}) + continue + this_var_value = flattened_list_item[xpath] + this_var_name = this_var_data["db_name"] + table_name = this_var_data["db_table"] + if self.documentation: + result = { + "value": this_var_value, + "ordering": this_var_data["ordering"], + "line_number": this_var_data["line_number"], + "description": this_var_data["description"], + "db_type": this_var_data["db_type"], + } + standardized_group_dict[this_var_name] = result - else: - standardized_group_dict[this_var_name] = this_var_value + else: + standardized_group_dict[this_var_name] = this_var_value try: self.repeating_groups[table_name].append(standardized_group_dict) except KeyError: @@ -142,88 +143,79 @@ def _parse_json(self, json_node, parent_path=""): element_path = parent_path if this_node_type == listType: - #print("List type %s" % element_path) + # print("List type %s" % element_path) this_group = None try: this_group = self.groups[element_path] except KeyError: - self.group_keyerrors.append( - {'element_path':element_path} - ) + self.group_keyerrors.append({"element_path": element_path}) self._process_group(json_node, parent_path, this_group) elif this_node_type == unicodeType: # but ignore it if is an @. - if '@' in element_path: - pass - else: - element_path = element_path.replace("/#text", "") + # if '@' in element_path: + # pass + # else: + element_path = element_path.replace("/#text", "") + try: + # is it a group? + this_group = self.groups[element_path] + self._process_group([{parent_path: json_node}], "", this_group) + + except KeyError: + + # It's not a group so it should be a variable we know about + + if self.csv_format: + this_var = { + "xpath": element_path, + "value": json_node, + "in_group": False, + "group_name": None, + "group_index": None, + } + self.for_csv_list.append(this_var) + + # It's not a group so it should be a variable we know about try: - # is it a group? - this_group = self.groups[element_path] - self._process_group( - [{parent_path: json_node}], - '', - this_group - ) + var_data = self.standardizer.get_var(element_path) + var_found = True except KeyError: + # pass through for some common key errors + # [ TODO: FIX THE KEYERRORS! ] + if not ignorable_keyerror(element_path): + self.variable_keyerrors.append({"element_path": element_path}) + var_found = False + + if var_found: - # It's not a group so it should be a variable we know about - - if self.csv_format: - this_var = { - 'xpath':element_path, - 'value':json_node, - 'in_group':False, - 'group_name':None, - 'group_index':None + table_name = var_data["db_table"] + var_name = var_data["db_name"] + + result = json_node + if self.documentation: + result = { + "value": json_node, + "ordering": var_data["ordering"], + "line_number": var_data["line_number"], + "description": var_data["description"], + "db_type": var_data["db_type"], } - self.for_csv_list.append(this_var) - # It's not a group so it should be a variable we know about try: - var_data = self.standardizer.get_var(element_path) - var_found = True - + self.schedule_parts[table_name][var_name] = result except KeyError: - # pass through for some common key errors - # [ TODO: FIX THE KEYERRORS! ] - if not ignorable_keyerror(element_path): - self.variable_keyerrors.append( - {'element_path':element_path} - ) - var_found = False - - if var_found: - - table_name = var_data['db_table'] - var_name = var_data['db_name'] - - result = json_node - if self.documentation: - result = { - 'value': json_node, - 'ordering': var_data['ordering'], - 'line_number': var_data['line_number'], - 'description': var_data['description'], - 'db_type': var_data['db_type'] - } - - try: - self.schedule_parts[table_name][var_name] = result - except KeyError: - self.schedule_parts[table_name] = self._get_table_start() - self.schedule_parts[table_name][var_name] = result - + self.schedule_parts[table_name] = self._get_table_start() + self.schedule_parts[table_name][var_name] = result elif this_node_type == orderedDictType or this_node_type == dictType: try: # is it a singleton group? this_group = self.groups[element_path] - self._process_group([{parent_path: json_node}], '', this_group) + self._process_group([{parent_path: json_node}], "", this_group) except KeyError: keys = json_node.keys() @@ -235,17 +227,18 @@ def _parse_json(self, json_node, parent_path=""): pass elif this_node_type == strType: - msg = "String '%s'" % json_node - #self.logging.debug(msg) + pass + # msg = "String '%s'" % json_node + # self.logging.debug(msg) else: raise Exception("Unhandled type: %s" % (type(json_node))) def parse(self, raw_ordered_dict, parent_path=""): self._parse_json(raw_ordered_dict, parent_path=parent_path) - return ({ - 'schedule_parts': self.schedule_parts, - 'groups': self.repeating_groups, - 'csv_line_array':self.for_csv_list, # This is empty if not csv - 'keyerrors':self.variable_keyerrors, - 'group_keyerrors':self.group_keyerrors - }) + return { + "schedule_parts": self.schedule_parts, + "groups": self.repeating_groups, + "csv_line_array": self.for_csv_list, # This is empty if not csv + "keyerrors": self.variable_keyerrors, + "group_keyerrors": self.group_keyerrors, + } diff --git a/irs_reader/standardizer.py b/irs_reader/standardizer.py index e467d31..535fc31 100644 --- a/irs_reader/standardizer.py +++ b/irs_reader/standardizer.py @@ -1,54 +1,44 @@ +import csv import os -import sys -import collections -#import logging -from datetime import datetime -from .settings import METADATA_DIRECTORY, KEYERROR_LOG -from .sked_dict_reader import SkedDictReader -from .type_utils import listType -if sys.version_info >= (3, 0): - import csv -else: - import unicodecsv as csv + +from .settings import METADATA_DIRECTORY class Standardizer(object): """ This reads metadata .csv files, which it uses to standardize - ordered dicts. For documentation, see Documentizer below. + ordered dicts. For documentation, see Documentizer below. """ def __init__(self): - #self.show_documentation = documentation + # self.show_documentation = documentation self.groups = {} self.variables = {} self.schedule_parts = {} # This is overridden for Documentizer class below - self.variable_columns =['db_table', 'db_name'] + self.variable_columns = ["db_table", "db_name"] self._make_groups() self._make_variables() - def _make_groups(self): - group_filepath = os.path.join(METADATA_DIRECTORY, 'groups.csv') - with open(group_filepath, 'r') as reader_fh: + group_filepath = os.path.join(METADATA_DIRECTORY, "groups.csv") + with open(group_filepath, "r") as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: - self.groups[row['xpath']] = row + self.groups[row["xpath"]] = row return True def _make_variables(self): - variable_filepath = os.path.join(METADATA_DIRECTORY, 'variables.csv') - with open(variable_filepath, 'r') as variable_fh: + variable_filepath = os.path.join(METADATA_DIRECTORY, "variables.csv") + with open(variable_filepath, "r") as variable_fh: reader = csv.DictReader(variable_fh) for row in reader: vardict = {} for col in self.variable_columns: - vardict[col]=row[col] - self.variables[row['xpath']] = vardict - + vardict[col] = row[col] + self.variables[row["xpath"]] = vardict return True @@ -58,27 +48,35 @@ def get_groups(self): def get_var(self, var_xpath, version=None): if version: raise Exception("Version checking is not implemented") - return (self.variables[var_xpath]) + return self.variables[var_xpath] def get_documentation_status(self): return False class Documentizer(Standardizer): - """ Like Standardizer, but returns canonical documentation info from 2016 version """ + """Like Standardizer, but returns canonical documentation info from 2016 version""" def __init__(self, versions=False): self.groups = {} self.variables = {} self.schedule_parts = {} - self.variable_columns =[ - 'db_table', 'db_name', 'ordering', - 'line_number', 'description', 'db_type', - 'irs_type', 'xpath' + self.variable_columns = [ + "db_table", + "db_name", + "ordering", + "line_number", + "description", + "db_type", + "irs_type", + "xpath", ] if versions: - self.variable_columns = self.variable_columns + ['version_start', 'version_end'] + self.variable_columns = self.variable_columns + [ + "version_start", + "version_end", + ] self._make_schedule_parts() self._make_groups() @@ -88,17 +86,16 @@ def get_documentation_status(self): return True def _make_schedule_parts(self): - part_filepath = os.path.join(METADATA_DIRECTORY, 'schedule_parts.csv') - with open(part_filepath, 'r') as reader_fh: + part_filepath = os.path.join(METADATA_DIRECTORY, "schedule_parts.csv") + with open(part_filepath, "r") as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: - self.schedule_parts[row['parent_sked_part']] = { - 'name': row['part_name'], - 'ordering': row['ordering'], - 'parent_sked': row['parent_sked'], - 'parent_sked_part': row['parent_sked_part'], - 'is_shell': row['is_shell'] - + self.schedule_parts[row["parent_sked_part"]] = { + "name": row["part_name"], + "ordering": row["ordering"], + "parent_sked": row["parent_sked"], + "parent_sked_part": row["parent_sked_part"], + "is_shell": row["is_shell"], } return True @@ -107,29 +104,29 @@ def get_schedule_parts(self): def part_ordering(self, partname): try: - result = int(self.schedule_parts[partname]['ordering']) + result = int(self.schedule_parts[partname]["ordering"]) return result except KeyError: return None def group_ordering(self, groupname): try: - return self.groups[groupname]['ordering'] + return self.groups[groupname]["ordering"] except KeyError: return None def get_groups_by_sked(self, sked): groups = [] for thisgroup in self.groups.keys(): - if self.groups[thisgroup]['parent_sked'] == sked: + if self.groups[thisgroup]["parent_sked"] == sked: groups.append(self.groups[thisgroup]) return groups def get_parts_by_sked(self, sked): parts = [] for thispart in self.schedule_parts.keys(): - #print(self.schedule_parts[thispart]) - if self.schedule_parts[thispart]['parent_sked'] == sked: + # print(self.schedule_parts[thispart]) + if self.schedule_parts[thispart]["parent_sked"] == sked: parts.append(self.schedule_parts[thispart]) return parts @@ -137,7 +134,6 @@ def get_variables(self): return self.variables - class VersionDocumentizer(object): """ Returns version-specific line number and documentation. @@ -158,31 +154,30 @@ def check_version(self, versionstring, start_year, end_year): return result def _make_line_numbers(self): - filepath = os.path.join(METADATA_DIRECTORY, 'line_numbers.csv') - with open(filepath, 'r') as reader_fh: + filepath = os.path.join(METADATA_DIRECTORY, "line_numbers.csv") + with open(filepath, "r") as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: try: - self.line_numbers[row['xpath']] - self.line_numbers[row['xpath']].append(row) + self.line_numbers[row["xpath"]] + self.line_numbers[row["xpath"]].append(row) except KeyError: - self.line_numbers[row['xpath']] = [row] + self.line_numbers[row["xpath"]] = [row] def _make_descriptions(self): - filepath = os.path.join(METADATA_DIRECTORY, 'descriptions.csv') - with open(filepath, 'r') as reader_fh: + filepath = os.path.join(METADATA_DIRECTORY, "descriptions.csv") + with open(filepath, "r") as reader_fh: reader = csv.DictReader(reader_fh) for row in reader: try: - self.descriptions[row['xpath']] - self.descriptions[row['xpath']].append(row) + self.descriptions[row["xpath"]] + self.descriptions[row["xpath"]].append(row) except KeyError: - self.descriptions[row['xpath']] = [row] - + self.descriptions[row["xpath"]] = [row] def get_line_number(self, xpath, version_string): candidate_rows = [] @@ -192,8 +187,10 @@ def get_line_number(self, xpath, version_string): return None for row in candidate_rows: - if self.check_version(version_string, row['version_start'], row['version_end']): - return row['line_number'] + if self.check_version( + version_string, row["version_start"], row["version_end"] + ): + return row["line_number"] return None @@ -204,7 +201,8 @@ def get_description(self, xpath, version_string): except KeyError: return None for row in candidate_rows: - if self.check_version(version_string, row['version_start'], row['version_end']): - return row['description'] + if self.check_version( + version_string, row["version_start"], row["version_end"] + ): + return row["description"] return None - diff --git a/irs_reader/text_format_utils.py b/irs_reader/text_format_utils.py index 515c4ba..d091e3b 100644 --- a/irs_reader/text_format_utils.py +++ b/irs_reader/text_format_utils.py @@ -1,33 +1,34 @@ -import json -import sys import codecs -import re import csv -import unicodecsv - -from .standardizer import Standardizer, Documentizer, VersionDocumentizer +import json +import re +import sys +from .standardizer import VersionDocumentizer -BRACKET_RE = re.compile(r'\[.*?\]') +BRACKET_RE = re.compile(r"\[.*?\]") ASTERISKS = "****************" + def debracket(string): - """ Eliminate the bracketed var names in doc, line strings """ - result = re.sub(BRACKET_RE, ';', str(string)) - result = result.lstrip(';') - result = result.lstrip(' ') - result = result.replace('; ;',';') + """Eliminate the bracketed var names in doc, line strings""" + result = re.sub(BRACKET_RE, ";", str(string)) + result = result.lstrip(";") + result = result.lstrip(" ") + result = result.replace("; ;", ";") return result + def most_recent(semicolon_delimited_string): result = semicolon_delimited_string.split(";")[-1] return result + def to_json(data, outfilepath=None): if data: if outfilepath: - with open(outfilepath, 'w') as outfile: + with open(outfilepath, "w") as outfile: json.dump(data, outfile) else: if hasattr(sys.stdout, "buffer"): @@ -36,117 +37,138 @@ def to_json(data, outfilepath=None): else: json.dump(data, sys.stdout) -def to_csv(parsed_filing, object_id=None, standardizer=None, documentation=True, vd=None, outfilepath=None): + +def to_csv( + parsed_filing, + object_id=None, + standardizer=None, + documentation=True, + vd=None, + outfilepath=None, +): if not vd: vd = VersionDocumentizer() - stdout = getattr(sys.stdout, 'buffer', sys.stdout) + if outfilepath: - stdout = open(outfilepath, 'wb') # or 'wb' ? - - fieldnames = [] - fieldnames = [ - 'object_id', 'form', 'line_number', 'description', 'value', 'variable_name', - 'xpath', 'in_group', 'group_name', 'group_index' - ] - writer = unicodecsv.DictWriter( - stdout, - fieldnames=fieldnames, - encoding='utf-8', - quoting=csv.QUOTE_MINIMAL - ) - writer.writeheader() # this fails in python3? + out_file = open(outfilepath, "w") + else: + out_file = sys.stdout + + fieldnames = [ + "object_id", + "form", + "line_number", + "description", + "value", + "variable_name", + "xpath", + "in_group", + "group_name", + "group_index", + ] + writer = csv.DictWriter(out_file, fieldnames=fieldnames, quoting=csv.QUOTE_MINIMAL) + writer.writeheader() # this fails in python3? results = parsed_filing.get_result() if results: for result in results: - for this_result in result['csv_line_array']: + for this_result in result["csv_line_array"]: vardata = None try: - vardata = standardizer.get_var(this_result['xpath']) + vardata = standardizer.get_var(this_result["xpath"]) except KeyError: pass if vardata: - this_result['variable_name'] = vardata['db_table'] + "." + vardata['db_name'] + this_result["variable_name"] = ( + vardata["db_table"] + "." + vardata["db_name"] + ) raw_line_num = vd.get_line_number( - this_result['xpath'], - parsed_filing.get_version() + this_result["xpath"], parsed_filing.get_version() ) - this_result['line_number'] = debracket(raw_line_num) + this_result["line_number"] = debracket(raw_line_num) raw_description = vd.get_description( - this_result['xpath'], - parsed_filing.get_version() + this_result["xpath"], parsed_filing.get_version() ) - this_result['description'] = debracket(raw_description) - this_result['form'] = this_result['xpath'].split("/")[1] - this_result['object_id'] = object_id + this_result["description"] = debracket(raw_description) + this_result["form"] = this_result["xpath"].split("/")[1] + this_result["object_id"] = object_id writer.writerow(this_result) -def to_txt(parsed_filing, standardizer=None, documentation=True, vd=None, outfilepath=None): +def to_txt( + parsed_filing, standardizer=None, documentation=True, vd=None, outfilepath=None +): if not vd: vd = VersionDocumentizer() results = parsed_filing.get_result() this_sked_name = None if outfilepath: - outfile = open(outfilepath, 'w') + outfile = open(outfilepath, "w") if results: for result in results: - for this_result in result['csv_line_array']: + for this_result in result["csv_line_array"]: - #### Collect the variables we need + # Collect the variables we need vardata = None - textoutput = "\n" # This is what we'll eventually write out - this_result['form'] = this_result['xpath'].split("/")[1] + textoutput = "\n" # This is what we'll eventually write out + this_result["form"] = this_result["xpath"].split("/")[1] try: - vardata = standardizer.get_var(this_result['xpath']) + vardata = standardizer.get_var(this_result["xpath"]) except KeyError: pass if vardata: - this_result['variable_name'] = vardata['db_table'] + "." + vardata['db_name'] + this_result["variable_name"] = ( + vardata["db_table"] + "." + vardata["db_name"] + ) raw_line_num = vd.get_line_number( - this_result['xpath'], - parsed_filing.get_version() + this_result["xpath"], parsed_filing.get_version() ) - this_result['line_number'] = debracket(raw_line_num) + this_result["line_number"] = debracket(raw_line_num) raw_description = vd.get_description( - this_result['xpath'], - parsed_filing.get_version() + this_result["xpath"], parsed_filing.get_version() ) - this_result['description'] = debracket(raw_description) - - #### Write the output, now that we've got the vars - - if this_sked_name != this_result['form']: - textoutput += "\n\n\n" + ASTERISKS + "\tSchedule %s\n" % this_result['form'] - this_sked_name = this_result['form'] - - textoutput += "\n" + ASTERISKS + "\n Value: '%s'\nForm: %s\nLine:%s\nDescription:%s" % ( - this_result['value'], - this_result['form'], - this_result['line_number'], - this_result['description'], + this_result["description"] = debracket(raw_description) + + # Write the output, now that we've got the vars + + if this_sked_name != this_result["form"]: + textoutput += ( + "\n\n\n" + ASTERISKS + "\tSchedule %s\n" % this_result["form"] + ) + this_sked_name = this_result["form"] + + textoutput += ( + "\n" + + ASTERISKS + + "\n Value: '%s'\nForm: %s\nLine:%s\nDescription:%s" + % ( + this_result["value"], + this_result["form"], + this_result["line_number"], + this_result["description"], + ) ) if documentation: - textoutput += "\nXpath:%s" % (this_result['xpath']) + textoutput += "\nXpath:%s" % (this_result["xpath"]) - if this_result['in_group']: + if this_result["in_group"]: textoutput += "\nGroup: %s group_index %s" % ( - this_result['group_name'], - this_result['group_index'] + this_result["group_name"], + this_result["group_index"], ) else: textoutput += "\nGroup:" - + if outfilepath: outfile.write(textoutput) else: sys.stdout.write(textoutput) if outfilepath: - outfile.close() \ No newline at end of file + outfile.close() diff --git a/irs_reader/type_utils.py b/irs_reader/type_utils.py index 7f5fc38..7c6d86d 100644 --- a/irs_reader/type_utils.py +++ b/irs_reader/type_utils.py @@ -6,6 +6,6 @@ dictType = type(dict()) orderedDictType = type(OrderedDict()) listType = type(list()) -unicodeType = type(u'') +unicodeType = type("") noneType = type(None) -strType = type('') +strType = type("") diff --git a/irs_reader/xmlrunner.py b/irs_reader/xmlrunner.py index d8ca396..88ccae7 100644 --- a/irs_reader/xmlrunner.py +++ b/irs_reader/xmlrunner.py @@ -1,21 +1,23 @@ from .filing import Filing -from .standardizer import Standardizer, Documentizer, VersionDocumentizer +from .settings import ALLOWED_VERSIONSTRINGS, CSV_ALLOWED_VERSIONSTRINGS from .sked_dict_reader import SkedDictReader +from .standardizer import Documentizer, Standardizer + # from .log_utils import configure_logging from .type_utils import listType -from .settings import WORKING_DIRECTORY, ALLOWED_VERSIONSTRINGS, CSV_ALLOWED_VERSIONSTRINGS class XMLRunner(object): - """ Load a Standardizer just once while running multiple filings - Return Filing objects with results, keyerrors set + """Load a Standardizer just once while running multiple filings + Return Filing objects with results, keyerrors set """ + def __init__(self, documentation=False, standardizer=None, csv_format=False): self.documentation = documentation self.csv_format = csv_format if documentation: - if not standardizer: + if not standardizer: self.standardizer = Documentizer() else: if standardizer: @@ -30,10 +32,10 @@ def get_standardizer(self): return self.standardizer def _run_schedule_k(self, sked, object_id, sked_dict, path_root, ein): - assert sked == 'IRS990ScheduleK' + assert sked == "IRS990ScheduleK" if type(sked_dict) == listType: for individual_sked in sked_dict: - doc_id = individual_sked['@documentId'] + doc_id = individual_sked["@documentId"] reader = SkedDictReader( self.standardizer, self.group_dicts, @@ -45,13 +47,14 @@ def _run_schedule_k(self, sked, object_id, sked_dict, path_root, ein): ) result = reader.parse(individual_sked, parent_path=path_root) - self.whole_filing_data.append({ - 'schedule_name': sked, - 'groups': result['groups'], - 'schedule_parts': result['schedule_parts'], - 'csv_line_array':result['csv_line_array'] - - }) + self.whole_filing_data.append( + { + "schedule_name": sked, + "groups": result["groups"], + "schedule_parts": result["schedule_parts"], + "csv_line_array": result["csv_line_array"], + } + ) else: reader = SkedDictReader( self.standardizer, @@ -63,17 +66,19 @@ def _run_schedule_k(self, sked, object_id, sked_dict, path_root, ein): ) result = reader.parse(sked_dict, parent_path=path_root) - self.whole_filing_data.append({ - 'schedule_name': sked, - 'groups': result['groups'], - 'schedule_parts': result['schedule_parts'], - 'csv_line_array':result['csv_line_array'] - }) + self.whole_filing_data.append( + { + "schedule_name": sked, + "groups": result["groups"], + "schedule_parts": result["schedule_parts"], + "csv_line_array": result["csv_line_array"], + } + ) def _run_schedule(self, sked, object_id, sked_dict, ein): path_root = "/" + sked # Only sked K (bonds) is allowed to repeat - if sked == 'IRS990ScheduleK': + if sked == "IRS990ScheduleK": self._run_schedule_k(sked, object_id, sked_dict, path_root, ein) else: @@ -85,24 +90,27 @@ def _run_schedule(self, sked, object_id, sked_dict, ein): documentation=self.documentation, csv_format=self.csv_format, ) - if sked == 'ReturnHeader990x': + if sked == "ReturnHeader990x": path_root = "/ReturnHeader" result = reader.parse(sked_dict, parent_path=path_root) - self.whole_filing_data.append({ - 'schedule_name': sked, - 'groups': result['groups'], - 'schedule_parts': result['schedule_parts'], - 'csv_line_array':result['csv_line_array'] - - }) + self.whole_filing_data.append( + { + "schedule_name": sked, + "groups": result["groups"], + "schedule_parts": result["schedule_parts"], + "csv_line_array": result["csv_line_array"], + } + ) - if len(result['group_keyerrors']) > 0 or len(result['keyerrors'])> 0: - self.filing_keyerr_data.append({ - 'schedule_name': sked, - 'group_keyerrors':result['group_keyerrors'], - 'keyerrors':result['keyerrors'], - }) + if len(result["group_keyerrors"]) > 0 or len(result["keyerrors"]) > 0: + self.filing_keyerr_data.append( + { + "schedule_name": sked, + "group_keyerrors": result["group_keyerrors"], + "keyerrors": result["keyerrors"], + } + ) def run_filing(self, object_id, verbose=False): self.whole_filing_data = [] @@ -112,7 +120,9 @@ def run_filing(self, object_id, verbose=False): this_version = this_filing.get_version() if verbose: print("Filing %s is version %s" % (object_id, this_version)) - if this_version in ALLOWED_VERSIONSTRINGS or ( self.csv_format and this_version in CSV_ALLOWED_VERSIONSTRINGS ): + if this_version in ALLOWED_VERSIONSTRINGS or ( + self.csv_format and this_version in CSV_ALLOWED_VERSIONSTRINGS + ): this_version = this_filing.get_version() schedules = this_filing.list_schedules() ein = this_filing.get_ein() @@ -123,41 +133,18 @@ def run_filing(self, object_id, verbose=False): this_filing.set_result(self.whole_filing_data) this_filing.set_keyerrors(self.filing_keyerr_data) - if verbose and not self.csv_format: # csv format works on years with many, many keyerrors, - if len(self.filing_keyerr_data)>0: + if ( + verbose and not self.csv_format + ): # csv format works on years with many, many keyerrors, + if len(self.filing_keyerr_data) > 0: print("In %s keyerrors: %s" % (object_id, self.filing_keyerr_data)) else: print("No keyerrors found") return this_filing else: - print("Filing version %s isn't supported for this operation" % this_version ) + print("Filing version %s isn't supported for this operation" % this_version) return this_filing - """ - def run_from_filing_obj(self, this_filing, verbose=False): - - #Run from a pre-created filing object. - - self.whole_filing_data = [] - self.filing_keyerr_data = [] - this_filing.process(verbose=verbose) - object_id = this_filing.get_object_id() - this_version = this_filing.get_version() - if this_version in ALLOWED_VERSIONSTRINGS: - this_version = this_filing.get_version() - schedules = this_filing.list_schedules() - ein = this_filing.get_ein() - for sked in schedules: - sked_dict = this_filing.get_schedule(sked) - self._run_schedule(sked, object_id, sked_dict, ein) - this_filing.set_result(self.whole_filing_data) - this_filing.set_keyerrors(self.filing_keyerr_data) - return this_filing - else: - return this_filing - """ - - def run_sked(self, object_id, sked, verbose=False): """ sked is the proper name of the schedule: @@ -168,7 +155,9 @@ def run_sked(self, object_id, sked, verbose=False): this_filing = Filing(object_id) this_filing.process(verbose=verbose) this_version = this_filing.get_version() - if this_version in ALLOWED_VERSIONSTRINGS or ( self.csv_format and this_version in CSV_ALLOWED_VERSIONSTRINGS ): + if this_version in ALLOWED_VERSIONSTRINGS or ( + self.csv_format and this_version in CSV_ALLOWED_VERSIONSTRINGS + ): this_version = this_filing.get_version() ein = this_filing.get_ein() sked_dict = this_filing.get_schedule(sked) @@ -178,5 +167,5 @@ def run_sked(self, object_id, sked, verbose=False): this_filing.set_keyerrors(self.filing_keyerr_data) return this_filing else: - print("Filing version %s isn't supported for this operation" % this_version ) + print("Filing version %s isn't supported for this operation" % this_version) return this_filing diff --git a/metadata b/metadata deleted file mode 160000 index 4ad69cc..0000000 --- a/metadata +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4ad69cc0f68dedb1137ccae34c4c84f88295b0a9 diff --git a/setup.py b/setup.py index b8063c7..5b1d891 100644 --- a/setup.py +++ b/setup.py @@ -1,44 +1,52 @@ from distutils.core import setup import os -NAME = 'irsx' -HUMAN_NAME = 'irsx' +NAME = "irsx" +HUMAN_NAME = "irsx" HERE = os.path.abspath(os.path.dirname(__file__)) version_ns = {} -with open(os.path.join(HERE, 'irs_reader', '_version.py')) as f: +with open(os.path.join(HERE, "irs_reader", "_version.py")) as f: exec(f.read(), {}, version_ns) -setup(name=HUMAN_NAME, - description = "Turn the IRS' versioned XML 990's into python objects \ +setup( + name=HUMAN_NAME, + description="Turn the IRS' versioned XML 990's into python objects \ with original line number and description.", - version = version_ns['__version__'], - author = 'Jacob Fenton', - author_email = 'jsfenfen@gmail.com', - url = 'https://github.com/jsfenfen/990-xml-reader', - license = 'MIT', - setup_requires = ["setuptools", ], - install_requires = ['requests', 'xmltodict', 'unicodecsv'], - tests_require = ['nose', 'requests', 'xmltodict', 'unicodecsv', 'tox', 'tox-pyenv',], - packages = ['irsx'], - package_dir = {'irsx': 'irs_reader'}, - package_data = {'irsx': ['metadata/*.csv']}, - keywords = ['990', 'nonprofit', 'tax'], - entry_points = { - "console_scripts": ["irsx=irsx.irsx_cli:main", - "irsx_index=irsx.irsx_index_cli:main", - "irsx_retrieve=irsx.irsx_retrieve_cli:main"] - }, - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - - ], - ) + version=version_ns["__version__"], + author="Jacob Fenton", + author_email="jsfenfen@gmail.com", + url="https://github.com/jsfenfen/990-xml-reader", + license="MIT", + setup_requires=[ + "setuptools", + ], + install_requires=["requests", "xmltodict", "django-environ"], + extras_require={ + "tests": [ + "pytest", + ] + }, + packages=["irsx"], + package_dir={"irsx": "irs_reader"}, + package_data={"irsx": ["metadata/*.csv"]}, + keywords=["990", "nonprofit", "tax"], + entry_points={ + "console_scripts": [ + "irsx=irsx.irsx_cli:main", + "irsx_index=irsx.irsx_index_cli:main", + "irsx_retrieve=irsx.irsx_retrieve_cli:main", + ] + }, + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + ], +) diff --git a/tests.py b/tests.py deleted file mode 100644 index 9342ba6..0000000 --- a/tests.py +++ /dev/null @@ -1,242 +0,0 @@ -import os -import json -from unittest import TestCase - -from irs_reader.file_utils import validate_object_id -from irs_reader.filing import Filing -from irs_reader.settings import WORKING_DIRECTORY, ALLOWED_VERSIONSTRINGS -from irs_reader.standardizer import Standardizer -from irs_reader.sked_dict_reader import SkedDictReader -from irs_reader.type_utils import listType -from irs_reader.xmlrunner import XMLRunner - - - -## Tests need to be reworked following IRS change in release. - - -# some test ids -from irs_reader.object_ids import object_ids_2017, \ - object_ids_2016, object_ids_2015 - -# For running cli stuff -from irs_reader.irsx_cli import run_main as run_cli_main, \ - get_parser as get_cli_parser -from irs_reader.irsx_index_cli import run_cli_index_main, \ - get_cli_index_parser - - -# FILING_2015V21 = '201642229349300909' -# FILING_2015V21_skeds = [ -# 'ReturnHeader990x', 'IRS990', 'IRS990ScheduleA', -# 'IRS990ScheduleB', 'IRS990ScheduleD', 'IRS990ScheduleM', -# 'IRS990ScheduleO' -# ] - -# # SUTTER HEALTH SACRAMENTO REGION 2014 filing has multiple schedule K's. -# FILING_2014V50 = '201533089349301428' - -# FILING_2014V50_skeds = [ -# 'ReturnHeader990x', 'IRS990', 'IRS990ScheduleA', 'IRS990ScheduleB', -# 'IRS990ScheduleC', 'IRS990ScheduleD', 'IRS990ScheduleG', -# 'IRS990ScheduleH', 'IRS990ScheduleI', 'IRS990ScheduleJ', -# 'IRS990ScheduleK', 'IRS990ScheduleL', 'IRS990ScheduleM', -# 'IRS990ScheduleO', 'IRS990ScheduleR' -# ] - -FILING_2022 = '202210409349301026' - -# don't bother testing every filing in tests -TEST_DEPTH = 10 - -# When set to false don't test download files that are already there. -# Runs faster set to off! -DOWNLOAD = False - - -def test_valid_object_id(): - result = validate_object_id(FILING_2022) - - -def test_process_from_id_only(): - a = Filing(FILING_2022) - a.process() - - -# def test_process_from_id_only_2(): -# a = Filing(FILING_2014V50) -# a.process() -# assert a.get_version() == '2014v5.0' - - -# def test_process_with_filepath(): -# filename = "%s_public.xml" % FILING_2015V21 -# filepath = os.path.join(WORKING_DIRECTORY, filename) -# a = Filing(FILING_2015V21, filepath=filepath) -# a.process() -# assert a.get_version() == '2015v2.1' - - -# test without runner -class TestConversion: - """ Still doesn't validate actual values, but... """ - - def setUp(self): - self.xml_runner = XMLRunner() - - def test_case_1(self): - parsed_filing = self.xml_runner.run_filing(FILING_2022) - - # def test_case_2(self): - # object_ids = object_ids_2017[:TEST_DEPTH] \ - # + object_ids_2016[:TEST_DEPTH] + object_ids_2015[:TEST_DEPTH] - # for object_id in object_ids: - # self.xml_runner.run_filing(object_id) - -# class TestRunner: -# """ Test using runner class """ - -# def setUp(self): -# self.xml_runner = XMLRunner() - -# def test1(self): -# parsed_filing = self.xml_runner.run_filing(FILING_2022) -# assert parsed_filing.get_type()=='IRS990' -# parsed_filing_schedules = parsed_filing.list_schedules() -# for sked in FILING_2015V21_skeds: -# assert sked in parsed_filing_schedules -# parsed_filing.get_parsed_sked(sked) - -# def test_multiple_sked_ks(self): -# parsed_filing = self.xml_runner.run_filing(FILING_2014V50) -# assert parsed_filing.get_type()=='IRS990' -# parsed_filing_schedules = parsed_filing.list_schedules() -# for sked in FILING_2014V50_skeds: -# assert sked in parsed_filing_schedules -# parsed_filing.get_parsed_sked(sked) -# def test_with_standardizer(self): -# standardizer = Standardizer() -# self.xml_runner = XMLRunner(standardizer=standardizer) - - -# class TestWithDownload: -# def setUp(self): -# self.filing = Filing(FILING_2015V21) -# if os.path.isfile(self.filing.get_filepath()): -# if DOWNLOAD: -# os.remove(self.filing.get_filepath()) - -# def test_case_1(self): -# self.filing.process() -# assert self.filing.get_version() == '2015v2.1' - -# def test_case_2(self): -# self.filing.process() -# f_skeds = self.filing.list_schedules() -# assert f_skeds == FILING_2015V21_skeds -# for f_sked in f_skeds: -# self.filing.get_schedule(f_sked) - - -class TestCommandLine: - def setUp(self): - parser = get_cli_parser() - self.parser = parser - - def test_cli_1(self): - args = self.parser.parse_args([FILING_2022, '--verbose']) - # Does it run? Output is to std out. - run_cli_main(args) - - def test_cli_2(self): - # dump only main 990 in bare json format - test_args = ['--schedule', 'IRS990', '--xpath', '202210409349301026'] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_3(self): - test_args = ['--schedule', 'IRS990', FILING_2022] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - """Testing the csv option without file set somehow breaks - it seems like it's some interaction between how nose handles output - and how we're outputting? Point is, the script works when the test fails. - So only test with the --file output option... - """ - def test_cli_4(self): - test_args = [ - '--schedule', 'IRS990', - '--format', 'csv', - '--file', 'testout.csv', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - - def test_cli_5(self): - test_args = [ - '--schedule', 'IRS990', - '--format', 'txt', - '--file','testout.csv', - '--verbose', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_6(self): - test_args = [ - '--format', 'txt', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_7(self): - test_args = [ - '--format', 'txt', - '--xpath', - '--verbose', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_8(self): - test_args = [ - '--list_schedules', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_8(self): - test_args = [ - '--format', 'txt', - '202210409349301026' - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - - def test_cli_namespaced(self): - test_args = [ - '--format', 'txt', - '202210409349301026' # tags start with "irs:" - ] - args = self.parser.parse_args(test_args) - run_cli_main(args) - -class TestCommandLine_Index: - - def setUp(self): - parser = get_cli_index_parser() - self.parser = parser - - def test_cli_index_1(self): - args = self.parser.parse_args(['--year', '2017']) - # Does it run? Output is to the 2017 index file. - if DOWNLOAD: - run_cli_index_main(args) - diff --git a/tests/test_all.py b/tests/test_all.py new file mode 100644 index 0000000..7ab18b4 --- /dev/null +++ b/tests/test_all.py @@ -0,0 +1,230 @@ +import os + +import pytest +from irsx.file_utils import validate_object_id +from irsx.filing import Filing +from irsx.irsx_cli import get_parser as get_cli_parser +from irsx.irsx_cli import run_main as run_cli_main +from irsx.irsx_index_cli import get_cli_index_parser, run_cli_index_main +from irsx.object_ids import object_ids_2015, object_ids_2016, object_ids_2017 +from irsx.settings import WORKING_DIRECTORY +from irsx.standardizer import Standardizer +from irsx.xmlrunner import XMLRunner + +FILING_2015V21 = "201642229349300909" +FILING_2015V21_skeds = [ + "ReturnHeader990x", + "IRS990", + "IRS990ScheduleA", + "IRS990ScheduleB", + "IRS990ScheduleD", + "IRS990ScheduleM", + "IRS990ScheduleO", +] + +# SUTTER HEALTH SACRAMENTO REGION 2014 filing has multiple schedule K's. +FILING_2014V50 = "201533089349301428" + +FILING_2014V50_skeds = [ + "ReturnHeader990x", + "IRS990", + "IRS990ScheduleA", + "IRS990ScheduleB", + "IRS990ScheduleC", + "IRS990ScheduleD", + "IRS990ScheduleG", + "IRS990ScheduleH", + "IRS990ScheduleI", + "IRS990ScheduleJ", + "IRS990ScheduleK", + "IRS990ScheduleL", + "IRS990ScheduleM", + "IRS990ScheduleO", + "IRS990ScheduleR", +] + +FILING_2022 = "202210409349301026" + +# don't bother testing every filing in tests +TEST_DEPTH = 10 + +# When set to false don't test download files that are already there. +# Runs faster set to off! +DOWNLOAD = False + + +def test_valid_object_id(): + result = validate_object_id(FILING_2022) + + +def test_process_from_id_only(): + a = Filing(FILING_2022) + a.process() + + +def test_process_from_id_only_2(): + a = Filing(FILING_2014V50) + a.process() + assert a.get_version() == "2014v5.0" + + +@pytest.mark.skip(reason="Not sure why this is failing now. Was commented out.") +def test_process_with_filepath(): + filename = "%s_public.xml" % FILING_2015V21 + filepath = os.path.join(WORKING_DIRECTORY, filename) + a = Filing(FILING_2015V21, filepath=filepath) + a.process() + assert a.get_version() == "2015v2.1" + + +# test without runner +class TestConversion: + """Still doesn't validate actual values, but...""" + + def setup_method(self): + self.xml_runner = XMLRunner() + + def test_case_1(self): + self.xml_runner.run_filing(FILING_2022) + + def test_case_2(self): + object_ids = ( + object_ids_2017[:TEST_DEPTH] + + object_ids_2016[:TEST_DEPTH] + + object_ids_2015[:TEST_DEPTH] + ) + for object_id in object_ids: + self.xml_runner.run_filing(object_id) + + +class TestRunner: + """Test using runner class""" + + def setup_method(self): + self.xml_runner = XMLRunner() + + @pytest.mark.skip(reason="Not sure why this is failing now. Was commented out.") + def test1(self): + parsed_filing = self.xml_runner.run_filing(FILING_2022) + assert parsed_filing.get_type() == "IRS990" + parsed_filing_schedules = parsed_filing.list_schedules() + for sked in FILING_2015V21_skeds: + assert sked in parsed_filing_schedules + parsed_filing.get_parsed_sked(sked) + + def test_multiple_sked_ks(self): + parsed_filing = self.xml_runner.run_filing(FILING_2014V50) + assert parsed_filing.get_type() == "IRS990" + parsed_filing_schedules = parsed_filing.list_schedules() + for sked in FILING_2014V50_skeds: + assert sked in parsed_filing_schedules + parsed_filing.get_parsed_sked(sked) + + def test_with_standardizer(self): + standardizer = Standardizer() + self.xml_runner = XMLRunner(standardizer=standardizer) + + +class TestWithDownload: + def setup_method(self): + self.filing = Filing(FILING_2015V21) + if os.path.isfile(self.filing.get_filepath()): + if DOWNLOAD: + os.remove(self.filing.get_filepath()) + + def test_case_1(self): + self.filing.process() + assert self.filing.get_version() == "2015v2.1" + + def test_case_2(self): + self.filing.process() + f_skeds = self.filing.list_schedules() + assert f_skeds == FILING_2015V21_skeds + for f_sked in f_skeds: + self.filing.get_schedule(f_sked) + + +class TestCommandLine: + def setup_method(self): + parser = get_cli_parser() + self.parser = parser + + def test_cli_1(self): + args = self.parser.parse_args([FILING_2022, "--verbose"]) + # Does it run? Output is to std out. + run_cli_main(args) + + def test_cli_2(self): + # dump only main 990 in bare json format + test_args = ["--schedule", "IRS990", "--xpath", "202210409349301026"] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_3(self): + test_args = ["--schedule", "IRS990", FILING_2022] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_4(self): + test_args = [ + "--schedule", + "IRS990", + "--format", + "csv", + "--file", + "testout.csv", + "202210409349301026", + ] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_5(self): + test_args = [ + "--schedule", + "IRS990", + "--format", + "txt", + "--file", + "testout.csv", + "--verbose", + "202210409349301026", + ] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_6(self): + test_args = ["--format", "txt", "202210409349301026"] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_7(self): + test_args = ["--format", "txt", "--xpath", "--verbose", "202210409349301026"] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_8(self): + test_args = ["--list_schedules", "202210409349301026"] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_9(self): + test_args = ["--format", "txt", "202210409349301026"] + args = self.parser.parse_args(test_args) + run_cli_main(args) + + def test_cli_namespaced(self): + test_args = ["--format", "txt", "202210409349301026"] # tags start with "irs:" + args = self.parser.parse_args(test_args) + run_cli_main(args) + + +class TestCommandLine_Index: + def setup_method(self): + parser = get_cli_index_parser() + self.parser = parser + + def test_cli_index_1(self): + args = self.parser.parse_args(["--year", "2017"]) + # Does it run? Output is to the 2017 index file. + if DOWNLOAD: + run_cli_index_main(args) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index ee2e89a..0000000 --- a/tox.ini +++ /dev/null @@ -1,5 +0,0 @@ -[tox] -envlist = py36,py37,py38,py39 -[testenv] -deps=nose -commands=nosetests