Skip to content

Commit 52f6b77

Browse files
committed
add download to get_values
1 parent 0cc8497 commit 52f6b77

3 files changed

Lines changed: 53 additions & 10 deletions

File tree

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ The __get_values__ function is the primary function for obtaining RegData from t
100100
* filtered (optional) - specify if poorly-performing industry results should be excluded. Default is True.
101101
* summary (optional) - specify if summary results should be returned, instead of document-level results. Default is True.
102102
* country (optional) - specify if all values for a country's jurisdiction ID should be returned. Default is False.
103+
* industryType (optional): Level of NAICS industries to include, default is '3-Digit'.
104+
* download (optional): If not False, a path location for a downloaded csv of the results.
103105
* verbose (optional) - value specifying how much debugging information should be printed for each function call. Higher number specifies more information, default is 0.
104106

105107
In the example below, we are interested in the total number of restrictions and total number of words for the US (get_jurisdictions(38)) for the period 2010 to 2019.
@@ -168,5 +170,18 @@ agency_restrictions_ind = agency_by_industry.merge(
168170
agencies, by='agency_id')
169171
```
170172

173+
## Downloading Data
174+
175+
There are two different ways to download data retrieved from RegCensusAPI:
176+
177+
1. Use the pandas `df.to_csv(outpath)` function, which allows the user to download a csv of the data, with the given outpath. See the pandas [documentation][3] for more features.
178+
179+
2. The __get_values__ function includes a `download` argument, which allows the user to simply download a csv of the data in the same line as the API call. See below for an example of this call.
180+
181+
```
182+
rc.get_values(series = [1,2], jurisdiction = 38, date = [2010, 2019], download='regdata2010to2019.csv')
183+
```
184+
171185
[1]:https://api.quantgov.org/swagger-ui.html
172186
[2]:https://www.quantgov.org/download-interactively
187+
[3]:https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html

regcensus/api.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212

1313
def get_values(series, jurisdiction, date, filtered=True, summary=True,
1414
documentType=3, agency=None, industry=None, dateIsRange=True,
15-
country=False, industryType='3-Digit', verbose=0):
15+
country=False, industryType='3-Digit',
16+
download=False, verbose=0):
1617
"""
1718
Get values for a specific jurisdition and series
1819
@@ -29,6 +30,10 @@ def get_values(series, jurisdiction, date, filtered=True, summary=True,
2930
dateIsRange (optional): Indicating whether the time parameter is range
3031
or should be treated as single data points
3132
country (optional): Get all values for country ID
33+
industryType (optional): Level of NAICS industries to include,
34+
default is '3-Digit'
35+
download (optional): If not False, a path location for a
36+
downloaded csv of the results
3237
verbose (optional): Print out the url of the API call
3338
3439
Returns: pandas dataframe with the values and various metadata
@@ -131,6 +136,11 @@ def get_values(series, jurisdiction, date, filtered=True, summary=True,
131136
# Prints error message if call fails
132137
if (output.columns[:3] == ['title', 'status', 'detail']).all():
133138
print('WARNING:', output.iloc[0][-1])
139+
elif download:
140+
if type(download) == str:
141+
clean_columns(output).to_csv(download, index=False)
142+
else:
143+
print("Valid outpath required to download.")
134144
# Returns clean data if no error
135145
else:
136146
return clean_columns(output)

tests/test_api.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
import os
23
import regcensus as rc
34

45

@@ -63,9 +64,11 @@ def test_get_values_multiple_series():
6364
assert order_results(results, 'seriesValue') == [405647.0, 35420432.0]
6465

6566

66-
def test_get_values_incorrect_series():
67-
results = rc.get_values(series=None, jurisdiction=38, date=2019)
68-
assert not results
67+
def test_get_values_incorrect_series(capsys):
68+
rc.get_values(series=None, jurisdiction=38, date=2019)
69+
assert capsys.readouterr().out == (
70+
'Valid series ID required. Select from the following list:\n'
71+
)
6972

7073

7174
def test_get_values_multiple_jurisdictions():
@@ -102,9 +105,9 @@ def test_get_values_one_industry():
102105
assert not results
103106

104107

105-
def test_get_values_incorrect_jurisdiction():
106-
results = rc.get_values(series=1, jurisdiction=None, date=2019)
107-
assert not results
108+
def test_get_values_incorrect_jurisdiction(capsys):
109+
rc.get_values(series=1, jurisdiction=None, date=2019)
110+
assert capsys.readouterr().out == 'Valid jurisdiction ID required.\n'
108111

109112

110113
def test_get_values_date_range():
@@ -124,9 +127,9 @@ def test_get_values_multiple_dates():
124127
]
125128

126129

127-
def test_get_values_incorrect_dates():
128-
results = rc.get_values(series=1, jurisdiction=38, date=None)
129-
assert not results
130+
def test_get_values_incorrect_dates(capsys):
131+
rc.get_values(series=1, jurisdiction=38, date=None)
132+
assert capsys.readouterr().out == 'Valid date is required.\n'
130133

131134

132135
def test_get_values_country():
@@ -149,6 +152,21 @@ def test_get_values_multiple_agencies():
149152
assert order_results(results, 'seriesValue') == [34167.0, 91227.0]
150153

151154

155+
def test_get_values_download():
156+
rc.get_values(
157+
series=91, jurisdiction=38, date=2019, agency=195, download='test.csv'
158+
)
159+
assert os.path.exists('test.csv')
160+
os.remove('test.csv')
161+
162+
163+
def test_get_values_incorrect_download(capsys):
164+
rc.get_values(
165+
series=91, jurisdiction=38, date=2019, agency=195, download=True
166+
)
167+
assert capsys.readouterr().out == 'Valid outpath required to download.\n'
168+
169+
152170
def test_list_document_types():
153171
results = rc.list_document_types()
154172
assert results['All Regulations'] == 3

0 commit comments

Comments
 (0)