-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtitleListImport.py
More file actions
70 lines (58 loc) · 2.75 KB
/
titleListImport.py
File metadata and controls
70 lines (58 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import boto3
import csv
from elasticsearch import helpers, Elasticsearch, RequestsHttpConnection
import io
from requests_aws4auth import AWS4Auth
import yaml
credentials = boto3.Session().get_credentials()
s3 = boto3.client('s3')
# read a configuration file
with open("prod_config.yml", 'r') as stream:
config = yaml.load(stream)
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, config.get('region'), config.get('service'))
es = Elasticsearch(
hosts = [{'host': config.get('eshost'), 'port': 443}],
http_auth = awsauth,
use_ssl = True,
verify_certs = True,
connection_class = RequestsHttpConnection
)
def run(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
# need to get the file from S3
response = s3.get_object(Bucket=bucket, Key=key)
item_file = response['Body'].read().decode('utf-8').split('\n')
csv.register_dialect('semicolon', delimiter=';', quoting=csv.QUOTE_ALL)
csv_read = csv.DictReader(item_file, dialect="semicolon");
file_data = []
for row in csv_read:
del row['New Titles']
file_data.append(row)
es.indices.delete(index='new_title_list', ignore=[400, 404])
mapping = {
"mappings":{
"properties": {
"Publication Date": {"type": "keyword"},
"Local Item Call Number": {"type": "keyword"},
"Source Application": {"type": "keyword"},
"Institution Name": {"type": "keyword"},
"Author Name": {"type": "keyword"},
"Title": {"type": "keyword"},
"Publisher Name": {"type": "keyword"},
"Edition": {"type": "keyword"},
"Language Name": {"type": "keyword"},
"Material Format": {"type": "keyword"},
"Material Subformat": {"type": "keyword"},
"Local Item Permanent Shelving Location": {"type": "keyword"},
"OCLC Number": {"type": "keyword"},
"Conspectus Subject": {"type": "keyword"},
"Library of Congress Call Number": {"type": "keyword"},
"Dewey Call Number": {"type": "keyword"},
"Calendar Date": {"type": "date", "format": "yyyy/MM/dd HH:mm:ss||yyyy/MM/dd"}
}
}
}
es.indices.create(index='new_title_list', body=mapping)
helpers.bulk(es, file_data, index='new_title_list', doc_type='_doc')
return "success"