serverless_scheduled_python/localImport.py at master · OCLC-Developer-Network/serverless_scheduled_python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import boto3
import csv
from elasticsearch import helpers, Elasticsearch, RequestsHttpConnection
import pycallnumber as pycn
from requests_aws4auth import AWS4Auth
import yaml

credentials = boto3.Session().get_credentials()
# read a configuration file
with open("prod_config.yml", 'r') as stream:
    config = yaml.load(stream)
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, config.get('region'), config.get('service'))

es = Elasticsearch(
    hosts = [{'host': "search-wms-example-data-ys7sb4o6l2qka35yinyehh474y.us-east-1.es.amazonaws.com", 'port': 443}],
    http_auth = awsauth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

def indexFile(item_file):

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    csv_read = csv.DictReader(open(item_file), dialect="piper");

    file_data = []
    for row in csv_read:
        del row['LHR_Item_Materials_Specified']
        del row['Title_ISBN']
        del row['LHR_Item_Cost']
        del row['LHR_Item_Nonpublic_Note']
        del row['LHR_Item_Public_Note']
        del row['Item_Due_Date']
        del row['Item_Issued_Count']
        del row['Issued_Count_YTD']
        del row['Item_Soft_Issued_Count']
        del row['Item_Soft_Issued_Count_YTD']
        del row['Item_Last_Issued_Date']
        del row['Item_Last_Inventoried_Date']
        del row['Item_Deleted_Date']
        del row['LHR_Date_Entered_on_File']
        del row['LHR_Item_Acquired_Date']
        del row['Language_Code']
        # loop through and normalize call number
        if row['Item_Call_Number']:
            try:
                normalizedNumber = pycn.callnumber(row['Item_Call_Number'])
                row['cn_type'] = normalizedNumber.__class__.__name__
                try:
                    row['cn_classification'] = str(normalizedNumber.classification)
                except AttributeError:
                    row['cn_classification'] = ""
                if isinstance(normalizedNumber, pycn.units.LC):
                    try:
                        row['cn_class_letters'] = str(normalizedNumber.classification.letters)
                    except AttributeError:
                        row['cn_class_letters'] = ""
                row['n_callnumber_sort'] = normalizedNumber.for_sort()
                row['n_callnumber_search'] = normalizedNumber.for_search()
            except pycn.exceptions.InvalidCallNumberStringError:
                row['cn_type'] = ""
                row['cn_classification'] = ""
                row['cn_class_letters'] = ""
                row['n_callnumber_sort'] = ""
                row['n_callnumber_search'] = ""

        if not row['Publication_Date']:
            row['Publication_Date'] = None
        file_data.append(row)

    es.indices.delete(index='ocpsb_items', ignore=[400, 404])

    mapping = {
                "mappings":{
                    "properties": {
                        "Institution_Symbol": {"type": "text"},
                        "Item_Holding_Location": {"type": "text", "fielddata": "true"},
                        "Item_Permanent_Shelving_Location": {"type": "text", "fielddata": "true"},
                        "Item_Temporary_Shelving_Location": {"type": "text", "fielddata": "true"},
                        "Item_Type": {"type": "text"},
                        "Item_Call_Number": {"type": "text"},
                        "Item_Enumeration_and_Chronology": {"type": "text"},
                        "Author_Name": {"type": "text"},
                        "Title": {"type": "text"},
                        "Material_Format": {"type": "text", "fielddata": "true"},
                        "OCLC_Number": {"type": "text"},
                        "Item_Barcode": {"type": "text"},
                        "Item_Status_Current_Status": {"type": "text", "fielddata": "true"},
                        "n_callnumber_sort": {"type": "text", "fielddata": "true"},
                        "n_callnumber_search": {"type": "text","fielddata": "true"},
                        "cn_classification": {"type": "text","fielddata": "true"},
                        "cn_class_letters": {"type": "text","fielddata": "true"},
                        "cn_type": {"type": "text","fielddata": "true"},
                        "Publication_Date": {"type": "date", "format": "Y", "ignore_malformed": "true"}
                    }
                }
            }
    es.indices.create(index='ocpsb_items', body=mapping)
    helpers.bulk(es, file_data, index='ocpsb_items', doc_type='_doc')

    return "success"

file = "inventory.txt"
print(indexFile(file))