-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
144 lines (122 loc) · 4.18 KB
/
main.py
File metadata and controls
144 lines (122 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "requests>=2.33.1",
# ]
# ///
import json
import logging
import requests
from constants import (
API_DOI,
FIELD_MATCH_SCORE,
FIELD_PUBTYPE,
FIELD_VENUE,
FIELD_YEAR,
FIELD_AUTHORS,
FIELD_TITLE,
FIELD_JOURNAL,
FIELD_DOI,
BASE_URL,
API_POINT,
SCORE_CRITICAL,
SCORE_LIMIT_BAD,
SCORE_LIMIT_GOOD,
SCORE_UNKNOWN,
bcolors,
)
from comparison import compare_references
from scoring import compute_total_score
LOGGER = logging.getLogger(__name__)
RETURN_FIELDS = ",".join(
(FIELD_TITLE, FIELD_AUTHORS, FIELD_YEAR, FIELD_VENUE, FIELD_JOURNAL, FIELD_PUBTYPE)
)
def output_reference_results(
reference: dict,
scores: dict,
) -> None:
doi = reference.get(FIELD_DOI)
title = reference.get(FIELD_TITLE)
LOGGER.info(f"{bcolors.BOLD}Title:{bcolors.ENDC} {title}")
LOGGER.info(f"{bcolors.BOLD}DOI:{bcolors.ENDC} {doi}")
output_score_results(scores)
return
def output_score_results(scores: dict) -> None:
for _field, _score in scores.items():
LOGGER.debug(f"Sub-score {_field}: {_score:.2f}")
total_score = compute_total_score(scores)
if total_score == SCORE_UNKNOWN:
LOGGER.warning(
f"{bcolors.WARNING}Could not find a title in the reference. Manually check it.{bcolors.ENDC}"
)
elif total_score == SCORE_CRITICAL:
LOGGER.critical(
f"{bcolors.FAIL}No match found on SemanticScholar! The reference probably does not exist or is not a published paper.{bcolors.ENDC}"
)
elif total_score < SCORE_LIMIT_BAD:
LOGGER.error(f"{bcolors.FAIL}Very low score: {total_score:.2f}{bcolors.ENDC}")
elif total_score < SCORE_LIMIT_GOOD:
LOGGER.warning(f"{bcolors.WARNING}Low score: {total_score:.2f}{bcolors.ENDC}")
else:
LOGGER.info(f"Score: {total_score:.2f}")
return
def rate_single_reference(reference: dict) -> tuple[dict, dict]:
request_params = {"fields": RETURN_FIELDS}
match = {}
doi = reference.get(FIELD_DOI)
title = reference.get(FIELD_TITLE)
if doi is not None:
request_url = f"{BASE_URL}/{API_DOI}{doi}"
elif title is None:
LOGGER.error("Could not get a title")
scores = {FIELD_TITLE: -1}
return scores, match
else:
request_url = f"{BASE_URL}/{API_POINT}"
request_params["query"] = f"{title}"
r = requests.get(url=request_url, params=request_params)
if r.status_code == 404:
LOGGER.critical(f"Could not find a match for '{title}'")
scores = {FIELD_TITLE: SCORE_CRITICAL}
elif r.status_code == 200:
match = r.json()
# print(match)
if doi is None:
match = match["data"][0]
if match[FIELD_MATCH_SCORE] < 100:
LOGGER.error("Ultra low quality match.")
scores = compare_references(reference, match)
elif match[FIELD_MATCH_SCORE] < 175:
LOGGER.warning("Low quality match")
scores = compare_references(reference, match)
scores = compare_references(reference, match)
else:
LOGGER.error(
f"Error getting response from SemanticScholar. Response code {r.status_code:d}."
)
scores = {FIELD_TITLE: SCORE_UNKNOWN}
r.close()
return scores, match
def main(references_json):
with open(references_json, "r") as json_file:
references = json.load(json_file)
for idx, reference in enumerate(references):
LOGGER.info(f"Reviewing reference [{idx + 1:d}]")
_scores, _match = rate_single_reference(reference)
output_reference_results(reference, _scores)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("references_json")
parser.add_argument(
"-v", "--verbosity", action="count", default=0, help="Increase output verbosity"
)
args = parser.parse_args()
args = vars(args)
verb = args.pop("verbosity")
logging.basicConfig(
format=f"%(asctime)s - [{bcolors.OKBLUE}%(levelname)8s{bcolors.ENDC}]: %(message)s"
)
loglevel = logging.INFO - verb * 10
LOGGER.setLevel(loglevel)
main(**args)