-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutil.py
More file actions
125 lines (99 loc) · 2.76 KB
/
util.py
File metadata and controls
125 lines (99 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sys
def table(soup):
""" Extracts raw table from shot finder
Parameters
----------
soup : BeautifulSoup
A soup object for the shot finder page
Returns
-------
DataFrame
The raw table from the page
"""
table = pd.read_html(soup.find('table').encode('utf-8'))[0].iloc[2:, :]
table.columns = ['rk', 'player', 'date', 'team', 'home', 'opp', ' ', 'qtr', 'time', 'result', 'description']
table = table.drop(columns=['rk', 'date', 'team', 'opp', ' ', 'qtr', 'time'])
table = table[table['description'] != 'Description']
return table
def collect_tables(url, soup):
progressMessage("Collecting tables", 0)
tables = []
while soup.find('a', text='Next page'):
tables.append(table(soup))
progressMessage("Collecting tables", len(tables))
# Update soup object
href = soup.find('a', text='Next page').get('href')
html = requests.get(url + href)
soup = BeautifulSoup(html.content, 'html.parser')
progressMessage("Collecting tables: Success\n", 0)
return tables
def data(tables):
progressMessage("Parsing data", 0)
data = []
l = len(tables)
for i, t in enumerate(tables):
# Parse table
data.extend(parse_table(t))
progressMessage("Parsing tables: Success\n", 0)
return data
def parse_table(table):
""" Parses and organizes data table, appending
description data as well
Parameters
----------
table : DataFrame
A data frame containing raw table from a
page
Returns
-------
list : list
A list of data rows
"""
data = []
for item in table.values.tolist():
home = 0 if item[1] == '@' else 1
result = item[2]
data.append([home] + parse_description(item[-1]) )
return data
def parse_description(desc):
""" Parses a play by play description into a list of data
Parameters
----------
desc : str
A sentence describing a shot taken
Returns
-------
list
A list of play by play data (shot_distance,
shot_type, assisted, result)
"""
# Split on makes or misses
if 'makes' in desc:
result = 1
split = desc.split(' makes ')
elif 'misses' in desc:
result = 0
split = desc.split(' misses ')
# Get player name
name = split[0]
desc = split[1]
# Get shot type and distance
if 'from' in desc:
split = desc.split(' from ')
shot_type = split[0]
split = split[1].split(' ft')
shot_distance = int(split[0])
else:
split = desc.split(' at ')
shot_type = split[0]
shot_distance = 0
# Get assist
assisted = 1 if 'assist' in desc else 0
return [shot_distance, shot_type, assisted, result]
def progressMessage(message, num):
# percent float from 0 to 1.
sys.stdout.write('\x1b[2K')
sys.stdout.write(message + ('.'* (num % 4)) + "\r")