-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathworking.py
More file actions
31 lines (24 loc) · 1.07 KB
/
working.py
File metadata and controls
31 lines (24 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# This Python file uses the following encoding: utf-8
# import libraries
from bs4 import BeautifulSoup
import time
from datetime import datetime
import soup_engine
import platform
import constants
# Code taken from the tutorial here https://medium.freecodecamp.org/how-to-scrape-websites-with-python-and-beautifulsoup-5946935d93fe
# and here https://stanford.edu/~mgorkove/cgi-bin/rpython_tutorials/Scraping_a_Webpage_Rendered_by_Javascript_Using_Python.php
############ CONSTANTS ############
# TODO URLS
# https://www.ibm.com/support/knowledgecenter/SSEPEK_12.0.0/codes/src/tpc/n181.html
url = 'https://www.ibm.com/docs/en/db2-for-zos/13?topic=codes-396'
PARSE_DICT = constants.PARSE_DICT
############ END CONSTANTS ############
############ PROGRAM ############
soup = soup_engine.soup_from_url(url)
parsed_tags = soup_engine.dict_from_soup(soup, PARSE_DICT)
parsed_tags[constants.DICT_URL] = url
dictionaries = [parsed_tags]
soup_engine.write_soup_engine_dict_to_files(
dictionaries, PARSE_DICT, 'codes')
soup_engine.write_soup_engine_dict_to_json_files(dictionaries, 'codes')