-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgettingSIC.R
More file actions
74 lines (67 loc) · 3.86 KB
/
gettingSIC.R
File metadata and controls
74 lines (67 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# https://www.sec.gov/edgar/sec-api-documentation
# https://www.osha.gov/data/sic-manual
# https://www.sec.gov/corpfin/division-of-corporation-finance-standard-industrial-classification-sic-code-list
# load packages
require("jsonlite"); require("data.table");require("httr");require("pbapply");require("stringr");require("plyr")
# assign user agent
PASS <- new.env()
assign("usrAgent","companyname.com email@companyName.com",env=PASS)
# ****************************************************************************************************************
# Read in CIK Codes :
# ****************************************************************************************************************
# read in list of CIK codes
INFO <- read_json("https://www.sec.gov/files/company_tickers.json")
INFO <- rbindlist(INFO)
# CIK numbers are 10 digits - we have to fill in with zeros
INFO$CIK = do.call(rbind, lapply(as.list(1:nrow(INFO)), function(ii){
ZEROS = 10-as.numeric(str_count(INFO$cik_str[ii]))
paste0(c(rep(0,ZEROS),INFO$cik_str[ii]), collapse = "")
}))
INFO <- as.data.frame(INFO)
# *****************************************************************************************************************
# *****************************************************************************************************************
# function to lookup CIK number by ticker
getCIK = function(symbol){
subset(INFO, INFO$ticker == paste(symbol))$CIK
}
# *****************************************************************************************************************
# *****************************************************************************************************************
sicLookUp = read.csv("sic.csv",sep=",")
getMG= function(sic){subset(sicLookUp, sicLookUp$SIC.Code == sic)$MAJOR.GROUP}
getDIV= function(sic){subset(sicLookUp, sicLookUp$SIC.Code == sic)$DIVISION}
# *****************************************************************************************************************
# *****************************************************************************************************************
# gets ticker sector by ticker
getTickerSIC= function(ticker)
{
# get CIK # for ticker
CIK = getCIK(ticker)
# get data by passing in url & headers
pg <- GET(url = paste0("https://data.sec.gov/submissions/CIK",CIK,".json"),
config = httr::add_headers(`User-Agent` = PASS$usrAgent,
`Accept-Encoding` = 'gzip, deflate'))
# raw data
data_raw <- try(content(pg, as="text", encoding="UTF-8") %>% fromJSON(pg, flatten=FALSE),silent = TRUE)
# ********************************************************************************************************
# EXTRACT COMPANY INFO
# ********************************************************************************************************
MG <- getMG(data_raw$sic)
DIV <- getDIV(data_raw$sic)
ALL <- as.data.frame(cbind(paste(ticker),str_to_title(data_raw$name),CIK,data_raw$entityType,
data_raw$exchanges[[1]],data_raw$tickers,data_raw$sic, MG, DIV,
data_raw$sicDescription))
colnames(ALL) <- c("currentTicker","companyName","cik", "entityType", "exhanges", "tickers",
"sic","majorGroup","division","industry")
# return data frame
ALL
}
# *****************************************************************************************************************
# *****************************************************************************************************************
# all tickers from CIK table
tickers = unique(INFO$ticker)
ALL = pblapply(as.list(tickers), function(x){
tmp <- try(getTickerSIC(ticker=x), silent = TRUE)
if(!inherits(tmp,'try-error')) tmp
})
ALL <- rbindlist(ALL, use.names = TRUE, fill = TRUE) %>% as.data.frame()
saveRDS(ALL,"sectorIndustryList.rds")