Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions Assignment1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import pandas as pd
import plotly.express as px
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

#Loading data into Pandas
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
names=["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Species"])
print(df)

#Summarizing stats
print(df.describe())


#Some Random Figures
fig = px.scatter(df, x="SepalWidthCm", y="SepalLengthCm", color="Species", size='PetalLengthCm',
hover_data=['PetalWidthCm'])
#fig.show()

fig2 = px.bar(df, x="SepalWidthCm", y="SepalLengthCm", color="Species")
#fig2.show()

fig3 = px.box(df, y="PetalWidthCm", color="Species", points='all')
#fig3.show()

fig4 = px.violin(df, y="PetalLengthCm", color="Species", violinmode='overlay', hover_data=df.columns)
#fig4.show()

fig5 = px.ecdf(df, x="SepalLengthCm", y="SepalWidthCm", color="Species", ecdfnorm=None)
#fig5.show()

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do fig1.show(),fig2.show(), etc so we can see the plots. the code runs fine but they don't show up unless you have them

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the note! I commented fig.show() to the code.

#we can tell Setosa tend to have larger sepal width, while versicolor and virginica have larger sepal length

#Train and Test Datasets
X = df.drop('Species', axis=1)
y = df['Species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
print(X_train)

scale = StandardScaler()
scale.fit(X_train)
X_train_sc = scale.transform(X_train)
X_test_sc = scale.transform(X_test)

#Random Forest
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train_sc, y_train)
predictor = clf.predict(X_test_sc)

pipeline = Pipeline([('scaler', StandardScaler()), ('classifier', RandomForestClassifier())])
pipeline.fit(X_train_sc, y_train)
r2 = pipeline.score(X_test_sc, y_test)
print(f"RFR: {r2}")

#Other models - SVC and Logistic Regression
svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(X_train_sc, y_train)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again the code runs fine here and everything ran right, but print out the values you get from the classifier.fit so we can see if the models are accurate or not

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well the code includes confusion matrix for both models which shows accuracy, precision, recall and F1 score.

predictor_svc = svclassifier.predict(X_test_sc)

print(confusion_matrix(y_test, predictor_svc))
print(classification_report(y_test, predictor_svc))

logreg = LogisticRegression()
logreg.fit(X_train_sc, y_train)
predictor_reg = logreg.predict(X_test_sc)

print(confusion_matrix(y_test, predictor_reg))
print(classification_report(y_test, predictor_reg))
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
- You can run `./scripts/create-venv.sh` to generate one
- `pip3 install --upgrade pip`
- Install pip-tools `pip3 install pip-tools`
- Update dev requirements: `pip-compile --output-file=requirements.dev.txt requirements.dev.in --upgrade`
- Update dev requirement s: `pip-compile --output-file=requirements.dev.txt requirements.dev.in --upgrade`
- Update requirements: `pip-compile --output-file=requirements.txt requirements.in --upgrade`
- Install dev requirements `pip3 install -r requirements.dev.txt`
- Install requirements `pip3 install -r requirements.txt`
Expand Down
84 changes: 39 additions & 45 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
#
# This file is autogenerated by pip-compile with python 3.8
# This file is autogenerated by pip-compile with python 3.10
# To update, run:
#
# pip-compile --output-file=requirements.dev.txt requirements.dev.in
#
anyio==3.6.1
# via jupyter-server
appnope==0.1.3
# via
# ipykernel
# ipython
argon2-cffi==21.3.0
# via
# jupyter-server
# nbclassic
# notebook
argon2-cffi-bindings==21.2.0
# via argon2-cffi
asttokens==2.0.5
asttokens==2.0.8
# via stack-data
attrs==21.4.0
attrs==22.1.0
# via jsonschema
babel==2.10.3
# via jupyterlab-server
Expand All @@ -35,49 +39,43 @@ cffi==1.15.1
# via argon2-cffi-bindings
cfgv==3.3.1
# via pre-commit
charset-normalizer==2.1.0
charset-normalizer==2.1.1
# via requests
click==8.1.3
# via
# black
# pip-tools
debugpy==1.6.2
debugpy==1.6.3
# via ipykernel
decorator==5.1.1
# via ipython
defusedxml==0.7.1
# via nbconvert
detect-secrets==1.3.0
# via -r requirements.dev.in
distlib==0.3.5
distlib==0.3.6
# via virtualenv
entrypoints==0.4
# via
# jupyter-client
# nbconvert
executing==0.9.1
# via jupyter-client
executing==1.0.0
# via stack-data
fastjsonschema==2.16.1
# via nbformat
filelock==3.7.1
filelock==3.8.0
# via virtualenv
flake8==4.0.1
# via -r requirements.dev.in
identify==2.5.2
identify==2.5.5
# via pre-commit
idna==3.3
# via
# anyio
# requests
importlib-metadata==4.12.0
# via jupyterlab-server
importlib-resources==5.9.0
# via jsonschema
ipykernel==6.15.1
ipykernel==6.15.2
# via
# nbclassic
# notebook
ipython==8.4.0
ipython==8.5.0
# via
# ipykernel
# jupyterlab
Expand All @@ -97,13 +95,13 @@ jinja2==3.1.2
# nbclassic
# nbconvert
# notebook
json5==0.9.8
json5==0.9.10
# via jupyterlab-server
jsonschema==4.7.2
jsonschema==4.15.0
# via
# jupyterlab-server
# nbformat
jupyter-client==7.3.4
jupyter-client==7.3.5
# via
# ipykernel
# jupyter-server
Expand All @@ -125,31 +123,33 @@ jupyter-server==1.18.1
# jupyterlab-server
# nbclassic
# notebook-shim
jupyterlab==3.4.4
jupyterlab==3.4.6
# via -r requirements.dev.in
jupyterlab-pygments==0.2.2
# via nbconvert
jupyterlab-server==2.15.0
jupyterlab-server==2.15.1
# via jupyterlab
lxml==4.9.1
# via nbconvert
markupsafe==2.1.1
# via
# jinja2
# nbconvert
matplotlib-inline==0.1.3
matplotlib-inline==0.1.6
# via
# ipykernel
# ipython
mccabe==0.6.1
# via flake8
mistune==0.8.4
mistune==2.0.4
# via nbconvert
mypy-extensions==0.4.3
# via black
nbclassic==0.4.3
# via jupyterlab
nbclient==0.6.6
nbclient==0.6.7
# via nbconvert
nbconvert==6.5.0
nbconvert==7.0.0
# via
# jupyter-server
# nbclassic
Expand Down Expand Up @@ -188,9 +188,9 @@ pandocfilters==1.5.0
# via nbconvert
parso==0.8.3
# via jedi
pathspec==0.9.0
pathspec==0.10.1
# via black
pep517==0.12.0
pep517==0.13.0
# via build
pexpect==4.8.0
# via ipython
Expand All @@ -209,9 +209,9 @@ prometheus-client==0.14.1
# jupyter-server
# nbclassic
# notebook
prompt-toolkit==3.0.30
prompt-toolkit==3.0.31
# via ipython
psutil==5.9.1
psutil==5.9.2
# via ipykernel
ptyprocess==0.7.0
# via
Expand All @@ -225,7 +225,7 @@ pycparser==2.21
# via cffi
pyflakes==2.4.0
# via flake8
pygments==2.12.0
pygments==2.13.0
# via
# ipython
# nbconvert
Expand All @@ -235,13 +235,13 @@ pyrsistent==0.18.1
# via jsonschema
python-dateutil==2.8.2
# via jupyter-client
pytz==2022.1
pytz==2022.2.1
# via babel
pyyaml==6.0
# via
# detect-secrets
# pre-commit
pyzmq==23.2.0
pyzmq==23.2.1
# via
# ipykernel
# jupyter-client
Expand All @@ -262,11 +262,11 @@ six==1.16.0
# asttokens
# bleach
# python-dateutil
sniffio==1.2.0
sniffio==1.3.0
# via anyio
soupsieve==2.3.2.post1
# via beautifulsoup4
stack-data==0.3.0
stack-data==0.5.0
# via ipython
terminado==0.15.0
# via
Expand Down Expand Up @@ -304,26 +304,20 @@ traitlets==5.3.0
# nbconvert
# nbformat
# notebook
typing-extensions==4.3.0
# via black
urllib3==1.26.11
urllib3==1.26.12
# via requests
virtualenv==20.16.1
virtualenv==20.16.5
# via pre-commit
wcwidth==0.2.5
# via prompt-toolkit
webencodings==0.5.1
# via
# bleach
# tinycss2
websocket-client==1.3.3
websocket-client==1.4.1
# via jupyter-server
wheel==0.37.1
# via pip-tools
zipp==3.8.1
# via
# importlib-metadata
# importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# pip
Expand Down
27 changes: 26 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1,26 @@
numpy
fastapi
flake8==5.0.4
flask
graphviz
gunicorn
isort[requirements]
netcal
nose
numpy
pandas
pip-tools
pre-commit
plotly
pydot
pygam
pyspark
pyspark-stubs
requests
scikit-learn
seaborn
statsmodels
sqlalchemy
sympy
uvicorn
wheel

Loading