diff --git a/Assignment1.py b/Assignment1.py new file mode 100644 index 0000000..5ae5ccf --- /dev/null +++ b/Assignment1.py @@ -0,0 +1,75 @@ +import pandas as pd +import plotly.express as px +from sklearn.ensemble import RandomForestClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC +from sklearn.pipeline import Pipeline + +#Loading data into Pandas +df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', + names=["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Species"]) +print(df) + +#Summarizing stats +print(df.describe()) + + +#Some Random Figures +fig = px.scatter(df, x="SepalWidthCm", y="SepalLengthCm", color="Species", size='PetalLengthCm', + hover_data=['PetalWidthCm']) +#fig.show() + +fig2 = px.bar(df, x="SepalWidthCm", y="SepalLengthCm", color="Species") +#fig2.show() + +fig3 = px.box(df, y="PetalWidthCm", color="Species", points='all') +#fig3.show() + +fig4 = px.violin(df, y="PetalLengthCm", color="Species", violinmode='overlay', hover_data=df.columns) +#fig4.show() + +fig5 = px.ecdf(df, x="SepalLengthCm", y="SepalWidthCm", color="Species", ecdfnorm=None) +#fig5.show() + +#we can tell Setosa tend to have larger sepal width, while versicolor and virginica have larger sepal length + +#Train and Test Datasets +X = df.drop('Species', axis=1) +y = df['Species'] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) +print(X_train) + +scale = StandardScaler() +scale.fit(X_train) +X_train_sc = scale.transform(X_train) +X_test_sc = scale.transform(X_test) + +#Random Forest +clf = RandomForestClassifier(n_estimators=100) +clf.fit(X_train_sc, y_train) +predictor = clf.predict(X_test_sc) + +pipeline = Pipeline([('scaler', StandardScaler()), ('classifier', RandomForestClassifier())]) +pipeline.fit(X_train_sc, y_train) +r2 = pipeline.score(X_test_sc, y_test) +print(f"RFR: {r2}") + +#Other models - SVC and Logistic Regression +svclassifier = SVC(kernel='poly', degree=8) +svclassifier.fit(X_train_sc, y_train) + +predictor_svc = svclassifier.predict(X_test_sc) + +print(confusion_matrix(y_test, predictor_svc)) +print(classification_report(y_test, predictor_svc)) + +logreg = LogisticRegression() +logreg.fit(X_train_sc, y_train) +predictor_reg = logreg.predict(X_test_sc) + +print(confusion_matrix(y_test, predictor_reg)) +print(classification_report(y_test, predictor_reg)) diff --git a/README.md b/README.md index 3cd99cc..7867140 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ - You can run `./scripts/create-venv.sh` to generate one - `pip3 install --upgrade pip` - Install pip-tools `pip3 install pip-tools` -- Update dev requirements: `pip-compile --output-file=requirements.dev.txt requirements.dev.in --upgrade` +- Update dev requirement s: `pip-compile --output-file=requirements.dev.txt requirements.dev.in --upgrade` - Update requirements: `pip-compile --output-file=requirements.txt requirements.in --upgrade` - Install dev requirements `pip3 install -r requirements.dev.txt` - Install requirements `pip3 install -r requirements.txt` diff --git a/requirements.dev.txt b/requirements.dev.txt index e05a14b..0ddad30 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,11 +1,15 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.10 # To update, run: # # pip-compile --output-file=requirements.dev.txt requirements.dev.in # anyio==3.6.1 # via jupyter-server +appnope==0.1.3 + # via + # ipykernel + # ipython argon2-cffi==21.3.0 # via # jupyter-server @@ -13,9 +17,9 @@ argon2-cffi==21.3.0 # notebook argon2-cffi-bindings==21.2.0 # via argon2-cffi -asttokens==2.0.5 +asttokens==2.0.8 # via stack-data -attrs==21.4.0 +attrs==22.1.0 # via jsonschema babel==2.10.3 # via jupyterlab-server @@ -35,13 +39,13 @@ cffi==1.15.1 # via argon2-cffi-bindings cfgv==3.3.1 # via pre-commit -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 # via requests click==8.1.3 # via # black # pip-tools -debugpy==1.6.2 +debugpy==1.6.3 # via ipykernel decorator==5.1.1 # via ipython @@ -49,35 +53,29 @@ defusedxml==0.7.1 # via nbconvert detect-secrets==1.3.0 # via -r requirements.dev.in -distlib==0.3.5 +distlib==0.3.6 # via virtualenv entrypoints==0.4 - # via - # jupyter-client - # nbconvert -executing==0.9.1 + # via jupyter-client +executing==1.0.0 # via stack-data fastjsonschema==2.16.1 # via nbformat -filelock==3.7.1 +filelock==3.8.0 # via virtualenv flake8==4.0.1 # via -r requirements.dev.in -identify==2.5.2 +identify==2.5.5 # via pre-commit idna==3.3 # via # anyio # requests -importlib-metadata==4.12.0 - # via jupyterlab-server -importlib-resources==5.9.0 - # via jsonschema -ipykernel==6.15.1 +ipykernel==6.15.2 # via # nbclassic # notebook -ipython==8.4.0 +ipython==8.5.0 # via # ipykernel # jupyterlab @@ -97,13 +95,13 @@ jinja2==3.1.2 # nbclassic # nbconvert # notebook -json5==0.9.8 +json5==0.9.10 # via jupyterlab-server -jsonschema==4.7.2 +jsonschema==4.15.0 # via # jupyterlab-server # nbformat -jupyter-client==7.3.4 +jupyter-client==7.3.5 # via # ipykernel # jupyter-server @@ -125,31 +123,33 @@ jupyter-server==1.18.1 # jupyterlab-server # nbclassic # notebook-shim -jupyterlab==3.4.4 +jupyterlab==3.4.6 # via -r requirements.dev.in jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-server==2.15.0 +jupyterlab-server==2.15.1 # via jupyterlab +lxml==4.9.1 + # via nbconvert markupsafe==2.1.1 # via # jinja2 # nbconvert -matplotlib-inline==0.1.3 +matplotlib-inline==0.1.6 # via # ipykernel # ipython mccabe==0.6.1 # via flake8 -mistune==0.8.4 +mistune==2.0.4 # via nbconvert mypy-extensions==0.4.3 # via black nbclassic==0.4.3 # via jupyterlab -nbclient==0.6.6 +nbclient==0.6.7 # via nbconvert -nbconvert==6.5.0 +nbconvert==7.0.0 # via # jupyter-server # nbclassic @@ -188,9 +188,9 @@ pandocfilters==1.5.0 # via nbconvert parso==0.8.3 # via jedi -pathspec==0.9.0 +pathspec==0.10.1 # via black -pep517==0.12.0 +pep517==0.13.0 # via build pexpect==4.8.0 # via ipython @@ -209,9 +209,9 @@ prometheus-client==0.14.1 # jupyter-server # nbclassic # notebook -prompt-toolkit==3.0.30 +prompt-toolkit==3.0.31 # via ipython -psutil==5.9.1 +psutil==5.9.2 # via ipykernel ptyprocess==0.7.0 # via @@ -225,7 +225,7 @@ pycparser==2.21 # via cffi pyflakes==2.4.0 # via flake8 -pygments==2.12.0 +pygments==2.13.0 # via # ipython # nbconvert @@ -235,13 +235,13 @@ pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via jupyter-client -pytz==2022.1 +pytz==2022.2.1 # via babel pyyaml==6.0 # via # detect-secrets # pre-commit -pyzmq==23.2.0 +pyzmq==23.2.1 # via # ipykernel # jupyter-client @@ -262,11 +262,11 @@ six==1.16.0 # asttokens # bleach # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio soupsieve==2.3.2.post1 # via beautifulsoup4 -stack-data==0.3.0 +stack-data==0.5.0 # via ipython terminado==0.15.0 # via @@ -304,11 +304,9 @@ traitlets==5.3.0 # nbconvert # nbformat # notebook -typing-extensions==4.3.0 - # via black -urllib3==1.26.11 +urllib3==1.26.12 # via requests -virtualenv==20.16.1 +virtualenv==20.16.5 # via pre-commit wcwidth==0.2.5 # via prompt-toolkit @@ -316,14 +314,10 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.3.3 +websocket-client==1.4.1 # via jupyter-server wheel==0.37.1 # via pip-tools -zipp==3.8.1 - # via - # importlib-metadata - # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements.in b/requirements.in index 296d654..d86a5d6 100644 --- a/requirements.in +++ b/requirements.in @@ -1 +1,26 @@ -numpy \ No newline at end of file +fastapi +flake8==5.0.4 +flask +graphviz +gunicorn +isort[requirements] +netcal +nose +numpy +pandas +pip-tools +pre-commit +plotly +pydot +pygam +pyspark +pyspark-stubs +requests +scikit-learn +seaborn +statsmodels +sqlalchemy +sympy +uvicorn +wheel + diff --git a/requirements.txt b/requirements.txt index b000696..6b6a538 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,280 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.10 # To update, run: # # pip-compile --output-file=requirements.txt requirements.in # -numpy==1.23.1 +absl-py==1.2.0 + # via tensorboard +anyio==3.6.1 + # via starlette +build==0.8.0 + # via pip-tools +cachetools==5.2.0 + # via google-auth +certifi==2022.6.15 + # via requests +cfgv==3.3.1 + # via pre-commit +charset-normalizer==2.1.1 + # via requests +click==8.1.3 + # via + # flask + # pip-tools + # uvicorn +cycler==0.11.0 + # via matplotlib +distlib==0.3.6 + # via virtualenv +fastapi==0.82.0 # via -r requirements.in +filelock==3.8.0 + # via virtualenv +flake8==5.0.4 + # via -r requirements.in +flask==2.2.2 + # via -r requirements.in +fonttools==4.37.1 + # via matplotlib +future==0.18.2 + # via pygam +google-auth==2.11.0 + # via + # google-auth-oauthlib + # tensorboard +google-auth-oauthlib==0.4.6 + # via tensorboard +graphviz==0.20.1 + # via -r requirements.in +grpcio==1.48.1 + # via tensorboard +gunicorn==20.1.0 + # via -r requirements.in +h11==0.13.0 + # via uvicorn +identify==2.5.5 + # via pre-commit +idna==3.3 + # via + # anyio + # requests +isort[requirements]==5.10.1 + # via -r requirements.in +itsdangerous==2.1.2 + # via flask +jinja2==3.1.2 + # via flask +joblib==1.1.0 + # via scikit-learn +kiwisolver==1.4.4 + # via matplotlib +markdown==3.4.1 + # via tensorboard +markupsafe==2.1.1 + # via + # jinja2 + # werkzeug +matplotlib==3.5.3 + # via + # netcal + # seaborn + # tikzplotlib +mccabe==0.7.0 + # via flake8 +mpmath==1.2.1 + # via sympy +netcal==1.2.1 + # via -r requirements.in +nodeenv==1.7.0 + # via pre-commit +nose==1.3.7 + # via -r requirements.in +numpy==1.23.2 + # via + # -r requirements.in + # matplotlib + # netcal + # opt-einsum + # pandas + # patsy + # pygam + # pyro-ppl + # scikit-learn + # scipy + # seaborn + # statsmodels + # tensorboard + # tikzplotlib + # torchvision +oauthlib==3.2.0 + # via requests-oauthlib +opt-einsum==3.3.0 + # via pyro-ppl +packaging==21.3 + # via + # build + # matplotlib + # statsmodels +pandas==1.4.4 + # via + # -r requirements.in + # seaborn + # statsmodels +patsy==0.5.2 + # via statsmodels +pep517==0.13.0 + # via build +pillow==9.2.0 + # via + # matplotlib + # tikzplotlib + # torchvision +pip-tools==6.8.0 + # via -r requirements.in +platformdirs==2.5.2 + # via virtualenv +plotly==5.10.0 + # via -r requirements.in +pre-commit==2.20.0 + # via -r requirements.in +progressbar2==4.0.0 + # via pygam +protobuf==3.19.4 + # via tensorboard +py4j==0.10.9 + # via pyspark +pyasn1==0.4.8 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.2.8 + # via google-auth +pycodestyle==2.9.1 + # via flake8 +pydantic==1.10.2 + # via fastapi +pydot==1.4.2 + # via -r requirements.in +pyflakes==2.5.0 + # via flake8 +pygam==0.8.0 + # via -r requirements.in +pyparsing==3.0.9 + # via + # matplotlib + # packaging + # pydot +pyro-api==0.1.2 + # via pyro-ppl +pyro-ppl==1.8.2 + # via netcal +pyspark==3.0.3 + # via + # -r requirements.in + # pyspark-stubs +pyspark-stubs==3.0.0.post3 + # via -r requirements.in +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +python-utils==3.3.3 + # via progressbar2 +pytz==2022.2.1 + # via pandas +pyyaml==6.0 + # via pre-commit +requests==2.28.1 + # via + # -r requirements.in + # requests-oauthlib + # tensorboard + # torchvision +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rsa==4.9 + # via google-auth +scikit-learn==1.1.2 + # via + # -r requirements.in + # netcal +scipy==1.9.1 + # via + # netcal + # pygam + # scikit-learn + # statsmodels +seaborn==0.12.0 + # via -r requirements.in +six==1.16.0 + # via + # google-auth + # grpcio + # patsy + # python-dateutil +sniffio==1.3.0 + # via anyio +sqlalchemy==1.4.41 + # via -r requirements.in +starlette==0.19.1 + # via fastapi +statsmodels==0.13.2 + # via -r requirements.in +sympy==1.11.1 + # via -r requirements.in +tenacity==8.0.1 + # via plotly +tensorboard==2.10.0 + # via netcal +tensorboard-data-server==0.6.1 + # via tensorboard +tensorboard-plugin-wit==1.8.1 + # via tensorboard +threadpoolctl==3.1.0 + # via scikit-learn +tikzplotlib==0.10.1 + # via netcal +toml==0.10.2 + # via pre-commit +tomli==2.0.1 + # via + # build + # pep517 +torch==1.12.1 + # via + # netcal + # pyro-ppl + # torchvision +torchvision==0.13.1 + # via netcal +tqdm==4.64.1 + # via + # netcal + # pyro-ppl +typing-extensions==4.3.0 + # via + # pydantic + # torch + # torchvision +urllib3==1.26.12 + # via requests +uvicorn==0.18.3 + # via -r requirements.in +virtualenv==20.16.5 + # via pre-commit +webcolors==1.12 + # via tikzplotlib +werkzeug==2.2.2 + # via + # flask + # tensorboard +wheel==0.37.1 + # via + # -r requirements.in + # pip-tools + # tensorboard + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools