diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..820d3d0f --- /dev/null +++ b/.gitignore @@ -0,0 +1,169 @@ +# Mac OS-specific storage files +.DS_Store + +# vim +*.swp +*.swo + +## https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# MkDocs documentation +docs/site/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..1a298e01 --- /dev/null +++ b/Makefile @@ -0,0 +1,68 @@ +################################################################################# +# GLOBALS # +################################################################################# + +PROJECT_NAME = Lab-ML-platzi +PYTHON_VERSION = 3.10 +PYTHON_INTERPRETER = python + +################################################################################# +# COMMANDS # +################################################################################# + + +## Install Python Dependencies +.PHONY: requirements +requirements: + $(PYTHON_INTERPRETER) -m pip install -U pip + $(PYTHON_INTERPRETER) -m pip install -r requirements.txt + + + + +## Delete all compiled Python files +.PHONY: clean +clean: + find . -type f -name "*.py[co]" -delete + find . -type d -name "__pycache__" -delete + +## Lint using flake8 and black (use `make format` to do formatting) +.PHONY: lint +lint: + flake8 logistic_regression + isort --check --diff --profile black logistic_regression + black --check --config pyproject.toml logistic_regression + +## Format source code with black +.PHONY: format +format: + black --config pyproject.toml logistic_regression + + + + + + +################################################################################# +# PROJECT RULES # +################################################################################# + + + +################################################################################# +# Self Documenting Commands # +################################################################################# + +.DEFAULT_GOAL := help + +define PRINT_HELP_PYSCRIPT +import re, sys; \ +lines = '\n'.join([line for line in sys.stdin]); \ +matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \ +print('Available rules:\n'); \ +print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches])) +endef +export PRINT_HELP_PYSCRIPT + +help: + @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST) diff --git a/README.md b/README.md index 34e5548a..9104005b 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,14 @@ curl --location --request GET 'http://localhost:3001/query?feats=465,France,Fema Si todo esta bien, deberías de obtener una respuesta así ``` -{"response": [1]} +{"response": [0]} ``` #### Equipo * Juan Perez Nombrefalso * Ricardo Alanís Tamez +* Angel Martínez #### Contribuir diff --git a/app.py b/app.py index c27bed07..56a669cb 100644 --- a/app.py +++ b/app.py @@ -4,15 +4,28 @@ import pandas as pd -FEATURES = pickle.load(open("churn/models/features.pk", "rb")) +# Cargar las características, el modelo y las equivalencias de columnas desde archivos creados con la libreria pickle +FEATURES = pickle.load(open("models/features.pk", "rb")) +model = pickle.load(open("models/model.pk", "rb")) +column_equivalence = pickle.load(open("models/column_equivalence.pk", "rb")) -model = pickle.load(open("churn/models/model.pk", "rb")) -column_equivalence = pickle.load(open("churn/models/column_equivalence.pk", "rb")) -# create the Flask app app = Flask(__name__) def convert_numerical(features): + """ + Función para convertir características en formato numérico. + + Si una característica es categórica, se reemplaza por su código numérico + utilizando el diccionario 'column_equivalence'. Si no es categórica, + se intenta convertir a valor numérico con pandas. Si falla, se coloca un 0. + + Parámetros: + features (list): Lista de características a convertir. + + Retorna: + list: Lista de características convertidas a formato numérico. + """ output = [] for i, feat in enumerate(features): if i in column_equivalence: @@ -26,6 +39,17 @@ def convert_numerical(features): @app.route('/query') def query_example(): + """ + Endpoint que recibe características en la URL, las convierte a formato numérico + y devuelve la predicción del modelo en formato JSON. + + Parámetros (GET): + feats: Cadena de características separadas por comas. + + Retorna: + JSON: Respuesta con la predicción del modelo. + """ + features = convert_numerical(request.args.get('feats').split(',')) response = { 'response': [int(x) for x in model.predict([features])] @@ -33,5 +57,5 @@ def query_example(): return json.dumps(response) if __name__ == '__main__': - # run app in debug mode on port 3001 + # Ejecutamos la aplicación Flask en modo debug en el puerto 3001 app.run(debug=True, port=3001) \ No newline at end of file diff --git a/churn/.DS_Store b/churn/.DS_Store deleted file mode 100644 index e80e53f3..00000000 Binary files a/churn/.DS_Store and /dev/null differ diff --git a/churn/models/column_equivalence.pk b/churn/models/column_equivalence.pk deleted file mode 100644 index ae3467ff..00000000 Binary files a/churn/models/column_equivalence.pk and /dev/null differ diff --git a/churn/models/features.pk b/churn/models/features.pk deleted file mode 100644 index a82fcfcd..00000000 Binary files a/churn/models/features.pk and /dev/null differ diff --git a/churn/models/model.pk b/churn/models/model.pk deleted file mode 100644 index 80c67ef0..00000000 Binary files a/churn/models/model.pk and /dev/null differ diff --git a/data/.DS_Store b/data/.DS_Store deleted file mode 100644 index 162d75b2..00000000 Binary files a/data/.DS_Store and /dev/null differ diff --git a/data/processed/.gitkeep b/data/processed/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/data/raw/.gitkeep b/data/raw/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/data/churn.csv b/data/raw/churn.csv similarity index 100% rename from data/churn.csv rename to data/raw/churn.csv diff --git a/docs/.gitkeep b/docs/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/models/column_equivalence.pk b/models/column_equivalence.pk new file mode 100644 index 00000000..f4379cd6 Binary files /dev/null and b/models/column_equivalence.pk differ diff --git a/models/features.pk b/models/features.pk new file mode 100644 index 00000000..60af949c Binary files /dev/null and b/models/features.pk differ diff --git a/models/model.pk b/models/model.pk new file mode 100644 index 00000000..1da145eb Binary files /dev/null and b/models/model.pk differ diff --git a/notebooks/.ipynb_checkpoints/modelgeneration_27092023_ricalanis-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/modelgeneration_27092023_ricalanis-checkpoint.ipynb deleted file mode 100644 index 16823ad1..00000000 --- a/notebooks/.ipynb_checkpoints/modelgeneration_27092023_ricalanis-checkpoint.ipynb +++ /dev/null @@ -1,360 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 19, - "id": "9de64c0d-889e-483d-9921-25cbe60cedf4", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ddef4e47-253e-427f-b3c5-0f3f6cdacbac", - "metadata": {}, - "outputs": [], - "source": [ - "# Cambiar directorio\n", - "os.chdir(\"..\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "45a2377a-c148-44d4-b936-800d57362bcb", - "metadata": {}, - "outputs": [], - "source": [ - "# Cargar los datos que tenemos disponibles\n", - "data = pd.read_csv(\"data/churn.csv\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "e6e35832-17f4-43b3-adcc-5de303725f8d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | RowNumber | \n", - "CustomerId | \n", - "Surname | \n", - "CreditScore | \n", - "Geography | \n", - "Gender | \n", - "Age | \n", - "Tenure | \n", - "Balance | \n", - "NumOfProducts | \n", - "HasCrCard | \n", - "IsActiveMember | \n", - "EstimatedSalary | \n", - "Exited | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "1 | \n", - "15634602 | \n", - "Hargrave | \n", - "619 | \n", - "France | \n", - "Female | \n", - "42 | \n", - "2 | \n", - "0.00 | \n", - "1 | \n", - "1 | \n", - "1 | \n", - "101348.88 | \n", - "1 | \n", - "
| 1 | \n", - "2 | \n", - "15647311 | \n", - "Hill | \n", - "608 | \n", - "Spain | \n", - "Female | \n", - "41 | \n", - "1 | \n", - "83807.86 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "112542.58 | \n", - "0 | \n", - "
| 2 | \n", - "3 | \n", - "15619304 | \n", - "Onio | \n", - "502 | \n", - "France | \n", - "Female | \n", - "42 | \n", - "8 | \n", - "159660.80 | \n", - "3 | \n", - "1 | \n", - "0 | \n", - "113931.57 | \n", - "1 | \n", - "
| 3 | \n", - "4 | \n", - "15701354 | \n", - "Boni | \n", - "699 | \n", - "France | \n", - "Female | \n", - "39 | \n", - "1 | \n", - "0.00 | \n", - "2 | \n", - "0 | \n", - "0 | \n", - "93826.63 | \n", - "0 | \n", - "
| 4 | \n", - "5 | \n", - "15737888 | \n", - "Mitchell | \n", - "850 | \n", - "Spain | \n", - "Female | \n", - "43 | \n", - "2 | \n", - "125510.82 | \n", - "1 | \n", - "1 | \n", - "1 | \n", - "79084.10 | \n", - "0 | \n", - "
| \n", + " | CreditScore | \n", + "Geography | \n", + "Gender | \n", + "Age | \n", + "Tenure | \n", + "Balance | \n", + "NumOfProducts | \n", + "HasCrCard | \n", + "IsActiveMember | \n", + "EstimatedSalary | \n", + "Exited | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9355 | \n", + "0.778 | \n", + "1.0 | \n", + "0.0 | \n", + "0.297297 | \n", + "0.8 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.0 | \n", + "0.0 | \n", + "0.835174 | \n", + "0.0 | \n", + "
| 4272 | \n", + "0.580 | \n", + "1.0 | \n", + "0.0 | \n", + "0.216216 | \n", + "0.3 | \n", + "0.310193 | \n", + "0.000000 | \n", + "1.0 | \n", + "1.0 | \n", + "0.842747 | \n", + "0.0 | \n", + "
| 89 | \n", + "0.570 | \n", + "0.5 | \n", + "0.0 | \n", + "0.135135 | \n", + "0.3 | \n", + "0.325326 | \n", + "0.333333 | \n", + "1.0 | \n", + "1.0 | \n", + "0.783974 | \n", + "0.0 | \n", + "
| 4424 | \n", + "0.502 | \n", + "0.5 | \n", + "0.0 | \n", + "0.391892 | \n", + "0.1 | \n", + "0.569163 | \n", + "0.000000 | \n", + "1.0 | \n", + "1.0 | \n", + "0.287735 | \n", + "0.0 | \n", + "
| 4815 | \n", + "0.850 | \n", + "0.5 | \n", + "0.0 | \n", + "0.797297 | \n", + "0.6 | \n", + "0.538548 | \n", + "0.000000 | \n", + "1.0 | \n", + "0.0 | \n", + "0.189143 | \n", + "0.0 | \n", + "
| 4500 | \n", + "0.842 | \n", + "0.5 | \n", + "1.0 | \n", + "0.189189 | \n", + "0.9 | \n", + "0.308839 | \n", + "0.000000 | \n", + "0.0 | \n", + "0.0 | \n", + "0.165673 | \n", + "0.0 | \n", + "
| 5068 | \n", + "0.724 | \n", + "1.0 | \n", + "0.0 | \n", + "0.824324 | \n", + "0.5 | \n", + "0.430767 | \n", + "0.000000 | \n", + "1.0 | \n", + "1.0 | \n", + "0.870620 | \n", + "0.0 | \n", + "
| 7282 | \n", + "0.462 | \n", + "1.0 | \n", + "0.0 | \n", + "0.351351 | \n", + "0.7 | \n", + "0.754562 | \n", + "0.333333 | \n", + "1.0 | \n", + "0.0 | \n", + "0.225095 | \n", + "1.0 | \n", + "
| 185 | \n", + "0.328 | \n", + "0.0 | \n", + "1.0 | \n", + "0.189189 | \n", + "0.8 | \n", + "0.000000 | \n", + "0.333333 | \n", + "1.0 | \n", + "0.0 | \n", + "0.479274 | \n", + "0.0 | \n", + "
| 7301 | \n", + "0.834 | \n", + "0.5 | \n", + "0.0 | \n", + "0.202703 | \n", + "0.1 | \n", + "0.576940 | \n", + "0.000000 | \n", + "1.0 | \n", + "1.0 | \n", + "0.662409 | \n", + "0.0 | \n", + "
RandomizedSearchCV(estimator=SVC(random_state=42), n_iter=50, n_jobs=-1,\n",
+ " param_distributions=[{'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'kernel': ['linear'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]},\n",
+ " {'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6,\n",
+ " 0.7, 0.8, 0.9],\n",
+ " 'kernel': ['rbf'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]},\n",
+ " {'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'degree': [2, 3, 4, 5, 6],\n",
+ " 'gamma': [0.01, 0.02, 0.03, 0.04,\n",
+ " 0.05],\n",
+ " 'kernel': ['poly'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]}],\n",
+ " random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(estimator=SVC(random_state=42), n_iter=50, n_jobs=-1,\n",
+ " param_distributions=[{'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'kernel': ['linear'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]},\n",
+ " {'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6,\n",
+ " 0.7, 0.8, 0.9],\n",
+ " 'kernel': ['rbf'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]},\n",
+ " {'C': [1.0, 2.0, 5.0, 10.0, 25.0, 100.0,\n",
+ " 500.0, 1000.0],\n",
+ " 'degree': [2, 3, 4, 5, 6],\n",
+ " 'gamma': [0.01, 0.02, 0.03, 0.04,\n",
+ " 0.05],\n",
+ " 'kernel': ['poly'],\n",
+ " 'tol': [0.01, 0.001, 0.0001, 1e-05]}],\n",
+ " random_state=42)SVC(random_state=42)
SVC(random_state=42)