Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"<div class=\"alert alert-info\">\n",
"Please follow these colored cells for direction\n",
"</div>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
},
"outputs": [],
"source": [
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
"# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load in \n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the \"../input/\" directory.\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# Any results you write to the current directory are saved as output."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
"_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
},
"outputs": [],
"source": [
"#Set path of the file to read.\n",
"TrainSet_path = '/kaggle/input/ace-class-assignment/AMP_TrainSet.csv'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Read the file into a variable TrainSet_data\n",
"TrainSet_data = pd.read_csv(TrainSet_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#List all columns in the dataset from which to choose variables for modeling\n",
"TrainSet_data.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Use the dot notation to select the column to predict <Prediction Target>. Call it y\n",
"y = TrainSet_data.CLASS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"<div class=\"alert alert-info\">\n",
"\n",
"## ??\n",
"\n",
"How did you arrive at these features?\n",
"</div>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Create list of features to be used for prediction\n",
"feature_names = ['FULL_Charge', 'FULL_AcidicMolPerc', 'FULL_AURR980107',\n",
" 'FULL_DAYM780201', 'FULL_GEOR030101', 'FULL_OOBM850104', 'NT_EFC195',\n",
" 'AS_MeanAmphiMoment', 'AS_DAYM780201', 'AS_FUKS010112', 'CT_RACS820104']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Select data corresponding to features in feature_names\n",
"X = TrainSet_data[feature_names]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Quickly review the data for predictiction of CLASS house prices using the head method\n",
"X.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Import DecisionTreeRegressorsor from scikit-learn library\n",
"from sklearn.naive_bayes import GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Define model. \n",
"data_model = GaussianNB()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Fit model\n",
"data_model.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Make Predictions for the first rows of the training data to see how the predict function works\n",
"print(\"Making predictions for the following 5 houses:\")\n",
"print(X.head())\n",
"print(\"The predictions are\")\n",
"print(data_model.predict(X.head()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<div class=\"alert alert-info\">\n",
"\n",
"## ??\n",
"\n",
"You have less than 10 algorithms. Why?\n",
"</div>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Calculate mean absolute error; one of many metrics to summarize model quality\n",
"from sklearn.metrics import mean_absolute_error\n",
"\n",
"predicted = data_model.predict(X)\n",
"mean_absolute_error(y, predicted)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Create classification report containing various statistics required to judge the model\n",
"from sklearn import metrics\n",
"\n",
"print(metrics.classification_report(y, predicted))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Create a confusion matrix to give us a clear idea of the accuracy and fitting of the model\n",
"print(metrics.confusion_matrix(y, predicted))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Set path of the file to read.\n",
"Test_data_path = '/kaggle/input/ace-class-assignment/Test.csv'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Read the file into a variable TrainSet_data\n",
"Test_data = pd.read_csv(Test_data_path)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Make Predictions for the first rows test data to see how the predict function works\n",
"print(\"Making predictions for test data:\")\n",
"print(Test_data.head())\n",
"print(\"The predictions are\")\n",
"print(data_model.predict(Test_data.head()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Make Predictions for the entire test data \n",
"print(\"Making predictions for test data:\")\n",
"print(Test_data)\n",
"print(\"The predictions are\")\n",
"print(data_model.predict(Test_data))\n",
"\n",
"test_preds = data_model.predict(Test_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Save predictions in format used for competition scoring\n",
"output = pd.DataFrame({'CLASS': test_preds})\n",
"output_bool = output.astype(bool)\n",
"output_bool.to_csv('submission.csv', index_label='Index')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<div class=\"alert alert-danger\">\n",
"\n",
"## This notebook is not good.\n",
"\n",
"- please check with your class members on how to make it better.\n",
"- we need to see more commenting, why are you doing what you are doing?\n",
"- this will fetch you very poor marks.\n",
"\n",
"\n",
"</div>"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading