atwine · eddUG · Mar 10, 2020 · Mar 10, 2020 · Mar 10, 2020 · Mar 13, 2020
diff --git a/Assignment Colab/.ipynb_checkpoints/ACE_class_NaiveBayes_Model-checkpoint.ipynb b/Assignment Colab/.ipynb_checkpoints/ACE_class_NaiveBayes_Model-checkpoint.ipynb
@@ -0,0 +1,345 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "<div class=\"alert alert-info\">\n",
+    "Please follow these colored cells for direction\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
+   },
+   "outputs": [],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load in \n",
+    "\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the \"../input/\" directory.\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# Any results you write to the current directory are saved as output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
+    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
+   },
+   "outputs": [],
+   "source": [
+    "#Set path of the file to read.\n",
+    "TrainSet_path = '/kaggle/input/ace-class-assignment/AMP_TrainSet.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Read the file into a variable TrainSet_data\n",
+    "TrainSet_data = pd.read_csv(TrainSet_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#List all columns in the dataset from which to choose variables for modeling\n",
+    "TrainSet_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Use the dot notation to select the column to predict <Prediction Target>. Call it y\n",
+    "y = TrainSet_data.CLASS"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "<div class=\"alert alert-info\">\n",
+    "\n",
+    "## ??\n",
+    "\n",
+    "How did you arrive at these features?\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Create list of features to be used for prediction\n",
+    "feature_names = ['FULL_Charge', 'FULL_AcidicMolPerc', 'FULL_AURR980107',\n",
+    "       'FULL_DAYM780201', 'FULL_GEOR030101', 'FULL_OOBM850104', 'NT_EFC195',\n",
+    "       'AS_MeanAmphiMoment', 'AS_DAYM780201', 'AS_FUKS010112', 'CT_RACS820104']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Select data corresponding to features in feature_names\n",
+    "X = TrainSet_data[feature_names]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Quickly review the data for predictiction of CLASS house prices using the head method\n",
+    "X.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Import DecisionTreeRegressorsor from  scikit-learn library\n",
+    "from sklearn.naive_bayes import GaussianNB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Define model. \n",
+    "data_model = GaussianNB()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Fit model\n",
+    "data_model.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Make Predictions for the first rows of the training data to see how the predict function works\n",
+    "print(\"Making predictions for the following 5 houses:\")\n",
+    "print(X.head())\n",
+    "print(\"The predictions are\")\n",
+    "print(data_model.predict(X.head()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-info\">\n",
+    "\n",
+    "## ??\n",
+    "\n",
+    "You have less than 10 algorithms. Why?\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Calculate mean absolute error; one of many metrics to summarize model quality\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "\n",
+    "predicted = data_model.predict(X)\n",
+    "mean_absolute_error(y, predicted)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Create classification report containing various statistics required to judge the model\n",
+    "from sklearn import metrics\n",
+    "\n",
+    "print(metrics.classification_report(y, predicted))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Create a confusion matrix to give us a clear idea of the accuracy and fitting of the model\n",
+    "print(metrics.confusion_matrix(y, predicted))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Set path of the file to read.\n",
+    "Test_data_path = '/kaggle/input/ace-class-assignment/Test.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Read the file into a variable TrainSet_data\n",
+    "Test_data = pd.read_csv(Test_data_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Make Predictions for the first rows test data to see how the predict function works\n",
+    "print(\"Making predictions for test data:\")\n",
+    "print(Test_data.head())\n",
+    "print(\"The predictions are\")\n",
+    "print(data_model.predict(Test_data.head()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Make Predictions for the entire test data \n",
+    "print(\"Making predictions for test data:\")\n",
+    "print(Test_data)\n",
+    "print(\"The predictions are\")\n",
+    "print(data_model.predict(Test_data))\n",
+    "\n",
+    "test_preds = data_model.predict(Test_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Save predictions in format used for competition scoring\n",
+    "output = pd.DataFrame({'CLASS': test_preds})\n",
+    "output_bool = output.astype(bool)\n",
+    "output_bool.to_csv('submission.csv', index_label='Index')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-danger\">\n",
+    "\n",
+    "## This notebook is not good.\n",
+    "\n",
+    "- please check with your class members on how to make it better.\n",
+    "- we need to see more commenting, why are you doing what you are doing?\n",
+    "- this will fetch you very poor marks.\n",
+    "\n",
+    "\n",
+    "</div>"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}