flylo · flylo · May 24, 2017
diff --git a/nbs/ComparisonPlots.ipynb b/nbs/ComparisonPlots.ipynb
diff --git a/nbs/FactorizationMachineResults-MoreFactorsLongerRun-Copy1.ipynb b/nbs/FactorizationMachineResults-MoreFactorsLongerRun-Copy1.ipynb
diff --git a/nbs/FactorizationMachineResults-MoreFactorsLongerRun.ipynb b/nbs/FactorizationMachineResults-MoreFactorsLongerRun.ipynb
diff --git a/nbs/FactorizationMachineResults.ipynb b/nbs/FactorizationMachineResults.ipynb
diff --git a/nbs/FeaturePlots.ipynb b/nbs/FeaturePlots.ipynb
diff --git a/nbs/L1Results.ipynb b/nbs/L1Results.ipynb
diff --git a/nbs/L2Results.ipynb b/nbs/L2Results.ipynb
diff --git a/nbs/LDAResults.ipynb b/nbs/LDAResults.ipynb
diff --git a/nbs/NTupleResults.ipynb b/nbs/NTupleResults.ipynb
diff --git a/nbs/NaiveBayesResults.ipynb b/nbs/NaiveBayesResults.ipynb
diff --git a/nbs/Posterior Overlap.ipynb b/nbs/Posterior Overlap.ipynb
diff --git a/nbs/RidgeResults.ipynb b/nbs/RidgeResults.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "from scipy import sparse\n",
+    "from functools import partial\n",
+    "from sklearn.externals import joblib\n",
+    "from sklearn.cross_validation import KFold\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.cross_validation import train_test_split\n",
+    "import seaborn as sns\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import RidgeClassifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "labels = joblib.load(\"/home/jovyan/persistent_data/data/dumps/labeled_corpus_labels.pkl\")\n",
+    "features = joblib.load(\"/home/jovyan/persistent_data/data/dumps/labeled_corpus_matrix.pkl\")\n",
+    "features = features.todense()\n",
+    "mozart_labels = [k for k in range(len(labels)) if labels[k] == \"mozart\"]\n",
+    "bach_labels = [k for k in range(len(labels)) if labels[k] == \"bach-js\"]\n",
+    "schubert_labels = [k for k in range(len(labels)) if labels[k] == \"schubert\"]\n",
+    "chopin_labels = [k for k in range(len(labels)) if labels[k] == \"chopin\"]\n",
+    "tchaikovsky_labels = [k for k in range(len(labels)) if labels[k] == \"tchaikovsky\"]\n",
+    "X = features[mozart_labels + bach_labels + schubert_labels + chopin_labels + tchaikovsky_labels].A\n",
+    "y = np.array([0 for i in range(len(mozart_labels))] + [1 for i in range(len(bach_labels))] + \n",
+    "            [2 for i in range(len(schubert_labels))] + [3 for i in range(len(chopin_labels))] +\n",
+    "            [4 for i in range(len(tchaikovsky_labels))])\n",
+    "y = y.reshape((y.shape[0],))\n",
+    "del features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "X = X[:,X.sum(axis=0).nonzero()[0]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "regularization_parameter = 10000.0\n",
+    "predicted = []\n",
+    "actuals = []\n",
+    "lr_models = []\n",
+    "i = 0\n",
+    "for train_idx, test_idx in KFold(n=X.shape[0], n_folds=5, shuffle=True):\n",
+    "    X_train = X[train_idx]\n",
+    "    y_train = y[train_idx]\n",
+    "    X_test = X[test_idx]\n",
+    "    y_test = y[test_idx]\n",
+    "#     p = [1/np.bincount(y_train)[i] for i in y_train]\n",
+    "#     undersample_idx = np.random.choice(np.arange(X_train.shape[0]),\n",
+    "#                  size=np.bincount(y_train).min() * len(set(y_train)),\n",
+    "#                  p=p / sum(p))\n",
+    "\n",
+    "\n",
+    "    lr = RidgeClassifier(alpha=regularization_parameter)\n",
+    "    \n",
+    "    try:\n",
+    "        lr.fit(X_train, y_train)\n",
+    "    except:\n",
+    "        continue\n",
+    "#     preds = (lr.predict_probabilities(X_test) > 0.5).astype(int)\n",
+    "#     predicted.append(preds)\n",
+    "#     actuals.append(y_test)\n",
+    "#     lr_models.append(lr)\n",
+    "#     pickle.dump(lr_models, open(\"./dumps/logistic/normal/model_fold_{fold}_reg_{reg}.pkl\"\n",
+    "#                                 .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
+    "#     pickle.dump(predicted, open(\"./dumps/logistic/normal/predicted_fold_{fold}_reg_{reg}.pkl\"\n",
+    "#                                 .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
+    "#     pickle.dump(actuals, open(\"./dumps/logistic/normal/actuals_fold_{fold}_reg_{reg}.pkl\"\n",
+    "#                                 .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
+    "#     i += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn import metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.774834437086\n",
+      "[[ 64  63   0   1   0]\n",
+      " [  8 459   0   0   0]\n",
+      " [ 11  16  22   5   5]\n",
+      " [  2  24   3  22   0]\n",
+      " [  0  25   2   5  18]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(metrics.accuracy_score(y_test, lr.predict(X_test)))\n",
+    "print(metrics.confusion_matrix(y_test, lr.predict(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}