Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions nbs/ComparisonPlots.ipynb

Large diffs are not rendered by default.

528 changes: 528 additions & 0 deletions nbs/FactorizationMachineResults-MoreFactorsLongerRun-Copy1.ipynb

Large diffs are not rendered by default.

485 changes: 485 additions & 0 deletions nbs/FactorizationMachineResults-MoreFactorsLongerRun.ipynb

Large diffs are not rendered by default.

687 changes: 687 additions & 0 deletions nbs/FactorizationMachineResults.ipynb

Large diffs are not rendered by default.

466 changes: 466 additions & 0 deletions nbs/FeaturePlots.ipynb

Large diffs are not rendered by default.

654 changes: 654 additions & 0 deletions nbs/L1Results.ipynb

Large diffs are not rendered by default.

522 changes: 522 additions & 0 deletions nbs/L2Results.ipynb

Large diffs are not rendered by default.

790 changes: 790 additions & 0 deletions nbs/LDAResults.ipynb

Large diffs are not rendered by default.

567 changes: 567 additions & 0 deletions nbs/NTupleResults.ipynb

Large diffs are not rendered by default.

522 changes: 522 additions & 0 deletions nbs/NaiveBayesResults.ipynb

Large diffs are not rendered by default.

272 changes: 272 additions & 0 deletions nbs/Posterior Overlap.ipynb

Large diffs are not rendered by default.

192 changes: 192 additions & 0 deletions nbs/RidgeResults.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"import pickle\n",
"import numpy as np\n",
"from scipy import sparse\n",
"from functools import partial\n",
"from sklearn.externals import joblib\n",
"from sklearn.cross_validation import KFold\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.cross_validation import train_test_split\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"from sklearn.linear_model import RidgeClassifier"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"labels = joblib.load(\"/home/jovyan/persistent_data/data/dumps/labeled_corpus_labels.pkl\")\n",
"features = joblib.load(\"/home/jovyan/persistent_data/data/dumps/labeled_corpus_matrix.pkl\")\n",
"features = features.todense()\n",
"mozart_labels = [k for k in range(len(labels)) if labels[k] == \"mozart\"]\n",
"bach_labels = [k for k in range(len(labels)) if labels[k] == \"bach-js\"]\n",
"schubert_labels = [k for k in range(len(labels)) if labels[k] == \"schubert\"]\n",
"chopin_labels = [k for k in range(len(labels)) if labels[k] == \"chopin\"]\n",
"tchaikovsky_labels = [k for k in range(len(labels)) if labels[k] == \"tchaikovsky\"]\n",
"X = features[mozart_labels + bach_labels + schubert_labels + chopin_labels + tchaikovsky_labels].A\n",
"y = np.array([0 for i in range(len(mozart_labels))] + [1 for i in range(len(bach_labels))] + \n",
" [2 for i in range(len(schubert_labels))] + [3 for i in range(len(chopin_labels))] +\n",
" [4 for i in range(len(tchaikovsky_labels))])\n",
"y = y.reshape((y.shape[0],))\n",
"del features"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = X[:,X.sum(axis=0).nonzero()[0]]"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"regularization_parameter = 10000.0\n",
"predicted = []\n",
"actuals = []\n",
"lr_models = []\n",
"i = 0\n",
"for train_idx, test_idx in KFold(n=X.shape[0], n_folds=5, shuffle=True):\n",
" X_train = X[train_idx]\n",
" y_train = y[train_idx]\n",
" X_test = X[test_idx]\n",
" y_test = y[test_idx]\n",
"# p = [1/np.bincount(y_train)[i] for i in y_train]\n",
"# undersample_idx = np.random.choice(np.arange(X_train.shape[0]),\n",
"# size=np.bincount(y_train).min() * len(set(y_train)),\n",
"# p=p / sum(p))\n",
"\n",
"\n",
" lr = RidgeClassifier(alpha=regularization_parameter)\n",
" \n",
" try:\n",
" lr.fit(X_train, y_train)\n",
" except:\n",
" continue\n",
"# preds = (lr.predict_probabilities(X_test) > 0.5).astype(int)\n",
"# predicted.append(preds)\n",
"# actuals.append(y_test)\n",
"# lr_models.append(lr)\n",
"# pickle.dump(lr_models, open(\"./dumps/logistic/normal/model_fold_{fold}_reg_{reg}.pkl\"\n",
"# .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
"# pickle.dump(predicted, open(\"./dumps/logistic/normal/predicted_fold_{fold}_reg_{reg}.pkl\"\n",
"# .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
"# pickle.dump(actuals, open(\"./dumps/logistic/normal/actuals_fold_{fold}_reg_{reg}.pkl\"\n",
"# .format(fold=str(i), reg=str(regularization_parameter)), 'wb'))\n",
"# i += 1"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn import metrics"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.774834437086\n",
"[[ 64 63 0 1 0]\n",
" [ 8 459 0 0 0]\n",
" [ 11 16 22 5 5]\n",
" [ 2 24 3 22 0]\n",
" [ 0 25 2 5 18]]\n"
]
}
],
"source": [
"print(metrics.accuracy_score(y_test, lr.predict(X_test)))\n",
"print(metrics.confusion_matrix(y_test, lr.predict(X_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}