diff --git a/notebooks/mocker-proposal.ipynb b/notebooks/mocker-proposal.ipynb
new file mode 100644
index 0000000..f3e1fdb
--- /dev/null
+++ b/notebooks/mocker-proposal.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "simplified-office",
+ "metadata": {},
+ "source": [
+ "## Some example mock APIs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "concerned-holiday",
+ "metadata": {},
+ "source": [
+ "Looking for some high level feedback on these data mocks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "cordless-prevention",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from mlfaker.generators import NormalGenerator, CategoricalGenerator, BaseGenerator\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "aerial-subscriber",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_df(generators, size):\n",
+ " return pd.DataFrame([g.generate(size) for g in generators]).T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "sustained-timber",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " foo | \n",
+ " bar | \n",
+ " fizz | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.624345 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.611756 | \n",
+ " NaN | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.528172 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -1.072969 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.865408 | \n",
+ " NaN | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " -2.301539 | \n",
+ " 1.0 | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 1.744812 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " foo ... fizz\n",
+ "0 1.624345 ... NaN\n",
+ "1 -0.611756 ... a\n",
+ "2 -0.528172 ... NaN\n",
+ "3 -1.072969 ... NaN\n",
+ "4 0.865408 ... b\n",
+ "5 -2.301539 ... a\n",
+ "6 1.744812 ... NaN\n",
+ "\n",
+ "[7 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "build_df(\n",
+ " [\n",
+ " NormalGenerator(\"foo\"),\n",
+ " CategoricalGenerator(\"bar\", fillrate=0.5), \n",
+ " CategoricalGenerator(\"fizz\", classes=[\"a\", \"b\", \"c\"], fillrate=0.5)\n",
+ " ],\n",
+ " 7\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "damaged-memory",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class DataFramer():\n",
+ " def __init__(self, generators):\n",
+ " self.generators = generators\n",
+ " \n",
+ " def generate(self, size):\n",
+ " return pd.DataFrame([g.generate(size) for g in self.generators]).T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "applied-shopping",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframer = DataFramer(\n",
+ " [\n",
+ " NormalGenerator(\"mike\"),\n",
+ " CategoricalGenerator(\"jeff\", fillrate=0.5, seed=10),\n",
+ " NormalGenerator(\"tom\", fillrate=0.9, loc=3),\n",
+ " CategoricalGenerator(\"target\", classes=[0, 1], fillrate=0.5)\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "sacred-sodium",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mike | \n",
+ " jeff | \n",
+ " tom | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.900856 | \n",
+ " NaN | \n",
+ " 3.705072 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.683728 | \n",
+ " NaN | \n",
+ " 1.320058 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.122890 | \n",
+ " 0.0 | \n",
+ " 4.999976 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -0.935769 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -0.267888 | \n",
+ " 1.0 | \n",
+ " 3.193118 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 0.530355 | \n",
+ " NaN | \n",
+ " 1.446993 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mike ... target\n",
+ "0 0.900856 ... 0.0\n",
+ "1 -0.683728 ... 1.0\n",
+ "2 -0.122890 ... 0.0\n",
+ "3 -0.935769 ... NaN\n",
+ "4 -0.267888 ... NaN\n",
+ "5 0.530355 ... NaN\n",
+ "\n",
+ "[6 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataframer.generate(6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "serial-doctor",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_df_from_cols(cols_types , size=10):\n",
+ " lookup = {\"normal\": NormalGenerator, \"categorical\": CategoricalGenerator}\n",
+ " return pd.DataFrame([lookup[v](k).generate(size) for k, v in cols_types.items()]).T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "continuing-crime",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " foo | \n",
+ " bar | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.624345 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.611756 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.528172 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -1.072969 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.865408 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " foo bar\n",
+ "0 1.624345 1.0\n",
+ "1 -0.611756 1.0\n",
+ "2 -0.528172 0.0\n",
+ "3 -1.072969 0.0\n",
+ "4 0.865408 1.0"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "build_df_from_cols({\"foo\": \"normal\", \"bar\": \"categorical\"}, size=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "searching-technology",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_df_from_num_cat(nums, cats, size=10):\n",
+ " gens = []\n",
+ " counter = 0\n",
+ " for col_type, gen in zip([nums, cats], [NormalGenerator, CategoricalGenerator]):\n",
+ " if isinstance(col_type, int):\n",
+ " gen_holder = [gen(f\"col{counter+i}\") for i in range(col_type)]\n",
+ " else:\n",
+ " gen_holder = [gen(col) for col in col_type]\n",
+ " counter += len(gen_holder)\n",
+ " gens.extend(gen_holder)\n",
+ " \n",
+ " return pd.DataFrame([gen.generate(size) for gen in gens]).T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "affiliated-burden",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " col0 | \n",
+ " col1 | \n",
+ " col2 | \n",
+ " col3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.624345 | \n",
+ " 1.624345 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.611756 | \n",
+ " -0.611756 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.528172 | \n",
+ " -0.528172 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -1.072969 | \n",
+ " -1.072969 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.865408 | \n",
+ " 0.865408 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " col0 ... col3\n",
+ "0 1.624345 ... 1.0\n",
+ "1 -0.611756 ... 1.0\n",
+ "2 -0.528172 ... 0.0\n",
+ "3 -1.072969 ... 0.0\n",
+ "4 0.865408 ... 1.0\n",
+ "\n",
+ "[5 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "build_df_from_num_cat(nums=2, cats=2, size=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "decreased-extension",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " foo | \n",
+ " col1 | \n",
+ " col2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.624345 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.611756 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.528172 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " foo ... col2\n",
+ "0 1.624345 ... 1.0\n",
+ "1 -0.611756 ... 1.0\n",
+ "2 -0.528172 ... 0.0\n",
+ "\n",
+ "[3 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "build_df_from_num_cat(nums=[\"foo\"], cats=2, size=3)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}