diff --git a/.gitignore b/.gitignore
index b6e4761..f7f34fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,6 +117,9 @@ venv.bak/
 # Rope project settings
 .ropeproject
 
+# VSCode project settings
+.vscode/**
+
 # mkdocs documentation
 /site
 
diff --git a/dronesim/control/INDIControl.py b/dronesim/control/INDIControl.py
index e132181..f3f093b 100644
--- a/dronesim/control/INDIControl.py
+++ b/dronesim/control/INDIControl.py
@@ -2,7 +2,6 @@
 import os
 import pdb
 
-# Active set library from : https://github.com/JimVaranelli/ActiveSet
 import sys
 import xml.etree.ElementTree as etxml
 
@@ -13,8 +12,8 @@
 
 from dronesim.control.BaseControl import BaseControl
 
-# from dronesim.control.ActiveSet import ActiveSet, ConstrainedLS
-from dronesim.control.wls_alloc import wls_alloc
+from dronesim.control.wls_alloc import wls_alloc as wls_alloc
+# from dronesim.control.lnwls_alloc import indi_lsi_wrapper as wls_alloc
 from dronesim.envs.BaseAviary import BaseAviary, DroneModel
 
 
@@ -95,15 +94,15 @@ def _parseURDFControlParameters(self):
         # self.PWM2RPM_SCALE = float(pwm2rpm.attrib['scale'])
         # self.PWM2RPM_CONST = float(pwm2rpm.attrib['const'])
         vals = [str(k) for k in pwm2rpm.attrib.values()]
-        self.PWM2RPM_SCALE = [float(s) for s in vals[0].split(" ") if s != ""]
-        self.PWM2RPM_CONST = [float(s) for s in vals[1].split(" ") if s != ""]
+        self.PWM2RPM_SCALE = np.asarray([float(s) for s in vals[0].split(" ") if s != ""])
+        self.PWM2RPM_CONST = np.asarray([float(s) for s in vals[1].split(" ") if s != ""])
 
         pwmlimit = URDF_TREE.find("control/pwm/limit")
         # self.MIN_PWM = float(pwmlimit.attrib['min'])
         # self.MAX_PWM = float(pwmlimit.attrib['max'])
         vals = [str(k) for k in pwmlimit.attrib.values()]
-        self.MIN_PWM = [float(s) for s in vals[0].split(" ") if s != ""]
-        self.MAX_PWM = [float(s) for s in vals[1].split(" ") if s != ""]
+        self.MIN_PWM = np.asarray([float(s) for s in vals[0].split(" ") if s != ""])
+        self.MAX_PWM = np.asarray([float(s) for s in vals[1].split(" ") if s != ""])
 
     ################################################################################
     def reset(self):
@@ -114,38 +113,39 @@ def reset(self):
         """
         super().reset()
         #### Store the last roll, pitch, and yaw ###################
-        self.last_rpy = np.zeros(3)
-        self.diffed_cur_ang_vel = np.zeros(3)  # ERASE
+        self.last_rpy           :np.ndarray = np.zeros(3)
+        self.diffed_cur_ang_vel :np.ndarray = np.zeros(3)  # ERASE
         #### Initialized PID control variables #####################
-        self.last_pos_e = np.zeros(3)
-        self.integral_pos_e = np.zeros(3)
-        self.last_rpy_e = np.zeros(3)
-        self.integral_rpy_e = np.zeros(3)
+        self.last_pos_e     :np.ndarray = np.zeros(3)
+        self.integral_pos_e :np.ndarray = np.zeros(3)
+        self.last_rpy_e     :np.ndarray = np.zeros(3)
+        self.integral_rpy_e :np.ndarray = np.zeros(3)
 
-        self.last_rates = np.zeros(3)  # p,q,r
+        self.last_rates :np.ndarray = np.zeros(3)  # p,q,r
         # self.last_pwm = np.ones(self.indi_actuator_nr)*1. # initial pwm
-        self.last_thrust = 0.0
+        self.last_thrust:float = 0.0
         # self.indi_increment = np.zeros(4)
-        self.cmd = np.ones(self.indi_actuator_nr) * 0.0
-        self.last_vel = np.zeros(3)
-        self.last_torque = np.zeros(3)  # For SU2 controller
-
-        self.xax = -1
-        self.yax = -1
-        self.zax = -1
-        self.xax1 = -2
-        self.yax1 = -2
-        self.zax1 = -2
+        self.cmd            :np.ndarray = np.zeros(self.indi_actuator_nr)
+        self.cmd_eps        :np.ndarray = np.ones(self.indi_actuator_nr)*0.05
+        self.last_vel       :np.ndarray = np.zeros(3)
+        self.last_torque    :np.ndarray = np.zeros(3)  # For SU2 controller
+
+        self.xax    :float = -1
+        self.yax    :float = -1
+        self.zax    :float = -1
+        self.xax1   :float = -2
+        self.yax1   :float = -2
+        self.zax1   :float = -2
 
         # for debugging logs...
-        self.att_log = np.zeros((30 * 100, 20))
-        self.guid_log = np.zeros((30 * 100, 20))
-        self.att_log_inc = 0
-        self.guid_log_inc = 0
+        self.att_log        :np.ndarray = np.zeros((30 * 100, 20))
+        self.guid_log       :np.ndarray = np.zeros((30 * 100, 20))
+        self.att_log_inc    :float = 0
+        self.guid_log_inc   :float = 0
 
-        self.rpm = np.zeros(self.indi_actuator_nr)
+        self.rpm :np.ndarray = np.zeros(self.indi_actuator_nr)
 
-    def rpm_of_pwm(self, pwm):
+    def rpm_of_pwm(self, pwm:np.ndarray):
         self.rpm = self.PWM2RPM_SCALE * pwm + self.PWM2RPM_CONST
         return self.rpm
 
@@ -153,16 +153,16 @@ def rpm_of_pwm(self, pwm):
 
     def computeControl(
         self,
-        control_timestep,
-        cur_pos,
-        cur_quat,
-        cur_vel,
-        cur_ang_vel,
-        target_pos,
-        target_vel=np.zeros(3),
-        target_acc=np.zeros(3),
-        target_rpy=np.zeros(3),
-        target_rpy_rates=np.zeros(3),
+        control_timestep:float,
+        cur_pos         :np.ndarray,
+        cur_quat        :np.ndarray,
+        cur_vel         :np.ndarray,
+        cur_ang_vel     :np.ndarray,
+        target_pos      :np.ndarray,
+        target_vel      :np.ndarray=np.zeros(3),
+        target_acc      :np.ndarray=np.zeros(3),
+        target_rpy      :np.ndarray=np.zeros(3),
+        target_rpy_rates:np.ndarray=np.zeros(3),
         
     ):
         """Computes the INDI control action (as RPMs) for a single drone.
@@ -231,14 +231,14 @@ def computeControl(
     ################################################################################
     def _INDIPositionControl(
         self,
-        control_timestep,
-        cur_pos,
-        cur_quat,
-        cur_vel,
-        target_pos,
-        target_rpy,
-        target_vel,
-        target_acc=np.zeros(3),
+        control_timestep:float,
+        cur_pos         :np.ndarray,
+        cur_quat        :np.ndarray,
+        cur_vel         :np.ndarray,
+        target_pos      :np.ndarray,
+        target_rpy      :np.ndarray,
+        target_vel      :np.ndarray,
+        target_acc      :np.ndarray=np.zeros(3),
         use_quaternion=False,
         nonlinear_increment=False,
     ):
@@ -354,13 +354,13 @@ def _INDIPositionControl(
 
     def _INDIAttitudeControl(
         self,
-        control_timestep,
-        thrust,
-        cur_quat,
-        cur_ang_vel,
-        target_euler,
-        target_quat,
-        target_rpy_rates,
+        control_timestep:float,
+        thrust          :float,
+        cur_quat        :np.ndarray,
+        cur_ang_vel     :np.ndarray,
+        target_euler    :np.ndarray,
+        target_quat     :np.ndarray,
+        target_rpy_rates:np.ndarray,
     ):
         """INDI attitude control.
 
@@ -412,11 +412,11 @@ def _INDIAttitudeControl(
 
     def _INDIRateControl(
             self,
-            control_timestep,
-            thrust,
-            cur_quat,
-            cur_ang_vel,
-            target_rpy_rates ):
+            control_timestep:float,
+            thrust          :float,
+            cur_quat        :np.ndarray,
+            cur_ang_vel     :np.ndarray,
+            target_rpy_rates:np.ndarray ):
 
         # FIXME : rate set point, reference angular speed, rpy rates, FIND a correct unique name for all...
         rate_sp = Rate()
@@ -454,7 +454,7 @@ def _INDIRateControl(
         indi_v[3] = thrust - self.last_thrust  # * 0.
         self.last_thrust = thrust
 
-        pseudo_inv = 1
+        pseudo_inv = 0
         if pseudo_inv:
             indi_du = np.dot(np.linalg.pinv(self.G1 / 0.05), indi_v)  # *self.m
             # print(f'Command : {self.cmd}')
@@ -462,26 +462,31 @@ def _INDIRateControl(
         else:
             # Use Active set for control allocation
             umin = np.asarray(
-                [self.MIN_PWM[i] - self.cmd[i] for i in range(self.indi_actuator_nr)]
+                [max(self.MIN_PWM[i] - self.cmd[i],-self.cmd_eps[i]) for i in range(self.indi_actuator_nr)]
             )
             umax = np.asarray(
-                [self.MAX_PWM[i] - self.cmd[i] for i in range(self.indi_actuator_nr)]
+                [min(self.MAX_PWM[i] - self.cmd[i],self.cmd_eps[i]) for i in range(self.indi_actuator_nr)]
             )
+            
+            # print(f'UMIN : {umin}  ---  UMAX : {umax}')
+            
             # umax = np.asarray([self.MAX_PWM for i in range(4)])
             # indi_v1 = [indi_v[i] for i in range(4)]
 
-            # up = np.array([0., 0., 0., 0.])
+            up = np.zeros_like(umin)
             Wv = np.array([1000, 1000, 0.1, 10])
             Wu = np.ones(self.indi_actuator_nr)  # np.array([1, 1, 1, 1, 1, 1]) #FIXME
             u_guess = None
             W_init = None
-            up = None
+            # up = None
 
             # import scipy.optimize
             # res = scipy.optimize.lsq_linear(A, v, bounds=(umin, umax), lsmr_tol='auto', verbose=1)
             indi_du, nit = wls_alloc(
                 indi_v, umin, umax, self.G1 / 0.05, u_guess, W_init, Wv, Wu, up
             )
+            
+            # print(f'INDI_V : {indi_v}  ---  INDI_DU : {indi_du}  ---  NIT : {nit}')
 
         self.cmd += indi_du
         self.cmd = np.clip(self.cmd, self.MIN_PWM, self.MAX_PWM)  # command in PWM
diff --git a/dronesim/control/INDIControl_6DOF.py b/dronesim/control/INDIControl_6DOF.py
index 70bbb1d..661eaa4 100644
--- a/dronesim/control/INDIControl_6DOF.py
+++ b/dronesim/control/INDIControl_6DOF.py
@@ -2,7 +2,6 @@
 import os
 import pdb
 
-# Active set library from : https://github.com/JimVaranelli/ActiveSet
 import sys
 import xml.etree.ElementTree as etxml
 from dataclasses import dataclass
@@ -13,8 +12,8 @@
 
 from dronesim.control.BaseControl import BaseControl
 
-# from dronesim.control.ActiveSet import ActiveSet, ConstrainedLS
 from dronesim.control.wls_alloc import wls_alloc
+# from dronesim.control.lnwls_alloc import indi_lsi_wrapper as wls_alloc
 from dronesim.envs.BaseAviary import BaseAviary, DroneModel
 
 # @dataclass
@@ -613,19 +612,22 @@ def _INDIAttitudeControl(
             # umax = np.asarray([self.MAX_PWM for i in range(4)])
             # indi_v1 = [indi_v[i] for i in range(4)]
 
-            # up = np.array([0., 0., 0., 0.])
             # Wv = np.array([1000, 1000, 0.1, 10])
-            Wv = np.array([1000, 1000, 0.1, 10, 10, 100])  # This can be a decision...
+            Wv = np.array([10, 10, 0.1, 1, 1, 5])  # This can be a decision...
             Wu = np.ones(self.indi_actuator_nr)  # np.array([1, 1, 1, 1, 1, 1]) #FIXME
             u_guess = None
             W_init = None
-            up = None
-
-            # import scipy.optimize
-            # res = scipy.optimize.lsq_linear(A, v, bounds=(umin, umax), lsmr_tol='auto', verbose=1)
-            indi_du, nit = wls_alloc(
-                indi_v, umin, umax, self.G1 / 0.05, u_guess, W_init, Wv, Wu, up
-            )
+            up = np.zeros_like(umin)
+
+            indi_uncapped = False
+            if indi_uncapped:
+                indi_du, nit = wls_alloc(
+                    indi_v, np.ones_like(umin) * -1e9, np.ones_like(umax) * 1e9, self.G1 / 0.05, u_guess, W_init, Wv, Wu, up
+                )
+            else:
+                indi_du, nit = wls_alloc(
+                    indi_v, umin, umax, self.G1 / 0.05, u_guess, W_init, Wv, Wu, up
+                )
 
         self.cmd += indi_du
         self.cmd = np.clip(self.cmd, self.MIN_PWM, self.MAX_PWM)  # command in PWM
diff --git a/dronesim/control/pywls/pywls_module.c b/dronesim/control/pywls/pywls_module.c
new file mode 100644
index 0000000..ebe4e2f
--- /dev/null
+++ b/dronesim/control/pywls/pywls_module.c
@@ -0,0 +1,347 @@
+#define PY_SSIZE_T_CLEAN
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+
+#include <string.h>
+
+#include "wls_alloc.h"
+
+/* --------------------------- Helper utilities --------------------------- */
+
+static PyArrayObject *
+require_float32_array(PyObject *obj, int ndim, const char *name)
+{
+    PyArrayObject *arr = (PyArrayObject *)PyArray_FROM_OTF(
+        obj,
+        NPY_FLOAT32,
+        NPY_ARRAY_IN_ARRAY
+    );
+    if (arr == NULL) {
+        return NULL;  /* NumPy already set an exception */
+    }
+
+    if (PyArray_NDIM(arr) != ndim) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s must be a %d-D NumPy array",
+                     name, ndim);
+        Py_DECREF(arr);
+        return NULL;
+    }
+
+    return arr;
+}
+
+static int
+check_vector_len(PyArrayObject *arr, npy_intp expected_len, const char *name)
+{
+    if (PyArray_NDIM(arr) != 1) {
+        PyErr_Format(PyExc_ValueError, "%s must be 1-D", name);
+        return -1;
+    }
+
+    if (PyArray_DIM(arr, 0) != expected_len) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s has length %lld, expected %lld",
+                     name,
+                     (long long)PyArray_DIM(arr, 0),
+                     (long long)expected_len);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+check_matrix_shape(PyArrayObject *arr, npy_intp rows, npy_intp cols, const char *name)
+{
+    if (PyArray_NDIM(arr) != 2) {
+        PyErr_Format(PyExc_ValueError, "%s must be 2-D", name);
+        return -1;
+    }
+
+    if (PyArray_DIM(arr, 0) != rows || PyArray_DIM(arr, 1) != cols) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s has shape (%lld, %lld), expected (%lld, %lld)",
+                     name,
+                     (long long)PyArray_DIM(arr, 0),
+                     (long long)PyArray_DIM(arr, 1),
+                     (long long)rows,
+                     (long long)cols);
+        return -1;
+    }
+
+    return 0;
+}
+
+/* ----------------------------- Wrapped call ----------------------------- */
+
+PyDoc_STRVAR(pywls_wls_alloc_doc,
+"wls_alloc(B, v, u_min, u_max, u_guess=None, W_init=None, Wv=None, Wu=None, "
+"u_pref=None, gamma_sq=100000.0, imax=100)\n"
+"--\n"
+"\n"
+"Wrap the C function wls_alloc() from wls_alloc.c.\n"
+"\n"
+"Parameters\n"
+"----------\n"
+"B : ndarray, shape (nv, nu), float32-compatible\n"
+"    Control effectiveness matrix.\n"
+"v : ndarray, shape (nv,), float32-compatible\n"
+"    Control objective vector.\n"
+"u_min, u_max : ndarray, shape (nu,), float32-compatible\n"
+"    Lower/upper actuator limits.\n"
+"u_guess : ndarray, optional, shape (nu,)\n"
+"    Initial actuator guess.\n"
+"W_init : ndarray, optional, shape (nu,)\n"
+"    Initial working set.\n"
+"Wv : ndarray, optional, shape (nv,)\n"
+"    Objective weights. Defaults to ones.\n"
+"Wu : ndarray, optional, shape (nu,)\n"
+"    Control weights. Defaults to ones.\n"
+"u_pref : ndarray, optional, shape (nu,)\n"
+"    Preferred actuator vector. Defaults to zeros.\n"
+"gamma_sq : float, optional\n"
+"    Weighting factor.\n"
+"imax : int, optional\n"
+"    Maximum number of iterations.\n"
+"\n"
+"Returns\n"
+"-------\n"
+"(u, iter) : tuple\n"
+"    u is a float32 NumPy array of shape (nu,), iter is the iteration count.\n");
+
+static PyObject *
+pywls_wls_alloc(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    (void)self;
+
+    PyObject *B_obj = NULL;
+    PyObject *v_obj = NULL;
+    PyObject *u_min_obj = NULL;
+    PyObject *u_max_obj = NULL;
+    PyObject *u_guess_obj = Py_None;
+    PyObject *W_init_obj = Py_None;
+    PyObject *Wv_obj = Py_None;
+    PyObject *Wu_obj = Py_None;
+    PyObject *u_pref_obj = Py_None;
+    float gamma_sq = 100000.0f;
+    int imax = 100;
+
+    static char *kwlist[] = {
+        "B", "v", "u_min", "u_max",
+        "u_guess", "W_init", "Wv", "Wu", "u_pref",
+        "gamma_sq", "imax",
+        NULL
+    };
+
+    if (!PyArg_ParseTupleAndKeywords(
+            args, kwargs,
+            "OOOO|OOOOOfi:wls_alloc",
+            kwlist,
+            &B_obj, &v_obj, &u_min_obj, &u_max_obj,
+            &u_guess_obj, &W_init_obj, &Wv_obj, &Wu_obj, &u_pref_obj,
+            &gamma_sq, &imax)) {
+        return NULL;
+    }
+
+    PyArrayObject *B_arr = NULL;
+    PyArrayObject *v_arr = NULL;
+    PyArrayObject *u_min_arr = NULL;
+    PyArrayObject *u_max_arr = NULL;
+    PyArrayObject *u_guess_arr = NULL;
+    PyArrayObject *W_init_arr = NULL;
+    PyArrayObject *Wv_arr = NULL;
+    PyArrayObject *Wu_arr = NULL;
+    PyArrayObject *u_pref_arr = NULL;
+    PyArrayObject *u_out_arr = NULL;
+
+    float **B_rows = NULL;
+    PyObject *result = NULL;
+
+    npy_intp nu = 0;
+    npy_intp nv = 0;
+
+    struct WLS_t state;
+    memset(&state, 0, sizeof(state));
+
+    /* Required arrays */
+    v_arr = require_float32_array(v_obj, 1, "v");
+    if (v_arr == NULL) goto fail;
+    nv = PyArray_DIM(v_arr, 0);
+
+    u_min_arr = require_float32_array(u_min_obj, 1, "u_min");
+    if (u_min_arr == NULL) goto fail;
+    nu = PyArray_DIM(u_min_arr, 0);
+
+    u_max_arr = require_float32_array(u_max_obj, 1, "u_max");
+    if (u_max_arr == NULL) goto fail;
+    if (check_vector_len(u_max_arr, nu, "u_max") < 0) goto fail;
+
+    B_arr = require_float32_array(B_obj, 2, "B");
+    if (B_arr == NULL) goto fail;
+    if (check_matrix_shape(B_arr, nv, nu, "B") < 0) goto fail;
+
+    /* Enforce compile-time limits from wls_alloc.h */
+    if (nu > WLS_N_U_MAX) {
+        PyErr_Format(PyExc_ValueError,
+                     "nu=%lld exceeds compile-time WLS_N_U_MAX=%d; "
+                     "rebuild the extension with a larger WLS_N_U_MAX",
+                     (long long)nu, WLS_N_U_MAX);
+        goto fail;
+    }
+    if (nv > WLS_N_V_MAX) {
+        PyErr_Format(PyExc_ValueError,
+                     "nv=%lld exceeds compile-time WLS_N_V_MAX=%d; "
+                     "rebuild the extension with a larger WLS_N_V_MAX",
+                     (long long)nv, WLS_N_V_MAX);
+        goto fail;
+    }
+
+    /* Optional arrays */
+    if (u_guess_obj != Py_None) {
+        u_guess_arr = require_float32_array(u_guess_obj, 1, "u_guess");
+        if (u_guess_arr == NULL) goto fail;
+        if (check_vector_len(u_guess_arr, nu, "u_guess") < 0) goto fail;
+    }
+
+    if (W_init_obj != Py_None) {
+        W_init_arr = require_float32_array(W_init_obj, 1, "W_init");
+        if (W_init_arr == NULL) goto fail;
+        if (check_vector_len(W_init_arr, nu, "W_init") < 0) goto fail;
+    }
+
+    if (Wv_obj != Py_None) {
+        Wv_arr = require_float32_array(Wv_obj, 1, "Wv");
+        if (Wv_arr == NULL) goto fail;
+        if (check_vector_len(Wv_arr, nv, "Wv") < 0) goto fail;
+    }
+
+    if (Wu_obj != Py_None) {
+        Wu_arr = require_float32_array(Wu_obj, 1, "Wu");
+        if (Wu_arr == NULL) goto fail;
+        if (check_vector_len(Wu_arr, nu, "Wu") < 0) goto fail;
+    }
+
+    if (u_pref_obj != Py_None) {
+        u_pref_arr = require_float32_array(u_pref_obj, 1, "u_pref");
+        if (u_pref_arr == NULL) goto fail;
+        if (check_vector_len(u_pref_arr, nu, "u_pref") < 0) goto fail;
+    }
+
+    /* Build float** row pointers for B */
+    B_rows = (float **)PyMem_Malloc((size_t)nv * sizeof(float *));
+    if (B_rows == NULL) {
+        PyErr_NoMemory();
+        goto fail;
+    }
+    {
+        float *B_data = (float *)PyArray_DATA(B_arr);
+        for (npy_intp i = 0; i < nv; ++i) {
+            B_rows[i] = B_data + i * nu;
+        }
+    }
+
+    /* Fill WLS_t */
+    state.nu = (int)nu;
+    state.nv = (int)nv;
+    state.gamma_sq = gamma_sq;
+    state.iter = 0;
+
+    memcpy(state.v,     PyArray_DATA(v_arr),     (size_t)nv * sizeof(float));
+    memcpy(state.u_min, PyArray_DATA(u_min_arr), (size_t)nu * sizeof(float));
+    memcpy(state.u_max, PyArray_DATA(u_max_arr), (size_t)nu * sizeof(float));
+
+    if (Wv_arr != NULL) {
+        memcpy(state.Wv, PyArray_DATA(Wv_arr), (size_t)nv * sizeof(float));
+    } else {
+        for (npy_intp i = 0; i < nv; ++i) state.Wv[i] = 1.0f;
+    }
+
+    if (Wu_arr != NULL) {
+        memcpy(state.Wu, PyArray_DATA(Wu_arr), (size_t)nu * sizeof(float));
+    } else {
+        for (npy_intp i = 0; i < nu; ++i) state.Wu[i] = 1.0f;
+    }
+
+    if (u_pref_arr != NULL) {
+        memcpy(state.u_pref, PyArray_DATA(u_pref_arr), (size_t)nu * sizeof(float));
+    } else {
+        for (npy_intp i = 0; i < nu; ++i) state.u_pref[i] = 0.0f;
+    }
+
+    /*
+     * wls_alloc() initializes state.u from u_guess if provided, otherwise
+     * from the midpoint of [u_min, u_max].
+     */
+    Py_BEGIN_ALLOW_THREADS
+    wls_alloc(
+        &state,
+        B_rows,
+        (u_guess_arr != NULL) ? (float *)PyArray_DATA(u_guess_arr) : NULL,
+        (W_init_arr != NULL) ? (float *)PyArray_DATA(W_init_arr) : NULL,
+        imax
+    );
+    Py_END_ALLOW_THREADS
+
+    {
+        npy_intp dims[1] = { nu };
+        u_out_arr = (PyArrayObject *)PyArray_SimpleNew(1, dims, NPY_FLOAT32);
+        if (u_out_arr == NULL) goto fail;
+        memcpy(PyArray_DATA(u_out_arr), state.u, (size_t)nu * sizeof(float));
+    }
+
+    result = Py_BuildValue("Ni", (PyObject *)u_out_arr, state.iter);
+    u_out_arr = NULL;  /* stolen by "N" */
+
+fail:
+    Py_XDECREF(B_arr);
+    Py_XDECREF(v_arr);
+    Py_XDECREF(u_min_arr);
+    Py_XDECREF(u_max_arr);
+    Py_XDECREF(u_guess_arr);
+    Py_XDECREF(W_init_arr);
+    Py_XDECREF(Wv_arr);
+    Py_XDECREF(Wu_arr);
+    Py_XDECREF(u_pref_arr);
+    Py_XDECREF(u_out_arr);
+
+    if (B_rows != NULL) PyMem_Free(B_rows);
+
+    return result;
+}
+
+/* ------------------------------ Module table ---------------------------- */
+
+static PyMethodDef pywls_methods[] = {
+    {
+        "wls_alloc",
+        (PyCFunction)pywls_wls_alloc,
+        METH_VARARGS | METH_KEYWORDS,
+        pywls_wls_alloc_doc
+    },
+    {NULL, NULL, 0, NULL}
+};
+
+
+static struct PyModuleDef pywls_module = {
+    PyModuleDef_HEAD_INIT,
+    "pywls",
+    "CPython/NumPy wrapper for wls_alloc.c",
+    -1,
+    pywls_methods
+};
+
+PyMODINIT_FUNC
+PyInit_pywls(void)
+{
+    PyObject *m = PyModule_Create(&pywls_module);
+    if (m == NULL) {
+        return NULL;
+    }
+
+    import_array();
+    return m;
+}
+
diff --git a/dronesim/control/pywls/qr_solve.c b/dronesim/control/pywls/qr_solve.c
new file mode 100644
index 0000000..7404996
--- /dev/null
+++ b/dronesim/control/pywls/qr_solve.c
@@ -0,0 +1,1616 @@
+/*
+ * This is part of the qr_solve library from John Burkardt.
+ * http://people.sc.fsu.edu/~jburkardt/c_src/qr_solve/qr_solve.html
+ *
+ * It is slightly modified to make it compile on simple microprocessors,
+ * and to remove all dynamic memory.
+ *
+ * This code is distributed under the GNU LGPL license.
+ */
+
+#include "std.h"
+#include <stdlib.h>
+#include <math.h>
+
+#include "qr_solve.h"
+#include "r8lib_min.h"
+
+#define DEBUG_FPRINTF(...)
+#define DEBUG_EXIT(...)
+
+/******************************************************************************/
+
+void daxpy ( int n, float da, float dx[], int incx, float dy[], int incy )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DAXPY computes constant times a vector plus a vector.
+
+  Discussion:
+
+    This routine uses unrolled loops for increments equal to one.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    30 March 2007
+
+  Author:
+
+    C version by John Burkardt
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979.
+
+    Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
+    Basic Linear Algebra Subprograms for Fortran Usage,
+    Algorithm 539,
+    ACM Transactions on Mathematical Software,
+    Volume 5, Number 3, September 1979, pages 308-323.
+
+  Parameters:
+
+    Input, int N, the number of elements in DX and DY.
+
+    Input, float DA, the multiplier of DX.
+
+    Input, float DX[*], the first vector.
+
+    Input, int INCX, the increment between successive entries of DX.
+
+    Input/output, float DY[*], the second vector.
+    On output, DY[*] has been replaced by DY[*] + DA * DX[*].
+
+    Input, int INCY, the increment between successive entries of DY.
+*/
+{
+  int i;
+  int ix;
+  int iy;
+  int m;
+
+  if ( n <= 0 )
+  {
+    return;
+  }
+
+  if ( da == 0.0 )
+  {
+    return;
+  }
+/*
+  Code for unequal increments or equal increments
+  not equal to 1.
+*/
+  if ( incx != 1 || incy != 1 )
+  {
+    if ( 0 <= incx )
+    {
+      ix = 0;
+    }
+    else
+    {
+      ix = ( - n + 1 ) * incx;
+    }
+
+    if ( 0 <= incy )
+    {
+      iy = 0;
+    }
+    else
+    {
+      iy = ( - n + 1 ) * incy;
+    }
+
+    for ( i = 0; i < n; i++ )
+    {
+      dy[iy] = dy[iy] + da * dx[ix];
+      ix = ix + incx;
+      iy = iy + incy;
+    }
+  }
+/*
+  Code for both increments equal to 1.
+*/
+  else
+  {
+    m = n % 4;
+
+    for ( i = 0; i < m; i++ )
+    {
+      dy[i] = dy[i] + da * dx[i];
+    }
+
+    for ( i = m; i < n; i = i + 4 )
+    {
+      dy[i  ] = dy[i  ] + da * dx[i  ];
+      dy[i+1] = dy[i+1] + da * dx[i+1];
+      dy[i+2] = dy[i+2] + da * dx[i+2];
+      dy[i+3] = dy[i+3] + da * dx[i+3];
+    }
+  }
+  return;
+}
+/******************************************************************************/
+
+float ddot ( int n, float dx[], int incx, float dy[], int incy )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DDOT forms the dot product of two vectors.
+
+  Discussion:
+
+    This routine uses unrolled loops for increments equal to one.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    30 March 2007
+
+  Author:
+
+    C version by John Burkardt
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979.
+
+    Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
+    Basic Linear Algebra Subprograms for Fortran Usage,
+    Algorithm 539,
+    ACM Transactions on Mathematical Software,
+    Volume 5, Number 3, September 1979, pages 308-323.
+
+  Parameters:
+
+    Input, int N, the number of entries in the vectors.
+
+    Input, float DX[*], the first vector.
+
+    Input, int INCX, the increment between successive entries in DX.
+
+    Input, float DY[*], the second vector.
+
+    Input, int INCY, the increment between successive entries in DY.
+
+    Output, float DDOT, the sum of the product of the corresponding
+    entries of DX and DY.
+*/
+{
+  float dtemp;
+  int i;
+  int ix;
+  int iy;
+  int m;
+
+  dtemp = 0.0;
+
+  if ( n <= 0 )
+  {
+    return dtemp;
+  }
+/*
+  Code for unequal increments or equal increments
+  not equal to 1.
+*/
+  if ( incx != 1 || incy != 1 )
+  {
+    if ( 0 <= incx )
+    {
+      ix = 0;
+    }
+    else
+    {
+      ix = ( - n + 1 ) * incx;
+    }
+
+    if ( 0 <= incy )
+    {
+      iy = 0;
+    }
+    else
+    {
+      iy = ( - n + 1 ) * incy;
+    }
+
+    for ( i = 0; i < n; i++ )
+    {
+      dtemp = dtemp + dx[ix] * dy[iy];
+      ix = ix + incx;
+      iy = iy + incy;
+    }
+  }
+/*
+  Code for both increments equal to 1.
+*/
+  else
+  {
+    m = n % 5;
+
+    for ( i = 0; i < m; i++ )
+    {
+      dtemp = dtemp + dx[i] * dy[i];
+    }
+
+    for ( i = m; i < n; i = i + 5 )
+    {
+      dtemp = dtemp + dx[i  ] * dy[i  ]
+                    + dx[i+1] * dy[i+1]
+                    + dx[i+2] * dy[i+2]
+                    + dx[i+3] * dy[i+3]
+                    + dx[i+4] * dy[i+4];
+    }
+  }
+  return dtemp;
+}
+/******************************************************************************/
+
+float dnrm2 ( int n, float x[], int incx )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DNRM2 returns the euclidean norm of a vector.
+
+  Discussion:
+
+     DNRM2 ( X ) = sqrt ( X' * X )
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    30 March 2007
+
+  Author:
+
+    C version by John Burkardt
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979.
+
+    Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
+    Basic Linear Algebra Subprograms for Fortran Usage,
+    Algorithm 539,
+    ACM Transactions on Mathematical Software,
+    Volume 5, Number 3, September 1979, pages 308-323.
+
+  Parameters:
+
+    Input, int N, the number of entries in the vector.
+
+    Input, float X[*], the vector whose norm is to be computed.
+
+    Input, int INCX, the increment between successive entries of X.
+
+    Output, float DNRM2, the Euclidean norm of X.
+*/
+{
+  float absxi;
+  int i;
+  int ix;
+  float norm;
+  float scale;
+  float ssq;
+
+  if ( n < 1 || incx < 1 )
+  {
+    norm = 0.0;
+  }
+  else if ( n == 1 )
+  {
+    norm = fabs ( x[0] );
+  }
+  else
+  {
+    scale = 0.0;
+    ssq = 1.0;
+    ix = 0;
+
+    for ( i = 0; i < n; i++ )
+    {
+      if ( x[ix] != 0.0 )
+      {
+        absxi = fabs ( x[ix] );
+        if ( scale < absxi )
+        {
+          ssq = 1.0 + ssq * ( scale / absxi ) * ( scale / absxi );
+          scale = absxi;
+        }
+        else
+        {
+          ssq = ssq + ( absxi / scale ) * ( absxi / scale );
+        }
+      }
+      ix = ix + incx;
+    }
+
+    norm  = scale * sqrt ( ssq );
+  }
+
+  return norm;
+}
+/******************************************************************************/
+
+void dqrank ( float a[], int lda, int m, int n, float tol, int *kr,
+  int jpvt[], float qraux[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DQRANK computes the QR factorization of a rectangular matrix.
+
+  Discussion:
+
+    This routine is used in conjunction with DQRLSS to solve
+    overdetermined, underdetermined and singular linear systems
+    in a least squares sense.
+
+    DQRANK uses the LINPACK subroutine DQRDC to compute the QR
+    factorization, with column pivoting, of an M by N matrix A.
+    The numerical rank is determined using the tolerance TOL.
+
+    Note that on output, ABS ( A(1,1) ) / ABS ( A(KR,KR) ) is an estimate
+    of the condition number of the matrix of independent columns,
+    and of R.  This estimate will be <= 1/TOL.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    21 April 2012
+
+  Author:
+
+    C version by John Burkardt.
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979,
+    ISBN13: 978-0-898711-72-1,
+    LC: QA214.L56.
+
+  Parameters:
+
+    Input/output, float A[LDA*N].  On input, the matrix whose
+    decomposition is to be computed.  On output, the information from DQRDC.
+    The triangular matrix R of the QR factorization is contained in the
+    upper triangle and information needed to recover the orthogonal
+    matrix Q is stored below the diagonal in A and in the vector QRAUX.
+
+    Input, int LDA, the leading dimension of A, which must
+    be at least M.
+
+    Input, int M, the number of rows of A.
+
+    Input, int N, the number of columns of A.
+
+    Input, float TOL, a relative tolerance used to determine the
+    numerical rank.  The problem should be scaled so that all the elements
+    of A have roughly the same absolute accuracy, EPS.  Then a reasonable
+    value for TOL is roughly EPS divided by the magnitude of the largest
+    element.
+
+    Output, int *KR, the numerical rank.
+
+    Output, int JPVT[N], the pivot information from DQRDC.
+    Columns JPVT(1), ..., JPVT(KR) of the original matrix are linearly
+    independent to within the tolerance TOL and the remaining columns
+    are linearly dependent.
+
+    Output, float QRAUX[N], will contain extra information defining
+    the QR factorization.
+*/
+{
+  int i;
+  int j;
+  int job;
+  int k;
+  /*float *work;*/
+
+  for ( i = 0; i < n; i++ )
+  {
+    jpvt[i] = 0;
+  }
+
+  float work[n];
+  /*work = ( float * ) malloc ( n * sizeof ( float ) );*/
+  job = 1;
+
+  dqrdc ( a, lda, m, n, qraux, jpvt, work, job );
+
+  *kr = 0;
+  k = i4_min ( m, n );
+
+  for ( j = 0; j < k; j++ )
+  {
+    if ( fabs ( a[j+j*lda] ) <= tol * fabs ( a[0+0*lda] ) )
+    {
+      return;
+    }
+    *kr = j + 1;
+  }
+
+  return;
+}
+/******************************************************************************/
+
+void dqrdc ( float a[], int lda, int n, int p, float qraux[], int jpvt[],
+  float work[], int job )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DQRDC computes the QR factorization of a real rectangular matrix.
+
+  Discussion:
+
+    DQRDC uses Householder transformations.
+
+    Column pivoting based on the 2-norms of the reduced columns may be
+    performed at the user's option.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    07 June 2005
+
+  Author:
+
+    C version by John Burkardt.
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, (Society for Industrial and Applied Mathematics),
+    3600 University City Science Center,
+    Philadelphia, PA, 19104-2688.
+    ISBN 0-89871-172-X
+
+  Parameters:
+
+    Input/output, float A(LDA,P).  On input, the N by P matrix
+    whose decomposition is to be computed.  On output, A contains in
+    its upper triangle the upper triangular matrix R of the QR
+    factorization.  Below its diagonal A contains information from
+    which the orthogonal part of the decomposition can be recovered.
+    Note that if pivoting has been requested, the decomposition is not that
+    of the original matrix A but that of A with its columns permuted
+    as described by JPVT.
+
+    Input, int LDA, the leading dimension of the array A.  LDA must
+    be at least N.
+
+    Input, int N, the number of rows of the matrix A.
+
+    Input, int P, the number of columns of the matrix A.
+
+    Output, float QRAUX[P], contains further information required
+    to recover the orthogonal part of the decomposition.
+
+    Input/output, integer JPVT[P].  On input, JPVT contains integers that
+    control the selection of the pivot columns.  The K-th column A(*,K) of A
+    is placed in one of three classes according to the value of JPVT(K).
+      > 0, then A(K) is an initial column.
+      = 0, then A(K) is a free column.
+      < 0, then A(K) is a final column.
+    Before the decomposition is computed, initial columns are moved to
+    the beginning of the array A and final columns to the end.  Both
+    initial and final columns are frozen in place during the computation
+    and only free columns are moved.  At the K-th stage of the
+    reduction, if A(*,K) is occupied by a free column it is interchanged
+    with the free column of largest reduced norm.  JPVT is not referenced
+    if JOB == 0.  On output, JPVT(K) contains the index of the column of the
+    original matrix that has been interchanged into the K-th column, if
+    pivoting was requested.
+
+    Workspace, float WORK[P].  WORK is not referenced if JOB == 0.
+
+    Input, int JOB, initiates column pivoting.
+    0, no pivoting is done.
+    nonzero, pivoting is done.
+*/
+{
+  int j;
+  int jp;
+  int l;
+  int lup;
+  int maxj;
+  float maxnrm;
+  float nrmxl;
+  int pl;
+  int pu;
+  int swapj;
+  float t;
+  float tt;
+
+  pl = 1;
+  pu = 0;
+/*
+  If pivoting is requested, rearrange the columns.
+*/
+  if ( job != 0 )
+  {
+    for ( j = 1; j <= p; j++ )
+    {
+      swapj = ( 0 < jpvt[j-1] );
+
+      if ( jpvt[j-1] < 0 )
+      {
+        jpvt[j-1] = -j;
+      }
+      else
+      {
+        jpvt[j-1] = j;
+      }
+
+      if ( swapj )
+      {
+        if ( j != pl )
+        {
+          dswap ( n, a+0+(pl-1)*lda, 1, a+0+(j-1), 1 );
+        }
+        jpvt[j-1] = jpvt[pl-1];
+        jpvt[pl-1] = j;
+        pl = pl + 1;
+      }
+    }
+    pu = p;
+
+    for ( j = p; 1 <= j; j-- )
+    {
+      if ( jpvt[j-1] < 0 )
+      {
+        jpvt[j-1] = -jpvt[j-1];
+
+        if ( j != pu )
+        {
+          dswap ( n, a+0+(pu-1)*lda, 1, a+0+(j-1)*lda, 1 );
+          jp = jpvt[pu-1];
+          jpvt[pu-1] = jpvt[j-1];
+          jpvt[j-1] = jp;
+        }
+        pu = pu - 1;
+      }
+    }
+  }
+/*
+  Compute the norms of the free columns.
+*/
+  for ( j = pl; j <= pu; j++ )
+  {
+    qraux[j-1] = dnrm2 ( n, a+0+(j-1)*lda, 1 );
+  }
+
+  for ( j = pl; j <= pu; j++ )
+  {
+    work[j-1] = qraux[j-1];
+  }
+/*
+  Perform the Householder reduction of A.
+*/
+  lup = i4_min ( n, p );
+
+  for ( l = 1; l <= lup; l++ )
+  {
+/*
+  Bring the column of largest norm into the pivot position.
+*/
+    if ( pl <= l && l < pu )
+    {
+      maxnrm = 0.0;
+      maxj = l;
+      for ( j = l; j <= pu; j++ )
+      {
+        if ( maxnrm < qraux[j-1] )
+        {
+          maxnrm = qraux[j-1];
+          maxj = j;
+        }
+      }
+
+      if ( maxj != l )
+      {
+        dswap ( n, a+0+(l-1)*lda, 1, a+0+(maxj-1)*lda, 1 );
+        qraux[maxj-1] = qraux[l-1];
+        work[maxj-1] = work[l-1];
+        jp = jpvt[maxj-1];
+        jpvt[maxj-1] = jpvt[l-1];
+        jpvt[l-1] = jp;
+      }
+    }
+/*
+  Compute the Householder transformation for column L.
+*/
+    qraux[l-1] = 0.0;
+
+    if ( l != n )
+    {
+      nrmxl = dnrm2 ( n-l+1, a+l-1+(l-1)*lda, 1 );
+
+      if ( nrmxl != 0.0 )
+      {
+        if ( a[l-1+(l-1)*lda] != 0.0 )
+        {
+          nrmxl = nrmxl * r8_sign ( a[l-1+(l-1)*lda] );
+        }
+
+        dscal ( n-l+1, 1.0 / nrmxl, a+l-1+(l-1)*lda, 1 );
+        a[l-1+(l-1)*lda] = 1.0 + a[l-1+(l-1)*lda];
+/*
+  Apply the transformation to the remaining columns, updating the norms.
+*/
+        for ( j = l + 1; j <= p; j++ )
+        {
+          t = -ddot ( n-l+1, a+l-1+(l-1)*lda, 1, a+l-1+(j-1)*lda, 1 )
+            / a[l-1+(l-1)*lda];
+          daxpy ( n-l+1, t, a+l-1+(l-1)*lda, 1, a+l-1+(j-1)*lda, 1 );
+
+          if ( pl <= j && j <= pu )
+          {
+            if ( qraux[j-1] != 0.0 )
+            {
+              tt = 1.0 - pow ( fabs ( a[l-1+(j-1)*lda] ) / qraux[j-1], 2 );
+              tt = r8_max ( tt, 0.0 );
+              t = tt;
+              tt = 1.0 + 0.05 * tt * pow ( qraux[j-1] / work[j-1], 2 );
+
+              if ( tt != 1.0 )
+              {
+                qraux[j-1] = qraux[j-1] * sqrt ( t );
+              }
+              else
+              {
+                qraux[j-1] = dnrm2 ( n-l, a+l+(j-1)*lda, 1 );
+                work[j-1] = qraux[j-1];
+              }
+            }
+          }
+        }
+/*
+  Save the transformation.
+*/
+        qraux[l-1] = a[l-1+(l-1)*lda];
+        a[l-1+(l-1)*lda] = -nrmxl;
+      }
+    }
+  }
+  return;
+}
+/******************************************************************************/
+
+int dqrls ( float a[], int lda, int m, int n, float tol, int *kr, float b[],
+  float x[], float rsd[], int jpvt[], float qraux[], int itask )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DQRLS factors and solves a linear system in the least squares sense.
+
+  Discussion:
+
+    The linear system may be overdetermined, underdetermined or singular.
+    The solution is obtained using a QR factorization of the
+    coefficient matrix.
+
+    DQRLS can be efficiently used to solve several least squares
+    problems with the same matrix A.  The first system is solved
+    with ITASK = 1.  The subsequent systems are solved with
+    ITASK = 2, to avoid the recomputation of the matrix factors.
+    The parameters KR, JPVT, and QRAUX must not be modified
+    between calls to DQRLS.
+
+    DQRLS is used to solve in a least squares sense
+    overdetermined, underdetermined and singular linear systems.
+    The system is A*X approximates B where A is M by N.
+    B is a given M-vector, and X is the N-vector to be computed.
+    A solution X is found which minimimzes the sum of squares (2-norm)
+    of the residual,  A*X - B.
+
+    The numerical rank of A is determined using the tolerance TOL.
+
+    DQRLS uses the LINPACK subroutine DQRDC to compute the QR
+    factorization, with column pivoting, of an M by N matrix A.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    10 September 2012
+
+  Author:
+
+    C version by John Burkardt.
+
+  Reference:
+
+    David Kahaner, Cleve Moler, Steven Nash,
+    Numerical Methods and Software,
+    Prentice Hall, 1989,
+    ISBN: 0-13-627258-4,
+    LC: TA345.K34.
+
+  Parameters:
+
+    Input/output, float A[LDA*N], an M by N matrix.
+    On input, the matrix whose decomposition is to be computed.
+    In a least squares data fitting problem, A(I,J) is the
+    value of the J-th basis (model) function at the I-th data point.
+    On output, A contains the output from DQRDC.  The triangular matrix R
+    of the QR factorization is contained in the upper triangle and
+    information needed to recover the orthogonal matrix Q is stored
+    below the diagonal in A and in the vector QRAUX.
+
+    Input, int LDA, the leading dimension of A.
+
+    Input, int M, the number of rows of A.
+
+    Input, int N, the number of columns of A.
+
+    Input, float TOL, a relative tolerance used to determine the
+    numerical rank.  The problem should be scaled so that all the elements
+    of A have roughly the same absolute accuracy EPS.  Then a reasonable
+    value for TOL is roughly EPS divided by the magnitude of the largest
+    element.
+
+    Output, int *KR, the numerical rank.
+
+    Input, float B[M], the right hand side of the linear system.
+
+    Output, float X[N], a least squares solution to the linear
+    system.
+
+    Output, float RSD[M], the residual, B - A*X.  RSD may
+    overwrite B.
+
+    Workspace, int JPVT[N], required if ITASK = 1.
+    Columns JPVT(1), ..., JPVT(KR) of the original matrix are linearly
+    independent to within the tolerance TOL and the remaining columns
+    are linearly dependent.  ABS ( A(1,1) ) / ABS ( A(KR,KR) ) is an estimate
+    of the condition number of the matrix of independent columns,
+    and of R.  This estimate will be <= 1/TOL.
+
+    Workspace, float QRAUX[N], required if ITASK = 1.
+
+    Input, int ITASK.
+    1, DQRLS factors the matrix A and solves the least squares problem.
+    2, DQRLS assumes that the matrix A was factored with an earlier
+       call to DQRLS, and only solves the least squares problem.
+
+    Output, int DQRLS, error code.
+    0:  no error
+    -1: LDA < M   (fatal error)
+    -2: N < 1     (fatal error)
+    -3: ITASK < 1 (fatal error)
+*/
+{
+  int ind;
+
+  if ( lda < m )
+  {
+    DEBUG_FPRINTF ( stderr, "\n" );
+    DEBUG_FPRINTF ( stderr, "DQRLS - Fatal error!\n" );
+    DEBUG_FPRINTF ( stderr, "  LDA < M.\n" );
+    ind = -1;
+    return ind;
+  }
+
+  if ( n <= 0 )
+  {
+    DEBUG_FPRINTF ( stderr, "\n" );
+    DEBUG_FPRINTF ( stderr, "DQRLS - Fatal error!\n" );
+    DEBUG_FPRINTF ( stderr, "  N <= 0.\n" );
+    ind = -2;
+    return ind;
+  }
+
+  if ( itask < 1 )
+  {
+    DEBUG_FPRINTF ( stderr, "\n" );
+    DEBUG_FPRINTF ( stderr, "DQRLS - Fatal error!\n" );
+    DEBUG_FPRINTF ( stderr, "  ITASK < 1.\n" );
+    ind = -3;
+    return ind;
+  }
+
+  ind = 0;
+/*
+  Factor the matrix.
+*/
+  if ( itask == 1 )
+  {
+    dqrank ( a, lda, m, n, tol, kr, jpvt, qraux );
+  }
+/*
+  Solve the least-squares problem.
+*/
+  dqrlss ( a, lda, m, n, *kr, b, x, rsd, jpvt, qraux );
+
+  return ind;
+}
+/******************************************************************************/
+void dqrlss ( float a[], int lda, int m, int n, int kr, float b[], float x[],
+  float rsd[], int jpvt[], float qraux[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DQRLSS solves a linear system in a least squares sense.
+
+  Discussion:
+
+    DQRLSS must be preceeded by a call to DQRANK.
+
+    The system is to be solved is
+      A * X = B
+    where
+      A is an M by N matrix with rank KR, as determined by DQRANK,
+      B is a given M-vector,
+      X is the N-vector to be computed.
+
+    A solution X, with at most KR nonzero components, is found which
+    minimizes the 2-norm of the residual (A*X-B).
+
+    Once the matrix A has been formed, DQRANK should be
+    called once to decompose it.  Then, for each right hand
+    side B, DQRLSS should be called once to obtain the
+    solution and residual.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    10 September 2012
+
+  Author:
+
+    C version by John Burkardt
+
+  Parameters:
+
+    Input, float A[LDA*N], the QR factorization information
+    from DQRANK.  The triangular matrix R of the QR factorization is
+    contained in the upper triangle and information needed to recover
+    the orthogonal matrix Q is stored below the diagonal in A and in
+    the vector QRAUX.
+
+    Input, int LDA, the leading dimension of A, which must
+    be at least M.
+
+    Input, int M, the number of rows of A.
+
+    Input, int N, the number of columns of A.
+
+    Input, int KR, the rank of the matrix, as estimated by DQRANK.
+
+    Input, float B[M], the right hand side of the linear system.
+
+    Output, float X[N], a least squares solution to the
+    linear system.
+
+    Output, float RSD[M], the residual, B - A*X.  RSD may
+    overwite B.
+
+    Input, int JPVT[N], the pivot information from DQRANK.
+    Columns JPVT[0], ..., JPVT[KR-1] of the original matrix are linearly
+    independent to within the tolerance TOL and the remaining columns
+    are linearly dependent.
+
+    Input, float QRAUX[N], auxiliary information from DQRANK
+    defining the QR factorization.
+*/
+{
+  int i;
+  int info UNUSED;
+  int j;
+  int job;
+  int k;
+  float t;
+
+  if ( kr != 0 )
+  {
+    job = 110;
+    info = dqrsl ( a, lda, m, kr, qraux, b, rsd, rsd, x, rsd, rsd, job );
+  }
+
+  for ( i = 0; i < n; i++ )
+  {
+    jpvt[i] = - jpvt[i];
+  }
+
+  for ( i = kr; i < n; i++ )
+  {
+    x[i] = 0.0;
+  }
+
+  for ( j = 1; j <= n; j++ )
+  {
+    if ( jpvt[j-1] <= 0 )
+    {
+      k = - jpvt[j-1];
+      jpvt[j-1] = k;
+
+      while ( k != j )
+      {
+        t = x[j-1];
+        x[j-1] = x[k-1];
+        x[k-1] = t;
+        jpvt[k-1] = -jpvt[k-1];
+        k = jpvt[k-1];
+      }
+    }
+  }
+  return;
+}
+/******************************************************************************/
+
+int dqrsl ( float a[], int lda, int n, int k, float qraux[], float y[],
+  float qy[], float qty[], float b[], float rsd[], float ab[], int job )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DQRSL computes transformations, projections, and least squares solutions.
+
+  Discussion:
+
+    DQRSL requires the output of DQRDC.
+
+    For K <= min(N,P), let AK be the matrix
+
+      AK = ( A(JPVT[0]), A(JPVT(2)), ..., A(JPVT(K)) )
+
+    formed from columns JPVT[0], ..., JPVT(K) of the original
+    N by P matrix A that was input to DQRDC.  If no pivoting was
+    done, AK consists of the first K columns of A in their
+    original order.  DQRDC produces a factored orthogonal matrix Q
+    and an upper triangular matrix R such that
+
+      AK = Q * (R)
+               (0)
+
+    This information is contained in coded form in the arrays
+    A and QRAUX.
+
+    The parameters QY, QTY, B, RSD, and AB are not referenced
+    if their computation is not requested and in this case
+    can be replaced by dummy variables in the calling program.
+    To save storage, the user may in some cases use the same
+    array for different parameters in the calling sequence.  A
+    frequently occuring example is when one wishes to compute
+    any of B, RSD, or AB and does not need Y or QTY.  In this
+    case one may identify Y, QTY, and one of B, RSD, or AB, while
+    providing separate arrays for anything else that is to be
+    computed.
+
+    Thus the calling sequence
+
+      dqrsl ( a, lda, n, k, qraux, y, dum, y, b, y, dum, 110, info )
+
+    will result in the computation of B and RSD, with RSD
+    overwriting Y.  More generally, each item in the following
+    list contains groups of permissible identifications for
+    a single calling sequence.
+
+      1. (Y,QTY,B) (RSD) (AB) (QY)
+
+      2. (Y,QTY,RSD) (B) (AB) (QY)
+
+      3. (Y,QTY,AB) (B) (RSD) (QY)
+
+      4. (Y,QY) (QTY,B) (RSD) (AB)
+
+      5. (Y,QY) (QTY,RSD) (B) (AB)
+
+      6. (Y,QY) (QTY,AB) (B) (RSD)
+
+    In any group the value returned in the array allocated to
+    the group corresponds to the last member of the group.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    07 June 2005
+
+  Author:
+
+    C version by John Burkardt.
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, (Society for Industrial and Applied Mathematics),
+    3600 University City Science Center,
+    Philadelphia, PA, 19104-2688.
+    ISBN 0-89871-172-X
+
+  Parameters:
+
+    Input, float A[LDA*P], contains the output of DQRDC.
+
+    Input, int LDA, the leading dimension of the array A.
+
+    Input, int N, the number of rows of the matrix AK.  It must
+    have the same value as N in DQRDC.
+
+    Input, int K, the number of columns of the matrix AK.  K
+    must not be greater than min(N,P), where P is the same as in the
+    calling sequence to DQRDC.
+
+    Input, float QRAUX[P], the auxiliary output from DQRDC.
+
+    Input, float Y[N], a vector to be manipulated by DQRSL.
+
+    Output, float QY[N], contains Q * Y, if requested.
+
+    Output, float QTY[N], contains Q' * Y, if requested.
+
+    Output, float B[K], the solution of the least squares problem
+      minimize norm2 ( Y - AK * B),
+    if its computation has been requested.  Note that if pivoting was
+    requested in DQRDC, the J-th component of B will be associated with
+    column JPVT(J) of the original matrix A that was input into DQRDC.
+
+    Output, float RSD[N], the least squares residual Y - AK * B,
+    if its computation has been requested.  RSD is also the orthogonal
+    projection of Y onto the orthogonal complement of the column space
+    of AK.
+
+    Output, float AB[N], the least squares approximation Ak * B,
+    if its computation has been requested.  AB is also the orthogonal
+    projection of Y onto the column space of A.
+
+    Input, integer JOB, specifies what is to be computed.  JOB has
+    the decimal expansion ABCDE, with the following meaning:
+
+      if A != 0, compute QY.
+      if B != 0, compute QTY.
+      if C != 0, compute QTY and B.
+      if D != 0, compute QTY and RSD.
+      if E != 0, compute QTY and AB.
+
+    Note that a request to compute B, RSD, or AB automatically triggers
+    the computation of QTY, for which an array must be provided in the
+    calling sequence.
+
+    Output, int DQRSL, is zero unless the computation of B has
+    been requested and R is exactly singular.  In this case, INFO is the
+    index of the first zero diagonal element of R, and B is left unaltered.
+*/
+{
+  int cab;
+  int cb;
+  int cqty;
+  int cqy;
+  int cr;
+  int i;
+  int info;
+  int j;
+  int jj;
+  int ju;
+  float t;
+  float temp;
+/*
+  Set INFO flag.
+*/
+  info = 0;
+/*
+  Determine what is to be computed.
+*/
+  cqy =  (   job / 10000          != 0 );
+  cqty = ( ( job %  10000 )       != 0 );
+  cb =   ( ( job %   1000 ) / 100 != 0 );
+  cr =   ( ( job %    100 ) /  10 != 0 );
+  cab =  ( ( job %     10 )       != 0 );
+
+  ju = i4_min ( k, n-1 );
+/*
+  Special action when N = 1.
+*/
+  if ( ju == 0 )
+  {
+    if ( cqy )
+    {
+      qy[0] = y[0];
+    }
+
+    if ( cqty )
+    {
+      qty[0] = y[0];
+    }
+
+    if ( cab )
+    {
+      ab[0] = y[0];
+    }
+
+    if ( cb )
+    {
+      if ( a[0+0*lda] == 0.0 )
+      {
+        info = 1;
+      }
+      else
+      {
+        b[0] = y[0] / a[0+0*lda];
+      }
+    }
+
+    if ( cr )
+    {
+      rsd[0] = 0.0;
+    }
+    return info;
+  }
+/*
+  Set up to compute QY or QTY.
+*/
+  if ( cqy )
+  {
+    for ( i = 1; i <= n; i++ )
+    {
+      qy[i-1] = y[i-1];
+    }
+  }
+
+  if ( cqty )
+  {
+    for ( i = 1; i <= n; i++ )
+    {
+      qty[i-1] = y[i-1];
+    }
+  }
+/*
+  Compute QY.
+*/
+  if ( cqy )
+  {
+    for ( jj = 1; jj <= ju; jj++ )
+    {
+      j = ju - jj + 1;
+
+      if ( qraux[j-1] != 0.0 )
+      {
+        temp = a[j-1+(j-1)*lda];
+        a[j-1+(j-1)*lda] = qraux[j-1];
+        t = -ddot ( n-j+1, a+j-1+(j-1)*lda, 1, qy+j-1, 1 ) / a[j-1+(j-1)*lda];
+        daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, qy+j-1, 1 );
+        a[j-1+(j-1)*lda] = temp;
+      }
+    }
+  }
+/*
+  Compute Q'*Y.
+*/
+  if ( cqty )
+  {
+    for ( j = 1; j <= ju; j++ )
+    {
+      if ( qraux[j-1] != 0.0 )
+      {
+        temp = a[j-1+(j-1)*lda];
+        a[j-1+(j-1)*lda] = qraux[j-1];
+        t = -ddot ( n-j+1, a+j-1+(j-1)*lda, 1, qty+j-1, 1 ) / a[j-1+(j-1)*lda];
+        daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, qty+j-1, 1 );
+        a[j-1+(j-1)*lda] = temp;
+      }
+    }
+  }
+/*
+  Set up to compute B, RSD, or AB.
+*/
+  if ( cb )
+  {
+    for ( i = 1; i <= k; i++ )
+    {
+      b[i-1] = qty[i-1];
+    }
+  }
+
+  if ( cab )
+  {
+    for ( i = 1; i <= k; i++ )
+    {
+      ab[i-1] = qty[i-1];
+    }
+  }
+
+  if ( cr && k < n )
+  {
+    for ( i = k+1; i <= n; i++ )
+    {
+      rsd[i-1] = qty[i-1];
+    }
+  }
+
+  if ( cab && k+1 <= n )
+  {
+    for ( i = k+1; i <= n; i++ )
+    {
+      ab[i-1] = 0.0;
+    }
+  }
+
+  if ( cr )
+  {
+    for ( i = 1; i <= k; i++ )
+    {
+      rsd[i-1] = 0.0;
+    }
+  }
+/*
+  Compute B.
+*/
+  if ( cb )
+  {
+    for ( jj = 1; jj <= k; jj++ )
+    {
+      j = k - jj + 1;
+
+      if ( a[j-1+(j-1)*lda] == 0.0 )
+      {
+        info = j;
+        break;
+      }
+
+      b[j-1] = b[j-1] / a[j-1+(j-1)*lda];
+
+      if ( j != 1 )
+      {
+        t = -b[j-1];
+        daxpy ( j-1, t, a+0+(j-1)*lda, 1, b, 1 );
+      }
+    }
+  }
+/*
+  Compute RSD or AB as required.
+*/
+  if ( cr || cab )
+  {
+    for ( jj = 1; jj <= ju; jj++ )
+    {
+      j = ju - jj + 1;
+
+      if ( qraux[j-1] != 0.0 )
+      {
+        temp = a[j-1+(j-1)*lda];
+        a[j-1+(j-1)*lda] = qraux[j-1];
+
+        if ( cr )
+        {
+          t = -ddot ( n-j+1, a+j-1+(j-1)*lda, 1, rsd+j-1, 1 )
+            / a[j-1+(j-1)*lda];
+          daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, rsd+j-1, 1 );
+        }
+
+        if ( cab )
+        {
+          t = -ddot ( n-j+1, a+j-1+(j-1)*lda, 1, ab+j-1, 1 )
+            / a[j-1+(j-1)*lda];
+          daxpy ( n-j+1, t, a+j-1+(j-1)*lda, 1, ab+j-1, 1 );
+        }
+        a[j-1+(j-1)*lda] = temp;
+      }
+    }
+  }
+
+  return info;
+}
+/******************************************************************************/
+
+void dscal ( int n, float sa, float x[], int incx )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DSCAL scales a vector by a constant.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    30 March 2007
+
+  Author:
+
+    C version by John Burkardt
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979.
+
+    Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
+    Basic Linear Algebra Subprograms for Fortran Usage,
+    Algorithm 539,
+    ACM Transactions on Mathematical Software,
+    Volume 5, Number 3, September 1979, pages 308-323.
+
+  Parameters:
+
+    Input, int N, the number of entries in the vector.
+
+    Input, float SA, the multiplier.
+
+    Input/output, float X[*], the vector to be scaled.
+
+    Input, int INCX, the increment between successive entries of X.
+*/
+{
+  int i;
+  int ix;
+  int m;
+
+  if ( n <= 0 )
+  {
+  }
+  else if ( incx == 1 )
+  {
+    m = n % 5;
+
+    for ( i = 0; i < m; i++ )
+    {
+      x[i] = sa * x[i];
+    }
+
+    for ( i = m; i < n; i = i + 5 )
+    {
+      x[i]   = sa * x[i];
+      x[i+1] = sa * x[i+1];
+      x[i+2] = sa * x[i+2];
+      x[i+3] = sa * x[i+3];
+      x[i+4] = sa * x[i+4];
+    }
+  }
+  else
+  {
+    if ( 0 <= incx )
+    {
+      ix = 0;
+    }
+    else
+    {
+      ix = ( - n + 1 ) * incx;
+    }
+
+    for ( i = 0; i < n; i++ )
+    {
+      x[ix] = sa * x[ix];
+      ix = ix + incx;
+    }
+  }
+  return;
+}
+/******************************************************************************/
+
+void dswap ( int n, float x[], int incx, float y[], int incy )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    DSWAP interchanges two vectors.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    30 March 2007
+
+  Author:
+
+    C version by John Burkardt
+
+  Reference:
+
+    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
+    LINPACK User's Guide,
+    SIAM, 1979.
+
+    Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
+    Basic Linear Algebra Subprograms for Fortran Usage,
+    Algorithm 539,
+    ACM Transactions on Mathematical Software,
+    Volume 5, Number 3, September 1979, pages 308-323.
+
+  Parameters:
+
+    Input, int N, the number of entries in the vectors.
+
+    Input/output, float X[*], one of the vectors to swap.
+
+    Input, int INCX, the increment between successive entries of X.
+
+    Input/output, float Y[*], one of the vectors to swap.
+
+    Input, int INCY, the increment between successive elements of Y.
+*/
+{
+  int i;
+  int ix;
+  int iy;
+  int m;
+  float temp;
+
+  if ( n <= 0 )
+  {
+  }
+  else if ( incx == 1 && incy == 1 )
+  {
+    m = n % 3;
+
+    for ( i = 0; i < m; i++ )
+    {
+      temp = x[i];
+      x[i] = y[i];
+      y[i] = temp;
+    }
+
+    for ( i = m; i < n; i = i + 3 )
+    {
+      temp = x[i];
+      x[i] = y[i];
+      y[i] = temp;
+
+      temp = x[i+1];
+      x[i+1] = y[i+1];
+      y[i+1] = temp;
+
+      temp = x[i+2];
+      x[i+2] = y[i+2];
+      y[i+2] = temp;
+    }
+  }
+  else
+  {
+    if ( 0 <= incx )
+    {
+      ix = 0;
+    }
+    else
+    {
+      ix = ( - n + 1 ) * incx;
+    }
+
+    if ( 0 <= incy )
+    {
+      iy = 0;
+    }
+    else
+    {
+      iy = ( - n + 1 ) * incy;
+    }
+
+    for ( i = 0; i < n; i++ )
+    {
+      temp = x[ix];
+      x[ix] = y[iy];
+      y[iy] = temp;
+      ix = ix + incx;
+      iy = iy + incy;
+    }
+
+  }
+
+  return;
+}
+/******************************************************************************/
+
+void qr_solve ( int m, int n, float a[], float b[], float x[]  )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    QR_SOLVE solves a linear system in the least squares sense.
+
+  Discussion:
+
+    If the matrix A has full column rank, then the solution X should be the
+    unique vector that minimizes the Euclidean norm of the residual.
+
+    If the matrix A does not have full column rank, then the solution is
+    not unique; the vector X will minimize the residual norm, but so will
+    various other vectors.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    11 September 2012
+
+  Author:
+
+    John Burkardt
+
+  Reference:
+
+    David Kahaner, Cleve Moler, Steven Nash,
+    Numerical Methods and Software,
+    Prentice Hall, 1989,
+    ISBN: 0-13-627258-4,
+    LC: TA345.K34.
+
+  Parameters:
+
+    Input, int M, the number of rows of A.
+
+    Input, int N, the number of columns of A.
+
+    Input, float A[M*N], the matrix.
+
+    Input, float B[M], the right hand side.
+
+    Output, float QR_SOLVE[N], the least squares solution.
+*/
+{
+  int ind UNUSED;
+  int itask;
+  int kr;
+  int lda;
+  float tol;
+
+  float a_qr[m*n];
+  r8mat_copy_new ( m, n, a, a_qr );
+  lda = m;
+  tol = r8_epsilon ( ) / r8mat_amax ( m, n, a_qr );
+  int jpvt[n];
+  float qraux[n];
+  float r[m];
+  itask = 1;
+
+  ind = dqrls ( a_qr, lda, m, n, tol, &kr, b, x, r, jpvt, qraux, itask );
+}
+/******************************************************************************/
+
diff --git a/dronesim/control/pywls/qr_solve.h b/dronesim/control/pywls/qr_solve.h
new file mode 100644
index 0000000..5e54b6c
--- /dev/null
+++ b/dronesim/control/pywls/qr_solve.h
@@ -0,0 +1,27 @@
+/*
+ * This is part of the qr_solve library from John Burkardt.
+ * http://people.sc.fsu.edu/~jburkardt/c_src/qr_solve/qr_solve.html
+ *
+ * It is slightly modified to make it compile on simple microprocessors,
+ * and to remove all dynamic memory.
+ *
+ * This code is distributed under the GNU LGPL license.
+ */
+
+void daxpy ( int n, float da, float dx[], int incx, float dy[], int incy );
+float ddot ( int n, float dx[], int incx, float dy[], int incy );
+float dnrm2 ( int n, float x[], int incx );
+void dqrank ( float a[], int lda, int m, int n, float tol, int *kr, 
+  int jpvt[], float qraux[] );
+void dqrdc ( float a[], int lda, int n, int p, float qraux[], int jpvt[], 
+  float work[], int job );
+int dqrls ( float a[], int lda, int m, int n, float tol, int *kr, float b[], 
+  float x[], float rsd[], int jpvt[], float qraux[], int itask );
+void dqrlss ( float a[], int lda, int m, int n, int kr, float b[], float x[], 
+  float rsd[], int jpvt[], float qraux[] );
+int dqrsl ( float a[], int lda, int n, int k, float qraux[], float y[], 
+  float qy[], float qty[], float b[], float rsd[], float ab[], int job );
+void drotg ( float *sa, float *sb, float *c, float *s );
+void dscal ( int n, float sa, float x[], int incx );
+void dswap ( int n, float x[], int incx, float y[], int incy );
+void qr_solve ( int m, int n, float a[], float b[], float x[] );
diff --git a/dronesim/control/pywls/r8lib_min.c b/dronesim/control/pywls/r8lib_min.c
new file mode 100644
index 0000000..e9dec55
--- /dev/null
+++ b/dronesim/control/pywls/r8lib_min.c
@@ -0,0 +1,554 @@
+/*
+ * This file is a modified subset of the R8lib from John Burkardt.
+ * http://people.sc.fsu.edu/~jburkardt/c_src/r8lib/r8lib.html
+ *
+ * It is the minimal set of functions from r8lib needed to use qr_solve.
+ *
+ * This code is distributed under the GNU LGPL license.
+ */
+
+#include "r8lib_min.h"
+#include "std.h"
+#include <stdlib.h>
+#include <math.h>
+
+#define DEBUG_FPRINTF(...)
+#define DEBUG_PRINT(...)
+#define DEBUG_EXIT(...)
+
+void r8mat_copy_new ( int m, int n, float a1[], float a2[])
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8MAT_COPY_NEW copies one R8MAT to a "new" R8MAT.
+
+  Discussion:
+
+    An R8MAT is a doubly dimensioned array of R8 values, stored as a vector
+    in column-major order.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    26 July 2008
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int M, N, the number of rows and columns.
+
+    Input, float A1[M*N], the matrix to be copied.
+
+    Output, float R8MAT_COPY_NEW[M*N], the copy of A1.
+*/
+{
+  int i;
+  int j;
+
+  /*a2 = ( float * ) malloc ( m * n * sizeof ( float ) );*/
+
+  for ( j = 0; j < n; j++ )
+  {
+    for ( i = 0; i < m; i++ )
+    {
+      a2[i+j*m] = a1[i+j*m];
+    }
+  }
+}
+/******************************************************************************/
+
+float r8_epsilon ( void )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8_EPSILON returns the R8 round off unit.
+
+  Discussion:
+
+    R8_EPSILON is a number R which is a power of 2 with the property that,
+    to the precision of the computer's arithmetic,
+      1 < 1 + R
+    but
+      1 = ( 1 + R / 2 )
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    01 September 2012
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Output, float R8_EPSILON, the R8 round-off unit.
+*/
+{
+  const float value = 1.192092896E-7;
+
+  return value;
+}
+/******************************************************************************/
+
+float r8mat_amax ( int m, int n, float a[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8MAT_AMAX returns the maximum absolute value entry of an R8MAT.
+
+  Discussion:
+
+    An R8MAT is a doubly dimensioned array of R8 values, stored as a vector
+    in column-major order.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    07 September 2012
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int M, the number of rows in A.
+
+    Input, int N, the number of columns in A.
+
+    Input, float A[M*N], the M by N matrix.
+
+    Output, float R8MAT_AMAX, the maximum absolute value entry of A.
+*/
+{
+  int i;
+  int j;
+  float value;
+
+  value = fabs ( a[0+0*m] );
+
+  for ( j = 0; j < n; j++ )
+  {
+    for ( i = 0; i < m; i++ )
+    {
+      if ( value < fabs ( a[i+j*m] ) )
+      {
+        value = fabs ( a[i+j*m] );
+      }
+    }
+  }
+  return value;
+}
+/******************************************************************************/
+
+float r8_sign ( float x )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8_SIGN returns the sign of an R8.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    08 May 2006
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, float X, the number whose sign is desired.
+
+    Output, float R8_SIGN, the sign of X.
+*/
+{
+  float value;
+
+  if ( x < 0.0 )
+  {
+    value = - 1.0;
+  }
+  else
+  {
+    value = + 1.0;
+  }
+  return value;
+}
+/******************************************************************************/
+
+float r8_max ( float x, float y )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8_MAX returns the maximum of two R8's.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    07 May 2006
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, float X, Y, the quantities to compare.
+
+    Output, float R8_MAX, the maximum of X and Y.
+*/
+{
+  float value;
+
+  if ( y < x )
+  {
+    value = x;
+  }
+  else
+  {
+    value = y;
+  }
+  return value;
+}
+/******************************************************************************/
+
+float *r8mat_l_solve ( int n, float a[], float b[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8MAT_L_SOLVE solves a lower triangular linear system.
+
+  Discussion:
+
+    An R8MAT is a doubly dimensioned array of R8 values, stored as a vector
+    in column-major order.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    07 June 2008
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int N, the number of rows and columns of
+    the matrix A.
+
+    Input, float A[N*N], the N by N lower triangular matrix.
+
+    Input, float B[N], the right hand side of the linear system.
+
+    Output, float R8MAT_L_SOLVE[N], the solution of the linear system.
+*/
+{
+  float dot;
+  int i;
+  int j;
+  float *x;
+
+  x = ( float * ) malloc ( n * sizeof ( float ) );
+/*
+  Solve L * x = b.
+*/
+  for ( i = 0; i < n; i++ )
+  {
+    dot = 0.0;
+    for ( j = 0; j < i; j++ )
+    {
+      dot = dot + a[i+j*n] * x[j];
+    }
+    x[i] = ( b[i] - dot ) / a[i+i*n];
+  }
+
+  return x;
+}
+/******************************************************************************/
+
+float *r8mat_lt_solve ( int n, float a[], float b[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8MAT_LT_SOLVE solves a transposed lower triangular linear system.
+
+  Discussion:
+
+    An R8MAT is a doubly dimensioned array of R8 values, stored as a vector
+    in column-major order.
+
+    Given the lower triangular matrix A, the linear system to be solved is:
+
+      A' * x = b
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    08 April 2009
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int N, the number of rows and columns of the matrix A.
+
+    Input, float A[N*N], the N by N lower triangular matrix.
+
+    Input, float B[N], the right hand side of the linear system.
+
+    Output, float R8MAT_LT_SOLVE[N], the solution of the linear system.
+*/
+{
+  int i;
+  int j;
+  float *x;
+
+  x = ( float * ) malloc ( n * sizeof ( float ) );
+
+  for ( j = n-1; 0 <= j; j-- )
+  {
+    x[j] = b[j];
+    for ( i = j+1; i < n; i++ )
+    {
+      x[j] = x[j] - x[i] * a[i+j*n];
+    }
+    x[j] = x[j] / a[j+j*n];
+  }
+
+  return x;
+}
+/******************************************************************************/
+
+float *r8mat_mtv_new ( int m, int n, float a[], float x[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8MAT_MTV_NEW multiplies a transposed matrix times a vector.
+
+  Discussion:
+
+    An R8MAT is a doubly dimensioned array of R8 values, stored as a vector
+    in column-major order.
+
+    For this routine, the result is returned as the function value.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    26 August 2011
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int M, N, the number of rows and columns of the matrix.
+
+    Input, float A[M,N], the M by N matrix.
+
+    Input, float X[M], the vector to be multiplied by A.
+
+    Output, float R8MAT_MTV_NEW[N], the product A'*X.
+*/
+{
+  int i;
+  int j;
+  float *y;
+
+  y = ( float * ) malloc ( n * sizeof ( float ) );
+
+  for ( j = 0; j < n; j++ )
+  {
+    y[j] = 0.0;
+    for ( i = 0; i < m; i++ )
+    {
+      y[j] = y[j] + a[i+j*m] * x[i];
+    }
+  }
+
+  return y;
+}
+/******************************************************************************/
+
+float r8vec_max ( int n, float r8vec[] )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    R8VEC_MAX returns the value of the maximum element in a R8VEC.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    05 May 2006
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int N, the number of entries in the array.
+
+    Input, float R8VEC[N], a pointer to the first entry of the array.
+
+    Output, float R8VEC_MAX, the value of the maximum element.  This
+    is set to 0.0 if N <= 0.
+*/
+{
+  int i;
+  float value;
+
+  if ( n <= 0 )
+  {
+    value = 0.0;
+    return value;
+  }
+
+  value = r8vec[0];
+
+  for ( i = 1; i < n; i++ )
+  {
+    if ( value < r8vec[i] )
+    {
+      value = r8vec[i];
+    }
+  }
+  return value;
+}
+/******************************************************************************/
+
+int i4_min ( int i1, int i2 )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    I4_MIN returns the smaller of two I4's.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    29 August 2006
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int I1, I2, two integers to be compared.
+
+    Output, int I4_MIN, the smaller of I1 and I2.
+*/
+{
+  int value;
+
+  if ( i1 < i2 )
+  {
+    value = i1;
+  }
+  else
+  {
+    value = i2;
+  }
+  return value;
+}
+/******************************************************************************/
+
+int i4_max ( int i1, int i2 )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    I4_MAX returns the maximum of two I4's.
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license.
+
+  Modified:
+
+    29 August 2006
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    Input, int I1, I2, are two integers to be compared.
+
+    Output, int I4_MAX, the larger of I1 and I2.
+*/
+{
+  int value;
+
+  if ( i2 < i1 )
+  {
+    value = i1;
+  }
+  else
+  {
+    value = i2;
+  }
+  return value;
+}
+/******************************************************************************/
diff --git a/dronesim/control/pywls/r8lib_min.h b/dronesim/control/pywls/r8lib_min.h
new file mode 100644
index 0000000..72e9655
--- /dev/null
+++ b/dronesim/control/pywls/r8lib_min.h
@@ -0,0 +1,25 @@
+/*
+ * This file is a modified subset of the R8lib from John Burkardt.
+ * http://people.sc.fsu.edu/~jburkardt/c_src/r8lib/r8lib.html
+ *
+ * It is the minimal set of functions from r8lib needed to use qr_solve.
+ *
+ * This code is distributed under the GNU LGPL license.
+ */
+
+void r8mat_copy_new ( int m, int n, float a1[], float a2[] );
+float r8_epsilon ( void );
+float r8mat_amax ( int m, int n, float a[] );
+float r8_sign ( float x );
+float r8_max ( float x, float y );
+float *r8mat_transpose_new ( int m, int n, float a[] );
+float *r8mat_mm_new ( int n1, int n2, int n3, float a[], float b[] );
+float *r8mat_cholesky_factor ( int n, float a[], int *flag );
+float *r8mat_mv_new ( int m, int n, float a[], float x[] );
+float *r8mat_cholesky_solve ( int n, float l[], float b[] );
+float *r8mat_l_solve ( int n, float a[], float b[] );
+float *r8mat_lt_solve ( int n, float a[], float b[] );
+float *r8mat_mtv_new ( int m, int n, float a[], float x[] );
+float r8vec_max ( int n, float r8vec[] );
+int i4_min ( int i1, int i2 );
+int i4_max ( int i1, int i2 );
diff --git a/dronesim/control/pywls/std.h b/dronesim/control/pywls/std.h
new file mode 100644
index 0000000..318f832
--- /dev/null
+++ b/dronesim/control/pywls/std.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2005 Pascal Brisset, Antoine Drouin
+ *
+ * This file is part of paparazzi.
+ *
+ * paparazzi is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * paparazzi is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with paparazzi; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ *
+ * a couple of fundamentals used in the avr code
+ *
+ */
+
+#ifndef STD_H
+#define STD_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <math.h>
+
+#ifdef SITL
+  #include <stdio.h> // for debuging in simulation
+#endif
+
+/* some convenience macros to print debug/config messages at compile time */
+// #include "message_pragmas.h"
+
+/* stringify a define, e.g. one that was not quoted */
+#define _STRINGIFY(s) #s
+#define STRINGIFY(s) _STRINGIFY(s)
+
+#define PTR(_f) &_f
+
+#ifndef FALSE
+  #define FALSE false
+#endif
+#ifndef TRUE
+  #define TRUE true
+#endif
+
+#ifndef NULL
+  #ifdef __cplusplus
+    #define NULL 0
+  #else
+    #define NULL ((void *)0)
+  #endif
+#endif
+
+/* Unit (void) values */
+typedef uint8_t unit_t;
+
+#ifndef M_PI
+  #define M_PI 3.14159265358979323846
+#endif
+
+#ifndef M_PI_6
+  #define M_PI_6 (M_PI/6)
+#endif
+
+#ifndef M_PI_4
+  #define M_PI_4 (M_PI/4)
+#endif
+
+#ifndef M_PI_2
+  #define M_PI_2 (M_PI/2)
+#endif
+
+
+#ifndef bit_is_set
+  #define bit_is_set(x, b) ((x >> b) & 0x1)
+#endif
+
+#ifndef _BV
+  #define _BV(bit) (1 << (bit))
+#endif
+
+#define SetBit(a, n) a |= (1 << n)
+#define ClearBit(a, n) a &= ~(1 << n)
+
+/** Normalize a rad angle between -PI and PI */
+#define NormRadAngle(x) { \
+    while (x > M_PI) x -= 2 * M_PI; \
+    while (x < -M_PI) x += 2 * M_PI; \
+  }
+/** Normalize a degree angle between 0 and 359 */
+#define NormCourse(x) { \
+    while (x < 0) x += 360; \
+    while (x >= 360) x -= 360; \
+  }
+/** Normalize a rad angle between 0 and 2*PI */
+#define NormCourseRad(x) { \
+    while (x < 0) x += 2*M_PI; \
+    while (x >= 2*M_PI) x -= 2*M_PI; \
+  }
+
+/** Normalize a degree angle between 0 and 359 */
+// FIXME should we use a protected version ? of NormXxx ?
+/*
+#define NormCourse(x) { \
+    uint8_t dont_loop_forever = 0;  \
+    while (x < 0 && ++dont_loop_forever) x += 360; \
+    while (x >= 360 && ++dont_loop_forever) x -= 360; \
+  }
+*/
+
+#define CloseDegAngles(_c1, _c2) ({ float _diff = _c1 - _c2; NormCourse(_diff); 350 < _diff || _diff < 10; })
+#define CloseRadAngles(_c1, _c2) ({ float _diff = _c1 - _c2; NormRadAngle(_diff); fabsf(_diff) < 0.0177; })
+
+#define DegOfRad(x) ((x) * (180. / M_PI))
+#define DeciDegOfRad(x) ((x) * (1800./ M_PI))
+#define RadOfDeg(x) ((x) * (M_PI/180.))
+#define RadOfDeciDeg(x) ((x) * (M_PI/1800.))
+#define RadOfCentiDeg(x) ((x) * (M_PI/18000.))
+
+#define MOfCm(_x) (((float)(_x))/100.)
+#define MOfMm(_x) (((float)(_x))/1000.)
+
+#define And(x, y) ((x) && (y))
+#define Or(x, y) ((x) || (y))
+#define Min(x,y) (x < y ? x : y)
+#define Max(x,y) (x > y ? x : y)
+#define LessThan(_x, _y) ((_x) < (_y))
+#define MoreThan(_x, _y) ((_x) > (_y))
+
+#ifndef ABS
+  #define ABS(val) ((val) < 0 ? -(val) : (val))
+#endif
+
+#define BoundUpper(_x, _max) { if (_x > (_max)) _x = (_max);}
+
+// Note: the bound function will bound NaN to min as any comparison that contains NaN is false.
+#define Bound(_x, _min, _max) { if (!(_x > (_min))) _x = (_min); else if (!(_x < (_max))) _x = (_max); }
+
+#define BoundInverted(_x, _min, _max) {           \
+    if ((_x < (_min)) && (_x > (_max))) {         \
+      if (abs(_x - (_min)) < abs(_x - (_max)))    \
+        _x = (_min);                              \
+      else                                        \
+        _x = (_max);                              \
+    }                                             \
+  }
+#define BoundWrapped(_x, _min, _max) {            \
+    if ((_max) > (_min))                          \
+      Bound(_x, _min, _max)                       \
+      else                                        \
+        BoundInverted(_x, _min, _max)             \
+      }
+#define BoundAbs(_x, _max) Bound(_x, -(_max), (_max))
+#define Clip(_x, _min, _max) ( (_x) < (_min) ? (_min) : (_x) > (_max) ? (_max) : (_x) )
+#define ClipAbs(x, max) Clip(x, -(max), (max))
+// Align makes the value of x a multiple of a1
+#define Align(_x, _a1) (_x%_a1 ? _x + (_a1 - (_x%_a1)) : _x )
+
+#define DeadBand(_x, _v) {            \
+    if (_x > (_v))                    \
+      _x = _x -(_v);                  \
+    else if  (_x < -(_v))             \
+      _x = _x +(_v);                  \
+    else                              \
+      _x = 0;                         \
+  }
+
+#define Blend(a, b, rho) (((rho)*(a))+(1-(rho))*(b))
+
+#define RunOnceEvery(_prescaler, _code) {   \
+    static uint16_t prescaler = 0;          \
+    prescaler++;                            \
+    if (prescaler >= _prescaler) {          \
+      prescaler = 0;                        \
+      _code;                                \
+    }                                       \
+  }
+
+#define RunXTimesEvery(_jumpstart, _prescaler, _interval, _xtimes, _code) {   \
+    static uint16_t prescaler = _jumpstart;     \
+    static uint16_t xtimes = 0;                 \
+    prescaler++;                                \
+    if (prescaler >= _prescaler + _interval*xtimes && xtimes < _xtimes) {     \
+      _code;                                    \
+      xtimes++;                                 \
+    }                                           \
+    if (xtimes >= _xtimes) {                    \
+      xtimes = 0;                               \
+      prescaler = 0;                            \
+    }                                           \
+  }
+
+
+#define PeriodicPrescaleBy5( _code_0, _code_1, _code_2, _code_3, _code_4) { \
+    static uint8_t _50hz = 0;           \
+    _50hz++;                            \
+    if (_50hz >= 5) _50hz = 0;          \
+    switch (_50hz) {                    \
+      case 0:                           \
+        _code_0;                        \
+        break;                          \
+      case 1:                           \
+        _code_1;                        \
+        break;                          \
+      case 2:                           \
+        _code_2;                        \
+        break;                          \
+      case 3:                           \
+        _code_3;                        \
+        break;                          \
+      case 4:                           \
+        _code_4;                        \
+        break;                          \
+    }                                   \
+  }
+
+#define PeriodicPrescaleBy10( _code_0, _code_1, _code_2, _code_3, _code_4, _code_5, _code_6, _code_7, _code_8, _code_9) { \
+    static uint8_t _cnt = 0;            \
+    _cnt++;                             \
+    if (_cnt >= 10) _cnt = 0;           \
+    switch (_cnt) {                     \
+      case 0:                           \
+        _code_0;                        \
+        break;                          \
+      case 1:                           \
+        _code_1;                        \
+        break;                          \
+      case 2:                           \
+        _code_2;                        \
+        break;                          \
+      case 3:                           \
+        _code_3;                        \
+        break;                          \
+      case 4:                           \
+        _code_4;                        \
+        break;                          \
+      case 5:                           \
+        _code_5;                        \
+        break;                          \
+      case 6:                           \
+        _code_6;                        \
+        break;                          \
+      case 7:                           \
+        _code_7;                        \
+        break;                          \
+      case 8:                           \
+        _code_8;                        \
+        break;                          \
+      case 9:                           \
+      default:                          \
+        _code_9;                        \
+        break;                          \
+    }                                   \
+  }
+
+static inline bool str_equal(const char *a, const char *b)
+{
+  int i = 0;
+  while (!(a[i] == 0 && b[i] == 0)) {
+    if (a[i] != b[i]) { return FALSE; }
+    i++;
+  }
+  return TRUE;
+}
+
+#ifdef __GNUC__
+  #define UNUSED __attribute__((__unused__))
+  #define WEAK __attribute__((weak))
+#else
+  #define UNUSED
+  #define WEAK
+#endif
+
+#if __GNUC__ >= 7
+  #define INTENTIONAL_FALLTHRU __attribute__ ((fallthrough));
+#else
+  #define INTENTIONAL_FALLTHRU
+#endif
+
+#endif /* STD_H */
diff --git a/dronesim/control/pywls/test.py b/dronesim/control/pywls/test.py
new file mode 100644
index 0000000..03c4432
--- /dev/null
+++ b/dronesim/control/pywls/test.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python3
+
+import numpy as np
+from pywls import wls_alloc
+
+# Example sizes
+nv = 4
+nu = 6
+
+B = np.random.randn(nv, nu).astype(np.float32)
+v = np.array([0.1, -0.2, 0.05, 0.0], dtype=np.float32)
+
+u_min = np.full(nu, -1.0, dtype=np.float32)
+u_max = np.full(nu,  1.0, dtype=np.float32)
+
+u_pref = np.zeros(nu, dtype=np.float32)
+Wv = np.ones(nv, dtype=np.float32)
+Wu = np.ones(nu, dtype=np.float32)
+
+u, n_iter = wls_alloc(
+    B, v, u_min, u_max,u_guess=np.zeros(nu, dtype=np.float32), W_init=None,
+    Wv=Wv, Wu=Wu, u_pref=u_pref,
+    gamma_sq=100000.0,
+    imax=100,
+)
+
+print("u =", u)
+print("iterations =", n_iter)
diff --git a/dronesim/control/pywls/wls_alloc.c b/dronesim/control/pywls/wls_alloc.c
new file mode 100644
index 0000000..da2d5dc
--- /dev/null
+++ b/dronesim/control/pywls/wls_alloc.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) Anton Naruta && Daniel Hoppener
+ * MAVLab Delft University of Technology
+ *
+ * This file is part of paparazzi.
+ *
+ * paparazzi is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * paparazzi is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with paparazzi; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/** @file wls_alloc.c
+ * @brief This is an active set algorithm for WLS control allocation
+ *
+ * This algorithm will find the optimal inputs to produce the least error wrt
+ * the control objective, taking into account the weighting matrices on the
+ * control objective and the control effort.
+ *
+ * The algorithm is described in:
+ * Prioritized Control Allocation for Quadrotors Subject to Saturation -
+ * E.J.J. Smeur, D.C. Höppener, C. de Wagter. In IMAV 2017
+ *
+ * written by Anton Naruta && Daniel Hoppener 2016
+ * MAVLab Delft University of Technology
+ */
+
+#include "std.h"
+#include "wls_alloc.h"
+
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include "qr_solve.h"
+#include "r8lib_min.h"
+
+// provide loop feedback
+#ifndef WLS_VERBOSE
+#define WLS_VERBOSE FALSE
+#endif
+
+#if WLS_VERBOSE
+#include <stdio.h>
+static void print_final_values(struct WLS_t* WLS_p, float **B);
+static void print_in_and_outputs(int n_c, int n_free, float **A_free_ptr, float *d, float *p_free);
+#endif
+
+
+/**
+ * @brief Wrapper for qr solve
+ *
+ * Possible to use a different solver if needed.
+ * Solves a system of the form Ax = b for x.
+ *
+ * @param m number of rows
+ * @param n number of columns
+ */
+static void qr_solve_wrapper(int m, int n, float **A, float *b, float *x) {
+  float in[m * n];
+  // convert A to 1d array
+  int k = 0;
+  for (int j = 0; j < n; j++) {
+    for (int i = 0; i < m; i++) {
+      in[k++] = A[i][j];
+    }
+  }
+  // use solver
+  qr_solve(m, n, in, b, x);
+}
+
+/**
+ * @brief active set algorithm for control allocation
+ *
+ * Takes the control objective and max and min inputs from pprz and calculates
+ * the inputs that will satisfy most of the control objective, subject to the
+ * weighting matrices Wv and Wu
+ *  
+ * @param WLS_p Struct that contains most of the WLS parameters
+ * @param B The control effectiveness matrix
+ * @param u_guess Initial value for u
+ * @param W_init Initial working set, if known
+ * @param imax Max number of iterations
+ */
+
+void wls_alloc(struct WLS_t* WLS_p, float **B, float *u_guess, float *W_init, int imax) {
+  // allocate variables, use defaults where parameters are set to 0
+  if (!WLS_p->gamma_sq) WLS_p->gamma_sq = 100000;
+  if (!imax) imax = 100;
+
+  int n_c = WLS_p->nu + WLS_p->nv;
+
+  float A[n_c][WLS_p->nu];
+  float A_free[n_c][WLS_p->nu];
+
+  // Create a pointer array to the rows of A_free
+  // such that we can pass it to a function
+  float *A_free_ptr[n_c];
+  for(int i = 0; i < n_c; i++)
+    A_free_ptr[i] = A_free[i];
+
+  float b[n_c];
+  float d[n_c];
+
+  int free_index[WLS_p->nu];
+  int free_index_lookup[WLS_p->nu];
+  int n_free = 0;
+  int free_chk = -1;
+
+  int iter = 0;
+  float p_free[WLS_p->nu];
+  float p[WLS_p->nu];
+  float u_opt[WLS_p->nu];
+  int infeasible_index[WLS_p->nu] UNUSED;
+  int n_infeasible = 0;
+  float lambda[WLS_p->nu];
+  float W[WLS_p->nu];
+
+  // Initialize u and the working set, if provided from input
+  if (!u_guess) {
+    for (int i = 0; i < WLS_p->nu; i++) {
+      WLS_p->u[i] = (WLS_p->u_max[i] + WLS_p->u_min[i]) * 0.5;
+    }
+  } else {
+    for (int i = 0; i < WLS_p->nu; i++) {
+      WLS_p->u[i] = u_guess[i];
+    }
+  }
+  W_init ? memcpy(W, W_init, WLS_p->nu * sizeof(float))
+         : memset(W, 0, WLS_p->nu * sizeof(float));
+
+  memset(free_index_lookup, -1, WLS_p->nu * sizeof(float));
+
+  // find free indices
+  for (int i = 0; i < WLS_p->nu; i++) {
+    if (W[i] == 0) {
+      free_index_lookup[i] = n_free;
+      free_index[n_free++] = i;
+    }
+  }
+
+  // fill up A, A_free, b and d
+  for (int i = 0; i < WLS_p->nv; i++) {
+    b[i] = WLS_p->gamma_sq * WLS_p->Wv[i] * WLS_p->v[i];
+    d[i] = b[i];
+    for (int j = 0; j < WLS_p->nu; j++) {
+      // If Wv is a NULL pointer, use Wv = identity
+      A[i][j] = WLS_p->gamma_sq * WLS_p->Wv[i] * B[i][j];
+      d[i] -= A[i][j] * WLS_p->u[j];
+    }
+  }
+  for (int i = WLS_p->nv; i < n_c; i++) {
+    memset(A[i], 0, WLS_p->nu * sizeof(float));
+    A[i][i - WLS_p->nv] = WLS_p->Wu[i - WLS_p->nv];
+    b[i] = WLS_p->Wu[i - WLS_p->nv] * WLS_p->u_pref[i - WLS_p->nv];
+    d[i] = b[i] - A[i][i - WLS_p->nv] * WLS_p->u[i - WLS_p->nv];
+  }
+
+  // -------------- Start loop ------------
+  while (iter++ < imax) {
+    // clear p, copy u to u_opt
+    memset(p, 0, WLS_p->nu * sizeof(float));
+    memcpy(u_opt, WLS_p->u, WLS_p->nu * sizeof(float));
+
+    // Construct a matrix with the free columns of A
+    if (free_chk != n_free) {
+      for (int i = 0; i < n_c; i++) {
+        for (int j = 0; j < n_free; j++) {
+          A_free[i][j] = A[i][free_index[j]];
+        }
+      }
+      free_chk = n_free;
+    }
+
+
+    // Count the infeasible free actuators
+    n_infeasible = 0;
+
+    if (n_free > 0) {
+      // Still free variables left, calculate corresponding solution
+
+      // use a solver to find the solution to A_free*p_free = d
+      qr_solve_wrapper(n_c, n_free, A_free_ptr, d, p_free);
+
+      //print results current step
+#if WLS_VERBOSE
+      print_in_and_outputs(n_c, n_free, A_free_ptr, d, p_free);
+#endif
+
+      // Set the nonzero values of p and add to u_opt
+      for (int i = 0; i < n_free; i++) {
+        p[free_index[i]] = p_free[i];
+        u_opt[free_index[i]] += p_free[i];
+
+        // check limits
+        if ((u_opt[free_index[i]] > WLS_p->u_max[free_index[i]] || u_opt[free_index[i]] < WLS_p->u_min[free_index[i]])) {
+          infeasible_index[n_infeasible++] = free_index[i];
+        }
+      }
+    }
+
+    // Check feasibility of the solution
+    if (n_infeasible == 0) {
+      // all variables are within limits
+      memcpy(WLS_p->u, u_opt, WLS_p->nu * sizeof(float));
+      memset(lambda, 0, WLS_p->nu * sizeof(float));
+
+      // d = d + A_free*p_free; lambda = A*d;
+      for (int i = 0; i < n_c; i++) {
+        for (int k = 0; k < n_free; k++) {
+          d[i] -= A_free[i][k] * p_free[k];
+        }
+        for (int k = 0; k < WLS_p->nu; k++) {
+          lambda[k] += A[i][k] * d[i];
+        }
+      }
+      bool break_flag = true;
+
+      // lambda = lambda x W;
+      for (int i = 0; i < WLS_p->nu; i++) {
+        lambda[i] *= W[i];
+        // if any lambdas are negative, keep looking for solution
+        if (lambda[i] < -FLT_EPSILON) {
+          break_flag = false;
+          W[i] = 0;
+          // add a free index
+          if (free_index_lookup[i] < 0) {
+            free_index_lookup[i] = n_free;
+            free_index[n_free++] = i;
+          }
+        }
+      }
+      if (break_flag) {
+
+#if WLS_VERBOSE
+        print_final_values(WLS_p, B);
+#endif
+        
+        // if solution is found, return number of iterations
+        WLS_p->iter = iter;
+        return;
+      }
+    } else {
+      // scaling back actuator command (0-1)
+      float alpha = 1.0;
+      float alpha_tmp;
+      int id_alpha = free_index[0];
+
+      // find the lowest distance from the limit among the free variables
+      for (int i = 0; i < n_free; i++) {
+        int id = free_index[i];
+
+        alpha_tmp = (p[id] < 0) ? (WLS_p->u_min[id] - WLS_p->u[id]) / p[id]
+                                : (WLS_p->u_max[id] - WLS_p->u[id]) / p[id];
+
+        if (isnan(alpha_tmp) || alpha_tmp < 0.f) {
+          alpha_tmp = 1.0f;
+        }
+        if (alpha_tmp < alpha) {
+          alpha = alpha_tmp;
+          id_alpha = id;
+        }
+      }
+
+      // update input u = u + alpha*p
+      for (int i = 0; i < WLS_p->nu; i++) {
+        WLS_p->u[i] += alpha * p[i];
+        Bound(WLS_p->u[i], WLS_p->u_min[i], WLS_p->u_max[i]);
+      }
+      // update d = d-alpha*A*p_free
+      for (int i = 0; i < n_c; i++) {
+        for (int k = 0; k < n_free; k++) {
+          d[i] -= A_free[i][k] * alpha * p_free[k];
+        }
+      }
+      // get rid of a free index
+      W[id_alpha] = (p[id_alpha] > 0) ? 1.0 : -1.0;
+
+      free_index[free_index_lookup[id_alpha]] = free_index[--n_free];
+      free_index_lookup[free_index[free_index_lookup[id_alpha]]] =
+          free_index_lookup[id_alpha];
+      free_index_lookup[id_alpha] = -1;
+    }
+  }
+  WLS_p->iter = iter;
+}
+
+#if WLS_VERBOSE
+static void print_in_and_outputs(int n_c, int n_free, float **A_free_ptr, float *d, float *p_free) {
+  printf("n_c = %d n_free = %d\n", n_c, n_free);
+
+  printf("A_free =\n");
+  for (int i = 0; i < n_c; i++) {
+    for (int j = 0; j < n_free; j++) {
+      printf("%f ", A_free_ptr[i][j]);
+    }
+    printf("\n");
+  }
+
+  printf("d = ");
+  for (int j = 0; j < n_c; j++) {
+    printf("%f ", d[j]);
+  }
+
+  printf("\noutput = ");
+  for (int j = 0; j < n_free; j++) {
+    printf("%f ", p_free[j]);
+  }
+  printf("\n\n");
+}
+
+static void print_final_values(struct WLS_t* WLS_p, float **B) {
+  printf("n_u = %d n_v = %d\n", WLS_p->nu, WLS_p->nv);
+
+  printf("B =\n");
+  for (int i = 0; i < WLS_p->nv; i++) {
+    for (int j = 0; j < WLS_p->nu; j++) {
+      printf("%f ", B[i][j]);
+    }
+    printf("\n");
+  }
+
+  printf("v = ");
+  for (int j = 0; j < WLS_p->nv; j++) {
+    printf("%f ", WLS_p->v[j]);
+  }
+
+  printf("\nu = ");
+  for (int j = 0; j < WLS_p->nu; j++) {
+    printf("%f ", u[j]);
+  }
+  printf("\n");
+
+  printf("\numin = ");
+  for (int j = 0; j < WLS_p->nu; j++) {
+    printf("%f ", WLS_p->u_min[j]);
+  }
+  printf("\n");
+
+  printf("\numax = ");
+  for (int j = 0; j < WLS_p->nu; j++) {
+    printf("%f ", WLS_p->u_max[j]);
+  }
+  printf("\n\n");
+  
+}
+#endif
diff --git a/dronesim/control/pywls/wls_alloc.h b/dronesim/control/pywls/wls_alloc.h
new file mode 100644
index 0000000..bc6b68f
--- /dev/null
+++ b/dronesim/control/pywls/wls_alloc.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) Anton Naruta && Daniel Hoppener
+ * MAVLab Delft University of Technology
+ *
+ * This file is part of paparazzi.
+ *
+ * paparazzi is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * paparazzi is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with paparazzi; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/**
+ * @brief active set algorithm for control allocation
+ *
+ * Takes the control objective and max and min inputs from pprz and calculates
+ * the inputs that will satisfy most of the control objective, subject to the
+ * weighting matrices Wv and Wu
+ *
+ * The dimension of the input vectors u and v are defined at compilation time
+ * and must be large enough for all the considered cases.
+ *
+ * @param u The control output vector
+ * @param v The control objective vector
+ * @param B The control effectiveness matrix
+ * @param nu Length of u
+ * @param nv Lenght of v
+ * @param u_guess Initial value for u
+ * @param W_init Initial working set, if known
+ * @param Wv Weighting on different control objectives
+ * @param Wu Weighting on different controls
+ * @param up Preferred control vector
+ * @param gamma_sq Preference of satisfying control objective over desired
+ * control vector (sqare root of gamma)
+ * @param imax Max number of iterations
+ *
+ * @return Number of iterations: (imax+1) means it ran out of iterations
+ */
+
+#ifndef WLS_ALLOC_HEADER
+#define WLS_ALLOC_HEADER
+
+#ifndef WLS_N_U_MAX
+#define WLS_N_U_MAX 6
+#endif
+
+#ifndef WLS_N_V_MAX
+#define WLS_N_V_MAX 4
+#endif
+struct WLS_t{
+  int nu;                    // number of actuators
+  int nv;                    // number of controlled axes
+  float gamma_sq;            // weighting factor WLS
+  float v[WLS_N_V_MAX];      // Pseudo Control Vector
+  float u[WLS_N_U_MAX];      // Allocation of Controls
+  float Wv[WLS_N_V_MAX];     // Weighting on different control objectives
+  float Wu[WLS_N_U_MAX];     // Weighting on different actuators
+  float u_pref[WLS_N_U_MAX]; // Preferred control vector
+  float u_min[WLS_N_U_MAX];  // Minimum control vector
+  float u_max[WLS_N_U_MAX];  // Maximum control vector
+  int   iter;                // Number of iterations
+};
+
+extern void wls_alloc(struct WLS_t* WLS_p, float **B, float *u_guess, float *W_init, int imax);
+
+#endif
\ No newline at end of file
diff --git a/dronesim/control/wls_alloc.py b/dronesim/control/wls_alloc.py
index 86e7f9e..16e3577 100644
--- a/dronesim/control/wls_alloc.py
+++ b/dronesim/control/wls_alloc.py
@@ -1,408 +1,90 @@
-# /*
-#  * Copyright (C) Anton Naruta && Daniel Hoppener
-#  * MAVLab Delft University of Technology
-#  *
-#  * This file is part of paparazzi.
-#  *
-#  * paparazzi is free software; you can redistribute it and/or modify
-#  * it under the terms of the GNU General Public License as published by
-#  * the Free Software Foundation; either version 2, or (at your option)
-#  * any later version.
-#  *
-#  * paparazzi is distributed in the hope that it will be useful,
-#  * but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  * GNU General Public License for more details.
-#  *
-#  * You should have received a copy of the GNU General Public License
-#  * along with paparazzi; see the file COPYING.  If not, write to
-#  * the Free Software Foundation, 59 Temple Place - Suite 330,
-#  * Boston, MA 02111-1307, USA.
-#  */
-
-"""
-@file wls_alloc.c
-@brief This is an active set algorithm for WLS control allocation
-
-This algorithm will find the optimal inputs to produce the least error wrt
-the control objective, taking into account the weighting matrices on the
-control objective and the control effort.
-
-The algorithm is described in:
-Prioritized Control Allocation for Quadrotors Subject to Saturation -
-E.J.J. Smeur, D.C. Höppener, C. de Wagter. In IMAV 2017
-written by Anton Naruta && Daniel Hoppener 2016
-MAVLab Delft University of Technology
-"""
-
-# include "wls_alloc.h"
-# include <stdio.h>
-# include "std.h"
-
-# include <string.h>
-# include <math.h>
-# include <float.h>
-# include "math/qr_solve/qr_solve.h"
-# include "math/qr_solve/r8lib_min.h"
-
-
-# Problem size needs to be predefined to avoid having to use VLAs
-# ifndef CA_N_V
-# error CA_N_V needs to be defined!
-# endif
-
-# ifndef CA_N_U
-# error CA_N_U needs to be defined!
-# endif
-
-# define CA_N_C  (CA_N_U+CA_N_V)
-
-# /**
-#  * @brief Wrapper for qr solve
-#  *
-#  * Possible to use a different solver if needed.
-#  * Solves a system of the form Ax = b for x.
-#  *
-#  * @param m number of rows
-#  * @param n number of columns
-#  */
-# // void qr_solve_wrapper(int m, int n, float** A, float* b, float* x) {
-# //   float in[m * n];
-# //   // convert A to 1d array
-# //   int k = 0;
-# //   for (int j = 0; j < n; j++) {
-# //     for (int i = 0; i < m; i++) {
-# //       in[k++] = A[i][j];
-# //     }
-# //   }
-# //   // use solver
-# //   qr_solve(m, n, in, b, x);
-# // }
-
+from dronesim.control.pywls import wls_alloc as _wls_alloc
 import numpy as np
 
-# CA_N_U = 6
-# CA_N_V = 4
-# CA_N_C = CA_N_U + CA_N_V
-FLT_EPSILON = 1e-7
-INFINITY = 1e32
-
-
-def qr_solve(A, b):
-    """Solves a system of the form Ax = b for x."""
-    q, r = np.linalg.qr(A)
-    p = np.dot(q.T, b)
-    return np.dot(np.linalg.pinv(r), p)
-
-
-# /**
-#  * @brief active set algorithm for control allocation
-#  *
-#  * Takes the control objective and max and min inputs from pprz and calculates
-#  * the inputs that will satisfy most of the control objective, subject to the
-#  * weighting matrices Wv and Wu
-#  *
-#  * @param u The control output vector
-#  * @param v The control objective
-#  * @param umin The minimum u vector
-#  * @param umax The maximum u vector
-#  * @param B The control effectiveness matrix
-#  * @param n_u Length of u
-#  * @param n_v Lenght of v
-#  * @param u_guess Initial value for u
-#  * @param W_init Initial working set, if known
-#  * @param Wv Weighting on different control objectives
-#  * @param Wu Weighting on different controls
-#  * @param up Preferred control vector
-#  * @param gamma_sq Preference of satisfying control objective over desired
-#  * control vector (sqare root of gamma)
-#  * @param imax Max number of iterations
-#  *
-#  * @return Number of iterations, -1 upon failure
-#  */
 
-
-def wls_alloc(v, umin, umax, B, u_guess, W_init, Wv, Wu, up, gamma_sq=100000, imax=100):
-    # Allocate variables, use defaults where parameters are set to 0
-    # if gamma_sq==None : gamma_sq = 100000
-    # if imax == None   : imax     = 100
-    CA_N_U = len(umin)
-    CA_N_V = len(v)
-    CA_N_C = CA_N_U + CA_N_V
-
-    n_c = CA_N_C
-    n_u = CA_N_U
-    n_v = CA_N_V
-
-    A = np.zeros((CA_N_C, CA_N_U))
-    A_free = np.zeros((CA_N_C, CA_N_U))
-
-    # Create a pointer array to the rows of A_free
-    # such that we can pass it to a function
-    # A_free_ptr = np.zeros((CA_N_C, CA_N_U), dtype=int)
-
-    # for i in range(n_c):
-    # A_free_ptr[i,:] = A_free[i,:]
-
-    b = np.zeros(CA_N_C)
-    d = np.zeros(CA_N_C)
-
-    free_index = np.zeros(CA_N_U, dtype=int)
-    free_index_lookup = np.zeros(CA_N_U, dtype=int)
-    n_free = 0
-    free_chk = -1
-
-    iter = 0
-    p_free = np.zeros(CA_N_U)
-    p = np.zeros(CA_N_U)
-    u = np.zeros(CA_N_U)
-    u_opt = np.zeros(CA_N_U)
-    infeasible_index = np.zeros(CA_N_U, dtype=int)  # UNUSED
-    n_infeasible = 0
-    Lambda = np.zeros(CA_N_U)
-    W = np.zeros(CA_N_U)
-
-    # Initialize u and the working set, if provided from input
-    if u_guess is None:
-        for i in range(n_u):
-            u[i] = (umax[i] + umin[i]) * 0.5
-    else:
-        # for i in range(n_u):
-        #   u[i] = u_guess[i]
-        u = u_guess.copy()
-
-    if W_init is not None:
-        W = W_init.copy()
+def _wls_score(v, B, Wv, Wu, u_pref, gamma_sq, u_sol):
+    Wu_mat = np.diag(Wu) if Wu is not None else np.eye(len(u_sol))
+    Wv_mat = np.diag(Wv) if Wv is not None else np.eye(len(v))
+    
+    return gamma_sq * np.linalg.norm(Wv_mat @ (B @ u_sol - v))**2 + np.linalg.norm(Wu_mat @ (u_sol - u_pref))**2
+
+def scipy_wls_alloc(
+    v:np.ndarray,umin:np.ndarray, umax:np.ndarray,
+    B:np.ndarray, 
+    u_guess:np.ndarray|None=None, W_init:np.ndarray|None=None, 
+    Wv:np.ndarray|None=None, Wu:np.ndarray|None=None,
+    u_pref:np.ndarray|None=None, gamma_sq:float=100000.0, imax:int=100):
+    
+    from scipy import optimize as opt
+    if u_pref is None:
+        u_pref = np.zeros_like(umin)
+    Wu_mat = np.diag(Wu) if Wu is not None else np.eye(len(umin))
+    Wv_mat = np.diag(Wv) if Wv is not None else np.eye(len(v))
+    A = np.vstack((np.sqrt(gamma_sq) * Wv_mat @ B,  Wu_mat))
+    b = np.hstack((np.sqrt(gamma_sq) * Wv_mat @ v, Wu_mat @ u_pref))
+    
+    sol = opt.lsq_linear(A, b, bounds=(umin, umax),
+                method='bvls', tol=1e-6)
+    
+    if sol.success:
+        return sol.x, sol.nit
     else:
-        W = np.zeros(n_u)
-
-    free_index_lookup = np.ones(n_u, dtype=int) * -1
-
-    # Find free indices
-    for i in range(n_u):
-        if W[i] == 0:
-            free_index_lookup[i] = n_free
-            free_index[n_free] = i  # WHAT IS THIS ???
-            n_free += 1  # $$$$$######@@@@$$$$%%%%
-            # print(f'n_free : {n_free}')
-
-    # Fill up A, A_free, b and d
-    for i in range(n_v):
-        # If Wv is a NULL pointer, use Wv = identity
-        if Wv is not None:
-            b[i] = gamma_sq * Wv[i] * v[i]
-        else:
-            b[i] = gamma_sq * v[i]
-        d[i] = b[i]
-        for j in range(n_u):
-            # If Wv is a NULL pointer, use Wv = identity
-            if Wv is not None:
-                A[i][j] = gamma_sq * Wv[i] * B[i][j]
-            else:
-                A[i][j] = gamma_sq * B[i][j]
-            d[i] -= A[i][j] * u[j]
-
-    for i in range(n_v, n_c):
-        A[i, :] = 0  # , n_u * sizeof(float));
-        if Wu is not None:
-            A[i][i - n_v] = Wu[i - n_v]
-        else:
-            A[i][i - n_v] = 1.0
-
-        if up is not None:
-            if Wu is not None:
-                b[i] = Wu[i - n_v] * up[i - n_v]
-            else:
-                b[i] = up[i - n_v]
-        else:
-            b[i] = 0
-        d[i] = b[i] - A[i][i - n_v] * u[i - n_v]
-
-    # -------------- Start loop ------------
-    while iter < imax:
-        iter += 1
-        # clear p, copy u to u_opt
-        p = np.zeros(n_u)  # * sizeof(float));
-        u_opt = u.copy()  # , n_u * sizeof(float));
-        # print(f'u : {u}')
-        # print(f'u_opt : {u_opt}')
-        # print(f'free_index : {free_index}')
-        # print(f'n_free : {n_free}')
-
-        # Construct a matrix with the free columns of A
-        if free_chk != n_free:
-            for i in range(n_c):
-                for j in range(n_free):
-                    # print(f'Free_index[j] : {free_index[j]}')
-                    A_free[i][j] = A[i][free_index[j]]
-            free_chk = n_free
-
-        # print('A_free : ', A_free_ptr)
-        # print('d : ', d)
-
-        if n_free:
-            # Still free variables left, calculate corresponding solution
-            # Use a solver to find the solution to A_free*p_free = d
-            # print('A_free : ', A_free_ptr)
-            # print('A : ', A)
-            # print('d : ', d)
-            # print(f'n_c : {n_c}, n_free : {n_free}, A_free_ptr.shape : {A_free_ptr.shape} ')
-
-            # p_free = qr_solve(A_free[:n_c,:n_free], d)
-            p_free = np.linalg.lstsq(A_free[:n_c, :n_free], d, rcond=None)[0]
-            # print(f'p_free : {p_free}')
-            # p_free = np.linalg.solve(A_free_ptr[:], d)
-
-        # Set the nonzero values of p and add to u_opt
-        for i in range(n_free):
-            p[free_index[i]] = p_free[i]
-            u_opt[free_index[i]] += p_free[i]
-
-        # check limits
-        n_infeasible = 0
-        for i in range(n_u):
-            if u_opt[i] >= (umax[i] + 1.0) or u_opt[i] <= (umin[i] - 1.0):
-                infeasible_index[n_infeasible] = i
-                n_infeasible += 1
-
-        # Check feasibility of the solution
-        if n_infeasible == 0:
-            # all variables are within limits
-            u = u_opt.copy()
-            Lambda = np.zeros(n_u)
-
-            # d = d + A_free*p_free; lambda = A*d;
-            for i in range(n_c):
-                for k in range(n_free):
-                    d[i] -= A_free[i][k] * p_free[k]
-
-                for k in range(n_u):
-                    Lambda[k] += A[i][k] * d[i]
-
-            break_flag = True
-
-            # lambda = lambda x W;
-            for i in range(n_u):
-                Lambda[i] *= W[i]
-                # if any lambdas are negative, keep looking for solution
-                if Lambda[i] < -FLT_EPSILON:
-                    break_flag = False
-                    W[i] = 0
-                    # add a free index
-                    if free_index_lookup[i] < 0:
-                        free_index_lookup[i] = n_free
-                        free_index[n_free] = i
-                        n_free += 1
-            if break_flag:
-                # if solution is found, return number of iterations
-                return u, iter
-        else:
-            alpha = INFINITY  # ???
-            alpha_tmp = 0.0
-            id_alpha = 0
-
-        # Find the lowest distance from the limit among the free variables
-        for i in range(n_free):
-            id = free_index[i]
-            if np.abs(p[id]) > FLT_EPSILON:
-                if p[id] < 0:
-                    alpha_tmp = (umin[id] - u[id]) / p[id]
-                else:
-                    alpha_tmp = (umax[id] - u[id]) / p[id]
-            else:
-                alpha_tmp = INFINITY
-
-            if alpha_tmp < alpha:
-                alpha = alpha_tmp
-                id_alpha = id
-
-        # update input u = u + alpha*p
-        for i in range(n_u):
-            u[i] += alpha * p[i]
-
-        # update d = d-alpha*A*p_free
-        for i in range(n_c):
-            k_len = min(
-                n_free, len(p_free)
-            )  # FIXME : Pfff this should be fixed! Somehow k is becoming bigger than the p_free length...
-            for k in range(k_len):  # Normally should be range(n_free)
-                # print(f'Dangerous place ! i : {i} , k : {k}, A shape : {A_free.shape} , p_free shape : {p_free.shape}')
-                d[i] -= (
-                    A_free[i][k] * alpha * p_free[k]
-                )  # having problem here with i:0 k:1, A(8,4) p_free:(1,) : IndexError: index 1 is out of bounds for axis 0 with size 1
-
-        # get rid of a free index
-        if p[id_alpha] > 0:
-            W[id_alpha] = 1.0
-        else:
-            W[id_alpha] = -1.0
-
-        # print(n_free, id_alpha)
-        # print(n_free, free_index[n_free], id_alpha, free_index_lookup[id_alpha] )
-        n_free -= 1
-        free_index[free_index_lookup[id_alpha]] = free_index[n_free]
-        free_index_lookup[free_index[free_index_lookup[id_alpha]]] = free_index_lookup[
-            id_alpha
-        ]
-        free_index_lookup[id_alpha] = -1
-
-    # solution failed, return negative one to indicate failure
-    return None, iter
-
-
-if __name__ == "__main__":
-    # v = np.array([0.5, 0.3, 20.2, 0.7])
-    # umin = np.array([-107, -19093, 0, -95])
-    # umax = np.array([9093, 107, 4600, 4505])
-    # A = np.array([
-    #   [      0,         0,  -0.0105,  0.0107016],
-    #   [ -0.0030044, 0.0030044, 0.035, 0.035],
-    #   [ -0.004856, -0.004856, 0, 0],
-    #   [       0,         0,   -0.0011,   -0.0011] ])
-
-    # up = np.array([1000., 1000., 1000., 1000.])
-    # Wv = np.array([100, 1000, 0.1, 10])
-    # Wu = np.array([1, 1, 1, 1])
-    # # B = np.array([
-    # #   [ 15.0,  15.0 , -15.0 , -15.0],
-    # #   [-15.0,  15.0 ,  15.0 , -15.0],
-    # #   [-5.0,  5.0 , -5.0 ,  5.0],
-    # #   [ 0.7,  0.7 ,  0.7 ,  0.7]  ])
-    # u_guess = None
-    # W_init = None
-
-    # # import scipy.optimize
-    # # res = scipy.optimize.lsq_linear(A, v, bounds=(umin, umax), lsmr_tol='auto', verbose=1)
-    # # print(f'LSQ_Lin : {res}')
-
-    # du, it = wls_alloc(v, umin, umax, A/1000., u_guess, W_init, Wv, Wu, up)
-    # print(f'Control increment : {du} and iteration : {it}')
-
-    umin = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
-    umax = np.array([9600, 9600, 9600, 9600, 9600, 9600])
-    uc = np.array([4614, 4210, 4210, 4614, 4210, 4210])
-    dumin = umin - uc
-    dumax = umax - uc
-    up = dumin.copy()
-
-    v = np.array([240, -240.5658, 600.0, 1.8532])
-    Wv = np.array([100, 100, 1, 10])
-    # Wu = np.array([1, 1, 1, 1, 1, 1, 1])
-    Wu = None
-    A = np.array(
-        [
-            [0.0, -0.015, 0.015, 0.0, -0.015, 0.015],
-            [0.015, -0.010, -0.010, 0.015, -0.010, -0.010],
-            [0.103, 0.103, 0.103, -0.103, -0.103, -0.103],
-            [-0.0009, -0.0009, -0.0009, -0.0009, -0.0009, -0.0009],
-        ]
+        return u_pref, imax+1
+    
+
+
+def wls_alloc(
+    v:np.ndarray, u_min:np.ndarray, u_max:np.ndarray, B:np.ndarray,
+    u_guess:np.ndarray|None=None, W_init:np.ndarray|None=None,
+    Wv:np.ndarray|None=None, Wu:np.ndarray|None=None, u_pref:np.ndarray|None=None,
+    gamma_sq:float=100000.0, imax:int=100,
+):
+    """
+    Python wrapper for the C-bound wls_alloc.
+    
+    Solve the following optimal control allocation problem:
+    minimize_u gamma_sq * || Wv * (B u - v) ||^2 + || Wu * (u - u_pref) ||^2
+
+    Parameters:
+        v: (nv,) Target force vector
+        u_min: (nu,) Min input values
+        u_max: (nu,) Max input values
+        B: (nv, nu) Control effectiveness matrix
+        u_guess: (nu,) Initial guess for u (optional)
+        W_init: (nu,) Initial weights for u (optional)
+        Wv: (nv,) vector of weights for v (optional)
+        Wu: (nu,) vector of weights for u_pref (optional)
+        u_pref: (nu,) vector of preferred u values (optional)
+        gamma_sq: Squared weight for prefering control allocation to u_pref (optional, default: 100000.0)
+        imax: maximum number of iterations (optional, default: 100)
+    Returns:
+        u: (nu,) vector of allocated controls
+        n_iter: number of iterations taken
+    """
+    nv, nu = B.shape
+    if Wv is None:
+        Wv = np.ones(nv, dtype=np.float32)
+    if Wu is None:
+        Wu = np.ones(nu, dtype=np.float32)
+    if u_pref is None:
+        u_pref = np.zeros(nu, dtype=np.float32)
+
+    u_sol,it = _wls_alloc(
+        B.astype(np.float32),
+        v.astype(np.float32),
+        u_min.astype(np.float32),
+        u_max.astype(np.float32),
+        u_guess.astype(np.float32) if u_guess is not None else None,
+        W_init.astype(np.float32) if W_init is not None else None,
+        Wv.astype(np.float32),
+        Wu.astype(np.float32),
+        u_pref.astype(np.float32),
+        gamma_sq,
+        imax,
     )
+    
+    return u_sol, it
+    
+
 
-    u_guess = None
-    W_init = None
 
-    du, it = wls_alloc(v, dumin, dumax, A, u_guess, W_init, Wv, Wu, up)
-    print(
-        "Matlab lsqlin output : -4614.0, 426.064612091305, 5390.0, -4614.0, -4210.0, 5390.0 "
-    )
-    print(f"Control increment : {du} and iteration : {it}")
diff --git a/examples/fly_INDI.py b/examples/fly_INDI.py
index cbf12ba..8e36bea 100644
--- a/examples/fly_INDI.py
+++ b/examples/fly_INDI.py
@@ -13,6 +13,8 @@
 import pybullet as p
 
 from dronesim.control.INDIControl import INDIControl
+# from dronesim.control.NINDIControl import NINDIControl as INDIControl
+
 from dronesim.envs.BaseAviary import DroneModel, Physics
 from dronesim.envs.CtrlAviary import CtrlAviary
 from dronesim.utils.Logger import Logger
diff --git a/examples/fly_INDI_TrajectoryTrack.py b/examples/fly_INDI_TrajectoryTrack.py
index b3a0472..2185c52 100644
--- a/examples/fly_INDI_TrajectoryTrack.py
+++ b/examples/fly_INDI_TrajectoryTrack.py
@@ -13,6 +13,8 @@
 import pybullet as p
 
 from dronesim.control.INDIControl import INDIControl
+# from dronesim.control.NINDIControl import NINDIControl as INDIControl
+
 from dronesim.envs.BaseAviary import DroneModel, Physics
 from dronesim.envs.CtrlAviary import CtrlAviary
 from dronesim.utils.Logger import Logger
@@ -313,4 +315,6 @@
 for gate in gates:
     ax.plot3D([gate[0]], [gate[1]], [gate[2]], "o")
 ax.legend(loc="upper right")
+
 plt.show()
+print(f"RMSE: {np.linalg.norm(np.array([x_flown, y_flown, z_flown]) - np.array([x, y, z])[:,:len(x_flown)]):.4f} m")
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d99202b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=61.0", "numpy"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dronesim"
+version = "0.1.0"
+description = "A drone simulation package"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.7"
+dependencies = [
+    "numpy",
+    "scipy",
+    "Pillow",
+    "matplotlib",
+    "cycler",
+    "gym",
+    "pybullet",
+]
+
+[tool.setuptools]
+packages = ["dronesim"]
diff --git a/setup.py b/setup.py
index 7407f6b..422045b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,37 @@
-from setuptools import setup
+"""Minimal setup.py for C extension configuration.
+
+This file works with pyproject.toml (the primary build configuration).
+It is kept for C extension support, which has limited TOML encoding support.
+See: https://packaging.python.org/guides/writing-pyproject-toml/#c-extensions
+"""
+
+from setuptools import Extension, setup
+import numpy
+
+pywls_dir = "dronesim/control/pywls"
+
+ext_modules = [
+    Extension(
+        name="dronesim.control.pywls",
+        sources=[
+            pywls_dir + "/pywls_module.c",
+            pywls_dir + "/wls_alloc.c",
+            pywls_dir + "/qr_solve.c",
+            pywls_dir + "/r8lib_min.c",
+        ],
+        include_dirs=[
+            numpy.get_include(),
+            pywls_dir,
+        ],
+        extra_compile_args=["-O3"],
+        define_macros=[
+            ("WLS_N_U_MAX", "8"),
+            ("WLS_N_V_MAX", "6"),
+        ],
+    ),
+]
+
 
 setup(
-    name="dronesim",
-    packages=["dronesim"],
-    version="0.1.0",
-    install_requires=[
-        "numpy",
-        "scipy",
-        "Pillow",
-        "matplotlib",
-        "cycler",
-        "gym",
-        "pybullet",
-    ],
+    ext_modules=ext_modules,
 )