The NeuraParse Inference Engine (NPIE) v2.0.0 provides a unified C API for running AI models on embedded devices. All 12 inference backends have full C/C++ implementation files with load/inference/unload support, including LLM text generation, speech-to-text, image generation, and quantum circuit simulation.
| Backend | Source File | Library Dependency |
|---|---|---|
| LiteRT | npie_litert.cpp |
tensorflow-lite |
| ONNX Runtime | npie_onnx.cpp |
onnxruntime |
| emlearn | npie_emlearn.c |
emlearn (header-only) |
| WasmEdge | npie_wasm.cpp |
wasmedge |
| NCNN | npie_ncnn.cpp |
ncnn (+ Vulkan optional) |
| ExecuTorch | npie_executorch.cpp |
executorch |
| OpenVINO | npie_openvino.cpp |
openvino::runtime |
| llama.cpp | npie_llama.cpp |
llama, common |
| whisper.cpp | npie_whisper.cpp |
whisper |
| stable-diffusion.cpp | npie_stable_diffusion.cpp |
stable-diffusion |
| MLC LLM | npie_mlc_llm.cpp |
tvm_runtime |
| QuEST | npie_quest.cpp |
QuEST |
- Core API
- Model Management
- Inference API
- LLM API (NEW in v2.0.0)
- Speech API (NEW in v2.0.0)
- Quantum API (NEW in v2.0.0)
- Hardware Detection
- Data Types
- Examples
- MAVLink API (NEW in v5.0.0)
- Swarm API (NEW in v5.0.0)
- Sensor Fusion API (NEW in v5.0.0)
- Network API (NEW in v5.0.0)
- Security API (NEW in v5.0.0)
Get NPIE version string.
const char* npie_version(void);
// Returns: "2.0.0"Initialize NPIE context.
npie_status_t npie_init(npie_context_t* ctx, const npie_options_t* options);Parameters:
ctx- Pointer to context handle (output)options- Initialization options (NULL for defaults)
Example:
npie_context_t ctx;
npie_options_t options = {
.backend = NPIE_BACKEND_AUTO,
.accelerator = NPIE_ACCELERATOR_AUTO,
.num_threads = 4,
.enable_profiling = true
};
if (npie_init(&ctx, &options) != NPIE_SUCCESS) {
fprintf(stderr, "Failed to initialize NPIE\n");
return 1;
}Shutdown NPIE context and free resources.
npie_status_t npie_shutdown(npie_context_t ctx);Load model from file.
npie_status_t npie_model_load(npie_context_t ctx,
npie_model_t* model,
const char* path,
const npie_options_t* options);Get model information.
npie_status_t npie_model_get_info(npie_model_t model, npie_model_info_t* info);Run inference on model.
npie_status_t npie_inference_run(npie_model_t model,
const npie_tensor_t* inputs,
uint32_t num_inputs,
npie_tensor_t* outputs,
uint32_t num_outputs,
npie_metrics_t* metrics);New in NPIE v2.0.0 - Large Language Model inference using llama.cpp backend.
Load a GGUF model for LLM inference.
npie_status_t npie_llm_load(npie_context_t ctx,
npie_llm_t* llm,
const char* model_path,
const npie_llm_params_t* params);Parameters:
ctx- NPIE contextllm- Pointer to LLM handle (output)model_path- Path to GGUF model fileparams- LLM configuration parameters
LLM Parameters:
typedef struct {
uint32_t max_tokens; // Maximum tokens to generate (default: 256)
float temperature; // Sampling temperature (default: 0.7)
float top_p; // Nucleus sampling (default: 0.9)
uint32_t top_k; // Top-K sampling (default: 40)
float repeat_penalty; // Repetition penalty (default: 1.1)
uint32_t context_size; // Context window size (default: 2048)
uint32_t num_threads; // CPU threads for inference
npie_accelerator_t accelerator;// GPU/NPU acceleration
npie_quantization_t quantization; // Model quantization mode
bool use_mmap; // Memory-mapped model loading
bool use_mlock; // Lock model in memory
} npie_llm_params_t;Quantization Modes:
typedef enum {
NPIE_QUANT_NONE = 0,
NPIE_QUANT_Q4_K_M, // 4-bit K-quant medium (recommended)
NPIE_QUANT_Q4_K_S, // 4-bit K-quant small
NPIE_QUANT_Q5_K_M, // 5-bit K-quant medium (higher quality)
NPIE_QUANT_Q8_0, // 8-bit quantization
NPIE_QUANT_IQ2_XXS, // 2-bit importance quantization (smallest)
NPIE_QUANT_IQ3_S, // 3-bit importance quantization
NPIE_QUANT_FP16_NF4, // FP16 with NormalFloat4
} npie_quantization_t;Example:
npie_llm_t llm;
npie_llm_params_t params = {
.max_tokens = 512,
.temperature = 0.7,
.top_p = 0.9,
.top_k = 40,
.context_size = 4096,
.num_threads = 4,
.accelerator = NPIE_ACCELERATOR_GPU_VULKAN,
.quantization = NPIE_QUANT_Q4_K_M,
.use_mmap = true
};
npie_llm_load(ctx, &llm, "/opt/neuraparse/models/llama-3.2-3b-q4_k_m.gguf", ¶ms);Generate text from a prompt with optional streaming callback.
npie_status_t npie_llm_generate(npie_llm_t llm,
const char* prompt,
char* output,
size_t output_size,
npie_llm_token_callback_t callback,
void* user_data);Token Callback:
// Called for each generated token (return false to stop)
typedef bool (*npie_llm_token_callback_t)(const char* token, void* user_data);Example:
// Streaming callback
bool on_token(const char* token, void* user_data) {
printf("%s", token);
fflush(stdout);
return true; // continue generating
}
char output[4096];
npie_llm_generate(llm, "Explain quantum computing in one paragraph:",
output, sizeof(output), on_token, NULL);Unload LLM model and free resources.
npie_status_t npie_llm_unload(npie_llm_t llm);New in NPIE v2.0.0 - Speech-to-text using whisper.cpp backend.
Load a Whisper model for speech recognition.
npie_status_t npie_speech_load(npie_context_t ctx,
npie_speech_t* speech,
const char* model_path,
const npie_speech_params_t* params);Speech Parameters:
typedef struct {
const char* language; // Language code (e.g., "en", "auto")
bool translate; // Translate to English
uint32_t num_threads; // CPU threads
bool use_gpu; // GPU acceleration
} npie_speech_params_t;Transcribe audio data to text.
npie_status_t npie_speech_transcribe(npie_speech_t speech,
const float* audio_data,
uint32_t num_samples,
uint32_t sample_rate,
char* output,
size_t output_size);Example:
npie_speech_t speech;
npie_speech_params_t params = {
.language = "en",
.translate = false,
.num_threads = 4,
.use_gpu = true
};
npie_speech_load(ctx, &speech, "/opt/neuraparse/models/whisper-base.bin", ¶ms);
// Transcribe 16kHz PCM audio
char transcript[4096];
npie_speech_transcribe(speech, audio_pcm, num_samples, 16000,
transcript, sizeof(transcript));
printf("Transcript: %s\n", transcript);
npie_speech_unload(speech);npie_status_t npie_speech_unload(npie_speech_t speech);New in NPIE v2.0.0 - Quantum circuit simulation using QuEST/Qulacs/Stim backends.
typedef enum {
NPIE_GATE_H, // Hadamard (superposition)
NPIE_GATE_X, // Pauli-X (NOT)
NPIE_GATE_Y, // Pauli-Y
NPIE_GATE_Z, // Pauli-Z (phase flip)
NPIE_GATE_T, // T gate (pi/8 phase)
NPIE_GATE_S, // S gate (pi/4 phase)
NPIE_GATE_RX, // X-axis rotation
NPIE_GATE_RY, // Y-axis rotation
NPIE_GATE_RZ, // Z-axis rotation
NPIE_GATE_CNOT, // Controlled-NOT (2-qubit)
NPIE_GATE_CZ, // Controlled-Z (2-qubit)
NPIE_GATE_SWAP, // SWAP qubits (2-qubit)
NPIE_GATE_TOFFOLI, // Toffoli CCNOT (3-qubit)
NPIE_GATE_MEASURE, // Measurement
} npie_gate_t;Create a quantum register.
npie_status_t npie_quantum_create(npie_context_t ctx,
npie_qureg_t* qureg,
const npie_quantum_params_t* params);Quantum Parameters:
typedef struct {
uint32_t num_qubits; // Number of qubits (1-30)
uint32_t num_shots; // Measurement shots (default: 1024)
bool use_density; // Use density matrix (for noise modeling)
bool use_gpu; // GPU acceleration
} npie_quantum_params_t;Apply a quantum gate to the register.
npie_status_t npie_quantum_gate(npie_qureg_t qureg,
npie_gate_t gate,
int32_t target,
int32_t control,
double angle);Parameters:
qureg- Quantum register handlegate- Gate typetarget- Target qubit indexcontrol- Control qubit index (-1 for single-qubit gates)angle- Rotation angle in radians (for Rx, Ry, Rz gates)
Example - Bell State:
npie_qureg_t qureg;
npie_quantum_params_t params = { .num_qubits = 2, .num_shots = 1024 };
npie_quantum_create(ctx, &qureg, ¶ms);
// Create Bell state: |00⟩ → (|00⟩ + |11⟩)/√2
npie_quantum_gate(qureg, NPIE_GATE_H, 0, -1, 0.0); // H on qubit 0
npie_quantum_gate(qureg, NPIE_GATE_CNOT, 1, 0, 0.0); // CNOT(0→1)Perform measurement on the quantum register.
npie_status_t npie_quantum_measure(npie_qureg_t qureg,
npie_measurement_t* results,
uint32_t max_results,
uint32_t* num_results);Measurement Result:
typedef struct {
uint32_t state; // Measured state as integer
uint32_t count; // Number of times measured
double probability; // Measured probability
} npie_measurement_t;Get the full statevector (real and imaginary amplitudes).
npie_status_t npie_quantum_get_statevector(npie_qureg_t qureg,
double* real,
double* imag);Destroy quantum register and free resources.
npie_status_t npie_quantum_destroy(npie_qureg_t qureg);Complete Quantum Example:
#include <npie.h>
#include <stdio.h>
int main() {
npie_context_t ctx;
npie_init(&ctx, NULL);
// Create 3-qubit register
npie_qureg_t qureg;
npie_quantum_params_t qparams = { .num_qubits = 3, .num_shots = 4096 };
npie_quantum_create(ctx, &qureg, &qparams);
// Build GHZ state: (|000⟩ + |111⟩)/√2
npie_quantum_gate(qureg, NPIE_GATE_H, 0, -1, 0.0);
npie_quantum_gate(qureg, NPIE_GATE_CNOT, 1, 0, 0.0);
npie_quantum_gate(qureg, NPIE_GATE_CNOT, 2, 0, 0.0);
// Measure
npie_measurement_t results[8];
uint32_t num_results;
npie_quantum_measure(qureg, results, 8, &num_results);
for (uint32_t i = 0; i < num_results; i++) {
printf("|%03b⟩: %d counts (%.1f%%)\n",
results[i].state, results[i].count,
results[i].probability * 100.0);
}
// Expected: |000⟩ ~50%, |111⟩ ~50%
npie_quantum_destroy(qureg);
npie_shutdown(ctx);
return 0;
}Detect available hardware accelerators.
npie_status_t npie_detect_accelerators(npie_context_t ctx,
npie_accelerator_t* accelerators,
uint32_t max_count,
uint32_t* count);typedef enum {
NPIE_BACKEND_AUTO = 0,
NPIE_BACKEND_LITERT, // TensorFlow Lite
NPIE_BACKEND_ONNXRUNTIME, // ONNX Runtime
NPIE_BACKEND_EMLEARN, // Tiny ML
NPIE_BACKEND_WASM, // WebAssembly
NPIE_BACKEND_OPENCV, // OpenCV DNN
NPIE_BACKEND_NCNN, // NCNN mobile inference
NPIE_BACKEND_EXECUTORCH, // ExecuTorch (PyTorch)
NPIE_BACKEND_OPENVINO, // OpenVINO (Intel)
NPIE_BACKEND_LLAMA_CPP, // llama.cpp LLM
NPIE_BACKEND_WHISPER_CPP, // whisper.cpp Speech
NPIE_BACKEND_STABLE_DIFFUSION, // stable-diffusion.cpp
NPIE_BACKEND_MLC_LLM, // MLC LLM
} npie_backend_t;typedef enum {
NPIE_ACCELERATOR_NONE = 0,
NPIE_ACCELERATOR_AUTO,
NPIE_ACCELERATOR_GPU, // Generic GPU (OpenCL)
NPIE_ACCELERATOR_NPU, // Generic NPU
NPIE_ACCELERATOR_TPU, // Google Edge TPU
NPIE_ACCELERATOR_DSP, // Qualcomm DSP
NPIE_ACCELERATOR_GPU_VULKAN, // Vulkan GPU compute
NPIE_ACCELERATOR_GPU_CUDA, // NVIDIA CUDA
NPIE_ACCELERATOR_GPU_METAL, // Apple Metal
NPIE_ACCELERATOR_NPU_HEXAGON, // Qualcomm Hexagon
NPIE_ACCELERATOR_NPU_ETHOS, // ARM Ethos-U
NPIE_ACCELERATOR_NPU_INTEL, // Intel NPU (Meteor Lake+)
} npie_accelerator_t;typedef enum {
NPIE_DTYPE_FLOAT32 = 0,
NPIE_DTYPE_FLOAT16,
NPIE_DTYPE_INT8,
NPIE_DTYPE_UINT8,
NPIE_DTYPE_INT32,
NPIE_DTYPE_INT64,
NPIE_DTYPE_BFLOAT16, // NEW: BFloat16
NPIE_DTYPE_FLOAT8_E4M3, // NEW: FP8
NPIE_DTYPE_INT4, // NEW: 4-bit integer
} npie_dtype_t;typedef enum {
NPIE_SUCCESS = 0,
NPIE_ERROR_INVALID_ARGUMENT = -1,
NPIE_ERROR_OUT_OF_MEMORY = -2,
NPIE_ERROR_MODEL_LOAD_FAILED = -3,
NPIE_ERROR_INFERENCE_FAILED = -4,
NPIE_ERROR_UNSUPPORTED_OPERATION = -5,
NPIE_ERROR_HARDWARE_NOT_AVAILABLE = -6,
NPIE_ERROR_TIMEOUT = -7,
NPIE_ERROR_NOT_INITIALIZED = -8,
NPIE_ERROR_ALREADY_INITIALIZED = -9,
NPIE_ERROR_IO = -10,
NPIE_ERROR_UNKNOWN = -99
} npie_status_t;typedef struct {
uint64_t inference_time_us;
uint64_t preprocessing_time_us;
uint64_t postprocessing_time_us;
uint64_t total_time_us;
size_t memory_used_bytes;
float cpu_usage_percent;
float accelerator_usage_percent;
} npie_metrics_t;npie_context_tis thread-safe for read operationsnpie_model_tcan be used from multiple threads simultaneouslynpie_llm_tshould be accessed from a single threadnpie_qureg_tis NOT thread-safe; use separate registers per thread- Inference operations are thread-safe
- Use separate contexts for complete isolation
- Use Vulkan acceleration for llama.cpp on GPUs without CUDA
- Use Q4_K_M quantization for best quality/size tradeoff on LLMs
- Use IQ2_XXS quantization for minimum model size (2-bit)
- Enable mmap for faster model loading
- Set context_size appropriately (smaller = faster, less memory)
- Reuse tensors - allocate once, reuse for multiple inferences
- Batch processing for traditional ML inference
- Set num_threads to number of CPU performance cores
# Core inference
gcc -o app app.c -lnpie -lpthread -lm
# With LLM support
gcc -o llm_app llm_app.c -lnpie -lllama -lpthread -lm
# With quantum support
gcc -o quantum_app quantum_app.c -lnpie -lQuEST -lpthread -lmCreate MAVLink connection over UART or UDP.
#include "neuraos_mavlink.h"
mavlink_connection_t *mavlink_connection_create(const mavlink_config_t *cfg);int mavlink_send_heartbeat(mavlink_connection_t *conn, uint8_t type, uint8_t autopilot, uint8_t mode);int mavlink_request_data_stream(mavlink_connection_t *conn, uint8_t stream_id, uint16_t rate_hz);#include "neuraos_swarm.h"
neura_swarm_ctx_t *neura_swarm_create(const neura_swarm_config_t *cfg);int neura_swarm_set_formation(neura_swarm_ctx_t *ctx, neura_formation_type_t type, float spacing_m, float heading_deg);Formation types: NEURA_FORMATION_V, NEURA_FORMATION_LINE, NEURA_FORMATION_CIRCLE, NEURA_FORMATION_GRID
int neura_swarm_start_mission(neura_swarm_ctx_t *ctx, const neura_swarm_mission_t *mission);#include "neuraos_sensors.h"
neura_sensor_fusion_ctx_t *neura_sensor_fusion_create(const neura_fusion_config_t *cfg);int neura_sensor_fusion_update(neura_sensor_fusion_ctx_t *ctx, const neura_sensor_data_t *data);int neura_sensor_fusion_get_state(neura_sensor_fusion_ctx_t *ctx, neura_fusion_state_t *state);
// state contains: position (lat/lon/alt), velocity (NED), attitude (quaternion), gyro/accel bias#include "neuraos_network.h"
neura_net_ctx_t *neura_net_create(const neura_net_config_t *cfg);WiFi Mesh and WireGuard VPN tunnel APIs.
#include "neuraos_security.h"
int neura_secure_boot_verify(const char *image_path, const char *keyring_path);int neura_model_encrypt(const char *input, const char *output, const uint8_t *key, size_t key_len);
int neura_model_decrypt(const char *input, const char *output, const uint8_t *key, size_t key_len);