From 00281eb61fa3bfb16a0ba224d85957bb5cd8cf80 Mon Sep 17 00:00:00 2001 From: octo-patch Date: Tue, 24 Mar 2026 15:18:37 +0800 Subject: [PATCH] feat: add MiniMax as LLM provider (llm-chain-minimax crate) Add a new llm-chain-minimax crate that integrates MiniMax LLM models (M2.7, M2.7-highspeed, M2.5, M2.5-highspeed) via their OpenAI-compatible API. - Implements the Executor trait using async-openai with MiniMax API endpoint - Adds Model enum with context size metadata (up to 1M tokens for M2.7) - Strips think tags from model responses (M2.5+ thinking support) - Adds executor!(minimax) macro support - Includes 16 unit tests and 3 integration tests - Updates README with MiniMax documentation --- crates/llm-chain-minimax/Cargo.toml | 26 ++ crates/llm-chain-minimax/README.md | 56 ++++ .../examples/simple_invocation.rs | 34 ++ crates/llm-chain-minimax/src/chatgpt/error.rs | 12 + .../llm-chain-minimax/src/chatgpt/executor.rs | 297 ++++++++++++++++++ crates/llm-chain-minimax/src/chatgpt/mod.rs | 9 + crates/llm-chain-minimax/src/chatgpt/model.rs | 147 +++++++++ .../llm-chain-minimax/src/chatgpt/prompt.rs | 169 ++++++++++ crates/llm-chain-minimax/src/lib.rs | 30 ++ .../tests/integration_test.rs | 121 +++++++ crates/llm-chain/src/executor.rs | 8 + docs/README.md | 25 ++ 12 files changed, 934 insertions(+) create mode 100644 crates/llm-chain-minimax/Cargo.toml create mode 100644 crates/llm-chain-minimax/README.md create mode 100644 crates/llm-chain-minimax/examples/simple_invocation.rs create mode 100644 crates/llm-chain-minimax/src/chatgpt/error.rs create mode 100644 crates/llm-chain-minimax/src/chatgpt/executor.rs create mode 100644 crates/llm-chain-minimax/src/chatgpt/mod.rs create mode 100644 crates/llm-chain-minimax/src/chatgpt/model.rs create mode 100644 crates/llm-chain-minimax/src/chatgpt/prompt.rs create mode 100644 crates/llm-chain-minimax/src/lib.rs create mode 100644 crates/llm-chain-minimax/tests/integration_test.rs diff --git a/crates/llm-chain-minimax/Cargo.toml b/crates/llm-chain-minimax/Cargo.toml new file mode 100644 index 00000000..25d68f39 --- /dev/null +++ b/crates/llm-chain-minimax/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "llm-chain-minimax" +version = "0.13.0" +edition = "2021" +description = "Use `llm-chain` with MiniMax's LLM models (M2.7, M2.5). MiniMax provides an OpenAI-compatible API for chat completions." +license = "MIT" +keywords = ["llm", "langchain", "minimax", "chain"] +categories = ["science"] +authors = ["MiniMax Community"] +readme = "../../docs/README.md" +repository = "https://github.com/sobelio/llm-chain/" + +[dependencies] +futures = "0.3.28" +async-openai = "0.16.2" +async-trait.workspace = true +llm-chain = { path = "../llm-chain", version = "0.13.0", default-features = false } +serde.workspace = true +strum = "0.24" +strum_macros = "0.24" +thiserror.workspace = true +tokio.workspace = true + +[dev-dependencies] +tokio = { version = "1.28.2", features = ["macros", "rt"] } +llm-chain = { path = "../llm-chain" } diff --git a/crates/llm-chain-minimax/README.md b/crates/llm-chain-minimax/README.md new file mode 100644 index 00000000..b1bf4101 --- /dev/null +++ b/crates/llm-chain-minimax/README.md @@ -0,0 +1,56 @@ +# llm-chain-minimax + +Use [MiniMax](https://www.minimax.io/) LLM models with `llm-chain`! This crate provides seamless integration with MiniMax's OpenAI-compatible API, giving you access to powerful models like MiniMax-M2.7 and MiniMax-M2.5. + +## Supported Models + +| Model | Context Window | Description | +|---|---|---| +| `MiniMax-M2.7` | 1M tokens | Latest and most capable model | +| `MiniMax-M2.7-highspeed` | 1M tokens | Faster variant of M2.7 | +| `MiniMax-M2.5` | 1M tokens | Previous generation, strong capabilities | +| `MiniMax-M2.5-highspeed` | 204K tokens | Optimized for speed | + +## Quick Start + +1. Get your MiniMax API key from [MiniMax Platform](https://www.minimax.io/) +2. Set the environment variable: + +```bash +export MINIMAX_API_KEY="your-api-key-here" +``` + +3. Add dependencies to your `Cargo.toml`: + +```toml +[dependencies] +llm-chain = "0.13.0" +llm-chain-minimax = "0.13.0" +``` + +4. Use the MiniMax executor: + +```rust +use llm_chain::executor; + +let exec = executor!(minimax)?; +``` + +Or with a specific model: + +```rust +use llm_chain::{executor, options}; + +let exec = executor!(minimax, options!(Model: "MiniMax-M2.5-highspeed"))?; +``` + +## Environment Variables + +| Variable | Description | +|---|---| +| `MINIMAX_API_KEY` | Your MiniMax API key (required) | +| `MINIMAX_API_BASE_URL` | Custom API base URL (default: `https://api.minimax.io/v1`) | + +## Examples + +See the [examples](./examples) directory for usage examples. diff --git a/crates/llm-chain-minimax/examples/simple_invocation.rs b/crates/llm-chain-minimax/examples/simple_invocation.rs new file mode 100644 index 00000000..b5a6c6b8 --- /dev/null +++ b/crates/llm-chain-minimax/examples/simple_invocation.rs @@ -0,0 +1,34 @@ +/// This example demonstrates how to use the MiniMax executor for a simple +/// chat completion. Make sure to set the `MINIMAX_API_KEY` environment variable. +/// +/// ```bash +/// export MINIMAX_API_KEY="your-api-key-here" +/// cargo run --example simple_invocation +/// ``` +use llm_chain::{executor, parameters}; +use llm_chain::prompt::{ConversationTemplate, StringTemplate}; +use llm_chain::step::Step; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Box> { + // Create a MiniMax executor with the default model (MiniMax-M2.7) + let exec = executor!(minimax)?; + + let prompt = ConversationTemplate::new() + .with_system_template("You are a helpful assistant.") + .with_user(StringTemplate::tera("{{question}}")); + + let result = Step::for_prompt_template(prompt.into()) + .run(¶meters!("question" => "What is the capital of France?"), &exec) + .await?; + + println!( + "Response: {}", + result + .to_immediate() + .await? + .primary_textual_output() + .unwrap_or_default() + ); + Ok(()) +} diff --git a/crates/llm-chain-minimax/src/chatgpt/error.rs b/crates/llm-chain-minimax/src/chatgpt/error.rs new file mode 100644 index 00000000..54cc8023 --- /dev/null +++ b/crates/llm-chain-minimax/src/chatgpt/error.rs @@ -0,0 +1,12 @@ +use async_openai::error::OpenAIError; +use llm_chain::prompt::StringTemplateError; +use thiserror::Error; + +#[derive(Debug, Error)] +#[error(transparent)] +pub enum MiniMaxInnerError { + #[error(transparent)] + OpenAIError(#[from] OpenAIError), + #[error(transparent)] + StringTemplateError(#[from] StringTemplateError), +} diff --git a/crates/llm-chain-minimax/src/chatgpt/executor.rs b/crates/llm-chain-minimax/src/chatgpt/executor.rs new file mode 100644 index 00000000..82d16649 --- /dev/null +++ b/crates/llm-chain-minimax/src/chatgpt/executor.rs @@ -0,0 +1,297 @@ +use super::error::MiniMaxInnerError; +use super::model::Model; +use super::prompt::completion_to_output; +use super::prompt::stream_to_output; +use async_openai::config::OpenAIConfig; + +use llm_chain::options::Opt; +use llm_chain::options::Options; +use llm_chain::options::OptionsCascade; +use llm_chain::output::Output; +use llm_chain::tokens::TokenCollection; + +use super::prompt::create_chat_completion_request; +use super::prompt::format_chat_messages; +use async_openai::error::OpenAIError; +use llm_chain::prompt::Prompt; + +use llm_chain::tokens::PromptTokensError; +use llm_chain::tokens::{Tokenizer, TokenizerError}; +use llm_chain::traits; +use llm_chain::traits::{ExecutorCreationError, ExecutorError}; + +use async_trait::async_trait; +use llm_chain::tokens::TokenCount; + +use std::str::FromStr; +use std::sync::Arc; + +/// The default MiniMax API base URL (OpenAI-compatible). +const MINIMAX_API_BASE: &str = "https://api.minimax.io/v1"; + +/// The `Executor` struct for MiniMax models. This executor uses the `async_openai` +/// crate with MiniMax's OpenAI-compatible API endpoint. +/// +/// # Configuration +/// +/// The executor reads the API key from: +/// 1. The `ApiKey` option passed to `new_with_options` +/// 2. The `MINIMAX_API_KEY` environment variable +/// +/// The API base URL can be overridden via the `MINIMAX_API_BASE_URL` environment variable. +#[derive(Clone)] +pub struct Executor { + client: Arc>, + options: Options, +} + +impl Executor { + /// Creates a new `Executor` with the given client. + pub fn for_client(client: async_openai::Client, options: Options) -> Self { + use llm_chain::traits::Executor as _; + let mut exec = Self::new_with_options(options).unwrap(); + exec.client = Arc::new(client); + exec + } + + fn get_model_from_invocation_options(&self, opts: &OptionsCascade) -> String { + let Some(Opt::Model(model)) = opts.get(llm_chain::options::OptDiscriminants::Model) else { + return Model::default().to_string(); + }; + model.to_name() + } + + fn cascade<'a>(&'a self, opts: Option<&'a Options>) -> OptionsCascade<'a> { + let mut v: Vec<&'a Options> = vec![&self.options]; + if let Some(o) = opts { + v.push(o); + } + OptionsCascade::from_vec(v) + } + + fn get_context_size(model_name: &str) -> usize { + Model::from_str(model_name) + .map(|m| m.context_size()) + .unwrap_or(128_000) + } +} + +#[derive(thiserror::Error, Debug)] +#[error(transparent)] +pub enum Error { + OpenAIError(#[from] OpenAIError), +} + +#[async_trait] +impl traits::Executor for Executor { + type StepTokenizer<'a> = MiniMaxTokenizer; + + /// Creates a new `Executor` configured for MiniMax's API. + /// + /// The API key is read from the `ApiKey` option or the `MINIMAX_API_KEY` + /// environment variable. The base URL defaults to `https://api.minimax.io/v1` + /// but can be overridden with `MINIMAX_API_BASE_URL`. + fn new_with_options(options: Options) -> Result { + let mut cfg = OpenAIConfig::new(); + + let opts = OptionsCascade::new().with_options(&options); + + // Set API key from options or environment + if let Some(Opt::ApiKey(api_key)) = opts.get(llm_chain::options::OptDiscriminants::ApiKey) { + cfg = cfg.with_api_key(api_key); + } else if let Ok(api_key) = std::env::var("MINIMAX_API_KEY") { + cfg = cfg.with_api_key(api_key); + } + + // Set base URL (MiniMax's OpenAI-compatible endpoint) + let base_url = std::env::var("MINIMAX_API_BASE_URL") + .unwrap_or_else(|_| MINIMAX_API_BASE.to_string()); + cfg = cfg.with_api_base(base_url); + + let client = Arc::new(async_openai::Client::with_config(cfg)); + Ok(Self { client, options }) + } + + async fn execute(&self, options: &Options, prompt: &Prompt) -> Result { + let opts = self.cascade(Some(options)); + let client = self.client.clone(); + let model = self.get_model_from_invocation_options(&opts); + let input = create_chat_completion_request(model, prompt, opts.is_streaming()).unwrap(); + if opts.is_streaming() { + let res = async move { client.chat().create_stream(input).await } + .await + .map_err(|e| ExecutorError::InnerError(e.into()))?; + Ok(stream_to_output(res)) + } else { + let res = async move { client.chat().create(input).await } + .await + .map_err(|e| ExecutorError::InnerError(e.into()))?; + Ok(completion_to_output(res)) + } + } + + fn tokens_used( + &self, + opts: &Options, + prompt: &Prompt, + ) -> Result { + let _opts_cas = self.cascade(Some(opts)); + + // Use a simple byte-level estimation since tiktoken doesn't support MiniMax models. + // A rough estimate is ~4 characters per token for English text. + let messages = format_chat_messages(prompt.to_chat()).map_err(|e| match e { + MiniMaxInnerError::StringTemplateError(e) => PromptTokensError::PromptFormatFailed(e), + _ => PromptTokensError::UnableToCompute, + })?; + + let mut total_chars = 0usize; + for msg in &messages { + let content = match msg { + async_openai::types::ChatCompletionRequestMessage::System(m) => { + m.content.clone().unwrap_or_default() + } + async_openai::types::ChatCompletionRequestMessage::User(m) => m + .content + .as_ref() + .and_then(|c| match c { + async_openai::types::ChatCompletionRequestUserMessageContent::Text(t) => { + Some(t.to_string()) + } + _ => None, + }) + .unwrap_or_default(), + async_openai::types::ChatCompletionRequestMessage::Assistant(m) => { + m.content.clone().unwrap_or_default() + } + async_openai::types::ChatCompletionRequestMessage::Tool(m) => { + m.content.clone().unwrap_or_default() + } + async_openai::types::ChatCompletionRequestMessage::Function(m) => { + m.content.clone().unwrap_or_default() + } + }; + // ~4 chars per token + overhead per message + total_chars += content.len() + 4; + } + let tokens_used = (total_chars / 4) as i32; + + Ok(TokenCount::new( + self.max_tokens_allowed(opts), + tokens_used, + )) + } + + fn max_tokens_allowed(&self, opts: &Options) -> i32 { + let opts_cas = self.cascade(Some(opts)); + let model = self.get_model_from_invocation_options(&opts_cas); + Self::get_context_size(&model) + .try_into() + .unwrap_or(128_000) + } + + fn answer_prefix(&self, _prompt: &Prompt) -> Option { + None + } + + fn get_tokenizer(&self, _options: &Options) -> Result { + Ok(MiniMaxTokenizer) + } +} + +/// A simple byte-level tokenizer for MiniMax models. +/// +/// Since MiniMax doesn't provide a public tokenizer, this uses a simple +/// character-based approach that provides reasonable estimates for token +/// counting and text splitting. +pub struct MiniMaxTokenizer; + +impl Tokenizer for MiniMaxTokenizer { + fn tokenize_str(&self, doc: &str) -> Result { + // Simple byte-level tokenization (similar to mock tokenizer) + let tokens: Vec = doc.as_bytes().iter().map(|&b| b as i32).collect(); + Ok(tokens.into()) + } + + fn to_string(&self, tokens: TokenCollection) -> Result { + let bytes: Vec = tokens + .as_i32() + .map_err(|_| TokenizerError::ToStringError)? + .into_iter() + .map(|c| c as u8) + .collect(); + String::from_utf8(bytes).map_err(|_| TokenizerError::ToStringError) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use llm_chain::traits::Executor; + + #[test] + fn test_default_model() { + let exec = + ::new_with_options(Options::empty().clone()).unwrap(); + let opts = exec.cascade(None); + let model = exec.get_model_from_invocation_options(&opts); + assert_eq!(model, "MiniMax-M2.7"); + } + + #[test] + fn test_custom_model() { + let mut builder = llm_chain::options::Options::builder(); + builder.add_option(Opt::Model(llm_chain::options::ModelRef::from_model_name( + "MiniMax-M2.5-highspeed", + ))); + let options = builder.build(); + let exec = ::new_with_options(options).unwrap(); + let opts = exec.cascade(None); + let model = exec.get_model_from_invocation_options(&opts); + assert_eq!(model, "MiniMax-M2.5-highspeed"); + } + + #[test] + fn test_context_size() { + assert_eq!(super::Executor::get_context_size("MiniMax-M2.7"), 1_000_000); + assert_eq!( + super::Executor::get_context_size("MiniMax-M2.5-highspeed"), + 204_000 + ); + assert_eq!(super::Executor::get_context_size("unknown"), 128_000); + } + + #[test] + fn test_tokenizer() { + let tokenizer = MiniMaxTokenizer; + let tokens = tokenizer.tokenize_str("Hello").unwrap(); + assert_eq!(tokens.len(), 5); + let text = tokenizer.to_string(tokens).unwrap(); + assert_eq!(text, "Hello"); + } + + #[test] + fn test_tokenizer_utf8() { + let tokenizer = MiniMaxTokenizer; + let tokens = tokenizer.tokenize_str("Héllo").unwrap(); + // é is 2 bytes in UTF-8 + assert_eq!(tokens.len(), 6); + let text = tokenizer.to_string(tokens).unwrap(); + assert_eq!(text, "Héllo"); + } + + #[test] + fn test_max_tokens_allowed() { + let exec = + ::new_with_options(Options::empty().clone()).unwrap(); + let max = exec.max_tokens_allowed(Options::empty()); + assert_eq!(max, 1_000_000); + } + + #[test] + fn test_answer_prefix_is_none() { + let exec = + ::new_with_options(Options::empty().clone()).unwrap(); + let prompt = Prompt::text("test".to_string()); + assert!(exec.answer_prefix(&prompt).is_none()); + } +} diff --git a/crates/llm-chain-minimax/src/chatgpt/mod.rs b/crates/llm-chain-minimax/src/chatgpt/mod.rs new file mode 100644 index 00000000..18660a3a --- /dev/null +++ b/crates/llm-chain-minimax/src/chatgpt/mod.rs @@ -0,0 +1,9 @@ +//! This module implements the MiniMax chat model executor using the +//! OpenAI-compatible API at `https://api.minimax.io/v1`. +mod error; +mod executor; +mod model; +mod prompt; + +pub use executor::{Error, Executor}; +pub use model::Model; diff --git a/crates/llm-chain-minimax/src/chatgpt/model.rs b/crates/llm-chain-minimax/src/chatgpt/model.rs new file mode 100644 index 00000000..a39db246 --- /dev/null +++ b/crates/llm-chain-minimax/src/chatgpt/model.rs @@ -0,0 +1,147 @@ +use llm_chain::options::{ModelRef, Opt}; +use serde::{Deserialize, Serialize}; +use strum_macros::EnumString; + +/// The `Model` enum represents the available MiniMax models. +/// +/// MiniMax provides high-performance language models through an +/// OpenAI-compatible API. The M2.7 family offers up to 1M token context, +/// while M2.5-highspeed provides 204K context with faster inference. +/// +/// # Example +/// +/// ``` +/// use llm_chain_minimax::chatgpt::Model; +/// +/// let default_model = Model::MiniMaxM27; +/// let highspeed = Model::MiniMaxM27Highspeed; +/// let custom = Model::Other("custom-model".to_string()); +/// ``` +#[derive(Debug, Default, Clone, Serialize, Deserialize, EnumString, PartialEq, Eq)] +#[non_exhaustive] +pub enum Model { + /// MiniMax-M2.7: The latest and most capable model with up to 1M token context. + #[default] + #[strum(serialize = "MiniMax-M2.7")] + MiniMaxM27, + + /// MiniMax-M2.7-highspeed: Faster variant of M2.7 optimized for speed. + #[strum(serialize = "MiniMax-M2.7-highspeed")] + MiniMaxM27Highspeed, + + /// MiniMax-M2.5: Previous generation model with strong capabilities. + #[strum(serialize = "MiniMax-M2.5")] + MiniMaxM25, + + /// MiniMax-M2.5-highspeed: Fast variant with 204K token context window. + #[strum(serialize = "MiniMax-M2.5-highspeed")] + MiniMaxM25Highspeed, + + /// A variant that allows specifying a custom model name. + #[strum(default)] + Other(String), +} + +impl Model { + /// Returns the maximum context size for the model. + pub fn context_size(&self) -> usize { + match self { + Model::MiniMaxM27 => 1_000_000, + Model::MiniMaxM27Highspeed => 1_000_000, + Model::MiniMaxM25 => 1_000_000, + Model::MiniMaxM25Highspeed => 204_000, + Model::Other(_) => 128_000, // conservative default + } + } +} + +impl ToString for Model { + fn to_string(&self) -> String { + match self { + Model::MiniMaxM27 => "MiniMax-M2.7".to_string(), + Model::MiniMaxM27Highspeed => "MiniMax-M2.7-highspeed".to_string(), + Model::MiniMaxM25 => "MiniMax-M2.5".to_string(), + Model::MiniMaxM25Highspeed => "MiniMax-M2.5-highspeed".to_string(), + Model::Other(model) => model.to_string(), + } + } +} + +impl From for ModelRef { + fn from(value: Model) -> Self { + ModelRef::from_model_name(value.to_string()) + } +} + +impl From for Opt { + fn from(value: Model) -> Self { + Opt::Model(value.into()) + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use super::*; + + #[test] + fn test_from_str() -> Result<(), Box> { + assert_eq!(Model::from_str("MiniMax-M2.7")?, Model::MiniMaxM27); + assert_eq!( + Model::from_str("MiniMax-M2.7-highspeed")?, + Model::MiniMaxM27Highspeed + ); + assert_eq!(Model::from_str("MiniMax-M2.5")?, Model::MiniMaxM25); + assert_eq!( + Model::from_str("MiniMax-M2.5-highspeed")?, + Model::MiniMaxM25Highspeed + ); + assert_eq!( + Model::from_str("custom-model")?, + Model::Other("custom-model".to_string()) + ); + Ok(()) + } + + #[test] + fn test_to_string() { + assert_eq!(Model::MiniMaxM27.to_string(), "MiniMax-M2.7"); + assert_eq!( + Model::MiniMaxM27Highspeed.to_string(), + "MiniMax-M2.7-highspeed" + ); + assert_eq!(Model::MiniMaxM25.to_string(), "MiniMax-M2.5"); + assert_eq!( + Model::MiniMaxM25Highspeed.to_string(), + "MiniMax-M2.5-highspeed" + ); + assert_eq!( + Model::Other("custom".to_string()).to_string(), + "custom" + ); + } + + #[test] + fn test_default_model() { + assert_eq!(Model::default(), Model::MiniMaxM27); + } + + #[test] + fn test_context_size() { + assert_eq!(Model::MiniMaxM27.context_size(), 1_000_000); + assert_eq!(Model::MiniMaxM27Highspeed.context_size(), 1_000_000); + assert_eq!(Model::MiniMaxM25.context_size(), 1_000_000); + assert_eq!(Model::MiniMaxM25Highspeed.context_size(), 204_000); + assert_eq!(Model::Other("unknown".to_string()).context_size(), 128_000); + } + + #[test] + fn test_model_to_opt() { + let opt: Opt = Model::MiniMaxM27.into(); + match opt { + Opt::Model(model_ref) => assert_eq!(model_ref.to_name(), "MiniMax-M2.7"), + _ => panic!("Expected Opt::Model"), + } + } +} diff --git a/crates/llm-chain-minimax/src/chatgpt/prompt.rs b/crates/llm-chain-minimax/src/chatgpt/prompt.rs new file mode 100644 index 00000000..32be0920 --- /dev/null +++ b/crates/llm-chain-minimax/src/chatgpt/prompt.rs @@ -0,0 +1,169 @@ +use async_openai::types::{ + ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestFunctionMessageArgs, + ChatCompletionRequestMessage, ChatCompletionRequestSystemMessageArgs, + ChatCompletionRequestToolMessageArgs, ChatCompletionRequestUserMessageArgs, + ChatCompletionResponseStream, CreateChatCompletionRequest, CreateChatCompletionRequestArgs, + CreateChatCompletionResponse, Role, +}; +use futures::StreamExt; +use llm_chain::prompt::{self, Prompt}; +use llm_chain::{ + output::{Output, StreamSegment}, + prompt::{ChatMessage, ChatMessageCollection}, +}; + +use super::error::MiniMaxInnerError; + +fn convert_role(role: &prompt::ChatRole) -> Role { + match role { + prompt::ChatRole::User => Role::User, + prompt::ChatRole::Assistant => Role::Assistant, + prompt::ChatRole::System => Role::System, + prompt::ChatRole::Other(_s) => Role::User, + } +} + +fn convert_minimax_role(role: &Role) -> prompt::ChatRole { + match role { + Role::User => prompt::ChatRole::User, + Role::Assistant => prompt::ChatRole::Assistant, + Role::System => prompt::ChatRole::System, + Role::Tool => prompt::ChatRole::Other("Tool".to_string()), + Role::Function => prompt::ChatRole::Other("Function".to_string()), + } +} + +fn format_chat_message( + message: &prompt::ChatMessage, +) -> Result { + let role = convert_role(message.role()); + let content = message.body().to_string(); + let msg = match role { + Role::Assistant => ChatCompletionRequestMessage::Assistant( + ChatCompletionRequestAssistantMessageArgs::default() + .content(content) + .build()?, + ), + Role::System => ChatCompletionRequestMessage::System( + ChatCompletionRequestSystemMessageArgs::default() + .content(content) + .build()?, + ), + Role::User => ChatCompletionRequestMessage::User( + ChatCompletionRequestUserMessageArgs::default() + .content(content) + .build()?, + ), + Role::Tool => ChatCompletionRequestMessage::Tool( + ChatCompletionRequestToolMessageArgs::default() + .content(content) + .build()?, + ), + Role::Function => ChatCompletionRequestMessage::Function( + ChatCompletionRequestFunctionMessageArgs::default() + .content(content) + .build()?, + ), + }; + Ok(msg) +} + +pub fn format_chat_messages( + messages: prompt::ChatMessageCollection, +) -> Result, MiniMaxInnerError> { + messages.iter().map(format_chat_message).collect() +} + +pub fn create_chat_completion_request( + model: String, + prompt: &Prompt, + is_streaming: bool, +) -> Result { + let messages = format_chat_messages(prompt.to_chat())?; + Ok(CreateChatCompletionRequestArgs::default() + .model(model) + .stream(is_streaming) + .messages(messages) + .build()?) +} + +pub fn completion_to_output(resp: CreateChatCompletionResponse) -> Output { + let msg = resp.choices.first().unwrap().message.clone(); + let content = msg.content.unwrap_or_default(); + // Strip MiniMax thinking tags if present (M2.5+ models may include ...) + let content = strip_think_tags(&content); + let mut col = ChatMessageCollection::new(); + col.add_message(ChatMessage::new( + convert_minimax_role(&msg.role), + content, + )); + Output::new_immediate(col.into()) +} + +pub fn stream_to_output(resp: ChatCompletionResponseStream) -> Output { + let stream = resp.flat_map(|x| { + let resp = x.unwrap(); + let delta = resp.choices.first().unwrap().delta.clone(); + let mut v = vec![]; + if let Some(role) = delta.role { + v.push(StreamSegment::Role(convert_minimax_role(&role))); + } + if let Some(content) = delta.content { + // Strip thinking tags from streamed content + let content = strip_think_tags(&content); + if !content.is_empty() { + v.push(StreamSegment::Content(content)); + } + } + futures::stream::iter(v) + }); + Output::from_stream(stream) +} + +/// Strip `...` tags that MiniMax models may include in responses. +fn strip_think_tags(content: &str) -> String { + if let Some(start) = content.find("") { + if let Some(end) = content.find("") { + let end = end + "".len(); + let mut result = String::new(); + result.push_str(&content[..start]); + result.push_str(&content[end..]); + return result.trim().to_string(); + } + } + content.to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_strip_think_tags_no_tags() { + assert_eq!(strip_think_tags("Hello world"), "Hello world"); + } + + #[test] + fn test_strip_think_tags_with_tags() { + assert_eq!( + strip_think_tags("reasoning hereThe answer is 42"), + "The answer is 42" + ); + } + + #[test] + fn test_strip_think_tags_only_tags() { + assert_eq!( + strip_think_tags("internal reasoning"), + "" + ); + } + + #[test] + fn test_strip_think_tags_with_surrounding_content() { + assert_eq!( + strip_think_tags("Before thinking After"), + "Before After" + ); + } +} diff --git a/crates/llm-chain-minimax/src/lib.rs b/crates/llm-chain-minimax/src/lib.rs new file mode 100644 index 00000000..e4e5bd83 --- /dev/null +++ b/crates/llm-chain-minimax/src/lib.rs @@ -0,0 +1,30 @@ +//! # llm-chain-minimax +//! +//! Use MiniMax's powerful LLM models with `llm-chain`! This crate provides a +//! seamless integration with MiniMax's OpenAI-compatible API, giving you access +//! to models like MiniMax-M2.7 and MiniMax-M2.5. +//! +//! MiniMax offers high-performance language models with large context windows +//! (up to 1M tokens for M2.7, 204K for M2.5-highspeed) through an +//! OpenAI-compatible chat completions endpoint at `https://api.minimax.io/v1`. +//! +//! # Getting Started +//! +//! Set the `MINIMAX_API_KEY` environment variable with your MiniMax API key: +//! +//! ```bash +//! export MINIMAX_API_KEY="your-api-key-here" +//! ``` +//! +//! Then use the executor in your code: +//! +//! ```ignore +//! use llm_chain::executor; +//! use llm_chain::options; +//! +//! let exec = executor!(minimax)?; +//! // Or with a specific model: +//! let exec = executor!(minimax, options!(Model: "MiniMax-M2.7"))?; +//! ``` + +pub mod chatgpt; diff --git a/crates/llm-chain-minimax/tests/integration_test.rs b/crates/llm-chain-minimax/tests/integration_test.rs new file mode 100644 index 00000000..1059c9c0 --- /dev/null +++ b/crates/llm-chain-minimax/tests/integration_test.rs @@ -0,0 +1,121 @@ +/// Integration tests for llm-chain-minimax. +/// +/// These tests require a valid `MINIMAX_API_KEY` environment variable. +/// They are ignored by default and can be run with: +/// +/// ```bash +/// MINIMAX_API_KEY="your-key" cargo test -p llm-chain-minimax --test integration_test -- --ignored +/// ``` +use llm_chain::options::{ModelRef, Opt, Options}; +use llm_chain::prompt::{ConversationTemplate, StringTemplate}; +use llm_chain::step::Step; +use llm_chain::traits::Executor; +use llm_chain_minimax::chatgpt; + +fn has_minimax_key() -> bool { + std::env::var("MINIMAX_API_KEY").is_ok() +} + +fn create_executor() -> chatgpt::Executor { + chatgpt::Executor::new().expect("Failed to create MiniMax executor") +} + +fn create_executor_with_model(model: &str) -> chatgpt::Executor { + let mut builder = Options::builder(); + builder.add_option(Opt::Model(ModelRef::from_model_name(model))); + chatgpt::Executor::new_with_options(builder.build()) + .expect("Failed to create MiniMax executor") +} + +#[tokio::test] +#[ignore] +async fn test_simple_chat_completion() { + if !has_minimax_key() { + eprintln!("Skipping: MINIMAX_API_KEY not set"); + return; + } + let exec = create_executor(); + let prompt = ConversationTemplate::new() + .with_system_template("You are a helpful assistant. Reply concisely.") + .with_user(StringTemplate::tera("What is 2+2? Just say the number.")); + + let result = Step::for_prompt_template(prompt.into()) + .run(&llm_chain::parameters!(), &exec) + .await + .expect("Execution failed"); + + let output = result + .to_immediate() + .await + .expect("Failed to get immediate output") + .primary_textual_output() + .expect("No textual output"); + + assert!(output.contains('4'), "Expected '4' in output, got: {}", output); +} + +#[tokio::test] +#[ignore] +async fn test_m27_model() { + if !has_minimax_key() { + eprintln!("Skipping: MINIMAX_API_KEY not set"); + return; + } + let exec = create_executor_with_model("MiniMax-M2.7"); + let prompt = ConversationTemplate::new() + .with_system_template("You are a helpful assistant.") + .with_user(StringTemplate::tera("Say 'hello' and nothing else.")); + + let result = Step::for_prompt_template(prompt.into()) + .run(&llm_chain::parameters!(), &exec) + .await + .expect("Execution failed with MiniMax-M2.7"); + + let output = result + .to_immediate() + .await + .expect("Failed to get output") + .primary_textual_output() + .expect("No output"); + + let output_lower = output.to_lowercase(); + assert!( + output_lower.contains("hello"), + "Expected 'hello' in output, got: {}", + output + ); +} + +#[tokio::test] +#[ignore] +async fn test_m25_highspeed_model() { + if !has_minimax_key() { + eprintln!("Skipping: MINIMAX_API_KEY not set"); + return; + } + let exec = create_executor_with_model("MiniMax-M2.5-highspeed"); + let prompt = ConversationTemplate::new() + .with_system_template("You are a helpful assistant.") + .with_user(StringTemplate::tera( + "What programming language is the Linux kernel written in? Reply in one word.", + )); + + let result = Step::for_prompt_template(prompt.into()) + .run(&llm_chain::parameters!(), &exec) + .await + .expect("Execution failed with MiniMax-M2.5-highspeed"); + + let output = result + .to_immediate() + .await + .expect("Failed to get output") + .primary_textual_output() + .expect("No output"); + + let output_lower = output.to_lowercase(); + assert!( + output_lower.contains('c'), + "Expected 'C' in output, got: {}", + output + ); +} diff --git a/crates/llm-chain/src/executor.rs b/crates/llm-chain/src/executor.rs index 6de5fe52..284a437a 100644 --- a/crates/llm-chain/src/executor.rs +++ b/crates/llm-chain/src/executor.rs @@ -80,4 +80,12 @@ macro_rules! executor { use llm_chain::traits::Executor; llm_chain_sagemaker_endpoint::Executor::new_with_options($options) }}; + (minimax) => {{ + use llm_chain::traits::Executor; + llm_chain_minimax::chatgpt::Executor::new() + }}; + (minimax, $options:expr) => {{ + use llm_chain::traits::Executor; + llm_chain_minimax::chatgpt::Executor::new_with_options($options) + }}; } diff --git a/docs/README.md b/docs/README.md index 695575a1..f198a6b2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,6 +30,7 @@ println!("{}", res); - **Prompt templates**: Create reusable and easily customizable prompt templates for consistent and structured interactions with LLMs. - **Chains**: Build powerful chains of prompts that allow you to execute more complex tasks, step by step, leveraging the full potential of LLMs. - **ChatGPT support**: Supports ChatGPT models, with plans to add OpenAI's other models in the future. +- **MiniMax support**: Integrates with [MiniMax](https://www.minimax.io/) models (M2.7, M2.5) via their OpenAI-compatible API, offering up to 1M token context windows. - **LLaMa support**: Provides seamless integration with LLaMa models, enabling natural language understanding and generation tasks with Facebook's research models. - **Alpaca support**: Incorporates support for Stanford's Alpaca models, expanding the range of available language models for advanced AI applications. - **`llm.rs` support**: Use llms in rust without dependencies on C++ code with our support for `llm.rs` @@ -55,6 +56,30 @@ export OPENAI_API_KEY="sk-YOUR_OPEN_AI_KEY_HERE" Then, refer to the [documentation](https://docs.rs/llm-chain) and [examples](/crates/llm-chain-openai/examples) to learn how to create prompt templates, chains, and more. +### Using MiniMax + +To use MiniMax models, add `llm-chain-minimax` to your dependencies: + +```toml +[dependencies] +llm-chain = "0.13.0" +llm-chain-minimax = "0.13.0" +``` + +Set your MiniMax API key: + +```bash +export MINIMAX_API_KEY="your-minimax-api-key" +``` + +Then use the `minimax` executor: + +```rust +let exec = executor!(minimax)?; +``` + +See [examples](/crates/llm-chain-minimax/examples) for more details. + ## Contributing 🤝 **We warmly welcome contributions from everyone!** If you're interested in helping improve `llm-chain`, please check out our [`CONTRIBUTING.md`](/docs/CONTRIBUTING.md) file for guidelines and best practices.