Skip to content

tryAGI/ElevenLabs

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

326 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

ElevenLabs

Nuget package dotnet License: MIT Discord

Features 🔥

  • Fully generated C# SDK based on official ElevenLabs OpenAPI specification using AutoSDK
  • Same day update to support new features
  • Updated and supported automatically if there are no breaking changes
  • All modern .NET features - nullability, trimming, NativeAOT, etc.
  • Support .Net Framework/.Net Standard 2.0
  • Realtime speech-to-text via WebSocket

Usage

Installation

dotnet add package ElevenLabs

Authentication

using ElevenLabs;

using var client = new ElevenLabsClient(apiKey);

List Available Voices

Fetch all voices available to the authenticated account and print each voice name.

using var client = new ElevenLabsClient(apiKey);

// Fetch all voices for the authenticated workspace.
GetVoicesResponseModel response = await client.Voices.GetVoicesAsync();

// Print the voice names.
foreach (var voice in response.Voices)
{
    Console.WriteLine(voice.Name);
}

Text to Speech

Convert text to speech with the first available voice and save the generated audio to disk.

using var client = new ElevenLabsClient(apiKey);

// Choose a voice to synthesize with.
var voices = await client.Voices.GetVoicesAsync();
var voiceId = voices.Voices[0].VoiceId;

// Generate speech audio.
byte[] audioBytes = await client.TextToSpeech.CreateTextToSpeechByVoiceIdAsync(
    voiceId: voiceId,
    text: "Hello, world! This is a test of the ElevenLabs text-to-speech API.");

// Persist the result to a local file.
await File.WriteAllBytesAsync("output.mp3", audioBytes);
Console.WriteLine($"Saved {audioBytes.Length} bytes to output.mp3");

Streaming Text to Speech

Request a streaming text-to-speech response for low-latency playback and save the returned audio bytes.

using var client = new ElevenLabsClient(apiKey);

// Choose a voice to synthesize with.
var voices = await client.Voices.GetVoicesAsync();
var voiceId = voices.Voices[0].VoiceId;

// Request streaming speech audio.
byte[] streamedAudio = await client.TextToSpeech.CreateTextToSpeechByVoiceIdStreamAsync(
    voiceId: voiceId,
    text: "This audio is streamed for low-latency playback.",
    modelId: "eleven_multilingual_v2",
    outputFormat: TextToSpeechStreamOutputFormat.Mp32205032);

// Persist the result to a local file.
await File.WriteAllBytesAsync("streamed-output.mp3", streamedAudio);
Console.WriteLine($"Saved {streamedAudio.Length} bytes to streamed-output.mp3");

Streaming Text to Speech with Timestamps

Stream synthesized audio together with character-level timing information for subtitles, captions, or lip-sync.

using var client = new ElevenLabsClient(apiKey);

// Choose a voice to synthesize with.
var voices = await client.Voices.GetVoicesAsync();
var voiceId = voices.Voices[0].VoiceId;

// Request streamed speech audio with timing metadata.
StreamingAudioChunkWithTimestampsResponseModel response =
    await client.TextToSpeech.CreateTextToSpeechByVoiceIdStreamWithTimestampsAsync(
        voiceId: voiceId,
        text: "Hello, this has timestamps.",
        modelId: "eleven_multilingual_v2",
        outputFormat: TextToSpeechStreamWithTimestampsOutputFormat.Mp32205032);

// Inspect the alignment information when it is present.
if (response.Alignment is { } alignment)
{
    for (int i = 0; i < alignment.Characters?.Count; i++)
    {
        Console.WriteLine($"'{alignment.Characters[i]}' " +
                          $"{alignment.CharacterStartTimesSeconds?[i]:F3}s - " +
                          $"{alignment.CharacterEndTimesSeconds?[i]:F3}s");
    }
}

Sound Generation

Generate a short sound effect from a text prompt and save the returned audio bytes.

using var client = new ElevenLabsClient(apiKey);

// Generate a sound effect from a text description.
byte[] soundBytes = await client.SoundGeneration.CreateSoundGenerationAsync(
    text: "A gentle ocean wave crashing on a sandy beach",
    durationSeconds: 3.0);

// Persist the result to a local file.
await File.WriteAllBytesAsync("ocean-wave.mp3", soundBytes);
Console.WriteLine($"Saved {soundBytes.Length} bytes to ocean-wave.mp3");

Speech to Text from a File

Transcribe a WAV file from disk and print the returned transcript text.

using var client = new ElevenLabsClient(apiKey);

// Load an audio file to transcribe.
byte[] audioFile = await File.ReadAllBytesAsync(
    Path.Combine(AppContext.BaseDirectory, "Resources", "hello-in-russian-24k-pcm16.wav"));

// Submit the file for transcription.
var transcription = await client.SpeechToText.CreateSpeechToTextAsync(
    modelId: BodySpeechToTextV1SpeechToTextPostModelId.ScribeV1,
    file: audioFile,
    languageCode: "ru");

// Print the transcript text when it is available.
var transcriptText = transcription.Value1 is { } chunk
    ? chunk.Text
    : null;

if (!string.IsNullOrWhiteSpace(transcriptText))
{
    Console.WriteLine(transcriptText);
}

Voice Cloning

Create an instant voice clone from an audio sample, print the new voice ID, and delete the test voice afterwards.

using var client = new ElevenLabsClient(apiKey);

// Load a voice sample from disk.
byte[] voiceSample = await File.ReadAllBytesAsync(
    Path.Combine(AppContext.BaseDirectory, "Resources", "hello-in-russian-24k-pcm16.wav"));

// Create the cloned voice.
AddVoiceIVCResponseModel response = await client.Voices.CreateVoicesAddAsync(
    name: $"Test Cloned Voice {Guid.NewGuid():N}",
    files: [voiceSample],
    description: "A cloned voice from my audio sample",
    removeBackgroundNoise: true);

Console.WriteLine($"Cloned voice ID: {response.VoiceId}");

// Clean up the test voice once the example has succeeded.
await client.Voices.DeleteVoicesByVoiceIdAsync(response.VoiceId);

Realtime Speech to Text

Open a realtime transcription session, stream PCM audio in chunks, and read transcript events until a final transcript arrives.

using var client = new ElevenLabsClient(apiKey);
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));

// Open a realtime speech-to-text session.
await using var session = await client.ConnectRealtimeAsync(
    new RealtimeSpeechToTextOptions
    {
        AudioFormat = RealtimeAudioFormat.Pcm24000,
        CommitStrategy = RealtimeCommitStrategy.Manual,
    },
    cancellationToken: cts.Token);

// Load a WAV file and convert it to PCM16 samples.
byte[] wavBytes = await File.ReadAllBytesAsync(
    Path.Combine(AppContext.BaseDirectory, "Resources", "hello-in-russian-24k-pcm16.wav"),
    cts.Token);
var (pcm, sampleRate, channels) = ReadWavPcm16(wavBytes);

// Send the audio in 0.5 second chunks and commit the final chunk.
const int samplesPerChunk = 12000;
for (var offset = 0; offset < pcm.Length; offset += samplesPerChunk)
{
    var count = Math.Min(samplesPerChunk, pcm.Length - offset);
    var bytes = new byte[count * 2];
    Buffer.BlockCopy(pcm, offset * 2, bytes, 0, bytes.Length);

    var commit = offset + count >= pcm.Length;
    await session.SendAudioChunkAsync(bytes, sampleRate, commit, cancellationToken: cts.Token);
}

// Read events until the service returns a final transcript.
string? transcript = null;
await foreach (var evt in session.ReadEventsAsync(cts.Token))
{
    switch (evt)
    {
        case SessionStartedEvent started:
            Console.WriteLine($"Session started: {started.SessionId}");
            break;
        case PartialTranscriptEvent partial:
            Console.WriteLine($"Partial: {partial.Text}");
            break;
        case CommittedTranscriptEvent committed:
            transcript = committed.Text;
            Console.WriteLine($"Final: {committed.Text}");
            break;
        case CommittedTranscriptWithTimestampsEvent committedWithTimestamps:
            transcript = committedWithTimestamps.Text;
            Console.WriteLine($"Final: {committedWithTimestamps.Text}");
            break;
        case ErrorEvent error:
            throw new InvalidOperationException($"ElevenLabs error: {error.ErrorType} - {error.Error}");
    }

    if (!string.IsNullOrWhiteSpace(transcript))
    {
        break;
    }
}

static (short[] samples, int sampleRate, int channels) ReadWavPcm16(ReadOnlySpan<byte> data)
{
    using var ms = new MemoryStream(data.ToArray(), writable: false);
    using var br = new BinaryReader(ms, Encoding.UTF8, leaveOpen: true);

    var riff = new string(br.ReadChars(4));
    if (riff != "RIFF")
    {
        throw new InvalidDataException("Not RIFF");
    }

    br.ReadInt32();
    var wave = new string(br.ReadChars(4));
    if (wave != "WAVE")
    {
        throw new InvalidDataException("Not WAVE");
    }

    ushort audioFormat = 1;
    ushort localChannels = 1;
    int localSampleRate = 16000;
    ushort bitsPerSample = 16;

    while (ms.Position < ms.Length)
    {
        if (ms.Length - ms.Position < 8)
        {
            break;
        }

        var id = new string(br.ReadChars(4));
        int size = br.ReadInt32();
        long next = ms.Position + size;

        if (id == "fmt ")
        {
            if (size < 16)
            {
                throw new InvalidDataException("Bad fmt chunk");
            }

            audioFormat = br.ReadUInt16();
            localChannels = br.ReadUInt16();
            localSampleRate = br.ReadInt32();
            br.ReadInt32();
            br.ReadUInt16();
            bitsPerSample = br.ReadUInt16();

            if (next > ms.Position)
            {
                br.ReadBytes((int)(next - ms.Position));
            }
        }
        else if (id == "data")
        {
            if (audioFormat != 1 || bitsPerSample != 16)
            {
                throw new InvalidDataException("Expected PCM16");
            }

            var dataSize = size == 0 ? (int)(ms.Length - ms.Position) : size;
            var dataBytes = br.ReadBytes(dataSize);
            var samples = new short[dataBytes.Length / 2];
            Buffer.BlockCopy(dataBytes, 0, samples, 0, dataBytes.Length);
            return (samples, localSampleRate, localChannels);
        }
        else if (size > 0)
        {
            br.ReadBytes(size);
        }
    }

    throw new InvalidDataException("WAV data chunk not found");
}

Support

Priority place for bugs: https://github.com/tryAGI/ElevenLabs/issues Priority place for ideas and general questions: https://github.com/tryAGI/ElevenLabs/discussions Discord: https://discord.gg/Ca2xhfBf3v

Acknowledgments

JetBrains logo

This project is supported by JetBrains through the Open Source Support Program.

CodeRabbit logo

This project is supported by CodeRabbit through the Open Source Support Program.