## Basic model info

- Model name: minimax/minimax speech-2.8-turbo
- Model description: Generate speech from text prompts and different voices using the MiniMax Speech-2.8 Turbo model, which leverages advanced AI techniques to create high-quality text-to-speech.
- Endpoint name: text-to-speech


## Model schema

The model schema is defined in the OpenAPI schema: [OpenAPI Schema](https://oapi.sunra.ai/main/minimax/speech-2.8-turbo/latest.json)

### Model input schema

The model input schema is:


```json
{
  "description": "Input model for Minimax Speech 2.8 Turbo text-to-speech generation.",
  "properties": {
    "audio_format": {
      "default": "mp3",
      "description": "Audio format of the generated speech",
      "enum": [
        "mp3",
        "pcm",
        "flac"
      ],
      "title": "Audio Format",
      "type": "string",
      "x-sr-order": 408
    },
    "bitrate": {
      "default": 128000,
      "description": "Bitrate for the generated speech",
      "enum": [
        32000,
        64000,
        128000,
        256000
      ],
      "title": "Bitrate",
      "type": "integer",
      "x-sr-order": 407
    },
    "channel": {
      "default": "mono",
      "description": "Number of audio channels",
      "enum": [
        "mono",
        "stereo"
      ],
      "title": "Channel",
      "type": "string",
      "x-sr-order": 409
    },
    "emotion": {
      "description": "Emotion of the generated speech",
      "enum": [
        "happy",
        "sad",
        "angry",
        "fearful",
        "disgusted",
        "surprised",
        "neutral"
      ],
      "title": "Emotion",
      "type": "string",
      "x-sr-order": 404
    },
    "english_normalization": {
      "default": false,
      "description": "Enable English text normalization for better number reading (slightly increases latency)",
      "title": "English Normalization",
      "type": "boolean",
      "x-sr-order": 405
    },
    "language_boost": {
      "description": "Enhance recognition of specified languages and dialects",
      "enum": [
        "Chinese",
        "Chinese,Yue",
        "English",
        "Arabic",
        "Russian",
        "Spanish",
        "French",
        "Portuguese",
        "German",
        "Turkish",
        "Dutch",
        "Ukrainian",
        "Vietnamese",
        "Indonesian",
        "Japanese",
        "Italian",
        "Korean",
        "Thai",
        "Polish",
        "Romanian",
        "Greek",
        "Czech",
        "Finnish",
        "Hindi",
        "Bulgarian",
        "Danish",
        "Hebrew",
        "Malay",
        "Slovak",
        "Swedish",
        "Croatian",
        "Hungarian",
        "Norwegian",
        "Slovenian",
        "Catalan",
        "Nynorsk",
        "Afrikaans",
        "auto"
      ],
      "title": "Language Boost",
      "type": "string",
      "x-sr-order": 410
    },
    "normalization_setting": {
      "description": "Loudness normalization settings for the audio",
      "properties": {
        "enabled": {
          "default": true,
          "description": "Enable loudness normalization for the audio",
          "title": "Enabled",
          "type": "boolean",
          "x-sr-order": 601
        },
        "target_loudness": {
          "default": -18,
          "description": "Target loudness in LUFS (default -18.0)",
          "maximum": -10,
          "minimum": -70,
          "title": "Target Loudness",
          "type": "number",
          "x-sr-order": 602
        },
        "target_peak": {
          "default": -0.5,
          "description": "Target peak level in dBTP (default -0.5)",
          "maximum": 0,
          "minimum": -3,
          "title": "Target Peak",
          "type": "number",
          "x-sr-order": 604
        },
        "target_range": {
          "default": 8,
          "description": "Target loudness range in LU (default 8.0)",
          "maximum": 20,
          "minimum": 0,
          "title": "Target Range",
          "type": "number",
          "x-sr-order": 603
        }
      },
      "title": "LoudnessNormalizationInput",
      "type": "object",
      "x-sr-order": 412
    },
    "pitch": {
      "default": 0,
      "description": "Voice pitch (-12 to 12)",
      "maximum": 12,
      "minimum": -12,
      "title": "Pitch",
      "type": "integer",
      "x-sr-order": 403
    },
    "pronunciation_dict": {
      "description": "Custom pronunciation dictionary for text replacement",
      "properties": {
        "tone_list": {
          "description": "List of pronunciation replacements in format ['text/(pronunciation)', ...]. For Chinese, tones are 1-5.",
          "items": {
            "type": "string"
          },
          "title": "Tone List",
          "type": "array",
          "x-sr-order": 701
        }
      },
      "title": "PronunciationDictInput",
      "type": "object",
      "x-sr-order": 413
    },
    "sample_rate": {
      "default": 32000,
      "description": "Sample rate for the generated speech",
      "enum": [
        8000,
        16000,
        22050,
        24000,
        32000,
        44100
      ],
      "title": "Sample Rate",
      "type": "integer",
      "x-sr-order": 406
    },
    "speed": {
      "default": 1,
      "description": "Speech speed (0.5-2.0)",
      "maximum": 2,
      "minimum": 0.5,
      "title": "Speed",
      "type": "number",
      "x-sr-order": 401
    },
    "text": {
      "description": "Text to convert to speech. Use <#x#> for pauses (x = 0.01-99.99 seconds). Supports interjection tags: (laughs), (sighs), (coughs), (clears throat), (gasps), (sniffs), (groans), (yawns).",
      "maxLength": 10000,
      "minLength": 1,
      "title": "Text",
      "type": "string",
      "x-sr-order": 201
    },
    "voice_id": {
      "anyOf": [
        {
          "enum": [
            "Wise_Woman",
            "Friendly_Person",
            "Inspirational_girl",
            "Deep_Voice_Man",
            "Calm_Woman",
            "Casual_Guy",
            "Lively_Girl",
            "Patient_Man",
            "Young_Knight",
            "Determined_Man",
            "Lovely_Girl",
            "Decent_Boy",
            "Imposing_Manner",
            "Elegant_Man",
            "Abbess",
            "Sweet_Girl_2",
            "Exuberant_Girl"
          ],
          "type": "string"
        },
        {
          "type": "string"
        }
      ],
      "default": "Wise_Woman",
      "description": "Desired voice ID. Use a voice ID you have trained (https://sunra.ai/models/minimax/voice-cloning), or one of the following system voice IDs: Wise_Woman, Friendly_Person, Inspirational_girl, Deep_Voice_Man, Calm_Woman, Casual_Guy, Lively_Girl, Patient_Man, Young_Knight, Determined_Man, Lovely_Girl, Decent_Boy, Imposing_Manner, Elegant_Man, Abbess, Sweet_Girl_2, Exuberant_Girl",
      "title": "Voice Id",
      "x-sr-order": 301
    },
    "voice_modify": {
      "description": "Voice modification settings to adjust pitch, intensity, and timbre",
      "properties": {
        "intensity": {
          "default": 0,
          "description": "Intensity/energy of the voice. Range: -100 to 100. Higher values create more energetic speech.",
          "maximum": 100,
          "minimum": -100,
          "title": "Intensity",
          "type": "integer",
          "x-sr-order": 502
        },
        "pitch": {
          "default": 0,
          "description": "Pitch adjustment in semitones. Range: -100 to 100. Positive values raise pitch, negative values lower it.",
          "maximum": 100,
          "minimum": -100,
          "title": "Pitch",
          "type": "integer",
          "x-sr-order": 501
        },
        "timbre": {
          "default": 0,
          "description": "Timbre adjustment. Range: -100 to 100. Affects the tonal quality of the voice.",
          "maximum": 100,
          "minimum": -100,
          "title": "Timbre",
          "type": "integer",
          "x-sr-order": 503
        }
      },
      "title": "VoiceModifyInput",
      "type": "object",
      "x-sr-order": 411
    },
    "volume": {
      "default": 1,
      "description": "Speech volume (0.01-10)",
      "maximum": 10,
      "minimum": 0.01,
      "title": "Volume",
      "type": "number",
      "x-sr-order": 402
    }
  },
  "required": [
    "text"
  ],
  "title": "TextToSpeechInput",
  "type": "object"
}
```


### Model output schema

The model output schema is:


```json
{
  "properties": {
    "audio": {
      "properties": {
        "content_type": {
          "description": "The mime type of the file.",
          "title": "Content Type",
          "type": "string"
        },
        "file_name": {
          "description": "The name of the file. It will be auto-generated if not provided.",
          "title": "File Name",
          "type": "string"
        },
        "file_size": {
          "description": "The size of the file in bytes.",
          "title": "File Size",
          "type": "integer"
        },
        "url": {
          "description": "The URL where the file can be downloaded from.",
          "title": "Url",
          "type": "string"
        }
      },
      "required": [
        "content_type",
        "file_name",
        "file_size",
        "url"
      ],
      "title": "SunraFile",
      "type": "object"
    },
    "duration_ms": {
      "description": "Duration of the audio in milliseconds",
      "title": "Duration Ms",
      "type": "integer"
    },
    "input_character_count": {
      "description": "Number of characters in the input text for billing",
      "title": "Input Character Count",
      "type": "integer"
    }
  },
  "required": [
    "audio",
    "duration_ms",
    "input_character_count"
  ],
  "title": "MinimaxSpeech28TurboOutput",
  "type": "object"
}
```


## Example inputs and outputs

Use the following example inputs and outputs to understand the model.

### Input example


```json
{
  "audio_format": "mp3",
  "bitrate": 128000,
  "channel": "mono",
  "emotion": "",
  "english_normalization": false,
  "language_boost": "",
  "normalization_setting": {
  },
  "pitch": 0,
  "pronunciation_dict": {
  },
  "sample_rate": 32000,
  "speed": 1,
  "text": "",
  "voice_id": "Wise_Woman",
  "voice_modify": {
  },
  "volume": 1
}
```


### Output example


```json
{
}
```


## Model code examples

### JavaScript


```javascript
import { sunra } from "@sunra/client";

const result = await sunra.subscribe("minimax/speech-2.8-turbo/text-to-speech", {
  input: {
  	text: '',
  	voice_id: 'Wise_Woman',
  	speed: 1,
  	volume: 1,
  	pitch: 0,
  	emotion: '',
  	english_normalization: false,
  	sample_rate: 32000,
  	bitrate: 128000,
  	audio_format: 'mp3',
  	channel: 'mono',
  	language_boost: '',
  	voice_modify: {},
  	normalization_setting: {},
  	pronunciation_dict: {}
  },
  logs: true,
  onQueueUpdate: (update) => {
    console.log(`Status Update: ${update.status}, Request ID: ${update.request_id}`);
  },
});
console.log(result.data);
console.log(result.requestId);
```


### Python


```python
import sunra_client

result = sunra_client.subscribe(
    "minimax/speech-2.8-turbo/text-to-speech",
    arguments={
        "text": "",
        "voice_id": "Wise_Woman",
        "speed": 1,
        "volume": 1,
        "pitch": 0,
        "emotion": "",
        "english_normalization": False,
        "sample_rate": 32000,
        "bitrate": 128000,
        "audio_format": "mp3",
        "channel": "mono",
        "language_boost": "",
        "voice_modify": {
    
    },
        "normalization_setting": {
    
    },
        "pronunciation_dict": {
    
    }
    },
    with_logs=True,
    on_enqueue=print,
    on_queue_update=print,
)
print(result)
```


### Java


```java
import ai.sunra.client.*;
import java.util.Map;
import com.google.gson.JsonObject;

var client = SunraClient.withEnvCredentials();

var response = client.subscribe(
    "minimax/speech-2.8-turbo/text-to-speech",
    SubscribeOptions.<JsonObject>builder()
        .input(Map.of(
            "text", "",
            "voice_id", "Wise_Woman",
            "speed", 1,
            "volume", 1,
            "pitch", 0,
            "emotion", "",
            "english_normalization", false,
            "sample_rate", 32000,
            "bitrate", 128000,
            "audio_format", "mp3",
            "channel", "mono",
            "language_boost", "",
            "voice_modify", [object Object],
            "normalization_setting", [object Object],
            "pronunciation_dict", [object Object]))
        .resultType(JsonObject.class)
        .onQueueUpdate(update -> System.out.printf(
            "\nStatus Update: %s, Request ID: %s%n",
            update.getStatus(),
            update.getRequestId()
        ))
        .logs(true)
        .build()
);

System.out.println("Completed!");
System.out.println(response.getData());
  
```


### Kotlin


```kotlin
import ai.sunra.client.kt.*
import com.google.gson.JsonObject

val client = createSunraClient()

val response = client.subscribe<JsonObject>(
    endpointId = "minimax/speech-2.8-turbo/text-to-speech",
    input = mapOf(
            "text" to "",
            "voice_id" to "Wise_Woman",
            "speed" to 1,
            "volume" to 1,
            "pitch" to 0,
            "emotion" to "",
            "english_normalization" to false,
            "sample_rate" to 32000,
            "bitrate" to 128000,
            "audio_format" to "mp3",
            "channel" to "mono",
            "language_boost" to "",
            "voice_modify" to [object Object],
            "normalization_setting" to [object Object],
            "pronunciation_dict" to [object Object]),
    options = ai.sunra.client.kt.SubscribeOptions(logs = true),
    onUpdate = { update ->
        println("\nStatus Update: ${update.status}, Request ID: ${update.requestId}")
    }
)

println("Completed!")
println(response.data)
  
```


### Curl


```bash

curl --request POST \
  --url https://api.sunra.ai/v1/queue/minimax/speech-2.8-turbo/text-to-speech \
  --header "Authorization: Key $SUNRA_KEY" \
  --header "Content-Type: application/json" \
  --data '{"text":"","voice_id":"Wise_Woman","speed":1,"volume":1,"pitch":0,"emotion":"","english_normalization":false,"sample_rate":32000,"bitrate":128000,"audio_format":"mp3","channel":"mono","language_boost":"","voice_modify":{},"normalization_setting":{},"pronunciation_dict":{}}'
    
```


## Model readme

>