## Basic model info - Model name: minimax/minimax speech-2.8-turbo - Model description: Generate speech from text prompts and different voices using the MiniMax Speech-2.8 Turbo model, which leverages advanced AI techniques to create high-quality text-to-speech. - Endpoint name: text-to-speech ## Model schema The model schema is defined in the OpenAPI schema: [OpenAPI Schema](https://oapi.sunra.ai/main/minimax/speech-2.8-turbo/latest.json) ### Model input schema The model input schema is: ```json { "description": "Input model for Minimax Speech 2.8 Turbo text-to-speech generation.", "properties": { "audio_format": { "default": "mp3", "description": "Audio format of the generated speech", "enum": [ "mp3", "pcm", "flac" ], "title": "Audio Format", "type": "string", "x-sr-order": 408 }, "bitrate": { "default": 128000, "description": "Bitrate for the generated speech", "enum": [ 32000, 64000, 128000, 256000 ], "title": "Bitrate", "type": "integer", "x-sr-order": 407 }, "channel": { "default": "mono", "description": "Number of audio channels", "enum": [ "mono", "stereo" ], "title": "Channel", "type": "string", "x-sr-order": 409 }, "emotion": { "description": "Emotion of the generated speech", "enum": [ "happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral" ], "title": "Emotion", "type": "string", "x-sr-order": 404 }, "english_normalization": { "default": false, "description": "Enable English text normalization for better number reading (slightly increases latency)", "title": "English Normalization", "type": "boolean", "x-sr-order": 405 }, "language_boost": { "description": "Enhance recognition of specified languages and dialects", "enum": [ "Chinese", "Chinese,Yue", "English", "Arabic", "Russian", "Spanish", "French", "Portuguese", "German", "Turkish", "Dutch", "Ukrainian", "Vietnamese", "Indonesian", "Japanese", "Italian", "Korean", "Thai", "Polish", "Romanian", "Greek", "Czech", "Finnish", "Hindi", "Bulgarian", "Danish", "Hebrew", "Malay", "Slovak", "Swedish", "Croatian", "Hungarian", "Norwegian", "Slovenian", "Catalan", "Nynorsk", "Afrikaans", "auto" ], "title": "Language Boost", "type": "string", "x-sr-order": 410 }, "normalization_setting": { "description": "Loudness normalization settings for the audio", "properties": { "enabled": { "default": true, "description": "Enable loudness normalization for the audio", "title": "Enabled", "type": "boolean", "x-sr-order": 601 }, "target_loudness": { "default": -18, "description": "Target loudness in LUFS (default -18.0)", "maximum": -10, "minimum": -70, "title": "Target Loudness", "type": "number", "x-sr-order": 602 }, "target_peak": { "default": -0.5, "description": "Target peak level in dBTP (default -0.5)", "maximum": 0, "minimum": -3, "title": "Target Peak", "type": "number", "x-sr-order": 604 }, "target_range": { "default": 8, "description": "Target loudness range in LU (default 8.0)", "maximum": 20, "minimum": 0, "title": "Target Range", "type": "number", "x-sr-order": 603 } }, "title": "LoudnessNormalizationInput", "type": "object", "x-sr-order": 412 }, "pitch": { "default": 0, "description": "Voice pitch (-12 to 12)", "maximum": 12, "minimum": -12, "title": "Pitch", "type": "integer", "x-sr-order": 403 }, "pronunciation_dict": { "description": "Custom pronunciation dictionary for text replacement", "properties": { "tone_list": { "description": "List of pronunciation replacements in format ['text/(pronunciation)', ...]. For Chinese, tones are 1-5.", "items": { "type": "string" }, "title": "Tone List", "type": "array", "x-sr-order": 701 } }, "title": "PronunciationDictInput", "type": "object", "x-sr-order": 413 }, "sample_rate": { "default": 32000, "description": "Sample rate for the generated speech", "enum": [ 8000, 16000, 22050, 24000, 32000, 44100 ], "title": "Sample Rate", "type": "integer", "x-sr-order": 406 }, "speed": { "default": 1, "description": "Speech speed (0.5-2.0)", "maximum": 2, "minimum": 0.5, "title": "Speed", "type": "number", "x-sr-order": 401 }, "text": { "description": "Text to convert to speech. Use <#x#> for pauses (x = 0.01-99.99 seconds). Supports interjection tags: (laughs), (sighs), (coughs), (clears throat), (gasps), (sniffs), (groans), (yawns).", "maxLength": 10000, "minLength": 1, "title": "Text", "type": "string", "x-sr-order": 201 }, "voice_id": { "anyOf": [ { "enum": [ "Wise_Woman", "Friendly_Person", "Inspirational_girl", "Deep_Voice_Man", "Calm_Woman", "Casual_Guy", "Lively_Girl", "Patient_Man", "Young_Knight", "Determined_Man", "Lovely_Girl", "Decent_Boy", "Imposing_Manner", "Elegant_Man", "Abbess", "Sweet_Girl_2", "Exuberant_Girl" ], "type": "string" }, { "type": "string" } ], "default": "Wise_Woman", "description": "Desired voice ID. Use a voice ID you have trained (https://sunra.ai/models/minimax/voice-cloning), or one of the following system voice IDs: Wise_Woman, Friendly_Person, Inspirational_girl, Deep_Voice_Man, Calm_Woman, Casual_Guy, Lively_Girl, Patient_Man, Young_Knight, Determined_Man, Lovely_Girl, Decent_Boy, Imposing_Manner, Elegant_Man, Abbess, Sweet_Girl_2, Exuberant_Girl", "title": "Voice Id", "x-sr-order": 301 }, "voice_modify": { "description": "Voice modification settings to adjust pitch, intensity, and timbre", "properties": { "intensity": { "default": 0, "description": "Intensity/energy of the voice. Range: -100 to 100. Higher values create more energetic speech.", "maximum": 100, "minimum": -100, "title": "Intensity", "type": "integer", "x-sr-order": 502 }, "pitch": { "default": 0, "description": "Pitch adjustment in semitones. Range: -100 to 100. Positive values raise pitch, negative values lower it.", "maximum": 100, "minimum": -100, "title": "Pitch", "type": "integer", "x-sr-order": 501 }, "timbre": { "default": 0, "description": "Timbre adjustment. Range: -100 to 100. Affects the tonal quality of the voice.", "maximum": 100, "minimum": -100, "title": "Timbre", "type": "integer", "x-sr-order": 503 } }, "title": "VoiceModifyInput", "type": "object", "x-sr-order": 411 }, "volume": { "default": 1, "description": "Speech volume (0.01-10)", "maximum": 10, "minimum": 0.01, "title": "Volume", "type": "number", "x-sr-order": 402 } }, "required": [ "text" ], "title": "TextToSpeechInput", "type": "object" } ``` ### Model output schema The model output schema is: ```json { "properties": { "audio": { "properties": { "content_type": { "description": "The mime type of the file.", "title": "Content Type", "type": "string" }, "file_name": { "description": "The name of the file. It will be auto-generated if not provided.", "title": "File Name", "type": "string" }, "file_size": { "description": "The size of the file in bytes.", "title": "File Size", "type": "integer" }, "url": { "description": "The URL where the file can be downloaded from.", "title": "Url", "type": "string" } }, "required": [ "content_type", "file_name", "file_size", "url" ], "title": "SunraFile", "type": "object" }, "duration_ms": { "description": "Duration of the audio in milliseconds", "title": "Duration Ms", "type": "integer" }, "input_character_count": { "description": "Number of characters in the input text for billing", "title": "Input Character Count", "type": "integer" } }, "required": [ "audio", "duration_ms", "input_character_count" ], "title": "MinimaxSpeech28TurboOutput", "type": "object" } ``` ## Example inputs and outputs Use the following example inputs and outputs to understand the model. ### Input example ```json { "audio_format": "mp3", "bitrate": 128000, "channel": "mono", "emotion": "", "english_normalization": false, "language_boost": "", "normalization_setting": { }, "pitch": 0, "pronunciation_dict": { }, "sample_rate": 32000, "speed": 1, "text": "", "voice_id": "Wise_Woman", "voice_modify": { }, "volume": 1 } ``` ### Output example ```json { } ``` ## Model code examples ### JavaScript ```javascript import { sunra } from "@sunra/client"; const result = await sunra.subscribe("minimax/speech-2.8-turbo/text-to-speech", { input: { text: '', voice_id: 'Wise_Woman', speed: 1, volume: 1, pitch: 0, emotion: '', english_normalization: false, sample_rate: 32000, bitrate: 128000, audio_format: 'mp3', channel: 'mono', language_boost: '', voice_modify: {}, normalization_setting: {}, pronunciation_dict: {} }, logs: true, onQueueUpdate: (update) => { console.log(`Status Update: ${update.status}, Request ID: ${update.request_id}`); }, }); console.log(result.data); console.log(result.requestId); ``` ### Python ```python import sunra_client result = sunra_client.subscribe( "minimax/speech-2.8-turbo/text-to-speech", arguments={ "text": "", "voice_id": "Wise_Woman", "speed": 1, "volume": 1, "pitch": 0, "emotion": "", "english_normalization": False, "sample_rate": 32000, "bitrate": 128000, "audio_format": "mp3", "channel": "mono", "language_boost": "", "voice_modify": { }, "normalization_setting": { }, "pronunciation_dict": { } }, with_logs=True, on_enqueue=print, on_queue_update=print, ) print(result) ``` ### Java ```java import ai.sunra.client.*; import java.util.Map; import com.google.gson.JsonObject; var client = SunraClient.withEnvCredentials(); var response = client.subscribe( "minimax/speech-2.8-turbo/text-to-speech", SubscribeOptions.builder() .input(Map.of( "text", "", "voice_id", "Wise_Woman", "speed", 1, "volume", 1, "pitch", 0, "emotion", "", "english_normalization", false, "sample_rate", 32000, "bitrate", 128000, "audio_format", "mp3", "channel", "mono", "language_boost", "", "voice_modify", [object Object], "normalization_setting", [object Object], "pronunciation_dict", [object Object])) .resultType(JsonObject.class) .onQueueUpdate(update -> System.out.printf( "\nStatus Update: %s, Request ID: %s%n", update.getStatus(), update.getRequestId() )) .logs(true) .build() ); System.out.println("Completed!"); System.out.println(response.getData()); ``` ### Kotlin ```kotlin import ai.sunra.client.kt.* import com.google.gson.JsonObject val client = createSunraClient() val response = client.subscribe( endpointId = "minimax/speech-2.8-turbo/text-to-speech", input = mapOf( "text" to "", "voice_id" to "Wise_Woman", "speed" to 1, "volume" to 1, "pitch" to 0, "emotion" to "", "english_normalization" to false, "sample_rate" to 32000, "bitrate" to 128000, "audio_format" to "mp3", "channel" to "mono", "language_boost" to "", "voice_modify" to [object Object], "normalization_setting" to [object Object], "pronunciation_dict" to [object Object]), options = ai.sunra.client.kt.SubscribeOptions(logs = true), onUpdate = { update -> println("\nStatus Update: ${update.status}, Request ID: ${update.requestId}") } ) println("Completed!") println(response.data) ``` ### Curl ```bash curl --request POST \ --url https://api.sunra.ai/v1/queue/minimax/speech-2.8-turbo/text-to-speech \ --header "Authorization: Key $SUNRA_KEY" \ --header "Content-Type: application/json" \ --data '{"text":"","voice_id":"Wise_Woman","speed":1,"volume":1,"pitch":0,"emotion":"","english_normalization":false,"sample_rate":32000,"bitrate":128000,"audio_format":"mp3","channel":"mono","language_boost":"","voice_modify":{},"normalization_setting":{},"pronunciation_dict":{}}' ``` ## Model readme >