MiniMax Speech 2.8 Turbo Sync Text-to-Speech

curl --request POST \
  --url https://api.novita.ai/v3/minimax-speech-2.8-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "stream": true,
  "voice_modify": {
    "pitch": 123,
    "timbre": 123,
    "intensity": 123,
    "sound_effects": "<string>"
  },
  "audio_setting": {
    "format": "<string>",
    "bitrate": 123,
    "channel": 123,
    "force_cbr": true,
    "sample_rate": 123
  },
  "output_format": "<string>",
  "voice_setting": {
    "vol": 123,
    "pitch": 123,
    "speed": 123,
    "emotion": "<string>",
    "voice_id": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "aigc_watermark": true,
  "language_boost": "<string>",
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "timber_weights": [
    {
      "weight": 123,
      "voice_id": "<string>"
    }
  ],
  "subtitle_enable": true,
  "pronunciation_dict": {
    "tone": [
      "<string>"
    ]
  }
}
'

import requests

url = "https://api.novita.ai/v3/minimax-speech-2.8-turbo"

payload = {
    "text": "<string>",
    "stream": True,
    "voice_modify": {
        "pitch": 123,
        "timbre": 123,
        "intensity": 123,
        "sound_effects": "<string>"
    },
    "audio_setting": {
        "format": "<string>",
        "bitrate": 123,
        "channel": 123,
        "force_cbr": True,
        "sample_rate": 123
    },
    "output_format": "<string>",
    "voice_setting": {
        "vol": 123,
        "pitch": 123,
        "speed": 123,
        "emotion": "<string>",
        "voice_id": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "aigc_watermark": True,
    "language_boost": "<string>",
    "stream_options": { "exclude_aggregated_audio": True },
    "timber_weights": [
        {
            "weight": 123,
            "voice_id": "<string>"
        }
    ],
    "subtitle_enable": True,
    "pronunciation_dict": { "tone": ["<string>"] }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    stream: true,
    voice_modify: {pitch: 123, timbre: 123, intensity: 123, sound_effects: '<string>'},
    audio_setting: {
      format: '<string>',
      bitrate: 123,
      channel: 123,
      force_cbr: true,
      sample_rate: 123
    },
    output_format: '<string>',
    voice_setting: {
      vol: 123,
      pitch: 123,
      speed: 123,
      emotion: '<string>',
      voice_id: '<string>',
      latex_read: true,
      text_normalization: true
    },
    aigc_watermark: true,
    language_boost: '<string>',
    stream_options: {exclude_aggregated_audio: true},
    timber_weights: [{weight: 123, voice_id: '<string>'}],
    subtitle_enable: true,
    pronunciation_dict: {tone: ['<string>']}
  })
};

fetch('https://api.novita.ai/v3/minimax-speech-2.8-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.novita.ai/v3/minimax-speech-2.8-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'stream' => true,
    'voice_modify' => [
        'pitch' => 123,
        'timbre' => 123,
        'intensity' => 123,
        'sound_effects' => '<string>'
    ],
    'audio_setting' => [
        'format' => '<string>',
        'bitrate' => 123,
        'channel' => 123,
        'force_cbr' => true,
        'sample_rate' => 123
    ],
    'output_format' => '<string>',
    'voice_setting' => [
        'vol' => 123,
        'pitch' => 123,
        'speed' => 123,
        'emotion' => '<string>',
        'voice_id' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'aigc_watermark' => true,
    'language_boost' => '<string>',
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'timber_weights' => [
        [
                'weight' => 123,
                'voice_id' => '<string>'
        ]
    ],
    'subtitle_enable' => true,
    'pronunciation_dict' => [
        'tone' => [
                '<string>'
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.novita.ai/v3/minimax-speech-2.8-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.novita.ai/v3/minimax-speech-2.8-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.novita.ai/v3/minimax-speech-2.8-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "audio": "<string>",
    "status": 123,
    "subtitle_file": "<string>"
  },
  "trace_id": "<string>",
  "base_resp": {
    "status_msg": "<string>",
    "status_code": 123
  },
  "extra_info": {
    "bitrate": 123,
    "audio_size": 123,
    "word_count": 123,
    "audio_format": "<string>",
    "audio_length": 123,
    "audio_channel": 123,
    "usage_characters": 123,
    "audio_sample_rate": 123,
    "invisible_character_ratio": 123
  }
}

POST

minimax-speech-2.8-turbo

MiniMax Speech 2.8 Turbo Sync Text-to-Speech

curl --request POST \
  --url https://api.novita.ai/v3/minimax-speech-2.8-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "stream": true,
  "voice_modify": {
    "pitch": 123,
    "timbre": 123,
    "intensity": 123,
    "sound_effects": "<string>"
  },
  "audio_setting": {
    "format": "<string>",
    "bitrate": 123,
    "channel": 123,
    "force_cbr": true,
    "sample_rate": 123
  },
  "output_format": "<string>",
  "voice_setting": {
    "vol": 123,
    "pitch": 123,
    "speed": 123,
    "emotion": "<string>",
    "voice_id": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "aigc_watermark": true,
  "language_boost": "<string>",
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "timber_weights": [
    {
      "weight": 123,
      "voice_id": "<string>"
    }
  ],
  "subtitle_enable": true,
  "pronunciation_dict": {
    "tone": [
      "<string>"
    ]
  }
}
'

import requests

url = "https://api.novita.ai/v3/minimax-speech-2.8-turbo"

payload = {
    "text": "<string>",
    "stream": True,
    "voice_modify": {
        "pitch": 123,
        "timbre": 123,
        "intensity": 123,
        "sound_effects": "<string>"
    },
    "audio_setting": {
        "format": "<string>",
        "bitrate": 123,
        "channel": 123,
        "force_cbr": True,
        "sample_rate": 123
    },
    "output_format": "<string>",
    "voice_setting": {
        "vol": 123,
        "pitch": 123,
        "speed": 123,
        "emotion": "<string>",
        "voice_id": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "aigc_watermark": True,
    "language_boost": "<string>",
    "stream_options": { "exclude_aggregated_audio": True },
    "timber_weights": [
        {
            "weight": 123,
            "voice_id": "<string>"
        }
    ],
    "subtitle_enable": True,
    "pronunciation_dict": { "tone": ["<string>"] }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    stream: true,
    voice_modify: {pitch: 123, timbre: 123, intensity: 123, sound_effects: '<string>'},
    audio_setting: {
      format: '<string>',
      bitrate: 123,
      channel: 123,
      force_cbr: true,
      sample_rate: 123
    },
    output_format: '<string>',
    voice_setting: {
      vol: 123,
      pitch: 123,
      speed: 123,
      emotion: '<string>',
      voice_id: '<string>',
      latex_read: true,
      text_normalization: true
    },
    aigc_watermark: true,
    language_boost: '<string>',
    stream_options: {exclude_aggregated_audio: true},
    timber_weights: [{weight: 123, voice_id: '<string>'}],
    subtitle_enable: true,
    pronunciation_dict: {tone: ['<string>']}
  })
};

fetch('https://api.novita.ai/v3/minimax-speech-2.8-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.novita.ai/v3/minimax-speech-2.8-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'stream' => true,
    'voice_modify' => [
        'pitch' => 123,
        'timbre' => 123,
        'intensity' => 123,
        'sound_effects' => '<string>'
    ],
    'audio_setting' => [
        'format' => '<string>',
        'bitrate' => 123,
        'channel' => 123,
        'force_cbr' => true,
        'sample_rate' => 123
    ],
    'output_format' => '<string>',
    'voice_setting' => [
        'vol' => 123,
        'pitch' => 123,
        'speed' => 123,
        'emotion' => '<string>',
        'voice_id' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'aigc_watermark' => true,
    'language_boost' => '<string>',
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'timber_weights' => [
        [
                'weight' => 123,
                'voice_id' => '<string>'
        ]
    ],
    'subtitle_enable' => true,
    'pronunciation_dict' => [
        'tone' => [
                '<string>'
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.novita.ai/v3/minimax-speech-2.8-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.novita.ai/v3/minimax-speech-2.8-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.novita.ai/v3/minimax-speech-2.8-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "audio": "<string>",
    "status": 123,
    "subtitle_file": "<string>"
  },
  "trace_id": "<string>",
  "base_resp": {
    "status_msg": "<string>",
    "status_code": 123
  },
  "extra_info": {
    "bitrate": 123,
    "audio_size": 123,
    "word_count": 123,
    "audio_format": "<string>",
    "audio_length": 123,
    "audio_channel": 123,
    "usage_characters": 123,
    "audio_sample_rate": 123,
    "invisible_character_ratio": 123
  }
}

MiniMax synchronous text-to-speech API using HTTP protocol. Supports various voice, emotion, speed and other parameter settings.

Request Headers

Content-Type

string

required

Supports: application/json

Authorization

string

required

Bearer authentication format, for example: Bearer {{API Key}}.

Request Body

text

string

required

Text to synthesize into speech, length limit is less than 10000 characters. If text length is greater than 3000 characters, streaming output is recommended. Supports paragraph breaks (newline), pause control (<#x#> tag), and interjection tags (such as (laughs), (coughs), etc., only supported by speech-2.8-hd/turbo)

stream

boolean

default:false

Controls whether to enable streaming output. Default is false

voice_modify

object

Hide properties

pitch

integer

Pitch adjustment (deep/bright), range [-100, 100]. Values closer to -100 produce deeper voice; closer to 100 produce brighter voiceValue range: [-100, 100]

timbre

integer

Timbre adjustment (rich/crisp), range [-100, 100]. Values closer to -100 produce richer voice; closer to 100 produce crisper voiceValue range: [-100, 100]

intensity

integer

Intensity adjustment (powerful/soft), range [-100, 100]. Values closer to -100 produce more powerful voice; closer to 100 produce softer voiceValue range: [-100, 100]

sound_effects

string

Sound effect setting, only one can be selected at a time. Options: spacious_echo (spacious echo), auditorium_echo (auditorium broadcast), lofi_telephone (telephone distortion), robotic (electronic)Optional values: spacious_echo, auditorium_echo, lofi_telephone, robotic

audio_setting

object

Hide properties

format

string

default:"mp3"

Audio output format. wav is only supported in non-streaming outputOptional values: mp3, pcm, flac, wav

bitrate

integer

default:128000

Audio bitrate. Options: [32000, 64000, 128000, 256000], default is 128000. This parameter only applies to mp3 formatOptional values: 32000, 64000, 128000, 256000

channel

integer

default:1

Number of audio channels. Options: [1, 2], where 1 is mono and 2 is stereo. Default is 1Optional values: 1, 2

force_cbr

boolean

default:false

Controls constant bitrate (CBR) encoding. When set to true, audio will be encoded with constant bitrate. Note: This parameter only works when streaming output is enabled and audio format is mp3

sample_rate

integer

default:32000

Audio sample rate. Options: [8000, 16000, 22050, 24000, 32000, 44100], default is 32000Optional values: 8000, 16000, 22050, 24000, 32000, 44100

output_format

string

default:"hex"

Controls output format, options are url or hex, default is hex. This parameter is only valid in non-streaming scenarios. URL is valid for 24 hoursOptional values: url, hex

voice_setting

object

Hide properties

vol

number

default:1

Audio volume, higher value means louder. Range (0, 10], default is 1.0Value range: [0, 10]

pitch

integer

default:0

Audio pitch, range [-12, 12], default is 0, where 0 is original voice outputValue range: [-12, 12]

speed

number

default:1

Speech speed, higher value means faster. Range [0.5, 2], default is 1.0Value range: [0.5, 2]

emotion

string

Controls the emotion of synthesized speech. Options correspond to 8 emotions: happy, sad, angry, fearful, disgusted, surprised, calm, fluent, whisper. The model will automatically match appropriate emotion based on input textOptional values: happy, sad, angry, fearful, disgusted, surprised, calm, fluent, whisper

voice_id

string

required

Voice ID for audio synthesis. If mixed voice is needed, set timber_weights parameter and leave this empty. Supports system voice, cloned voice, and text-generated voice

latex_read

boolean

default:false

Controls whether to read LaTeX formulas, default is false. Only supports Chinese. When enabled, language_boost will be set to Chinese

text_normalization

boolean

default:false

Whether to enable Chinese/English text normalization, which can improve performance in number reading scenarios but slightly increases latency. Default is false

aigc_watermark

boolean

default:false

Controls whether to add audio rhythm identifier at the end of synthesized audio, default is false. This parameter is only valid for non-streaming synthesis

language_boost

string

Whether to enhance recognition ability for specified minor languages and dialects. Default is null, can be set to auto to let the model decide automaticallyOptional values: Chinese, Chinese,Yue, English, Arabic, Russian, Spanish, French, Portuguese, German, Turkish, Dutch, Ukrainian, Vietnamese, Indonesian, Japanese, Italian, Korean, Thai, Polish, Romanian, Greek, Czech, Finnish, Hindi, Bulgarian, Danish, Hebrew, Malay, Persian, Slovak, Swedish, Croatian, Filipino, Hungarian, Norwegian, Slovenian, Catalan, Nynorsk, Tamil, Afrikaans, auto

stream_options

object

Hide properties

exclude_aggregated_audio

boolean

default:false

Sets whether the last chunk contains the concatenated audio hex data. Default is false, meaning the last chunk contains the complete concatenated audio hex data

timber_weights

object[]

Mixed voice settings, supports up to 4 voice mixtures

Hide properties

weight

integer

required

Weight of each voice in the mix, must be set together with voice_id. Range [1, 100], supports up to 4 voice mixtures. Higher weight means more similarity to that voiceValue range: [1, 100]

voice_id

string

required

Voice ID for audio synthesis, must be set together with weight parameter. Supports system voice, cloned voice, and text-generated voice

subtitle_enable

boolean

default:false

Controls whether to enable subtitle service, default is false. This parameter is only valid in non-streaming output scenarios, and only valid for speech-2.6-hd, speech-2.6-turbo, speech-02-turbo, speech-02-hd, speech-01-turbo, speech-01-hd models

pronunciation_dict

object

Hide properties

tone

string[]

Defines pronunciation or replacement rules for special characters or symbols. For Chinese text, tones are represented by numbers: 1st tone = 1, 2nd tone = 2, 3rd tone = 3, 4th tone = 4, neutral tone = 5. Example: [“omg/oh my god”]

Response

data

object

Returned synthesis data object, may be null and needs null check

Hide properties

audio

string

Synthesized audio data in hex encoding, format matches the output format specified in request

status

integer

Current audio stream status: 1 means synthesizing, 2 means synthesis completed

subtitle_file

string

Subtitle download link. Subtitles for the audio file, accurate to sentence (no more than 50 characters), in milliseconds, JSON format

trace_id

string

Session ID for this request, used for troubleshooting

base_resp

object

Status code and details for this request

Hide properties

status_msg

string

Status details

status_code

integer

Status code. 0: success, 1000: unknown error, 1001: timeout, 1002: rate limit triggered, 1004: authentication failed, 1039: TPM rate limit triggered, 1042: invalid characters exceed 10%, 2013: invalid input parameters

extra_info

object

Additional audio information

Hide properties

bitrate

integer

Audio bitrate

audio_size

integer

Audio file size in bytes

word_count

integer

Word count of pronounced text, includes Chinese characters, numbers, letters, excludes punctuation

audio_format

string

Generated audio file format. Options: [mp3, pcm, flac]Optional values: mp3, pcm, flac

audio_length

integer

Audio duration in milliseconds

audio_channel

integer

Generated audio channel count, 1: mono, 2: stereo

usage_characters

integer

Billable character count

audio_sample_rate

integer

Audio sample rate

invisible_character_ratio

number

Invalid character ratio. If invalid characters do not exceed 10% (inclusive), audio will be generated normally with this ratio data returned; if exceeds 10%, an error will be returned

Last modified on July 8, 2026

MiniMax Speech 2.8 HD Async Text-to-Speech MiniMax Speech 2.8 HD Sync Text-to-Speech

⌘I

​Request Headers

​Request Body

​Response

Request Headers

Request Body

Response