We provide an OpenAI API-compatible interface. You only need to install the StackFlow package.
sudo apt install lib-llm llm-sys llm-cosy-voice llm-openai-api sudo apt install llm-model-cosyvoice2-0.5b-axcl curl http://127.0.0.1:8000/v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "CosyVoice2-0.5B-axcl",
"response_format": "wav",
"input": "But thy eternal summer shall not fade, Nor lose possession of that fair thou ow’st; Nor shall Death brag thou wander’st in his shade, When in eternal lines to time thou grow’st; So long as men can breathe or eyes can see, So long lives this, and this gives life to thee."
}' \
-o output.wav from pathlib import Path
from openai import OpenAI
client = OpenAI(
api_key="sk-",
base_url="http://127.0.0.1:8000/v1"
)
speech_file_path = Path(__file__).parent / "output.wav"
with client.audio.speech.with_streaming_response.create(
model="CosyVoice2-0.5B-axcl",
voice="prompt_data",
response_format="wav",
input="But thy eternal summer shall not fade, Nor lose possession of that fair thou ow’st; Nor shall Death brag thou wander’st in his shade, When in eternal lines to time thou grow’st; So long as men can breathe or eyes can see, So long lives this, and this gives life to thee.",
) as response:
response.stream_to_file(speech_file_path) git clone --recurse-submodules https://huggingface.co/M5Stack/CosyVoice2-scripts File Description
m5stack@raspberrypi:~/rsp/CosyVoice2-scripts $ ls -lh
total 28K
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:18 asset
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:18 CosyVoice-BlankEN
drwxrwxr-x 2 m5stack m5stack 4.0K Nov 6 15:19 frontend-onnx
drwxrwxr-x 3 m5stack m5stack 4.0K Nov 6 15:18 pengzhendong
-rw-rw-r-- 1 m5stack m5stack 24 Nov 6 15:18 README.md
-rw-rw-r-- 1 m5stack m5stack 103 Nov 6 15:18 requirements.txt
drwxrwxr-x 3 m5stack m5stack 4.0K Nov 6 15:18 scripts python -m venv cosyvoice source cosyvoice/bin/activate pip install -r requirements.txt python3 scripts/process_prompt.py --prompt_text asset/en_woman1.txt --prompt_speech asset/en_woman1.mp3 --output en_woman1 Successfully generated the audio feature file
(cosyvoice) m5stack@raspberrypi:~/rsp/CosyVoice2-scripts $ python3 scripts/process_prompt.py --prompt_text asset/en_woman1.txt --prompt_speech asset/en_woman1.mp3 --output en_woman1
2025-11-06 16:16:01.526554414 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
prompt_text But many of these southern girls have the same trouble, said Holly.
fmax 8000
prompt speech token size: torch.Size([1, 103]) cp -r en_woman1 /opt/m5stack/data/CosyVoice2-0.5B-axcl/ sudo systemctl restart llm-sys # Reset model configuration curl http://127.0.0.1:8000/v1/audio/speech \
-H "Content-Type: application/json" \
-d '{
"model": "CosyVoice2-0.5B-axcl",
"voice": "en_woman1",
"response_format": "wav",
"input": "But thy eternal summer shall not fade, Nor lose possession of that fair thou ow’st; Nor shall Death brag thou wander’st in his shade, When in eternal lines to time thou grow’st; So long as men can breathe or eyes can see, So long lives this, and this gives life to thee."
}' \
-o output.wav from pathlib import Path
from openai import OpenAI
client = OpenAI(
api_key="sk-",
base_url="http://127.0.0.1:8000/v1"
)
speech_file_path = Path(__file__).parent / "output.wav"
with client.audio.speech.with_streaming_response.create(
model="CosyVoice2-0.5B-axcl",
voice="en_woman1",
response_format="wav",
input="But thy eternal summer shall not fade, Nor lose possession of that fair thou ow’st; Nor shall Death brag thou wander’st in his shade, When in eternal lines to time thou grow’st; So long as men can breathe or eyes can see, So long lives this, and this gives life to thee.",
) as response:
response.stream_to_file(speech_file_path)