Test :
$ curl -fsS http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "qwen2.5-1.5b",
"messages": [
{"role": "system", "content": "Tu es un assistant DevOps qui répond en français en une phrase."},
{"role": "user", "content": "Quelle commande systemd affiche les services actifs ?"}
],
"temperature": 0.2,
"max_tokens": 80
}' | jq
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "systemctl list-units --active"
}
}
],
"created": 1781775300,
"model": "qwen2.5-1.5b",
"system_fingerprint": "b9687-b4024af6c",
"object": "chat.completion",
"usage": {
"completion_tokens": 8,
"prompt_tokens": 38,
"total_tokens": 46,
"prompt_tokens_details": {
"cached_tokens": 37
}
},
"id": "chatcmpl-mXP4t0noHYBKTaBSnZfxFdoTycVt42zC",
"timings": {
"cache_n": 37,
"prompt_n": 1,
"prompt_ms": 21.161,
"prompt_per_token_ms": 21.161,
"prompt_per_second": 47.25674590047729,
"predicted_n": 8,
"predicted_ms": 40.424,
"predicted_per_token_ms": 5.053,
"predicted_per_second": 197.90223629527014
}
}