はじめに
最近ChatGPTを英語の勉強のために英語で質問するようにしているのですが、「これって会話できないかな?」というのが気になったので作ってみました。
環境
.NET6 コンソールアプリ
Microsoft.CognitiveServices.Speech 1.26.0
コード
OpenAIに質問を投げる
以下のようになります。
static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key)
{
string endpoint = "https://api.openai.com/v1/chat/completions";
var content = JsonContent.Create(new
{
model = "gpt-3.5-turbo",
messages = new List<Message> { new Message("user", question) },
stream = true
});
HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint);
requestMessage.Content = content;
requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key);
Console.Write("");
HttpResponseMessage response = await client.SendAsync(requestMessage);
if(response.IsSuccessStatusCode)
{
var resultContent = await response.Content.ReadAsStringAsync();
var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"];
if (obj != null)
{
var answer = obj.ToString().Replace("\n", "");
return answer;
}
else
{
return "Can not get an answer.";
}
}
else
{
return "An error has occurred.";
}
}
本文のJSONを作成するためのクラスを作っておきます。
class message
{
public message(string _role, string _content)
{
role = _role;
content = _content;
}
public string role { get; set; }
public string content { get; set; }
}
音声を文字に変換
static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion)
{
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
Console.WriteLine("マイクに向けて話してください。");
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
string answer = "";
switch (speechRecognitionResult.Reason)
{
case ResultReason.RecognizedSpeech:
Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
answer = speechRecognitionResult.Text;
break;
case ResultReason.NoMatch:
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
break;
case ResultReason.Canceled:
var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
}
break;
}
return answer;
}
以下のページのコードを参考にしました。
音声テキスト変換クイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn
文字を音声に変換
static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion)
{
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural";
using (var speechSynthesizer = new SpeechSynthesizer(speechConfig))
{
var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text);
switch (speechSynthesisResult.Reason)
{
case ResultReason.SynthesizingAudioCompleted:
Console.WriteLine($"Speech synthesized for text: [{text}]");
break;
case ResultReason.Canceled:
var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
}
break;
default:
break;
}
}
}
以下のページを参考にしました。
テキスト読み上げクイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn
全体のコード
コード全体は以下のようになります。
using System.Net.Http.Json;
using System.Text.Json.Nodes;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
class Program
{
async static Task Main(string[] args)
{
string speechKey = "[Azure Speech Serviceのキー]";
string speechRegion = "[Azure Speech Serviceのリージョン]";
string openai_api_key = "[OpenAIのAPIキー]";
HttpClient client = new HttpClient();
var inProcess = true;
while(inProcess)
{
var questionText = await OutputSpeechRecognitionResultAsync(speechKey, speechRegion);
if (questionText.ToLower().Contains("bye"))
{
inProcess = false;
}
var answer = await AnswerQuestionAsync(questionText, client,openai_api_key);
await OutputSpeechSynthesisResultAsync(answer, speechKey, speechRegion);
}
}
static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key)
{
string endpoint = "https://api.openai.com/v1/chat/completions";
var content = JsonContent.Create(new
{
model = "gpt-3.5-turbo",
messages = new List<Message> { new Message("user", question) },
stream = true
});
HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint);
requestMessage.Content = content;
requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key);
Console.Write("");
HttpResponseMessage response = await client.SendAsync(requestMessage);
if(response.IsSuccessStatusCode)
{
var resultContent = await response.Content.ReadAsStringAsync();
var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"];
if (obj != null)
{
var answer = obj.ToString().Replace("\n", "");
return answer;
}
else
{
return "Can not get an answer.";
}
}
else
{
return "An error has occurred.";
}
}
static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion)
{
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
Console.WriteLine("マイクに向けて話してください。");
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
string answer = "";
switch (speechRecognitionResult.Reason)
{
case ResultReason.RecognizedSpeech:
Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
answer = speechRecognitionResult.Text;
break;
case ResultReason.NoMatch:
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
break;
case ResultReason.Canceled:
var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
}
break;
}
return answer;
}
static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion)
{
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural";
using (var speechSynthesizer = new SpeechSynthesizer(speechConfig))
{
var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text);
switch (speechSynthesisResult.Reason)
{
case ResultReason.SynthesizingAudioCompleted:
Console.WriteLine($"Speech synthesized for text: [{text}]");
break;
case ResultReason.Canceled:
var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
}
break;
default:
break;
}
}
}
}
class Message
{
public Message(string _role, string _content)
{
role = _role;
content = _content;
}
public string role { get; set; }
public string content { get; set; }
}
「Bye」という単語が含まれるまで繰り返し会話ができます。ぜひ試してみてください。