はじめに
最近ChatGPTを英語の勉強のために英語で質問するようにしているのですが、「これって会話できないかな?」というのが気になったので作ってみました。
環境
.NET6 コンソールアプリ
Microsoft.CognitiveServices.Speech 1.26.0
コード
OpenAIに質問を投げる
以下のようになります。
static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key) { //OpenAIのエンドポイントを入力 string endpoint = "https://api.openai.com/v1/chat/completions"; //OpenAIのエンドポイントに送るリクエスト本文を作成 var content = JsonContent.Create(new { model = "gpt-3.5-turbo", messages = new List<Message> { new Message("user", question) }, stream = true }); //リクエストを送る HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint); requestMessage.Content = content;// new StringContent(request, Encoding.UTF8, "application/json"); requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key); Console.Write(""); HttpResponseMessage response = await client.SendAsync(requestMessage); if(response.IsSuccessStatusCode) { //Choicesの中のcontenを取得 var resultContent = await response.Content.ReadAsStringAsync(); var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"]; if (obj != null) { var answer = obj.ToString().Replace("\n", ""); return answer; } else { return "Can not get an answer."; } } else { return "An error has occurred."; } }
本文のJSONを作成するためのクラスを作っておきます。
class message { public message(string _role, string _content) { role = _role; content = _content; } public string role { get; set; } public string content { get; set; } }
音声を文字に変換
static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion) { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechRecognitionLanguage = "en-US"; using var audioConfig = AudioConfig.FromDefaultMicrophoneInput(); using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); Console.WriteLine("マイクに向けて話してください。"); var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync(); string answer = ""; switch (speechRecognitionResult.Reason) { case ResultReason.RecognizedSpeech: Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}"); answer = speechRecognitionResult.Text; break; case ResultReason.NoMatch: Console.WriteLine($"NOMATCH: Speech could not be recognized."); break; case ResultReason.Canceled: var cancellation = CancellationDetails.FromResult(speechRecognitionResult); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?"); } break; } return answer; }
以下のページのコードを参考にしました。
音声テキスト変換クイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn
文字を音声に変換
static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion) { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural"; using (var speechSynthesizer = new SpeechSynthesizer(speechConfig)) { var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text); switch (speechSynthesisResult.Reason) { case ResultReason.SynthesizingAudioCompleted: Console.WriteLine($"Speech synthesized for text: [{text}]"); break; case ResultReason.Canceled: var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?"); } break; default: break; } } }
以下のページを参考にしました。
テキスト読み上げクイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn
全体のコード
コード全体は以下のようになります。
using System.Net.Http.Json; using System.Text.Json.Nodes; using Microsoft.CognitiveServices.Speech; using Microsoft.CognitiveServices.Speech.Audio; class Program { async static Task Main(string[] args) { // Azure Speech Serviceの "SPEECH_KEY" と "SPEECH_REGION"を入力 string speechKey = "[Azure Speech Serviceのキー]"; string speechRegion = "[Azure Speech Serviceのリージョン]"; //OpenAIのAPIキーを入力 string openai_api_key = "[OpenAIのAPIキー]"; HttpClient client = new HttpClient(); var inProcess = true; while(inProcess) { //話した内容を文字として取得 var questionText = await OutputSpeechRecognitionResultAsync(speechKey, speechRegion); //Byeと言われたら終了 if (questionText.ToLower().Contains("bye")) { inProcess = false; } //OpenAIに質問を投げる var answer = await AnswerQuestionAsync(questionText, client,openai_api_key); //答えを話してもらう await OutputSpeechSynthesisResultAsync(answer, speechKey, speechRegion); } } static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key) { //OpenAIのエンドポイントを入力 string endpoint = "https://api.openai.com/v1/chat/completions"; //OpenAIのエンドポイントに送るリクエスト本文を作成 var content = JsonContent.Create(new { model = "gpt-3.5-turbo", messages = new List<Message> { new Message("user", question) }, stream = true }); //リクエストを送る HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint); requestMessage.Content = content;// new StringContent(request, Encoding.UTF8, "application/json"); requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key); Console.Write(""); HttpResponseMessage response = await client.SendAsync(requestMessage); if(response.IsSuccessStatusCode) { //Choicesの中のcontenを取得 var resultContent = await response.Content.ReadAsStringAsync(); var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"]; if (obj != null) { var answer = obj.ToString().Replace("\n", ""); return answer; } else { return "Can not get an answer."; } } else { return "An error has occurred."; } } static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion) { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechRecognitionLanguage = "en-US"; using var audioConfig = AudioConfig.FromDefaultMicrophoneInput(); using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); Console.WriteLine("マイクに向けて話してください。"); var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync(); string answer = ""; switch (speechRecognitionResult.Reason) { case ResultReason.RecognizedSpeech: Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}"); answer = speechRecognitionResult.Text; break; case ResultReason.NoMatch: Console.WriteLine($"NOMATCH: Speech could not be recognized."); break; case ResultReason.Canceled: var cancellation = CancellationDetails.FromResult(speechRecognitionResult); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?"); } break; } return answer; } static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion) { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural"; using (var speechSynthesizer = new SpeechSynthesizer(speechConfig)) { var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text); switch (speechSynthesisResult.Reason) { case ResultReason.SynthesizingAudioCompleted: Console.WriteLine($"Speech synthesized for text: [{text}]"); break; case ResultReason.Canceled: var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?"); } break; default: break; } } } } //JSON作成用のクラス class Message { public Message(string _role, string _content) { role = _role; content = _content; } public string role { get; set; } public string content { get; set; } }
「Bye」という単語が含まれるまで繰り返し会話ができます。ぜひ試してみてください。