C#のコンソールでChatGPTと英会話するアプリを作ってみた

はじめに

最近ChatGPTを英語の勉強のために英語で質問するようにしているのですが、「これって会話できないかな？」というのが気になったので作ってみました。

環境

.NET6 コンソールアプリ
Microsoft.CognitiveServices.Speech 1.26.0

コード

OpenAIに質問を投げる

以下のようになります。

static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key)
{
    //OpenAIのエンドポイントを入力
    string endpoint = "https://api.openai.com/v1/chat/completions";

    //OpenAIのエンドポイントに送るリクエスト本文を作成
    var content = JsonContent.Create(new
    {
        model = "gpt-3.5-turbo",
        messages = new List<Message> { new Message("user", question) },
        stream = true
    });

    //リクエストを送る
    HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint);
    requestMessage.Content = content;// new StringContent(request, Encoding.UTF8, "application/json");
    requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key);

    Console.Write("");
    
    HttpResponseMessage response = await client.SendAsync(requestMessage);

    if(response.IsSuccessStatusCode)
    {
        //Choicesの中のcontenを取得
        var resultContent = await response.Content.ReadAsStringAsync();

        var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"];
        if (obj != null)
        {
            var answer = obj.ToString().Replace("\n", "");
            return answer;
        }
        else
        {
            return "Can not get an answer.";
        }
    }
    else
    {
        return "An error has occurred.";
    }

}

本文のJSONを作成するためのクラスを作っておきます。

class message
{
    public message(string _role, string _content)
    {
        role = _role;
        content = _content;
    }
    public string role { get; set; }
    public string content { get; set; }
}

音声を文字に変換

static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion)
{
    var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
    speechConfig.SpeechRecognitionLanguage = "en-US";

    using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
    using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);

    Console.WriteLine("マイクに向けて話してください。");
    var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

    string answer = "";

    switch (speechRecognitionResult.Reason)
    {
        case ResultReason.RecognizedSpeech:
            Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
            answer = speechRecognitionResult.Text;
            break;
        case ResultReason.NoMatch:
            Console.WriteLine($"NOMATCH: Speech could not be recognized.");
            break;
        case ResultReason.Canceled:
            var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
            Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

            if (cancellation.Reason == CancellationReason.Error)
            {
                Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
            }
            break;
    }
    return answer;
}

以下のページのコードを参考にしました。

音声テキスト変換クイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn

文字を音声に変換

static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion)
{
    var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);

    speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural";

    using (var speechSynthesizer = new SpeechSynthesizer(speechConfig))
    {
        var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text);          

        switch (speechSynthesisResult.Reason)
        {
            case ResultReason.SynthesizingAudioCompleted:
                Console.WriteLine($"Speech synthesized for text: [{text}]");
                break;
            case ResultReason.Canceled:
                var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
                Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                if (cancellation.Reason == CancellationReason.Error)
                {
                    Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                    Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
                    Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                }
                break;
            default:
                break;
        }
    }
}

以下のページを参考にしました。

テキスト読み上げクイックスタート - Speech サービス - Azure Cognitive Services | Microsoft Learn

全体のコード

コード全体は以下のようになります。

using System.Net.Http.Json;
using System.Text.Json.Nodes;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;

class Program
{
    async static Task Main(string[] args)
    {
        // Azure Speech Serviceの "SPEECH_KEY" と "SPEECH_REGION"を入力
        string speechKey = "[Azure Speech Serviceのキー]";
        string speechRegion = "[Azure Speech Serviceのリージョン]";

        //OpenAIのAPIキーを入力
        string openai_api_key = "[OpenAIのAPIキー]";

        HttpClient client = new HttpClient();


        var inProcess = true;

        while(inProcess)
        {
            //話した内容を文字として取得
            var questionText = await OutputSpeechRecognitionResultAsync(speechKey, speechRegion);

            //Byeと言われたら終了
            if (questionText.ToLower().Contains("bye"))
            {
                inProcess = false;
            }

            //OpenAIに質問を投げる
            var answer = await AnswerQuestionAsync(questionText, client,openai_api_key);
            
            //答えを話してもらう
            await OutputSpeechSynthesisResultAsync(answer, speechKey, speechRegion);
            
        }
    }


    static async Task<string> AnswerQuestionAsync(string question, HttpClient client, string openai_api_key)
    {
        //OpenAIのエンドポイントを入力
        string endpoint = "https://api.openai.com/v1/chat/completions";

        //OpenAIのエンドポイントに送るリクエスト本文を作成
        var content = JsonContent.Create(new
        {
            model = "gpt-3.5-turbo",
            messages = new List<Message> { new Message("user", question) },
            stream = true
        });

        //リクエストを送る
        HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Post, endpoint);
        requestMessage.Content = content;// new StringContent(request, Encoding.UTF8, "application/json");
        requestMessage.Headers.Add("Authorization", "Bearer " + openai_api_key);

        Console.Write("");
        
        HttpResponseMessage response = await client.SendAsync(requestMessage);

        if(response.IsSuccessStatusCode)
        {
            //Choicesの中のcontenを取得
            var resultContent = await response.Content.ReadAsStringAsync();

            var obj = JsonNode.Parse(resultContent)!["choices"]![0]!["message"]!["content"];
            if (obj != null)
            {
                var answer = obj.ToString().Replace("\n", "");
                return answer;
            }
            else
            {
                return "Can not get an answer.";
            }
        }
        else
        {
            return "An error has occurred.";
        }

    }


    static async Task<string> OutputSpeechRecognitionResultAsync(string speechKey, string speechRegion)
    {
        var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
        speechConfig.SpeechRecognitionLanguage = "en-US";

        using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
        using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);

        Console.WriteLine("マイクに向けて話してください。");
        var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

        string answer = "";

        switch (speechRecognitionResult.Reason)
        {
            case ResultReason.RecognizedSpeech:
                Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
                answer = speechRecognitionResult.Text;
                break;
            case ResultReason.NoMatch:
                Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                break;
            case ResultReason.Canceled:
                var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                if (cancellation.Reason == CancellationReason.Error)
                {
                    Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                    Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                    Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                }
                break;
        }
        return answer;
    }

    

    static async Task OutputSpeechSynthesisResultAsync(string text, string speechKey, string speechRegion)
    {
        var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);

        speechConfig.SpeechSynthesisVoiceName = "en-US-JennyNeural";

        using (var speechSynthesizer = new SpeechSynthesizer(speechConfig))
        {
            var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text);          

            switch (speechSynthesisResult.Reason)
            {
                case ResultReason.SynthesizingAudioCompleted:
                    Console.WriteLine($"Speech synthesized for text: [{text}]");
                    break;
                case ResultReason.Canceled:
                    var cancellation = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
                    Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
                        Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                    }
                    break;
                default:
                    break;
            }
        }
    }
}

//JSON作成用のクラス
class Message
{
    public Message(string _role, string _content)
    {
        role = _role;
        content = _content;
    }
    public string role { get; set; }
    public string content { get; set; }
}

「Bye」という単語が含まれるまで繰り返し会話ができます。ぜひ試してみてください。

takataka430’s blog

.NET系を中心に勉強したことのまとめを書きます