语音转文本 IBM Watson C#,长音频超过 100 MB


我正在实现 C# 代码,其中我尝试转录超过 100 MB 的音频

,但它不允许我开发需要的程序,该程序可以在 C#
中发送超过 100 MB 的音频在此代码中,我使用的是 Web 套接字,但我如何像流式传输音频一样发送

public static void CallWatson()
{
    using (var nf = new Notifier())
    using (var ws = new WebSocket("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?watson-token=""))
    {
        string startActionjson = "{"action": "start", "content-type": "audio/wav", "continuous" : true, "interim_results": true}";
        ws.OnOpen += (sender, e) => ws.Send(startActionjson);
        // Set the WebSocket events.
        string result = string.Empty;
        ws.OnMessage += Ws_OnMessage;

        ws.OnError += (sender, e) =>
          nf.Notify(
            new NotificationMessage
            {
                Summary = "WebSocket Error",
                Body = e.Message,
                Icon = "notification-message-im"
            });
        ws.OnClose += (sender, e) =>
          nf.Notify(
            new NotificationMessage
            {
                Summary = String.Format("WebSocket Close ({0})", e.Code),
                Body = e.Reason,
                Icon = "notification-message-im"
            });
        ws.Connect();
        //ws.SendAsync(startActionjson, b =>
        //{
        //    if (b == true)
        //    {
        //        //send the audio as binary
        //        string filePath = "E:\test33.wav";
        //        byte[] bytes = System.IO.File.ReadAllBytes(filePath);
        //        ws.SendAsync(bytes, b1 =>
        //        {
        //            if (b1)
        //                ws.Close();
        //        });
        //        // result+=result+ws.
        //    }
        //});
        // Connect to the server asynchronously.
        //ws.ConnectAsync ();
        //Console.WriteLine("nType 'exit' to exit.n");
        string filePath = "E:\Test3.wav";
        byte[] bytes = System.IO.File.ReadAllBytes(filePath);
        while (true)
        {
            Thread.Sleep(3000);
            ws.SendAsync(bytes, b1 =>
            {
                if (b1)
                    ws.Close();
            });
        }
        //for (int i = 0; i < bytes.Length; i += 1000000)
        //{
        //    Thread.Sleep(1000);
        //    byte[] buffer = new byte[1000000];
        //    Buffer.BlockCopy(bytes, i, buffer, 0, 128);
        //  //  ws.Send(buffer);
        //    ws.SendAsync(buffer, b1 =>
        //    {
        //        if (b1)
        //            ws.Close();
        //    });
        //}
    }
}
private static void Ws_OnMessage(object sender, MessageEventArgs e)
{
    string s = e.Data;
}

根据文档,无论输入法如何,都有 100mb 的限制。因此,您必须将音频文件拆分为<100mb 的块。

要流式传输音频,而不是调用System.IO.File.ReadAllBytes(filePath);并迭代结果,我认为您需要创建一个FileStream

此外,您不应该在到达文件末尾后立即关闭 websocket - 这可能会阻止您接收所有结果。相反,请将字符串{"action": "stop"}发送并等待,直到收到 {"state": "listening"} 响应,该响应指示它已完成处理您的音频并发回所有文本。

更新:我掌握了一台Windows机器,安装了Visual Studio,并整理了一个工作示例。我从来没有弄清楚你正在使用什么WebSocket API/库,但这只使用了我可以在 microsoft.com 上找到文档的内置内容,所以它应该对你有用。

我用几个不同的.ogg和.wav文件对其进行了测试,并确认我按预期获得了多个中期和最终结果。

using System;
using System.Net.WebSockets;
using System.Net;
using System.Runtime.Serialization.Json;
using System.Threading;
using System.Threading.Tasks;
using System.Text;
using System.IO;
using System.Runtime.Serialization;

// Perform streaming transcription of an audio file using the IBM Watson Speech to Text service over a websocket
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text.html
// https://msdn.microsoft.com/en-us/library/system.net.websockets.clientwebsocket%28v=vs.110%29.aspx
namespace WatsonSTTWebsocketExample
{
    class Program
    {
        static void Main(string[] args)
        {
            Transcribe();
            Console.WriteLine("Press any key to exit");
            Console.ReadLine();
        }
        // http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/doc/getting_started/gs-credentials.shtml
        static String username = "<username>";
        static String password = "<password>";
        static String file = @"c:audio.wav";
        static Uri url = new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize");
        static ArraySegment<byte> openingMessage = new ArraySegment<byte>( Encoding.UTF8.GetBytes(
            "{"action": "start", "content-type": "audio/wav", "continuous" : true, "interim_results": true}"
        ));
        static ArraySegment<byte> closingMessage = new ArraySegment<byte>(Encoding.UTF8.GetBytes(
            "{"action": "stop"}"
        ));

        static void Transcribe()
        {
            var ws = new ClientWebSocket();
            ws.Options.Credentials = new NetworkCredential(username, password);
            ws.ConnectAsync(url, CancellationToken.None).Wait();
            // send opening message and wait for initial delimeter 
            Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
            // send all audio and then a closing message; simltaneously print all results until delimeter is recieved
            Task.WaitAll(SendAudio(ws), HandleResults(ws)); 
            // close down the websocket
            ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
        }
        static async Task SendAudio(ClientWebSocket ws)
        {
            using (FileStream fs = File.OpenRead(file))
            {
                byte[] b = new byte[1024];
                while (fs.Read(b, 0, b.Length) > 0)
                {
                    await ws.SendAsync(new ArraySegment<byte>(b), WebSocketMessageType.Binary, true, CancellationToken.None);
                }
                await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
            }
        }
        // prints results until the connection closes or a delimeterMessage is recieved
        static async Task HandleResults(ClientWebSocket ws)
        {
            var buffer = new byte[1024];
            while (true)
            {
                var segment = new ArraySegment<byte>(buffer);
                var result = await ws.ReceiveAsync(segment, CancellationToken.None);
                if (result.MessageType == WebSocketMessageType.Close)
                {
                    return;
                }
                int count = result.Count;
                while (!result.EndOfMessage)
                {
                    if (count >= buffer.Length)
                    {
                        await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
                        return;
                    }
                    segment = new ArraySegment<byte>(buffer, count, buffer.Length - count);
                    result = await ws.ReceiveAsync(segment, CancellationToken.None);
                    count += result.Count;
                }
                var message = Encoding.UTF8.GetString(buffer, 0, count);
                // you'll probably want to parse the JSON into a useful object here,
                // see ServiceState and IsDelimeter for a light-weight example of that.
                Console.WriteLine(message);
                if (IsDelimeter(message))
                {
                    return;
                }
            }
        }

        // the watson service sends a {"state": "listening"} message at both the beginning and the *end* of the results
        // this checks for that
        [DataContract]
        internal class ServiceState
        {
            [DataMember]
            public string state = "";
        }
        static bool IsDelimeter(String json)
        {
            MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
            DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
            ServiceState obj = (ServiceState)ser.ReadObject(stream);
            return obj.state == "listening";
        }
    }
}

相关内容

  • 没有找到相关文章

最新更新