需要在本地C++
中实现Encoding.Unicode.GetBytes
.NET
实现:
Console.WriteLine("codePage number: " + Encoding.Unicode.CodePage.ToString());
Console.Write("string: ");
foreach (var ch in Encoding.Unicode.GetBytes("string"))
Console.Write(ch.ToString("X") + "-");
Console.WriteLine();
Console.Write("строка: ");
foreach (var ch in Encoding.Unicode.GetBytes("строка"))
Console.Write(ch.ToString("X") + "-");
Console.ReadLine();
.NET
实现输出:
codePage number: 1200
string: 73-0-74-0-72-0-69-0-6E-0-67-0
строка: 41-4-42-4-40-4-3E-4-3A-4-30-4
如何实现这种方法(不使用boost, QT等)到c++ ?
我从Windows
找到了这个方法:
#include <exception>
#include <iostream>
#include <ostream>
#include <string>
#include <Windows.h>
std::wstring ConvertToUTF16(const std::string & source, const UINT codePage)
{
// Fail if an invalid input character is encountered
static const DWORD conversionFlags = MB_ERR_INVALID_CHARS;
// Require size for destination string
int utf16Length = ::MultiByteToWideChar(
codePage, // code page for the conversion
conversionFlags, // flags
source.c_str(), // source string
source.length(), // length (in chars) of source string
NULL, // unused - no conversion done in this step
0 // request size of destination buffer, in wchar_t's
);
if (utf16Length == 0)
{
const DWORD error = ::GetLastError();
throw std::exception(
"MultiByteToWideChar() failed: Can't get length of destination UTF-16 string.",
error);
}
// Allocate room for destination string
std::wstring utf16Text;
utf16Text.resize(utf16Length);
// Convert to Unicode
if (!::MultiByteToWideChar(
codePage, // code page for conversion
0, // validation was done in previous call
source.c_str(), // source string
source.length(), // length (in chars) of source string
&utf16Text[0], // destination buffer
utf16Text.length() // size of destination buffer, in wchar_t's
))
{
const DWORD error = ::GetLastError();
throw std::exception(
"MultiByteToWideChar() failed: Can't convert to UTF-16 string.",
error);
}
return utf16Text;
}
void main()
{
try
{
// ASCII text
std::string inText("string");
// Unicode
static const UINT codePage = 1200;
// Convert to Unicode
const std::wstring utf16Text = ConvertToUTF16(inText, codePage);
// Show result
for (size_t i = 0; i < utf16Text.size(); i++)
printf("%X-", utf16Text[i]);
}
catch (const std::exception& e)
{
std::cerr << "*** ERROR:n";
std::cerr << e.what();
std::cerr << std::endl;
}
getchar();
}
但是MultiByteToWideChar
没有返回1200代码页(Unicode
)的字符串大小。
MultiByteToWideChar()
的codepage参数指定输入 char
数据的编码,因此可以将从编码转换为 UTF-16。在Win32编程中永远不要使用1200码页。
. net中的字符串用UTF-16编码。Encoding.Unicode.GetBytes()
返回一个UTF-16LE编码的字节数组。因此字符数据按原样返回。
对于Windows上的UTF-16,使用基于wchar_t
或char16_t
的字符串(如std::wstring
或std::u16string
)。如果您需要一个UTF-16编码的字节数组,请分配2 * length
字节(例如使用std::vector
)并按原样复制原始字符串字符:
std::vector<BYTE> GetUnicodeBytes(const std::wstring &str)
{
std::vector<BYTE> result;
if (!str.empty())
{
result.resize(sizeof(wchar_t) * str.length());
CopyMemory(&result[0], str.c_str(), result.size());
}
return result;
}
std::wcout << L"string: ";
for (auto ch: GetUnicodeBytes(L"string"))
std::wcout << std::hex << (int)ch << L"-";
std::wcout << std::endl;
std::wcout << L"строка: ";
for (auto ch: GetUnicodeBytes(L"строка"))
std::wcout << std::hex << (int)ch << L"-";
std::wcout << std::endl;
另外:
std::vector<BYTE> GetUnicodeBytes(const std::u16string &str)
{
std::vector<BYTE> result;
if (!str.empty())
{
result.resize(sizeof(char16_t) * str.length());
CopyMemory(&result[0], str.c_str(), result.size());
}
return result;
}
std::wcout << L"string: ";
for (auto ch: GetUnicodeBytes(u"string"))
std::wcout << std::hex << (int)ch << L"-";
std::wcout << std::endl;
std::wcout << L"строка: ";
for (auto ch: GetUnicodeBytes(u"строка"))
std::wcout << std::hex << (int)ch << L"-";
std::wcout << std::endl;