代码之家  ›  专栏  ›  技术社区  ›  Searene

使用C++ [复制]转换UTF-16字符到UTF-8字符串

c++
  •  0
  • Searene  · 技术社区  · 6 年前

    STDMETHODIMP CFileSystemAPI::setRRConfig( BSTR config_str, VARIANT* ret )
    {
    mReportReaderFactory.reset( new sbis::report_reader::ReportReaderFactory() );
    
    USES_CONVERSION;
    std::string configuration_str = W2A( config_str );
    

    0 回复  |  直到 8 年前
        1
  •  1
  •   Peter Mortensen icecrime    8 年前

    如果你使用C++ 11,你可以检查一下:

    http://www.cplusplus.com/reference/codecvt/codecvt_utf8_utf16/

        2
  •  1
  •   AndersK    8 年前

    std::string WstrToUtf8Str(const std::wstring& wstr)
    {
      std::string retStr;
      if (!wstr.empty())
      {
        int sizeRequired = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL);
    
        if (sizeRequired > 0)
        {
          std::vector<char> utf8String(sizeRequired);
          int bytesConverted = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(),    
                               -1, &utf8String[0], utf8String.size(), NULL, 
                               NULL);
          if (bytesConverted != 0)
          {
            retStr = &utf8String[0];
          }
          else
          {
            std::stringstream err;
            err << __FUNCTION__ 
                << " std::string WstrToUtf8Str failed to convert wstring '"
                << wstr.c_str() << L"'";
            throw std::runtime_error( err.str() );
          }
        }
      }
      return retStr;
    }
    

    可以将BSTR作为std::wstring给函数

        3
  •  0
  •   kvv    11 年前
    void encode_unicode_character(char* buffer, int* offset, wchar_t ucs_character)
    {
        if (ucs_character <= 0x7F)
        {
            // Plain single-byte ASCII.
            buffer[(*offset)++] = (char) ucs_character;
        }
        else if (ucs_character <= 0x7FF)
        {
            // Two bytes.
            buffer[(*offset)++] = 0xC0 | (ucs_character >> 6);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0xFFFF)
        {
            // Three bytes.
            buffer[(*offset)++] = 0xE0 | (ucs_character >> 12);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x1FFFFF)
        {
            // Four bytes.
            buffer[(*offset)++] = 0xF0 | (ucs_character >> 18);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x3FFFFFF)
        {
            // Five bytes.
            buffer[(*offset)++] = 0xF8 | (ucs_character >> 24);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x7FFFFFFF)
        {
            // Six bytes.
            buffer[(*offset)++] = 0xFC | (ucs_character >> 30);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 24) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
            buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else
        {
            // Invalid char; don't encode anything.
        }
    }
    

    ISO10646-2012这是所有你需要了解的UCS。