1

I have this code, which convert UTF-8 string to Unicode:

#include <unicode/unistr.h>
//included other header files

int main(int argc, char** argv) {
    std::string s("some string");
        // convert std::string to ICU's UnicodeString
        UnicodeString ucs = UnicodeString::fromUTF8(StringPiece(s.c_str()));

        // convert UnicodeString to std::wstring
        std::wstring ws;
        for (int i = 0; i < ucs.length(); ++i)
            ws += static_cast<wchar_t>(ucs[i]);

        std::wcout << ws;
}

I can not understand how to convert this UnicodeString to windows-1251 (cp1251). Which function shoud I use to do this in Linux ?

Vvnbm
  • 11
  • 3
  • On Windows just use: `MultiByteToWideChar` with the relevant code pages https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx to convert directly from UTF-8 to cp1251. – Richard Critten Jan 08 '18 at 17:17
  • hey, thanks for your answer. I need to do this in Linux, I corrected my question. – Vvnbm Jan 08 '18 at 17:20
  • Wikipedia [1251](https://en.wikipedia.org/wiki/Windows-1251) has the Unicode to/from 1251 mapping. – Eljay Jan 08 '18 at 17:32
  • I didn't understand your answer, sorry. How exactly can I convert this Unicode string to Windows-1251 ? – Vvnbm Jan 08 '18 at 18:50
  • @RichardCritten: you can't convert *directly* from UTF-8 to CP1251 using `MultiByteToWideChar()`. You have to convert from UTF-8 to UTF-16 first, and then convert UTF-16 to CP1251 using `WideCharToMultiByte()` – Remy Lebeau Jan 09 '18 at 21:36

1 Answers1

3

Use ICU's conversion functions in ucnv.h (see Conversion > Using Converters in ICU's documentation):

#include <memory>
#include <unicode/ucnv.h>
bool convertTo1251(std::vector<UChar> const & input, std::vector<char> & output)
{
    UErrorCode status = U_ZERO_ERROR;
    UConverter *pConvert = ucnv_open("windows-1251", &status);
    if (status)
    {
        printf("Failed to obtain char set converter: %d\r\n", status);
        return false;
    }
    std::shared_ptr<UConverter> cnv(pConvert, ucnv_close);

    UChar const * pwszBegin = &input[0], *pwszEnd = pwszBegin + input.size();
    output.resize(input.size());

    char *pszBegin = &output[0], *pszEnd = pszBegin + input.size();

    ucnv_fromUnicode(pConvert, &pszBegin, pszEnd, &pwszBegin, pwszEnd, nullptr, true, &status);
    if (status)
    {
        // deal with error
        return false;
    }
    return true;
}
Igor Bendrup
  • 2,317
  • 1
  • 14
  • 15
SoronelHaetir
  • 12,547
  • 1
  • 11
  • 21