參考
改成 c 版本
C++:
-
#include <stdio.h>
-
#include <string.h>
-
-
void EncodeToUTF8(char * szSource, char *szFinal);
-
void DecodeFromUTF8(char * szSource, char *szFinal);
-
-
int main(int argc, char* argv[])
-
{
-
char szEncodeFinal[256];
-
char szDecodeFinal[256];
-
EncodeToUTF8("123abc測試", szEncodeFinal);
-
printf("Encode:%s\n", szEncodeFinal);
-
DecodeFromUTF8(szEncodeFinal, szDecodeFinal);
-
printf("Decode:%s\n", szDecodeFinal);
-
return 0;
-
}
-
-
void EncodeToUTF8(char * szSource, char *szFinal)
-
{
-
unsigned short ch;
-
-
unsigned char bt1, bt2, bt3, bt4, bt5, bt6;
-
-
int n, nMax = strlen(szSource);
-
-
//CString sFinal, szTemp;
-
szFinal[0] = ('\0');
-
-
for (n = 0; n <nMax; ++n)
-
{
-
ch = (unsigned short)szSource[n];
-
-
if (ch == ('='))
-
{
-
char szTemp[256];
-
sprintf(szTemp, ("=%02X"), ch);
-
-
strcat(szFinal, szTemp);
-
-
}
-
else if (ch <128)
-
{
-
char szTemp[2];
-
szTemp[0] = szSource[n];
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
}
-
else if (ch <= 2047)
-
{
-
char szTemp[256];
-
bt1 = (unsigned char)(192 + (ch / 64));
-
bt2 = (unsigned char)(128 + (ch % 64));
-
-
//szTemp.Format(("=%02X=%02X"), bt1, bt2);
-
sprintf(szTemp, ("=%02X=%02X"), bt1, bt2);
-
-
//sFinal += szTemp;
-
strcat(szFinal, szTemp);
-
}
-
else if (ch <= 65535)
-
{
-
char szTemp[256];
-
bt1 = (unsigned char)(224 + (ch / 4096));
-
bt2 = (unsigned char)(128 + ((ch / 64) % 64));
-
bt3 = (unsigned char)(128 + (ch % 64));
-
-
//szTemp.Format(("=%02X=%02X=%02X"), bt1, bt2, bt3);
-
sprintf(szTemp, ("=%02X=%02X=%02X"), bt1, bt2, bt3);
-
-
//sFinal += szTemp;
-
strcat(szFinal, szTemp);
-
}
-
else if (ch <= 2097151)
-
{
-
char szTemp[256];
-
bt1 = (unsigned char)(240 + (ch / 262144));
-
bt2 = (unsigned char)(128 + ((ch / 4096) % 64));
-
bt3 = (unsigned char)(128 + ((ch / 64) % 64));
-
bt4 = (unsigned char)(128 + (ch % 64));
-
-
//szTemp.Format(("=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4);
-
//sFinal += szTemp;
-
sprintf(szTemp, ("=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4);
-
strcat(szFinal, szTemp);
-
}
-
else if (ch <=67108863)
-
{
-
char szTemp[256];
-
bt1 = (unsigned char)(248 + (ch / 16777216));
-
bt2 = (unsigned char)(128 + ((ch / 262144) % 64));
-
bt3 = (unsigned char)(128 + ((ch / 4096) % 64));
-
bt4 = (unsigned char)(128 + ((ch / 64) % 64));
-
bt5 = (unsigned char)(128 + (ch % 64));
-
-
//szTemp.Format(("=%02X=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4, bt5);
-
sprintf(szTemp, ("=%02X=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4, bt5);
-
//sFinal += szTemp;
-
strcat(szFinal, szTemp);
-
}
-
else if (ch <=2147483647)
-
{
-
char szTemp[256];
-
bt1 = (unsigned char)(252 + (ch / 1073741824));
-
bt2 = (unsigned char)(128 + ((ch / 16777216) % 64));
-
bt3 = (unsigned char)(128 + ((ch / 262144) % 64));
-
bt4 = (unsigned char)(128 + ((ch / 4096) % 64));
-
bt5 = (unsigned char)(128 + ((ch / 64) % 64));
-
bt6 = (unsigned char)(128 + (ch % 64));
-
-
//szTemp.Format(("=%02X=%02X=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4, bt5, bt6);
-
//sFinal += szTemp;
-
sprintf(szTemp, ("=%02X=%02X=%02X=%02X=%02X=%02X"), bt1, bt2, bt3, bt4, bt5, bt6);
-
strcat(szFinal, szTemp);
-
}
-
-
}
-
-
//return sFinal;
-
}
-
-
unsigned char MakeByte(char ch1, char ch2);
-
-
void DecodeFromUTF8(char * szSource, char *szFinal)
-
{
-
-
unsigned char z, y, x, w, v, u;
-
int n, nMax = strlen(szSource);
-
unsigned short ch;
-
-
//CString sFinal, szTemp;
-
szFinal[0] = ('\0');
-
-
for (n = 0; n <nMax; ++n)
-
{
-
ch = (unsigned short)szSource[n];
-
-
if (ch != ('='))
-
{
-
char szTemp[2];
-
szTemp[0] = (char)ch;
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
-
//sFinal += (char)ch;
-
continue;
-
}
-
-
if (n>= nMax - 2) break; // something is wrong
-
z = MakeByte(szSource[n+1], szSource[n+2]);
-
-
if (z <127)
-
{
-
char szTemp[2];
-
szTemp[0] = (char)z;
-
szTemp[1] = ('\0');
-
-
//sFinal += (char)z;
-
strcat(szFinal, szTemp);
-
-
n = n + 2;
-
}
-
else if (z>= 192 && z <= 223)
-
{
-
// character is two unsigned chars
-
char szTemp[2];
-
if (n>= nMax - 5) break; // something is wrong
-
y = MakeByte(szSource[n+4], szSource[n+5]);
-
//sFinal += (char)( (z-192)*64 + (y-128) );
-
szTemp[0] = (char)( (z-192)*64 + (y-128) );
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
n = n + 5;
-
}
-
else if (z>= 224 && z <= 239)
-
{
-
// character is three unsigned chars
-
char szTemp[2];
-
if (n>= nMax - 8) break; // something is wrong
-
y = MakeByte(szSource[n+4], szSource[n+5]);
-
x = MakeByte(szSource[n+7], szSource[n+8]);
-
//sFinal += (char)( (z-224)*4096 + (y-128)*64 + (x-128) );
-
szTemp[0] = (char)( (z-224)*4096 + (y-128)*64 + (x-128) );
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
n = n + 8;
-
}
-
else if (z>= 240 && z <= 247)
-
{
-
// character is four unsigned chars
-
char szTemp[2];
-
if (n>= nMax - 11) break; // something is wrong
-
y = MakeByte(szSource[n+4], szSource[n+5]);
-
x = MakeByte(szSource[n+7], szSource[n+8]);
-
w = MakeByte(szSource[n+10], szSource[n+11]);
-
//sFinal += (char)( (z-240)*262144 + (y-128)*4096 + (x-128)*64 + (w-128) );
-
szTemp[0] = (char)( (z-240)*262144 + (y-128)*4096 + (x-128)*64 + (w-128) );
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
n = n + 11;
-
}
-
else if (z>= 248 && z <= 251)
-
{
-
// character is four unsigned chars
-
char szTemp[2];
-
if (n>= nMax - 14) break; // something is wrong
-
y = MakeByte(szSource[n+4], szSource[n+5]);
-
x = MakeByte(szSource[n+7], szSource[n+8]);
-
w = MakeByte(szSource[n+10], szSource[n+11]);
-
v = MakeByte(szSource[n+13], szSource[n+14]);
-
//sFinal += (char)( (z-248)*16777216 + (y-128)*262144 + (x-128)*4096 + (w-128)*64 + (v-128) );
-
szTemp[0] = (char)( (z-248)*16777216 + (y-128)*262144 + (x-128)*4096 + (w-128)*64 + (v-128) );
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
n = n + 14;
-
}
-
else if (z>= 252 && z <= 253)
-
{
-
// character is four unsigned chars
-
char szTemp[2];
-
if (n>= nMax - 17) break; // something is wrong
-
y = MakeByte(szSource[n+4], szSource[n+5]);
-
x = MakeByte(szSource[n+7], szSource[n+8]);
-
w = MakeByte(szSource[n+10], szSource[n+11]);
-
v = MakeByte(szSource[n+13], szSource[n+14]);
-
u = MakeByte(szSource[n+16], szSource[n+17]);
-
//sFinal += (char)( (z-252)*1073741824 + (y-128)*16777216 + (x-128)*262144 + (w-128)*4096 + (v-128)*64 + (u-128) );
-
szTemp[0] = (char)( (z-252)*1073741824 + (y-128)*16777216 + (x-128)*262144 + (w-128)*4096 + (v-128)*64 + (u-128) );
-
szTemp[1] = ('\0');
-
strcat(szFinal, szTemp);
-
n = n + 17;
-
}
-
-
}
-
-
//return sFinal;
-
}
-
-
// helper function for decoding
-
unsigned char MakeByte(char ch1, char ch2)
-
{
-
unsigned char bt1 = 0, bt2 = 0;
-
unsigned char btFinal;
-
switch (ch2)
-
{
-
case ('0'):
-
bt2 = 0x00;
-
break;
-
case ('1'):
-
bt2 = 0x01;
-
break;
-
case ('2'):
-
bt2 = 0x02;
-
break;
-
case ('3'):
-
bt2 = 0x03;
-
break;
-
case ('4'):
-
bt2 = 0x04;
-
break;
-
case ('5'):
-
bt2 = 0x05;
-
break;
-
case ('6'):
-
bt2 = 0x06;
-
break;
-
case ('7'):
-
bt2 = 0x07;
-
break;
-
case ('8'):
-
bt2 = 0x08;
-
break;
-
case ('9'):
-
bt2 = 0x09;
-
break;
-
case ('A'):
-
bt2 = 0x0A;
-
break;
-
case ('B'):
-
bt2 = 0x0B;
-
break;
-
case ('C'):
-
bt2 = 0x0C;
-
break;
-
case ('D'):
-
bt2 = 0x0D;
-
break;
-
case ('E'):
-
bt2 = 0x0E;
-
break;
-
case ('F'):
-
bt2 = 0x0F;
-
break;
-
}
-
-
switch (ch1)
-
{
-
case ('0'):
-
bt1 = 0x00;
-
break;
-
case ('1'):
-
bt1 = 0x10;
-
break;
-
case ('2'):
-
bt1 = 0x20;
-
break;
-
case ('3'):
-
bt1 = 0x30;
-
break;
-
case ('4'):
-
bt1 = 0x40;
-
break;
-
case ('5'):
-
bt1 = 0x50;
-
break;
-
case ('6'):
-
bt1 = 0x60;
-
break;
-
case ('7'):
-
bt1 = 0x70;
-
break;
-
case ('8'):
-
bt1 = 0x80;
-
break;
-
case ('9'):
-
bt1 = 0x90;
-
break;
-
case ('A'):
-
bt1 = 0xA0;
-
break;
-
case ('B'):
-
bt1 = 0xB0;
-
break;
-
case ('C'):
-
bt1 = 0xC0;
-
break;
-
case ('D'):
-
bt1 = 0xD0;
-
break;
-
case ('E'):
-
bt1 = 0xE0;
-
break;
-
case ('F'):
-
bt1 = 0xF0;
-
break;
-
}
-
-
btFinal = bt2 | bt1;
-
-
return btFinal;
-
-
}
[1]: http://www.codeproject.com/string/UTF8.asp
[2]: http://www1.tip.nl/~t876506/utf8tbl.html
Tags: utf8