11.10.2013, 08:54
UTF-8 encode/decode. Written using this as a reference.
pawn Код:
stock utf8encode(dest[], const source[], maxlength = sizeof(dest)) {
new len = strlen(source);
new packed = ispacked(source);
dest[0] = '\0';
new idx = 0;
for (new i = 0; i < len; i++) {
new c = packed ? source{i} : source[i];
if (c >= 0x80) {
if (c > 0x4000000) {
// 6 byte
dest[idx++] = 0b11111100 | ((c >>> 30) & 0b00000001);
dest[idx++] = 0b10000000 | ((c >>> 24) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 18) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 12) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 6) & 0b00111111);
dest[idx++] = 0b10000000 | (c & 0b00111111);
} else if (c > 0x200000) {
// 5 byte
dest[idx++] = 0b11111000 | ((c >>> 24) & 0b00000011);
dest[idx++] = 0b10000000 | ((c >>> 18) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 12) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 6) & 0b00111111);
dest[idx++] = 0b10000000 | (c & 0b00111111);
} else if (c > 0x10000) {
// 4 byte
dest[idx++] = 0b11110000 | ((c >>> 18) & 0b00000111);
dest[idx++] = 0b10000000 | ((c >>> 12) & 0b00111111);
dest[idx++] = 0b10000000 | ((c >>> 6) & 0b00111111);
dest[idx++] = 0b10000000 | (c & 0b00111111);
} else if (c > 0x800) {
// 3 byte
dest[idx++] = 0b11100000 | ((c >>> 12) & 0b00001111);
dest[idx++] = 0b10000000 | ((c >>> 6) & 0b00111111);
dest[idx++] = 0b10000000 | (c & 0b00111111);
} else {
// 2 byte
dest[idx++] = 0b11000000 | ((c >>> 6) & 0b00011111);
dest[idx++] = 0b10000000 | (c & 0b00111111);
}
} else if (c > 0) {
dest[idx++] = c;
}
}
dest[idx++] = '\0';
}
stock utf8decode(dest[], const source[], maxlength = sizeof(dest)) {
new len = strlen(source);
new packed = ispacked(source);
dest[0] = '\0';
new idx = 0;
for (new i = 0; i < len; i++) {
new c = source[i];
if (c & 0b10000000) {
if (c & 0b11100000 == 0b11000000) {
// 2 byte
if (i + 3 >= len) continue;
dest[idx++] = (c & 0b00011111) << 6 | (source[++i] & 0b00111111);
} else if (c & 0b11110000 == 0b11100000) {
// 3 byte
if (i + 4 >= len) continue;
dest[idx++] = (c & 0b00001111) << 12 |
(source[++i] & 0b00111111) << 6 |
(source[++i] & 0b00111111);
} else if (c & 0b11111000 == 0b11110000) {
// 4 byte
if (i + 5 >= len) continue;
dest[idx++] = (c & 0b00000111) << 18 |
(source[++i] & 0b00111111) << 12 |
(source[++i] & 0b00111111) << 6 |
(source[++i] & 0b00111111);
} else if (c & 0b11111100 == 0b11111000) {
// 5 byte
if (i + 6 >= len) continue;
dest[idx++] = (c & 0b00000011) << 24 |
(source[++i] & 0b00111111) << 18 |
(source[++i] & 0b00111111) << 12 |
(source[++i] & 0b00111111) << 6 |
(source[++i] & 0b00111111);
} else if (c & 0b11111110 == 0b11111100) {
// 6 byte
if (i + 7 >= len) continue;
dest[idx++] = (c & 0b00000001) << 30 |
(source[++i] & 0b00111111) << 24 |
(source[++i] & 0b00111111) << 18 |
(source[++i] & 0b00111111) << 12 |
(source[++i] & 0b00111111) << 6 |
(source[++i] & 0b00111111);
}
} else {
dest[idx++] = c;
}
}
dest[idx++] = 0;
}