-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutf8togb2312.hpp
158 lines (141 loc) · 3.26 KB
/
utf8togb2312.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
//#include <stdio.h>
//#include <string.h>
//#include "cp936.hpp"
#include <pgmspace.h>
#include "utf16.h"
/*
#define uint8_t unsigned char
#define uint16_t unsigned short
#define uint32_t unsigned int*/
int GetUtf8ByteNumForWord(uint8_t firstCh)
{
uint8_t temp = 0x80;
int num = 0;
while (temp & firstCh)
{
num++;
temp = (temp >> 1);
}
//Serial.printf("the num is: %d\n", num);
return num;
}
/*uint16_t SearchCodeTable(uint16_t unicodeKey)
{
int first = 0;
int end = CODE_TABLE_SIZE - 1;
int mid = 0;
while (first <= end)
{
mid = (first + end) / 2;
if (code_table[mid].unicode == unicodeKey)
{
return code_table[mid].gb;
}
else if (code_table[mid].unicode > unicodeKey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return 0xA1F5;//□
}*/
uint16_t SearchCodeTable(uint16_t unicodeKey){//用于读取utf16.bin
int first = 0;
int end = CODE_TABLE_SIZE - 1;//这个文件的CODE_TABLE_SIZE是6963
int mid = 0;
uint16_t unicode;
while (first <= end){
mid = (first+end)/2;
unicode = (pgm_read_byte(code_table+mid*4+1)<<8)|pgm_read_byte(code_table+mid*4);
if(unicode == unicodeKey){
return (pgm_read_byte(code_table+mid*4+2)<<8)|pgm_read_byte(code_table+mid*4+3);
}else if(unicode > unicodeKey){
end = mid - 1;
}else{
first = mid + 1;
}
}
return 0xA1F5;//“□”,用于替换GB2312未收录汉字
}
uint32_t Utf8ToGb2312(const char* utf8, int len,uint16_t* gbArray)
{
int k=0;
int byteCount = 0;
int i = 0;
int j = 0;
char* temp = new char[len];
uint16_t unicodeKey = 0;
uint16_t gbKey = 0;
//循环解析
while (i < len){
switch(GetUtf8ByteNumForWord((uint8_t)utf8[i])){
case 0:
temp[j] = utf8[i];
byteCount = 1;
break;
case 1:
//printf("utf8[i]=0x%X\n",utf8[i]);
temp[j] = utf8[i];
byteCount = 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
byteCount = 2;
break;
case 3:
//这里就开始进行UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
//取得Unicode的值
memcpy(&unicodeKey, (temp + j), 2);
//根据这个值查表取得对应的GB2312的值
gbKey = SearchCodeTable(unicodeKey);
//printf("gbKey=0x%X\n",gbKey);
gbArray[k++]=gbKey;
byteCount = 3;
break;
case 4:
byteCount = 4;
break;
case 5:
byteCount = 5;
break;
case 6:
byteCount = 6;
break;
default:
Serial.printf("the len is more than 6\n");
return 0;
break;
}
i += byteCount;
if (byteCount == 1){
j++;
}else{
j += 2;
}
}
delete(temp);
return k;
}
int mian(){
int i,n;
char str[]="人之初,性本善,性相近,习相远。";
char s;
int num=strlen(str);
n=num/3;
uint16_t gbArray[num];
Utf8ToGb2312(str,num,gbArray);
for(i=0;i<n;i++){
//Serial.printf("%c%c%c:0x%X\n",str[3*i],str[3*i+1],str[3*i+2],gbArray[i]);
s = gbArray[i]>>8;
Serial.print(s);
s = gbArray[i];
Serial.print(s);
}
return 0;
}