本文关于vb uft-8gb2312,据
亚洲金融智库2021-07-20日讯:
,,治,:йUTF-8Щ20080613 08:17, ,UTF-8UnicodeUTF-8 UTF UCS Transformation Format. UTF-8 6 UTF-8 ASC II(0-127) UTF-8 ASC II ASC II ± 1λλ110xxxxx 10xxxxxx λ1110xxxx 10xxxxxx 10xxxxxx.λ1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx xxx λ磺 Unicode 00 A9 = 1010 1001 UTF-8 11000010 10101001 = 0x C2 0xA9; 22 60 () = 0010 0010 0110 0000 UTF-8 11100010 10001001 10100000 = 0xE2 0x89 0xA0,,Unicodeutf-8 The table below summarizes the format of these different octet types. The letter x indicates bits available for encoding bits of the character number.Char. number range | UTF-8 octet sequence (hexadecimal) | (binary) --------------------+--------------------------------------------- 0000 0000-0000 007F | 0xxxxxxx 0000 0080-0000 07FF | 110xxxxx 10xxxxxx 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx //////A///////// 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxxUnicodeutf-8UnicodeΧ0000 0800-0000 FFFF С, BOMUTF-8UTF-16UTF-16Unicode594EUnicode4E59UTF-16594E UnicodeBOMBOMBill Of MaterialBOMByte Order MarkBOMеС UCS"ZERO WIDTH NO-BREAK SPACE"FEFFFFFEUCSóСUCS"ZERO WIDTH NO-BREAK SPACE" FEFFBig-EndianFFFELittle-Endian"ZERO WIDTH NO-BREAK SPACE"BOM UTF-8BOMBOM"ZERO WIDTH NO-BREAK SPACE"UTF-8EF BB BFEF BB BFUTF-8, VBUTF-8Unicode1.APIFunction Utf8ToUnicode(ByRef Utf() As Byte) As StringDim utfLen As LongutfLen = -1On Error Resume NextutfLen = UBound(Utf)If utfLen = -1 Then Exit FunctionOn Error GoTo 0Dim i As Long, j As Long, k As Long, N As LongDim B As Byte, cnt As ByteDim Buf() As StringReDim Buf(utfLen)i = 0j = 0Do While i <= utfLen B = Utf(i) If (B And &HFC) = &HFC Then cnt = 6 ElseIf (B And &HF8) = &HF8 Then cnt = 5 ElseIf (B And &HF0) = &HF0 Then cnt = 4 ElseIf (B And &HE0) = &HE0 Then cnt = 3 ElseIf (B And &HC0) = &HC0 Then cnt = 2 Else cnt = 1 End If If i + cnt - 1 > utfLen Then Buf(j) = "?" Exit Do End If Select Case cnt Case 2 N = B And &H1F Case 3 N = B And &HF Case 4 N = B And &H7 Case 5 N = B And &H3 Case 6 N = B And &H1 Case Else Buf(j) = Chr(B) GoTo Continued: End Select For k = 1 To cnt - 1 B = Utf(i + k) N = N * &H40 + (B And &H3F) Next Buf(j) = ChrW(N)Continued: i = i + cnt j = j + 1LoopUtf8ToUnicode = Join(Buf, "")End Function2. API (UnicodeUTF-8)Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As LongPrivate Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As LongPrivate Const CP_UTF8 = 65001Function Utf8ToUnicode(ByRef Utf() As Byte) As StringDim lRet As LongDim lLength As LongDim lBufferSize As LonglLength = UBound(Utf) - LBound(Utf) + 1If lLength <= 0 Then Exit FunctionlBufferSize = lLength * 2Utf8ToUnicode = String$(lBufferSize, Chr(0))lRet = MultiByteToWideChar(CP_UTF8, 0, VarPtr(Utf(0)), lLength, StrPtr(Utf8ToUnicode), lBufferSize)If lRet <> 0 Then Utf8ToUnicode = Left(Utf8ToUnicode, lRet)End IfEnd FunctionFunction UnicodeToUtf8(ByVal UCS As String) As Byte()Dim lLength As LongDim lBufferSize As LongDim lResult As LongDim abUTF8() As BytelLength = Len(UCS)If lLength = 0 Then Exit FunctionlBufferSize = lLength * 3 + 1ReDim abUTF8(lBufferSize - 1)lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)If lResult <> 0 ThenlResult = lResult - 1ReDim Preserve abUTF8(lResult)UnicodeToUtf8 = abUTF8End IfEnd FunctionPrivate Sub Command1_Click()Dim byt() As Bytebyt = UnicodeToUtf8("")Debug.Print Hex(byt(0)) & Hex(byt(1)) & Hex(byt(2))Debug.Print Utf8ToUnicode(byt()) End Sub
''Text1.Text = UTF8_Decode(UTF8Zfc)'κ(strconv)'***********************************'Utf8UnicodePublic Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As LongPublic Const CP_UTF8 = 65001'Private Declare Function GetVersionExA Lib "kernel32" (lpVersionInformation As OSVERSIONINFO) As IntegerPrivate Type OSVERSIONINFO dwOSVersionInfoSize As Long dwMajorVersion As Long dwMinorVersion As Long dwBuildNumber As Long dwPlatformId As Long szCSDVersion As String * 128End Type'Public Function GetVersion() As String Dim osinfo As OSVERSIONINFO Dim retvalue As Integer osinfo.dwOSVersionInfoSize = 148 osinfo.szCSDVersion = Space$(128) retvalue = GetVersionExA(osinfo) With osinfo Select Case .dwPlatformId Case 1 Select Case .dwMinorVersion Case 0 GetVersion = "1Windows 95" Case 10 GetVersion = "1Windows 98" Case 90 GetVersion = "1Windows Mellinnium" End Select Case 2 Select Case .dwMajorVersion Case 3 GetVersion = "2Windows NT 3.51" Case 4 GetVersion = "2Windows NT 4.0" Case 5 If .dwMinorVersion = 0 Then GetVersion = "2Windows 2000" Else GetVersion = "2Windows XP" End If End Select Case Else GetVersion = "Failed" End Select End WithEnd Function': Utf8UnicodePublic Function UTF8_Decode(ByVal sUTF8 As String) As String Dim lngUtf8Size As Long Dim strBuffer As String Dim lngBufferSize As Long Dim lngResult As Long Dim bytUtf8() As Byte Dim n As Long If LenB(sUTF8) = 0 Then Exit Function If Left(GetVersion(), 1) = "2" Then On Error GoTo EndFunction 'bytUtf8 = StrConv(sUTF8, vbFromUnicode) bytUtf8 = sUTF8 lngUtf8Size = UBound(bytUtf8) + 1 On Error GoTo 0 'Set buffer for longest possible string i.e. each byte is 'ANSI, thus 1 unicode(2 bytes)for every utf-8 character. lngBufferSize = lngUtf8Size * 2 strBuffer = String$(lngBufferSize, vbNullChar) 'Translate using code page 65001(UTF-8) lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _ lngUtf8Size, StrPtr(strBuffer), lngBufferSize) 'Trim result to actual length If lngResult Then UTF8_Decode = Left(strBuffer, lngResult) End If Else Dim i As Long Dim TopIndex As Long Dim TwoBytes(1) As Byte Dim ThreeBytes(2) As Byte Dim AByte As Byte Dim TStr As String Dim BArray() As Byte 'Resume on error in case someone inputs text with accents 'that should have been encoded as UTF-8 On Error Resume Next TopIndex = LenB(sUTF8) ' Number of bytes equal TopIndex+1 If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert 'BArray = StrConv(sUTF8, vbFromUnicode) BArray = sUTF8 i = 0 ' Initialise pointer TopIndex = TopIndex - 1 ' Iterate through the Byte Array Do While i <= TopIndex AByte = BArray(i) If AByte < &H80 Then ' Normal ANSI character - use it as is TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index ElseIf AByte >= &HE0 Then 'was = &HE1 Then ' Start of 3 byte UTF-8 group for a character ' Copy 3 byte to ThreeBytes ThreeBytes(0) = BArray(i): i = i + 1 ThreeBytes(1) = BArray(i): i = i + 1 ThreeBytes(2) = BArray(i): i = i + 1 ' Convert Byte array to UTF-16 then Unicode TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F)) ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then ' Start of 2 byte UTF-8 group for a character TwoBytes(0) = BArray(i): i = i + 1 TwoBytes(1) = BArray(i): i = i + 1 ' Convert Byte array to UTF-16 then Unicode TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F)) Else ' Normal ANSI character - use it as is TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index End If Loop UTF8_Decode = TStr ' Return the resultant string Erase BArray End IfEndFunction:End Function
专题推荐:
gb2312(2)uft(2)vb(2)