UTF-8 encode and decode: Difference between revisions

(→‎{{header|Phix}}: added PureBasic)
Line 1,414:
</pre>
 
=={{header|VBA}}==
<lang VB>Private Function unicode_2_utf8(x As Long) As Byte()
Dim y() As Byte
Dim r As Long
Select Case x
Case 0 To &H7F
ReDim y(0)
y(0) = x
Case &H80 To &H7FF
ReDim y(1)
y(0) = 192 + x \ 64
y(1) = 128 + x Mod 64
Case &H800 To &H7FFF
ReDim y(2)
y(2) = 128 + x Mod 64
r = x \ 64
y(1) = 128 + r Mod 64
y(0) = 224 + r \ 64
Case 32768 To 65535 '&H8000 To &HFFFF equals in VBA as -32768 to -1
ReDim y(2)
y(2) = 128 + x Mod 64
r = x \ 64
y(1) = 128 + r Mod 64
y(0) = 224 + r \ 64
Case &H10000 To &H10FFFF
ReDim y(3)
y(3) = 128 + x Mod 64
r = x \ 64
y(2) = 128 + r Mod 64
r = r \ 64
y(1) = 128 + r Mod 64
y(0) = 240 + r \ 64
Case Else
MsgBox "what else?" & x & " " & Hex(x)
End Select
unicode_2_utf8 = y
End Function
Private Function utf8_2_unicode(x() As Byte) As Long
Dim first As Long, second As Long, third As Long, fourth As Long
Dim total As Long
Select Case UBound(x) - LBound(x)
Case 0 'one byte
If x(0) < 128 Then
total = x(0)
Else
MsgBox "highest bit set error"
End If
Case 1 'two bytes and assume first byte is leading byte
If x(0) \ 32 = 6 Then
first = x(0) Mod 32
If x(1) \ 64 = 2 Then
second = x(1) Mod 64
Else
MsgBox "mask error"
End If
Else
MsgBox "leading byte error"
End If
total = 64 * first + second
Case 2 'three bytes and assume first byte is leading byte
If x(0) \ 16 = 14 Then
first = x(0) Mod 16
If x(1) \ 64 = 2 Then
second = x(1) Mod 64
If x(2) \ 64 = 2 Then
third = x(2) Mod 64
Else
MsgBox "mask error last byte"
End If
Else
MsgBox "mask error middle byte"
End If
Else
MsgBox "leading byte error"
End If
total = 4096 * first + 64 * second + third
Case 3 'four bytes and assume first byte is leading byte
If x(0) \ 8 = 30 Then
first = x(0) Mod 8
If x(1) \ 64 = 2 Then
second = x(1) Mod 64
If x(2) \ 64 = 2 Then
third = x(2) Mod 64
If x(3) \ 64 = 2 Then
fourth = x(3) Mod 64
Else
MsgBox "mask error last byte"
End If
Else
MsgBox "mask error third byte"
End If
Else
MsgBox "mask error second byte"
End If
Else
MsgBox "mask error leading byte"
End If
total = CLng(262144 * first + 4096 * second + 64 * third + fourth)
Case Else
MsgBox "more bytes than expected"
End Select
utf8_2_unicode = total
End Function
Public Sub program()
Dim cp As Variant
Dim r() As Byte, s As String
cp = [{65, 246, 1046, 8364, 119070}] '[{&H0041,&H00F6,&H0416,&H20AC,&H1D11E}]
Debug.Print "ch unicode UTF-8 encoded decoded"
For Each cpi In cp
r = unicode_2_utf8(CLng(cpi))
On Error Resume Next
s = CStr(Hex(cpi))
Debug.Print ChrW(cpi); String$(10 - Len(s), " "); s,
If Err.Number = 5 Then Debug.Print "?"; String$(10 - Len(s), " "); s,
s = ""
For Each yz In r
s = s & CStr(Hex(yz)) & " "
Next yz
Debug.Print String$(13 - Len(s), " "); s;
s = CStr(Hex(utf8_2_unicode(r)))
Debug.Print String$(8 - Len(s), " "); s
Next cpi
End Sub</lang>{{out}}<pre>ch unicode UTF-8 encoded decoded
A 41 41 41
ö F6 C3 B6 F6
? 416 D0 96 416
€ 20AC E2 82 AC 20AC
? 1D11E F0 9D 84 9E 1D11E
</pre>
=={{header|zkl}}==
<lang zkl>println("Char Unicode UTF-8");
255

edits