My functions that i have gathered
[lua]local function CodeToUTF8 (Unicode)
if (Unicode <= 0x7F) then return string.char(Unicode); end;
if (Unicode <= 0x7FF) then
local Byte0 = 0xC0 + math.floor(Unicode / 0x40);
local Byte1 = 0x80 + (Unicode % 0x40);
return string.char(Byte0, Byte1);
end;
if (Unicode <= 0xFFFF) then
local Byte0 = 0xE0 + math.floor(Unicode / 0x1000);
local Byte1 = 0x80 + (math.floor(Unicode / 0x40) % 0x40);
local Byte2 = 0x80 + (Unicode % 0x40);
return string.char(Byte0, Byte1, Byte2);
end;
return “”;
end;
if not string.CodeToUTF8 then string.CodeToUTF8 = CodeToUTF8 end
local function CodeFromUTF8 (UTF8)
local Byte0 = string.byte(UTF8,1);
if (math.floor(Byte0 / 0x80) == 0) then return Byte0; end;
local Byte1 = string.byte(UTF8,2) % 0x40;
if (math.floor(Byte0 / 0x20) == 0x06) then
return (Byte0 % 0x20)*0x40 + Byte1;
end;
local Byte2 = string.byte(UTF8,3) % 0x40;
if (math.floor(Byte0 / 0x10) == 0x0E) then
return (Byte0 % 0x10)*0x1000 + Byte1*0x40 + Byte2;
end;
local Byte3 = string.byte(UTF8,4) % 0x40;
if (math.floor(Byte0 / 0x08) == 0x1E) then
return (Byte0 % 0x08)*0x40000 + Byte1*0x1000 + Byte2*0x40 + Byte3;
end;
end;
if not string.CodeFromUTF8 then string.CodeFromUTF8 = CodeFromUTF8 end
local function utf8charbytes (s, i)
– argument defaults
i = i or 1
local c = string.byte(s, i)
– determine bytes needed for character, based on RFC 3629
if c > 0 and c <= 127 then
– UTF8-1
return 1
elseif c >= 194 and c <= 223 then
– UTF8-2
local c2 = string.byte(s, i + 1)
return 2
elseif c >= 224 and c <= 239 then
– UTF8-3
local c2 = s:byte(i + 1)
local c3 = s:byte(i + 2)
return 3
elseif c >= 240 and c <= 244 then
– UTF8-4
local c2 = s:byte(i + 1)
local c3 = s:byte(i + 2)
local c4 = s:byte(i + 3)
return 4
end
end
if not string.utf8charbytes then string.utf8charbytes = utf8charbytes end
– returns the number of characters in a UTF-8 string
local function utf8len(s)
local pos = 1
local bytes = string.len(s)
local len = 0
while pos <= bytes and len ~= chars do
local c = string.byte(s,pos)
len = len + 1
pos = pos + utf8charbytes(s, pos)
end
if chars ~= nil then
return pos - 1
end
return len
end
if not string.utf8len then string.utf8len = utf8len end
– functions identically to string.sub except that i and j are UTF-8 characters
– instead of bytes
local function utf8sub (s, i, j)
j = j or -1
if i == nil then
return “”
end
local pos = 1
local bytes = string.len(s)
local len = 0
– only set l if i or j is negative
local l = (i >= 0 and j >= 0) or utf8len(s)
local startChar = (i >= 0) and i or l + i + 1
local endChar = (j >= 0) and j or l + j + 1
– can’t have start before end!
if startChar > endChar then
return “”
end
– byte offsets to pass to string.sub
local startByte, endByte = 1, bytes
while pos <= bytes do
len = len + 1
if len == startChar then
startByte = pos
end
pos = pos + utf8charbytes(s, pos)
if len == endChar then
endByte = pos - 1
break
end
end
return string.sub(s, startByte, endByte)
end
if not string.utf8sub then string.utf8sub = utf8sub end
– replace UTF-8 characters based on a mapping table
local function utf8replace (s, mapping)
local pos = 1
local bytes = string.len(s)
local charbytes
local newstr = “”
while pos <= bytes do
charbytes = utf8charbytes(s, pos)
local c = string.sub(s, pos, pos + charbytes - 1)
newstr = newstr … (mapping[c] or c)
pos = pos + charbytes
end
return newstr
end
if not string.utf8replace then string.utf8replace = utf8replace end
local function split(str, pat)
local t = {}
local fpat = “(.-)” … pat
local last_end = 1
local s, e, cap = str:find(fpat, 1)
while s do
if s ~= 1 or cap ~= “” then
table.insert(t,cap)
end
last_end = e+1
s, e, cap = str:find(fpat, last_end)
end
if last_end <= #str then
cap = str:sub(last_end)
table.insert(t, cap)
end
return t
end
if not string.split then string.split = split end [import]uid: 7177 topic_id: 31107 reply_id: 124408[/import]