the string.byte and string.char functions can be used to compute the length and the substring of a string that contains characters other than those of the english alphabet:
string.len=function(s)
local len,k=0,1
while k\<=#s do
len=len+1
if string.byte(s,k)\<=190 then k=k+1 else k=k+2 end
end
return len
end
string.sub=function(s,i,j)
local chars={}
local k=1
while k\<=#s do
local byte1=string.byte(s,k)
if byte1\<=190 then
chars[#chars+1]=string.char(byte1)
k=k+1
else
local byte2=string.byte(s,k+1)
chars[#chars+1]=string.char(byte1,byte2)
k=k+2
end
end
local sub=""
for m=i,j do
sub=sub..chars[m]
end
return sub
end
usage:
local word="a?r??jämnñ"
print(string.len(word))
for i=1,string.len(word) do
print(string.sub(word,i,i))
end
this works for european languages, but i think that with a couple of changes that have to do with the bytes used for each glyph, it can work for other languages too
[import]uid: 6459 topic_id: 1633 reply_id: 5009[/import]