--根据首字节获取UTF8需要的字节数
local function GetUTF8CharLength(ch)
local utf8_look_for_table = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
}
return utf8_look_for_table[ch]
end
--根据UTF8流获取字符串长度
--GetUTF8Length("一二三四五六七") 返回7
local function GetUTF8Length(str)
local len = 0
local ptr = 1
repeat
local char = string.byte(str, ptr)
local char_len = GetUTF8CharLength(char)
len = len + 1
ptr = ptr + char_len
until(ptr>#str)
return len
end
--截取UTF8字符串
--SubUTF8String("一二三四五六七",1,3) 返回一二三
local function SubUTF8String(str, begin, length)
begin = begin or 1
length = length or -1 --length为-1时代表不限制长度
local ret = ""
local len = 0
local ptr = 1
repeat
local char = string.byte(str, ptr)
local char_len = GetUTF8CharLength(char)
len = len + 1
if len>=begin and (length==-1 or len<begin+length) then
for i=0,char_len-1 do
ret = ret .. string.char( string.byte(str, ptr + i) )
end
end
ptr = ptr + char_len
until(ptr>#str)
return ret
end
local function test(str, len, add)
local ret = ""
for i=1,math.ceil(GetUTF8Length(str)/len) do
ret = ret .. SubUTF8String(str, (i-1)*len + 1, len) .. add
end
return ret
end
local str = "一二三四五六七一二三四五六七一二三四五六七一二三四五六七123"
print( test(str, 7, "~~\n") )
--[[
输出结果:
一二三四五六七~~
一二三四五六七~~
一二三四五六七~~
一二三四五六七~~
123~~
]]