From f4847e9f8fc7897722c9a90de4728334e1531692 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 1 Jun 2022 12:05:53 +0300 Subject: [PATCH] add all libs from standard AIO Launcher kit --- lib/date.lua | 742 ++++++++++++++++++++++++++++++++++ lib/eval.lua | 10 + lib/fun.lua | 1058 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/html.lua | 264 ++++++++++++ lib/json.lua | 388 ++++++++++++++++++ lib/utf8.lua | 1049 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/utils.lua | 1 + lib/xml.lua | 6 +- 8 files changed, 3513 insertions(+), 5 deletions(-) create mode 100644 lib/date.lua create mode 100644 lib/eval.lua create mode 100644 lib/fun.lua create mode 100644 lib/html.lua create mode 100644 lib/json.lua create mode 100644 lib/utf8.lua diff --git a/lib/date.lua b/lib/date.lua new file mode 100644 index 0000000..f33b565 --- /dev/null +++ b/lib/date.lua @@ -0,0 +1,742 @@ +--------------------------------------------------------------------------------------- +-- Module for date and time calculations +-- +-- Version 2.2 +-- Copyright (C) 2005-2006, by Jas Latrix (jastejada@yahoo.com) +-- Copyright (C) 2013-2021, by Thijs Schreijer +-- Licensed under MIT, http://opensource.org/licenses/MIT + +--[[ CONSTANTS ]]-- + local HOURPERDAY = 24 + local MINPERHOUR = 60 + local MINPERDAY = 1440 -- 24*60 + local SECPERMIN = 60 + local SECPERHOUR = 3600 -- 60*60 + local SECPERDAY = 86400 -- 24*60*60 + local TICKSPERSEC = 1000000 + local TICKSPERDAY = 86400000000 + local TICKSPERHOUR = 3600000000 + local TICKSPERMIN = 60000000 + local DAYNUM_MAX = 365242500 -- Sat Jan 01 1000000 00:00:00 + local DAYNUM_MIN = -365242500 -- Mon Jan 01 1000000 BCE 00:00:00 + local DAYNUM_DEF = 0 -- Mon Jan 01 0001 00:00:00 + local _; +--[[ GLOBAL SETTINGS ]]-- + local centuryflip = 0 -- year >= centuryflip == 1900, < centuryflip == 2000 +--[[ LOCAL ARE FASTER ]]-- + local type = type + local pairs = pairs + local error = error + local assert = assert + local tonumber = tonumber + local tostring = tostring + local string = string + local math = math + local os = os + local unpack = unpack or table.unpack + local setmetatable = setmetatable + local getmetatable = getmetatable +--[[ EXTRA FUNCTIONS ]]-- + local fmt = string.format + local lwr = string.lower + local rep = string.rep + local len = string.len -- luacheck: ignore + local sub = string.sub + local gsub = string.gsub + local gmatch = string.gmatch or string.gfind + local find = string.find + local ostime = os.time + local osdate = os.date + local floor = math.floor + local ceil = math.ceil + local abs = math.abs + -- removes the decimal part of a number + local function fix(n) n = tonumber(n) return n and ((n > 0 and floor or ceil)(n)) end + -- returns the modulo n % d; + local function mod(n,d) return n - d*floor(n/d) end + -- is `str` in string list `tbl`, `ml` is the minimun len + local function inlist(str, tbl, ml, tn) + local sl = len(str) + if sl < (ml or 0) then return nil end + str = lwr(str) + for k, v in pairs(tbl) do + if str == lwr(sub(v, 1, sl)) then + if tn then tn[0] = k end + return k + end + end + end + local function fnil() end +--[[ DATE FUNCTIONS ]]-- + local DATE_EPOCH -- to be set later + local sl_weekdays = { + [0]="Sunday",[1]="Monday",[2]="Tuesday",[3]="Wednesday",[4]="Thursday",[5]="Friday",[6]="Saturday", + [7]="Sun",[8]="Mon",[9]="Tue",[10]="Wed",[11]="Thu",[12]="Fri",[13]="Sat", + } + local sl_meridian = {[-1]="AM", [1]="PM"} + local sl_months = { + [00]="January", [01]="February", [02]="March", + [03]="April", [04]="May", [05]="June", + [06]="July", [07]="August", [08]="September", + [09]="October", [10]="November", [11]="December", + [12]="Jan", [13]="Feb", [14]="Mar", + [15]="Apr", [16]="May", [17]="Jun", + [18]="Jul", [19]="Aug", [20]="Sep", + [21]="Oct", [22]="Nov", [23]="Dec", + } + -- added the '.2' to avoid collision, use `fix` to remove + local sl_timezone = { + [000]="utc", [0.2]="gmt", + [300]="est", [240]="edt", + [360]="cst", [300.2]="cdt", + [420]="mst", [360.2]="mdt", + [480]="pst", [420.2]="pdt", + } + -- set the day fraction resolution + local function setticks(t) + TICKSPERSEC = t; + TICKSPERDAY = SECPERDAY*TICKSPERSEC + TICKSPERHOUR= SECPERHOUR*TICKSPERSEC + TICKSPERMIN = SECPERMIN*TICKSPERSEC + end + -- is year y leap year? + local function isleapyear(y) -- y must be int! + return (mod(y, 4) == 0 and (mod(y, 100) ~= 0 or mod(y, 400) == 0)) + end + -- day since year 0 + local function dayfromyear(y) -- y must be int! + return 365*y + floor(y/4) - floor(y/100) + floor(y/400) + end + -- day number from date, month is zero base + local function makedaynum(y, m, d) + local mm = mod(mod(m,12) + 10, 12) + return dayfromyear(y + floor(m/12) - floor(mm/10)) + floor((mm*306 + 5)/10) + d - 307 + --local yy = y + floor(m/12) - floor(mm/10) + --return dayfromyear(yy) + floor((mm*306 + 5)/10) + (d - 1) + end + -- date from day number, month is zero base + local function breakdaynum(g) + local g = g + 306 + local y = floor((10000*g + 14780)/3652425) + local d = g - dayfromyear(y) + if d < 0 then y = y - 1; d = g - dayfromyear(y) end + local mi = floor((100*d + 52)/3060) + return (floor((mi + 2)/12) + y), mod(mi + 2,12), (d - floor((mi*306 + 5)/10) + 1) + end + --[[ for floats or int32 Lua Number data type + local function breakdaynum2(g) + local g, n = g + 306; + local n400 = floor(g/DI400Y);n = mod(g,DI400Y); + local n100 = floor(n/DI100Y);n = mod(n,DI100Y); + local n004 = floor(n/DI4Y); n = mod(n,DI4Y); + local n001 = floor(n/365); n = mod(n,365); + local y = (n400*400) + (n100*100) + (n004*4) + n001 - ((n001 == 4 or n100 == 4) and 1 or 0) + local d = g - dayfromyear(y) + local mi = floor((100*d + 52)/3060) + return (floor((mi + 2)/12) + y), mod(mi + 2,12), (d - floor((mi*306 + 5)/10) + 1) + end + ]] + -- day fraction from time + local function makedayfrc(h,r,s,t) + return ((h*60 + r)*60 + s)*TICKSPERSEC + t + end + -- time from day fraction + local function breakdayfrc(df) + return + mod(floor(df/TICKSPERHOUR),HOURPERDAY), + mod(floor(df/TICKSPERMIN ),MINPERHOUR), + mod(floor(df/TICKSPERSEC ),SECPERMIN), + mod(df,TICKSPERSEC) + end + -- weekday sunday = 0, monday = 1 ... + local function weekday(dn) return mod(dn + 1, 7) end + -- yearday 0 based ... + local function yearday(dn) + return dn - dayfromyear((breakdaynum(dn))-1) + end + -- parse v as a month + local function getmontharg(v) + local m = tonumber(v); + return (m and fix(m - 1)) or inlist(tostring(v) or "", sl_months, 2) + end + -- get daynum of isoweek one of year y + local function isow1(y) + local f = makedaynum(y, 0, 4) -- get the date for the 4-Jan of year `y` + local d = weekday(f) + d = d == 0 and 7 or d -- get the ISO day number, 1 == Monday, 7 == Sunday + return f + (1 - d) + end + local function isowy(dn) + local w1; + local y = (breakdaynum(dn)) + if dn >= makedaynum(y, 11, 29) then + w1 = isow1(y + 1); + if dn < w1 then + w1 = isow1(y); + else + y = y + 1; + end + else + w1 = isow1(y); + if dn < w1 then + w1 = isow1(y-1) + y = y - 1 + end + end + return floor((dn-w1)/7)+1, y + end + local function isoy(dn) + local y = (breakdaynum(dn)) + return y + (((dn >= makedaynum(y, 11, 29)) and (dn >= isow1(y + 1))) and 1 or (dn < isow1(y) and -1 or 0)) + end + local function makedaynum_isoywd(y,w,d) + return isow1(y) + 7*w + d - 8 -- simplified: isow1(y) + ((w-1)*7) + (d-1) + end +--[[ THE DATE MODULE ]]-- + local fmtstr = "%x %X"; +--#if not DATE_OBJECT_AFX then + local date = {} + setmetatable(date, date) +-- Version: VMMMRRRR; V-Major, M-Minor, R-Revision; e.g. 5.45.321 == 50450321 + do + local major = 2 + local minor = 2 + local revision = 0 + date.version = major * 10000000 + minor * 10000 + revision + end +--#end -- not DATE_OBJECT_AFX +--[[ THE DATE OBJECT ]]-- + local dobj = {} + dobj.__index = dobj + dobj.__metatable = dobj + -- shout invalid arg + local function date_error_arg() return error("invalid argument(s)",0) end + -- create new date object + local function date_new(dn, df) + return setmetatable({daynum=dn, dayfrc=df}, dobj) + end + +--#if not NO_LOCAL_TIME_SUPPORT then + -- magic year table + local date_epoch, yt; + local function getequivyear(y) + assert(not yt) + yt = {} + local de = date_epoch:copy() + local dw, dy + for _ = 0, 3000 do + de:setyear(de:getyear() + 1, 1, 1) + dy = de:getyear() + dw = de:getweekday() * (isleapyear(dy) and -1 or 1) + if not yt[dw] then yt[dw] = dy end --print(de) + if yt[1] and yt[2] and yt[3] and yt[4] and yt[5] and yt[6] and yt[7] and yt[-1] and yt[-2] and yt[-3] and yt[-4] and yt[-5] and yt[-6] and yt[-7] then + getequivyear = function(y) return yt[ (weekday(makedaynum(y, 0, 1)) + 1) * (isleapyear(y) and -1 or 1) ] end + return getequivyear(y) + end + end + end + -- TimeValue from date and time + local function totv(y,m,d,h,r,s) + return (makedaynum(y, m, d) - DATE_EPOCH) * SECPERDAY + ((h*60 + r)*60 + s) + end + -- TimeValue from TimeTable + local function tmtotv(tm) + return tm and totv(tm.year, tm.month - 1, tm.day, tm.hour, tm.min, tm.sec) + end + -- Returns the bias in seconds of utc time daynum and dayfrc + local function getbiasutc2(self) + local y,m,d = breakdaynum(self.daynum) + local h,r,s = breakdayfrc(self.dayfrc) + local tvu = totv(y,m,d,h,r,s) -- get the utc TimeValue of date and time + local tml = osdate("*t", tvu) -- get the local TimeTable of tvu + if (not tml) or (tml.year > (y+1) or tml.year < (y-1)) then -- failed try the magic + y = getequivyear(y) + tvu = totv(y,m,d,h,r,s) + tml = osdate("*t", tvu) + end + local tvl = tmtotv(tml) + if tvu and tvl then + return tvu - tvl, tvu, tvl + else + return error("failed to get bias from utc time") + end + end + -- Returns the bias in seconds of local time daynum and dayfrc + local function getbiasloc2(daynum, dayfrc) + local tvu + -- extract date and time + local y,m,d = breakdaynum(daynum) + local h,r,s = breakdayfrc(dayfrc) + -- get equivalent TimeTable + local tml = {year=y, month=m+1, day=d, hour=h, min=r, sec=s} + -- get equivalent TimeValue + local tvl = tmtotv(tml) + + local function chkutc() + tml.isdst = nil; local tvug = ostime(tml) if tvug and (tvl == tmtotv(osdate("*t", tvug))) then tvu = tvug return end + tml.isdst = true; local tvud = ostime(tml) if tvud and (tvl == tmtotv(osdate("*t", tvud))) then tvu = tvud return end + tvu = tvud or tvug + end + chkutc() + if not tvu then + tml.year = getequivyear(y) + tvl = tmtotv(tml) + chkutc() + end + return ((tvu and tvl) and (tvu - tvl)) or error("failed to get bias from local time"), tvu, tvl + end +--#end -- not NO_LOCAL_TIME_SUPPORT + +--#if not DATE_OBJECT_AFX then + -- the date parser + local strwalker = {} -- ^Lua regular expression is not as powerful as Perl$ + strwalker.__index = strwalker + local function newstrwalker(s)return setmetatable({s=s, i=1, e=1, c=len(s)}, strwalker) end + function strwalker:aimchr() return "\n" .. self.s .. "\n" .. rep(".",self.e-1) .. "^" end + function strwalker:finish() return self.i > self.c end + function strwalker:back() self.i = self.e return self end + function strwalker:restart() self.i, self.e = 1, 1 return self end + function strwalker:match(s) return (find(self.s, s, self.i)) end + function strwalker:__call(s, f)-- print("strwalker:__call "..s..self:aimchr()) + local is, ie; is, ie, self[1], self[2], self[3], self[4], self[5] = find(self.s, s, self.i) + if is then self.e, self.i = self.i, 1+ie; if f then f(unpack(self)) end return self end + end + local function date_parse(str) + local y,m,d, h,r,s, z, w,u, j, e, x,c, dn,df + local sw = newstrwalker(gsub(gsub(str, "(%b())", ""),"^(%s*)","")) -- remove comment, trim leading space + --local function error_out() print(y,m,d,h,r,s) end + local function error_dup(q) --[[error_out()]] error("duplicate value: " .. (q or "") .. sw:aimchr()) end + local function error_syn(q) --[[error_out()]] error("syntax error: " .. (q or "") .. sw:aimchr()) end + local function error_inv(q) --[[error_out()]] error("invalid date: " .. (q or "") .. sw:aimchr()) end + local function sety(q) y = y and error_dup() or tonumber(q); end + local function setm(q) m = (m or w or j) and error_dup(m or w or j) or tonumber(q) end + local function setd(q) d = d and error_dup() or tonumber(q) end + local function seth(q) h = h and error_dup() or tonumber(q) end + local function setr(q) r = r and error_dup() or tonumber(q) end + local function sets(q) s = s and error_dup() or tonumber(q) end + local function adds(q) s = s + tonumber(q) end + local function setj(q) j = (m or w or j) and error_dup() or tonumber(q); end + local function setz(q) z = (z ~= 0 and z) and error_dup() or q end + local function setzn(zs,zn) zn = tonumber(zn); setz( ((zn<24) and (zn*60) or (mod(zn,100) + floor(zn/100) * 60))*( zs=='+' and -1 or 1) ) end + local function setzc(zs,zh,zm) setz( ((tonumber(zh)*60) + tonumber(zm))*( zs=='+' and -1 or 1) ) end + + if not (sw("^(%d%d%d%d)",sety) and (sw("^(%-?)(%d%d)%1(%d%d)",function(_,a,b) setm(tonumber(a)); setd(tonumber(b)) end) or sw("^(%-?)[Ww](%d%d)%1(%d?)",function(_,a,b) w, u = tonumber(a), tonumber(b or 1) end) or sw("^%-?(%d%d%d)",setj) or sw("^%-?(%d%d)",function(a) setm(a);setd(1) end)) + and ((sw("^%s*[Tt]?(%d%d):?",seth) and sw("^(%d%d):?",setr) and sw("^(%d%d)",sets) and sw("^(%.%d+)",adds)) + or sw:finish() or (sw"^%s*$" or sw"^%s*[Zz]%s*$" or sw("^%s-([%+%-])(%d%d):?(%d%d)%s*$",setzc) or sw("^%s*([%+%-])(%d%d)%s*$",setzn)) + ) ) + then --print(y,m,d,h,r,s,z,w,u,j) + sw:restart(); y,m,d,h,r,s,z,w,u,j = nil,nil,nil,nil,nil,nil,nil,nil,nil,nil + repeat -- print(sw:aimchr()) + if sw("^[tT:]?%s*(%d%d?):",seth) then --print("$Time") + _ = sw("^%s*(%d%d?)",setr) and sw("^%s*:%s*(%d%d?)",sets) and sw("^(%.%d+)",adds) + elseif sw("^(%d+)[/\\%s,-]?%s*") then --print("$Digits") + x, c = tonumber(sw[1]), len(sw[1]) + if (x >= 70) or (m and d and (not y)) or (c > 3) then + sety( x + ((x >= 100 or c>3) and 0 or x 12) or (h < 0) then return error_inv() end + if x == 'a' and h == 12 then h = 0 end -- am + if x == 'p' and h ~= 12 then h = h + 12 end -- pm + else error_syn() end + end + elseif not(sw("^([+-])(%d%d?):(%d%d)",setzc) or sw("^([+-])(%d+)",setzn) or sw("^[Zz]%s*$")) then -- sw{"([+-])",{"(%d%d?):(%d%d)","(%d+)"}} + error_syn("?") + end + sw("^%s*") until sw:finish() + --else print("$Iso(Date|Time|Zone)") + end + -- if date is given, it must be complete year, month & day + if (not y and not h) or ((m and not d) or (d and not m)) or ((m and w) or (m and j) or (j and w)) then return error_inv("!") end + -- fix month + if m then m = m - 1 end + -- fix year if we are on BCE + if e and e < 0 and y > 0 then y = 1 - y end + -- create date object + dn = (y and ((w and makedaynum_isoywd(y,w,u)) or (j and makedaynum(y, 0, j)) or makedaynum(y, m, d))) or DAYNUM_DEF + df = makedayfrc(h or 0, r or 0, s or 0, 0) + ((z or 0)*TICKSPERMIN) + --print("Zone",h,r,s,z,m,d,y,df) + return date_new(dn, df) -- no need to :normalize(); + end + local function date_fromtable(v) + local y, m, d = fix(v.year), getmontharg(v.month), fix(v.day) + local h, r, s, t = tonumber(v.hour), tonumber(v.min), tonumber(v.sec), tonumber(v.ticks) + -- atleast there is time or complete date + if (y or m or d) and (not(y and m and d)) then return error("incomplete table") end + return (y or h or r or s or t) and date_new(y and makedaynum(y, m, d) or DAYNUM_DEF, makedayfrc(h or 0, r or 0, s or 0, t or 0)) + end + local tmap = { + ['number'] = function(v) return date_epoch:copy():addseconds(v) end, + ['string'] = function(v) return date_parse(v) end, + ['boolean']= function(v) return date_fromtable(osdate(v and "!*t" or "*t")) end, + ['table'] = function(v) local ref = getmetatable(v) == dobj; return ref and v or date_fromtable(v), ref end + } + local function date_getdobj(v) + local o, r = (tmap[type(v)] or fnil)(v); + return (o and o:normalize() or error"invalid date time value"), r -- if r is true then o is a reference to a date obj + end +--#end -- not DATE_OBJECT_AFX + local function date_from(arg1, arg2, arg3, arg4, arg5, arg6, arg7) + local y, m, d = fix(arg1), getmontharg(arg2), fix(arg3) + local h, r, s, t = tonumber(arg4 or 0), tonumber(arg5 or 0), tonumber(arg6 or 0), tonumber(arg7 or 0) + if y and m and d and h and r and s and t then + return date_new(makedaynum(y, m, d), makedayfrc(h, r, s, t)):normalize() + else + return date_error_arg() + end + end + + --[[ THE DATE OBJECT METHODS ]]-- + function dobj:normalize() + local dn, df = fix(self.daynum), self.dayfrc + self.daynum, self.dayfrc = dn + floor(df/TICKSPERDAY), mod(df, TICKSPERDAY) + return (dn >= DAYNUM_MIN and dn <= DAYNUM_MAX) and self or error("date beyond imposed limits:"..self) + end + + function dobj:getdate() local y, m, d = breakdaynum(self.daynum) return y, m+1, d end + function dobj:gettime() return breakdayfrc(self.dayfrc) end + + function dobj:getclockhour() local h = self:gethours() return h>12 and mod(h,12) or (h==0 and 12 or h) end + + function dobj:getyearday() return yearday(self.daynum) + 1 end + function dobj:getweekday() return weekday(self.daynum) + 1 end -- in lua weekday is sunday = 1, monday = 2 ... + + function dobj:getyear() local r,_,_ = breakdaynum(self.daynum) return r end + function dobj:getmonth() local _,r,_ = breakdaynum(self.daynum) return r+1 end-- in lua month is 1 base + function dobj:getday() local _,_,r = breakdaynum(self.daynum) return r end + function dobj:gethours() return mod(floor(self.dayfrc/TICKSPERHOUR),HOURPERDAY) end + function dobj:getminutes() return mod(floor(self.dayfrc/TICKSPERMIN), MINPERHOUR) end + function dobj:getseconds() return mod(floor(self.dayfrc/TICKSPERSEC ),SECPERMIN) end + function dobj:getfracsec() return mod(floor(self.dayfrc/TICKSPERSEC ),SECPERMIN)+(mod(self.dayfrc,TICKSPERSEC)/TICKSPERSEC) end + function dobj:getticks(u) local x = mod(self.dayfrc,TICKSPERSEC) return u and ((x*u)/TICKSPERSEC) or x end + + function dobj:getweeknumber(wdb) + local wd, yd = weekday(self.daynum), yearday(self.daynum) + if wdb then + wdb = tonumber(wdb) + if wdb then + wd = mod(wd-(wdb-1),7)-- shift the week day base + else + return date_error_arg() + end + end + return (yd < wd and 0) or (floor(yd/7) + ((mod(yd, 7)>=wd) and 1 or 0)) + end + + function dobj:getisoweekday() return mod(weekday(self.daynum)-1,7)+1 end -- sunday = 7, monday = 1 ... + function dobj:getisoweeknumber() return (isowy(self.daynum)) end + function dobj:getisoyear() return isoy(self.daynum) end + function dobj:getisodate() + local w, y = isowy(self.daynum) + return y, w, self:getisoweekday() + end + function dobj:setisoyear(y, w, d) + local cy, cw, cd = self:getisodate() + if y then cy = fix(tonumber(y))end + if w then cw = fix(tonumber(w))end + if d then cd = fix(tonumber(d))end + if cy and cw and cd then + self.daynum = makedaynum_isoywd(cy, cw, cd) + return self:normalize() + else + return date_error_arg() + end + end + + function dobj:setisoweekday(d) return self:setisoyear(nil, nil, d) end + function dobj:setisoweeknumber(w,d) return self:setisoyear(nil, w, d) end + + function dobj:setyear(y, m, d) + local cy, cm, cd = breakdaynum(self.daynum) + if y then cy = fix(tonumber(y))end + if m then cm = getmontharg(m) end + if d then cd = fix(tonumber(d))end + if cy and cm and cd then + self.daynum = makedaynum(cy, cm, cd) + return self:normalize() + else + return date_error_arg() + end + end + + function dobj:setmonth(m, d)return self:setyear(nil, m, d) end + function dobj:setday(d) return self:setyear(nil, nil, d) end + + function dobj:sethours(h, m, s, t) + local ch,cm,cs,ck = breakdayfrc(self.dayfrc) + ch, cm, cs, ck = tonumber(h or ch), tonumber(m or cm), tonumber(s or cs), tonumber(t or ck) + if ch and cm and cs and ck then + self.dayfrc = makedayfrc(ch, cm, cs, ck) + return self:normalize() + else + return date_error_arg() + end + end + + function dobj:setminutes(m,s,t) return self:sethours(nil, m, s, t) end + function dobj:setseconds(s, t) return self:sethours(nil, nil, s, t) end + function dobj:setticks(t) return self:sethours(nil, nil, nil, t) end + + function dobj:spanticks() return (self.daynum*TICKSPERDAY + self.dayfrc) end + function dobj:spanseconds() return (self.daynum*TICKSPERDAY + self.dayfrc)/TICKSPERSEC end + function dobj:spanminutes() return (self.daynum*TICKSPERDAY + self.dayfrc)/TICKSPERMIN end + function dobj:spanhours() return (self.daynum*TICKSPERDAY + self.dayfrc)/TICKSPERHOUR end + function dobj:spandays() return (self.daynum*TICKSPERDAY + self.dayfrc)/TICKSPERDAY end + + function dobj:addyears(y, m, d) + local cy, cm, cd = breakdaynum(self.daynum) + if y then y = fix(tonumber(y))else y = 0 end + if m then m = fix(tonumber(m))else m = 0 end + if d then d = fix(tonumber(d))else d = 0 end + if y and m and d then + self.daynum = makedaynum(cy+y, cm+m, cd+d) + return self:normalize() + else + return date_error_arg() + end + end + + function dobj:addmonths(m, d) + return self:addyears(nil, m, d) + end + + local function dobj_adddayfrc(self,n,pt,pd) + n = tonumber(n) + if n then + local x = floor(n/pd); + self.daynum = self.daynum + x; + self.dayfrc = self.dayfrc + (n-x*pd)*pt; + return self:normalize() + else + return date_error_arg() + end + end + function dobj:adddays(n) return dobj_adddayfrc(self,n,TICKSPERDAY,1) end + function dobj:addhours(n) return dobj_adddayfrc(self,n,TICKSPERHOUR,HOURPERDAY) end + function dobj:addminutes(n) return dobj_adddayfrc(self,n,TICKSPERMIN,MINPERDAY) end + function dobj:addseconds(n) return dobj_adddayfrc(self,n,TICKSPERSEC,SECPERDAY) end + function dobj:addticks(n) return dobj_adddayfrc(self,n,1,TICKSPERDAY) end + local tvspec = { + -- Abbreviated weekday name (Sun) + ['%a']=function(self) return sl_weekdays[weekday(self.daynum) + 7] end, + -- Full weekday name (Sunday) + ['%A']=function(self) return sl_weekdays[weekday(self.daynum)] end, + -- Abbreviated month name (Dec) + ['%b']=function(self) return sl_months[self:getmonth() - 1 + 12] end, + -- Full month name (December) + ['%B']=function(self) return sl_months[self:getmonth() - 1] end, + -- Year/100 (19, 20, 30) + ['%C']=function(self) return fmt("%.2d", fix(self:getyear()/100)) end, + -- The day of the month as a number (range 1 - 31) + ['%d']=function(self) return fmt("%.2d", self:getday()) end, + -- year for ISO 8601 week, from 00 (79) + ['%g']=function(self) return fmt("%.2d", mod(self:getisoyear() ,100)) end, + -- year for ISO 8601 week, from 0000 (1979) + ['%G']=function(self) return fmt("%.4d", self:getisoyear()) end, + -- same as %b + ['%h']=function(self) return self:fmt0("%b") end, + -- hour of the 24-hour day, from 00 (06) + ['%H']=function(self) return fmt("%.2d", self:gethours()) end, + -- The hour as a number using a 12-hour clock (01 - 12) + ['%I']=function(self) return fmt("%.2d", self:getclockhour()) end, + -- The day of the year as a number (001 - 366) + ['%j']=function(self) return fmt("%.3d", self:getyearday()) end, + -- Month of the year, from 01 to 12 + ['%m']=function(self) return fmt("%.2d", self:getmonth()) end, + -- Minutes after the hour 55 + ['%M']=function(self) return fmt("%.2d", self:getminutes())end, + -- AM/PM indicator (AM) + ['%p']=function(self) return sl_meridian[self:gethours() > 11 and 1 or -1] end, --AM/PM indicator (AM) + -- The second as a number (59, 20 , 01) + ['%S']=function(self) return fmt("%.2d", self:getseconds()) end, + -- ISO 8601 day of the week, to 7 for Sunday (7, 1) + ['%u']=function(self) return self:getisoweekday() end, + -- Sunday week of the year, from 00 (48) + ['%U']=function(self) return fmt("%.2d", self:getweeknumber()) end, + -- ISO 8601 week of the year, from 01 (48) + ['%V']=function(self) return fmt("%.2d", self:getisoweeknumber()) end, + -- The day of the week as a decimal, Sunday being 0 + ['%w']=function(self) return self:getweekday() - 1 end, + -- Monday week of the year, from 00 (48) + ['%W']=function(self) return fmt("%.2d", self:getweeknumber(2)) end, + -- The year as a number without a century (range 00 to 99) + ['%y']=function(self) return fmt("%.2d", mod(self:getyear() ,100)) end, + -- Year with century (2000, 1914, 0325, 0001) + ['%Y']=function(self) return fmt("%.4d", self:getyear()) end, + -- Time zone offset, the date object is assumed local time (+1000, -0230) + ['%z']=function(self) local b = -self:getbias(); local x = abs(b); return fmt("%s%.4d", b < 0 and "-" or "+", fix(x/60)*100 + floor(mod(x,60))) end, + -- Time zone name, the date object is assumed local time + ['%Z']=function(self) return self:gettzname() end, + -- Misc -- + -- Year, if year is in BCE, prints the BCE Year representation, otherwise result is similar to "%Y" (1 BCE, 40 BCE) + ['%\b']=function(self) local x = self:getyear() return fmt("%.4d%s", x>0 and x or (-x+1), x>0 and "" or " BCE") end, + -- Seconds including fraction (59.998, 01.123) + ['%\f']=function(self) local x = self:getfracsec() return fmt("%s%.9f",x >= 10 and "" or "0", x) end, + -- percent character % + ['%%']=function(self) return "%" end, + -- Group Spec -- + -- 12-hour time, from 01:00:00 AM (06:55:15 AM); same as "%I:%M:%S %p" + ['%r']=function(self) return self:fmt0("%I:%M:%S %p") end, + -- hour:minute, from 01:00 (06:55); same as "%I:%M" + ['%R']=function(self) return self:fmt0("%I:%M") end, + -- 24-hour time, from 00:00:00 (06:55:15); same as "%H:%M:%S" + ['%T']=function(self) return self:fmt0("%H:%M:%S") end, + -- month/day/year from 01/01/00 (12/02/79); same as "%m/%d/%y" + ['%D']=function(self) return self:fmt0("%m/%d/%y") end, + -- year-month-day (1979-12-02); same as "%Y-%m-%d" + ['%F']=function(self) return self:fmt0("%Y-%m-%d") end, + -- The preferred date and time representation; same as "%x %X" + ['%c']=function(self) return self:fmt0("%x %X") end, + -- The preferred date representation, same as "%a %b %d %\b" + ['%x']=function(self) return self:fmt0("%a %b %d %\b") end, + -- The preferred time representation, same as "%H:%M:%\f" + ['%X']=function(self) return self:fmt0("%H:%M:%\f") end, + -- GroupSpec -- + -- Iso format, same as "%Y-%m-%dT%T" + ['${iso}'] = function(self) return self:fmt0("%Y-%m-%dT%T") end, + -- http format, same as "%a, %d %b %Y %T GMT" + ['${http}'] = function(self) return self:fmt0("%a, %d %b %Y %T GMT") end, + -- ctime format, same as "%a %b %d %T GMT %Y" + ['${ctime}'] = function(self) return self:fmt0("%a %b %d %T GMT %Y") end, + -- RFC850 format, same as "%A, %d-%b-%y %T GMT" + ['${rfc850}'] = function(self) return self:fmt0("%A, %d-%b-%y %T GMT") end, + -- RFC1123 format, same as "%a, %d %b %Y %T GMT" + ['${rfc1123}'] = function(self) return self:fmt0("%a, %d %b %Y %T GMT") end, + -- asctime format, same as "%a %b %d %T %Y" + ['${asctime}'] = function(self) return self:fmt0("%a %b %d %T %Y") end, + } + function dobj:fmt0(str) return (gsub(str, "%%[%a%%\b\f]", function(x) local f = tvspec[x];return (f and f(self)) or x end)) end + function dobj:fmt(str) + str = str or self.fmtstr or fmtstr + return self:fmt0((gmatch(str, "${%w+}")) and (gsub(str, "${%w+}", function(x)local f=tvspec[x];return (f and f(self)) or x end)) or str) + end + + function dobj.__lt(a, b) if (a.daynum == b.daynum) then return (a.dayfrc < b.dayfrc) else return (a.daynum < b.daynum) end end + function dobj.__le(a, b) if (a.daynum == b.daynum) then return (a.dayfrc <= b.dayfrc) else return (a.daynum <= b.daynum) end end + function dobj.__eq(a, b)return (a.daynum == b.daynum) and (a.dayfrc == b.dayfrc) end + function dobj.__sub(a,b) + local d1, d2 = date_getdobj(a), date_getdobj(b) + local d0 = d1 and d2 and date_new(d1.daynum - d2.daynum, d1.dayfrc - d2.dayfrc) + return d0 and d0:normalize() + end + function dobj.__add(a,b) + local d1, d2 = date_getdobj(a), date_getdobj(b) + local d0 = d1 and d2 and date_new(d1.daynum + d2.daynum, d1.dayfrc + d2.dayfrc) + return d0 and d0:normalize() + end + function dobj.__concat(a, b) return tostring(a) .. tostring(b) end + function dobj:__tostring() return self:fmt() end + + function dobj:copy() return date_new(self.daynum, self.dayfrc) end + +--[[ THE LOCAL DATE OBJECT METHODS ]]-- + function dobj:tolocal() + local dn,df = self.daynum, self.dayfrc + local bias = getbiasutc2(self) + if bias then + -- utc = local + bias; local = utc - bias + self.daynum = dn + self.dayfrc = df - bias*TICKSPERSEC + return self:normalize() + else + return nil + end + end + + function dobj:toutc() + local dn,df = self.daynum, self.dayfrc + local bias = getbiasloc2(dn, df) + if bias then + -- utc = local + bias; + self.daynum = dn + self.dayfrc = df + bias*TICKSPERSEC + return self:normalize() + else + return nil + end + end + + function dobj:getbias() return (getbiasloc2(self.daynum, self.dayfrc))/SECPERMIN end + + function dobj:gettzname() + local _, tvu, _ = getbiasloc2(self.daynum, self.dayfrc) + return tvu and osdate("%Z",tvu) or "" + end + +--#if not DATE_OBJECT_AFX then + function date.time(h, r, s, t) + h, r, s, t = tonumber(h or 0), tonumber(r or 0), tonumber(s or 0), tonumber(t or 0) + if h and r and s and t then + return date_new(DAYNUM_DEF, makedayfrc(h, r, s, t)) + else + return date_error_arg() + end + end + + function date:__call(arg1, ...) + local arg_count = select("#", ...) + (arg1 == nil and 0 or 1) + if arg_count > 1 then return (date_from(arg1, ...)) + elseif arg_count == 0 then return (date_getdobj(false)) + else local o, r = date_getdobj(arg1); return r and o:copy() or o end + end + + date.diff = dobj.__sub + + function date.isleapyear(v) + local y = fix(v); + if not y then + y = date_getdobj(v) + y = y and y:getyear() + end + return isleapyear(y+0) + end + + function date.epoch() return date_epoch:copy() end + + function date.isodate(y,w,d) return date_new(makedaynum_isoywd(y + 0, w and (w+0) or 1, d and (d+0) or 1), 0) end + function date.setcenturyflip(y) + if y ~= floor(y) or y < 0 or y > 100 then date_error_arg() end + centuryflip = y + end + function date.getcenturyflip() return centuryflip end + +-- Internal functions + function date.fmt(str) if str then fmtstr = str end; return fmtstr end + function date.daynummin(n) DAYNUM_MIN = (n and n < DAYNUM_MAX) and n or DAYNUM_MIN return n and DAYNUM_MIN or date_new(DAYNUM_MIN, 0):normalize()end + function date.daynummax(n) DAYNUM_MAX = (n and n > DAYNUM_MIN) and n or DAYNUM_MAX return n and DAYNUM_MAX or date_new(DAYNUM_MAX, 0):normalize()end + function date.ticks(t) if t then setticks(t) end return TICKSPERSEC end +--#end -- not DATE_OBJECT_AFX + + local tm = osdate("*t", 0 - system:get_tz_offset()); + if tm then + date_epoch = date_new(makedaynum(tm.year, tm.month - 1, tm.day), makedayfrc(tm.hour, tm.min, tm.sec, 0)) + -- the distance from our epoch to os epoch in daynum + DATE_EPOCH = date_epoch and date_epoch:spandays() + else -- error will be raise only if called! + date_epoch = setmetatable({},{__index = function() error("failed to get the epoch date") end}) + end + +--#if not DATE_OBJECT_AFX then +return date +--#else +--$return date_from +--#end + diff --git a/lib/eval.lua b/lib/eval.lua new file mode 100644 index 0000000..ae90b63 --- /dev/null +++ b/lib/eval.lua @@ -0,0 +1,10 @@ +math = require "math" +use(math) + +function pprint(obj) + if type(obj) == "string" or type(obj) == "number" or type(obj) == "boolean" then + print(obj) + else + print("") + end +end \ No newline at end of file diff --git a/lib/fun.lua b/lib/fun.lua new file mode 100644 index 0000000..2427bb2 --- /dev/null +++ b/lib/fun.lua @@ -0,0 +1,1058 @@ +--- +--- Lua Fun - a high-performance functional programming library for LuaJIT +--- +--- Copyright (c) 2013-2017 Roman Tsisyk +--- +--- Distributed under the MIT/X11 License. See COPYING.md for more details. +--- + +local exports = {} +local methods = {} + +-- compatibility with Lua 5.1/5.2 +local unpack = rawget(table, "unpack") or unpack + +-------------------------------------------------------------------------------- +-- Tools +-------------------------------------------------------------------------------- + +local return_if_not_empty = function(state_x, ...) + if state_x == nil then + return nil + end + return ... +end + +local call_if_not_empty = function(fun, state_x, ...) + if state_x == nil then + return nil + end + return state_x, fun(...) +end + +local function deepcopy(orig) -- used by cycle() + local orig_type = type(orig) + local copy + if orig_type == 'table' then + copy = {} + for orig_key, orig_value in next, orig, nil do + copy[deepcopy(orig_key)] = deepcopy(orig_value) + end + else + copy = orig + end + return copy +end + +local iterator_mt = { + -- usually called by for-in loop + __call = function(self, param, state) + return self.gen(param, state) + end; + __tostring = function(self) + return '' + end; + -- add all exported methods + __index = methods; +} + +local wrap = function(gen, param, state) + return setmetatable({ + gen = gen, + param = param, + state = state + }, iterator_mt), param, state +end +exports.wrap = wrap + +local unwrap = function(self) + return self.gen, self.param, self.state +end +methods.unwrap = unwrap + +-------------------------------------------------------------------------------- +-- Basic Functions +-------------------------------------------------------------------------------- + +local nil_gen = function(_param, _state) + return nil +end + +local string_gen = function(param, state) + local state = state + 1 + if state > #param then + return nil + end + local r = string.sub(param, state, state) + return state, r +end + +local ipairs_gen = ipairs({}) -- get the generating function from ipairs + +local pairs_gen = pairs({ a = 0 }) -- get the generating function from pairs +local map_gen = function(tab, key) + local value + local key, value = pairs_gen(tab, key) + return key, key, value +end + +local rawiter = function(obj, param, state) + assert(obj ~= nil, "invalid iterator") + if type(obj) == "table" then + local mt = getmetatable(obj); + if mt ~= nil then + if mt == iterator_mt then + return obj.gen, obj.param, obj.state + elseif mt.__ipairs ~= nil then + return mt.__ipairs(obj) + elseif mt.__pairs ~= nil then + return mt.__pairs(obj) + end + end + if #obj > 0 then + -- array + return ipairs(obj) + else + -- hash + return map_gen, obj, nil + end + elseif (type(obj) == "function") then + return obj, param, state + elseif (type(obj) == "string") then + if #obj == 0 then + return nil_gen, nil, nil + end + return string_gen, obj, 0 + end + error(string.format('object %s of type "%s" is not iterable', + obj, type(obj))) +end + +local iter = function(obj, param, state) + return wrap(rawiter(obj, param, state)) +end +exports.iter = iter + +local method0 = function(fun) + return function(self) + return fun(self.gen, self.param, self.state) + end +end + +local method1 = function(fun) + return function(self, arg1) + return fun(arg1, self.gen, self.param, self.state) + end +end + +local method2 = function(fun) + return function(self, arg1, arg2) + return fun(arg1, arg2, self.gen, self.param, self.state) + end +end + +local export0 = function(fun) + return function(gen, param, state) + return fun(rawiter(gen, param, state)) + end +end + +local export1 = function(fun) + return function(arg1, gen, param, state) + return fun(arg1, rawiter(gen, param, state)) + end +end + +local export2 = function(fun) + return function(arg1, arg2, gen, param, state) + return fun(arg1, arg2, rawiter(gen, param, state)) + end +end + +local each = function(fun, gen, param, state) + repeat + state = call_if_not_empty(fun, gen(param, state)) + until state == nil +end +methods.each = method1(each) +exports.each = export1(each) +methods.for_each = methods.each +exports.for_each = exports.each +methods.foreach = methods.each +exports.foreach = exports.each + +-------------------------------------------------------------------------------- +-- Generators +-------------------------------------------------------------------------------- + +local range_gen = function(param, state) + local stop, step = param[1], param[2] + local state = state + step + if state > stop then + return nil + end + return state, state +end + +local range_rev_gen = function(param, state) + local stop, step = param[1], param[2] + local state = state + step + if state < stop then + return nil + end + return state, state +end + +local range = function(start, stop, step) + if step == nil then + if stop == nil then + if start == 0 then + return nil_gen, nil, nil + end + stop = start + start = stop > 0 and 1 or -1 + end + step = start <= stop and 1 or -1 + end + + assert(type(start) == "number", "start must be a number") + assert(type(stop) == "number", "stop must be a number") + assert(type(step) == "number", "step must be a number") + assert(step ~= 0, "step must not be zero") + + if (step > 0) then + return wrap(range_gen, {stop, step}, start - step) + elseif (step < 0) then + return wrap(range_rev_gen, {stop, step}, start - step) + end +end +exports.range = range + +local duplicate_table_gen = function(param_x, state_x) + return state_x + 1, unpack(param_x) +end + +local duplicate_fun_gen = function(param_x, state_x) + return state_x + 1, param_x(state_x) +end + +local duplicate_gen = function(param_x, state_x) + return state_x + 1, param_x +end + +local duplicate = function(...) + if select('#', ...) <= 1 then + return wrap(duplicate_gen, select(1, ...), 0) + else + return wrap(duplicate_table_gen, {...}, 0) + end +end +exports.duplicate = duplicate +exports.replicate = duplicate +exports.xrepeat = duplicate + +local tabulate = function(fun) + assert(type(fun) == "function") + return wrap(duplicate_fun_gen, fun, 0) +end +exports.tabulate = tabulate + +local zeros = function() + return wrap(duplicate_gen, 0, 0) +end +exports.zeros = zeros + +local ones = function() + return wrap(duplicate_gen, 1, 0) +end +exports.ones = ones + +local rands_gen = function(param_x, _state_x) + return 0, math.random(param_x[1], param_x[2]) +end + +local rands_nil_gen = function(_param_x, _state_x) + return 0, math.random() +end + +local rands = function(n, m) + if n == nil and m == nil then + return wrap(rands_nil_gen, 0, 0) + end + assert(type(n) == "number", "invalid first arg to rands") + if m == nil then + m = n + n = 0 + else + assert(type(m) == "number", "invalid second arg to rands") + end + assert(n < m, "empty interval") + return wrap(rands_gen, {n, m - 1}, 0) +end +exports.rands = rands + +-------------------------------------------------------------------------------- +-- Slicing +-------------------------------------------------------------------------------- + +local nth = function(n, gen_x, param_x, state_x) + assert(n > 0, "invalid first argument to nth") + -- An optimization for arrays and strings + if gen_x == ipairs_gen then + return param_x[n] + elseif gen_x == string_gen then + if n <= #param_x then + return string.sub(param_x, n, n) + else + return nil + end + end + for i=1,n-1,1 do + state_x = gen_x(param_x, state_x) + if state_x == nil then + return nil + end + end + return return_if_not_empty(gen_x(param_x, state_x)) +end +methods.nth = method1(nth) +exports.nth = export1(nth) + +local head_call = function(state, ...) + if state == nil then + error("head: iterator is empty") + end + return ... +end + +local head = function(gen, param, state) + return head_call(gen(param, state)) +end +methods.head = method0(head) +exports.head = export0(head) +exports.car = exports.head +methods.car = methods.head + +local tail = function(gen, param, state) + state = gen(param, state) + if state == nil then + return wrap(nil_gen, nil, nil) + end + return wrap(gen, param, state) +end +methods.tail = method0(tail) +exports.tail = export0(tail) +exports.cdr = exports.tail +methods.cdr = methods.tail + +local take_n_gen_x = function(i, state_x, ...) + if state_x == nil then + return nil + end + return {i, state_x}, ... +end + +local take_n_gen = function(param, state) + local n, gen_x, param_x = param[1], param[2], param[3] + local i, state_x = state[1], state[2] + if i >= n then + return nil + end + return take_n_gen_x(i + 1, gen_x(param_x, state_x)) +end + +local take_n = function(n, gen, param, state) + assert(n >= 0, "invalid first argument to take_n") + return wrap(take_n_gen, {n, gen, param}, {0, state}) +end +methods.take_n = method1(take_n) +exports.take_n = export1(take_n) + +local take_while_gen_x = function(fun, state_x, ...) + if state_x == nil or not fun(...) then + return nil + end + return state_x, ... +end + +local take_while_gen = function(param, state_x) + local fun, gen_x, param_x = param[1], param[2], param[3] + return take_while_gen_x(fun, gen_x(param_x, state_x)) +end + +local take_while = function(fun, gen, param, state) + assert(type(fun) == "function", "invalid first argument to take_while") + return wrap(take_while_gen, {fun, gen, param}, state) +end +methods.take_while = method1(take_while) +exports.take_while = export1(take_while) + +local take = function(n_or_fun, gen, param, state) + if type(n_or_fun) == "number" then + return take_n(n_or_fun, gen, param, state) + else + return take_while(n_or_fun, gen, param, state) + end +end +methods.take = method1(take) +exports.take = export1(take) + +local drop_n = function(n, gen, param, state) + assert(n >= 0, "invalid first argument to drop_n") + local i + for i=1,n,1 do + state = gen(param, state) + if state == nil then + return wrap(nil_gen, nil, nil) + end + end + return wrap(gen, param, state) +end +methods.drop_n = method1(drop_n) +exports.drop_n = export1(drop_n) + +local drop_while_x = function(fun, state_x, ...) + if state_x == nil or not fun(...) then + return state_x, false + end + return state_x, true, ... +end + +local drop_while = function(fun, gen_x, param_x, state_x) + assert(type(fun) == "function", "invalid first argument to drop_while") + local cont, state_x_prev + repeat + state_x_prev = deepcopy(state_x) + state_x, cont = drop_while_x(fun, gen_x(param_x, state_x)) + until not cont + if state_x == nil then + return wrap(nil_gen, nil, nil) + end + return wrap(gen_x, param_x, state_x_prev) +end +methods.drop_while = method1(drop_while) +exports.drop_while = export1(drop_while) + +local drop = function(n_or_fun, gen_x, param_x, state_x) + if type(n_or_fun) == "number" then + return drop_n(n_or_fun, gen_x, param_x, state_x) + else + return drop_while(n_or_fun, gen_x, param_x, state_x) + end +end +methods.drop = method1(drop) +exports.drop = export1(drop) + +local split = function(n_or_fun, gen_x, param_x, state_x) + return take(n_or_fun, gen_x, param_x, state_x), + drop(n_or_fun, gen_x, param_x, state_x) +end +methods.split = method1(split) +exports.split = export1(split) +methods.split_at = methods.split +exports.split_at = exports.split +methods.span = methods.split +exports.span = exports.split + +-------------------------------------------------------------------------------- +-- Indexing +-------------------------------------------------------------------------------- + +local index = function(x, gen, param, state) + local i = 1 + for _k, r in gen, param, state do + if r == x then + return i + end + i = i + 1 + end + return nil +end +methods.index = method1(index) +exports.index = export1(index) +methods.index_of = methods.index +exports.index_of = exports.index +methods.elem_index = methods.index +exports.elem_index = exports.index + +local indexes_gen = function(param, state) + local x, gen_x, param_x = param[1], param[2], param[3] + local i, state_x = state[1], state[2] + local r + while true do + state_x, r = gen_x(param_x, state_x) + if state_x == nil then + return nil + end + i = i + 1 + if r == x then + return {i, state_x}, i + end + end +end + +local indexes = function(x, gen, param, state) + return wrap(indexes_gen, {x, gen, param}, {0, state}) +end +methods.indexes = method1(indexes) +exports.indexes = export1(indexes) +methods.elem_indexes = methods.indexes +exports.elem_indexes = exports.indexes +methods.indices = methods.indexes +exports.indices = exports.indexes +methods.elem_indices = methods.indexes +exports.elem_indices = exports.indexes + +-------------------------------------------------------------------------------- +-- Filtering +-------------------------------------------------------------------------------- + +local filter1_gen = function(fun, gen_x, param_x, state_x, a) + while true do + if state_x == nil or fun(a) then break; end + state_x, a = gen_x(param_x, state_x) + end + return state_x, a +end + +-- call each other +local filterm_gen +local filterm_gen_shrink = function(fun, gen_x, param_x, state_x) + return filterm_gen(fun, gen_x, param_x, gen_x(param_x, state_x)) +end + +filterm_gen = function(fun, gen_x, param_x, state_x, ...) + if state_x == nil then + return nil + end + if fun(...) then + return state_x, ... + end + return filterm_gen_shrink(fun, gen_x, param_x, state_x) +end + +local filter_detect = function(fun, gen_x, param_x, state_x, ...) + if select('#', ...) < 2 then + return filter1_gen(fun, gen_x, param_x, state_x, ...) + else + return filterm_gen(fun, gen_x, param_x, state_x, ...) + end +end + +local filter_gen = function(param, state_x) + local fun, gen_x, param_x = param[1], param[2], param[3] + return filter_detect(fun, gen_x, param_x, gen_x(param_x, state_x)) +end + +local filter = function(fun, gen, param, state) + return wrap(filter_gen, {fun, gen, param}, state) +end +methods.filter = method1(filter) +exports.filter = export1(filter) +methods.remove_if = methods.filter +exports.remove_if = exports.filter + +local grep = function(fun_or_regexp, gen, param, state) + local fun = fun_or_regexp + if type(fun_or_regexp) == "string" then + fun = function(x) return string.find(x, fun_or_regexp) ~= nil end + end + return filter(fun, gen, param, state) +end +methods.grep = method1(grep) +exports.grep = export1(grep) + +local partition = function(fun, gen, param, state) + local neg_fun = function(...) + return not fun(...) + end + return filter(fun, gen, param, state), + filter(neg_fun, gen, param, state) +end +methods.partition = method1(partition) +exports.partition = export1(partition) + +-------------------------------------------------------------------------------- +-- Reducing +-------------------------------------------------------------------------------- + +local foldl_call = function(fun, start, state, ...) + if state == nil then + return nil, start + end + return state, fun(start, ...) +end + +local foldl = function(fun, start, gen_x, param_x, state_x) + while true do + state_x, start = foldl_call(fun, start, gen_x(param_x, state_x)) + if state_x == nil then + break; + end + end + return start +end +methods.foldl = method2(foldl) +exports.foldl = export2(foldl) +methods.reduce = methods.foldl +exports.reduce = exports.foldl + +local length = function(gen, param, state) + if gen == ipairs_gen or gen == string_gen then + return #param + end + local len = 0 + repeat + state = gen(param, state) + len = len + 1 + until state == nil + return len - 1 +end +methods.length = method0(length) +exports.length = export0(length) + +local is_null = function(gen, param, state) + return gen(param, deepcopy(state)) == nil +end +methods.is_null = method0(is_null) +exports.is_null = export0(is_null) + +local is_prefix_of = function(iter_x, iter_y) + local gen_x, param_x, state_x = iter(iter_x) + local gen_y, param_y, state_y = iter(iter_y) + + local r_x, r_y + for i=1,10,1 do + state_x, r_x = gen_x(param_x, state_x) + state_y, r_y = gen_y(param_y, state_y) + if state_x == nil then + return true + end + if state_y == nil or r_x ~= r_y then + return false + end + end +end +methods.is_prefix_of = is_prefix_of +exports.is_prefix_of = is_prefix_of + +local all = function(fun, gen_x, param_x, state_x) + local r + repeat + state_x, r = call_if_not_empty(fun, gen_x(param_x, state_x)) + until state_x == nil or not r + return state_x == nil +end +methods.all = method1(all) +exports.all = export1(all) +methods.every = methods.all +exports.every = exports.all + +local any = function(fun, gen_x, param_x, state_x) + local r + repeat + state_x, r = call_if_not_empty(fun, gen_x(param_x, state_x)) + until state_x == nil or r + return not not r +end +methods.any = method1(any) +exports.any = export1(any) +methods.some = methods.any +exports.some = exports.any + +local sum = function(gen, param, state) + local s = 0 + local r = 0 + repeat + s = s + r + state, r = gen(param, state) + until state == nil + return s +end +methods.sum = method0(sum) +exports.sum = export0(sum) + +local product = function(gen, param, state) + local p = 1 + local r = 1 + repeat + p = p * r + state, r = gen(param, state) + until state == nil + return p +end +methods.product = method0(product) +exports.product = export0(product) + +local min_cmp = function(m, n) + if n < m then return n else return m end +end + +local max_cmp = function(m, n) + if n > m then return n else return m end +end + +local min = function(gen, param, state) + local state, m = gen(param, state) + if state == nil then + error("min: iterator is empty") + end + + local cmp + if type(m) == "number" then + -- An optimization: use math.min for numbers + cmp = math.min + else + cmp = min_cmp + end + + for _, r in gen, param, state do + m = cmp(m, r) + end + return m +end +methods.min = method0(min) +exports.min = export0(min) +methods.minimum = methods.min +exports.minimum = exports.min + +local min_by = function(cmp, gen_x, param_x, state_x) + local state_x, m = gen_x(param_x, state_x) + if state_x == nil then + error("min: iterator is empty") + end + + for _, r in gen_x, param_x, state_x do + m = cmp(m, r) + end + return m +end +methods.min_by = method1(min_by) +exports.min_by = export1(min_by) +methods.minimum_by = methods.min_by +exports.minimum_by = exports.min_by + +local max = function(gen_x, param_x, state_x) + local state_x, m = gen_x(param_x, state_x) + if state_x == nil then + error("max: iterator is empty") + end + + local cmp + if type(m) == "number" then + -- An optimization: use math.max for numbers + cmp = math.max + else + cmp = max_cmp + end + + for _, r in gen_x, param_x, state_x do + m = cmp(m, r) + end + return m +end +methods.max = method0(max) +exports.max = export0(max) +methods.maximum = methods.max +exports.maximum = exports.max + +local max_by = function(cmp, gen_x, param_x, state_x) + local state_x, m = gen_x(param_x, state_x) + if state_x == nil then + error("max: iterator is empty") + end + + for _, r in gen_x, param_x, state_x do + m = cmp(m, r) + end + return m +end +methods.max_by = method1(max_by) +exports.max_by = export1(max_by) +methods.maximum_by = methods.maximum_by +exports.maximum_by = exports.maximum_by + +local totable = function(gen_x, param_x, state_x) + local tab, key, val = {} + while true do + state_x, val = gen_x(param_x, state_x) + if state_x == nil then + break + end + table.insert(tab, val) + end + return tab +end +methods.totable = method0(totable) +exports.totable = export0(totable) + +local tomap = function(gen_x, param_x, state_x) + local tab, key, val = {} + while true do + state_x, key, val = gen_x(param_x, state_x) + if state_x == nil then + break + end + tab[key] = val + end + return tab +end +methods.tomap = method0(tomap) +exports.tomap = export0(tomap) + +-------------------------------------------------------------------------------- +-- Transformations +-------------------------------------------------------------------------------- + +local map_gen = function(param, state) + local gen_x, param_x, fun = param[1], param[2], param[3] + return call_if_not_empty(fun, gen_x(param_x, state)) +end + +local map = function(fun, gen, param, state) + return wrap(map_gen, {gen, param, fun}, state) +end +methods.map = method1(map) +exports.map = export1(map) + +local enumerate_gen_call = function(state, i, state_x, ...) + if state_x == nil then + return nil + end + return {i + 1, state_x}, i, ... +end + +local enumerate_gen = function(param, state) + local gen_x, param_x = param[1], param[2] + local i, state_x = state[1], state[2] + return enumerate_gen_call(state, i, gen_x(param_x, state_x)) +end + +local enumerate = function(gen, param, state) + return wrap(enumerate_gen, {gen, param}, {1, state}) +end +methods.enumerate = method0(enumerate) +exports.enumerate = export0(enumerate) + +local intersperse_call = function(i, state_x, ...) + if state_x == nil then + return nil + end + return {i + 1, state_x}, ... +end + +local intersperse_gen = function(param, state) + local x, gen_x, param_x = param[1], param[2], param[3] + local i, state_x = state[1], state[2] + if i % 2 == 1 then + return {i + 1, state_x}, x + else + return intersperse_call(i, gen_x(param_x, state_x)) + end +end + +-- TODO: interperse must not add x to the tail +local intersperse = function(x, gen, param, state) + return wrap(intersperse_gen, {x, gen, param}, {0, state}) +end +methods.intersperse = method1(intersperse) +exports.intersperse = export1(intersperse) + +-------------------------------------------------------------------------------- +-- Compositions +-------------------------------------------------------------------------------- + +local function zip_gen_r(param, state, state_new, ...) + if #state_new == #param / 2 then + return state_new, ... + end + + local i = #state_new + 1 + local gen_x, param_x = param[2 * i - 1], param[2 * i] + local state_x, r = gen_x(param_x, state[i]) + if state_x == nil then + return nil + end + table.insert(state_new, state_x) + return zip_gen_r(param, state, state_new, r, ...) +end + +local zip_gen = function(param, state) + return zip_gen_r(param, state, {}) +end + +-- A special hack for zip/chain to skip last two state, if a wrapped iterator +-- has been passed +local numargs = function(...) + local n = select('#', ...) + if n >= 3 then + -- Fix last argument + local it = select(n - 2, ...) + if type(it) == 'table' and getmetatable(it) == iterator_mt and + it.param == select(n - 1, ...) and it.state == select(n, ...) then + return n - 2 + end + end + return n +end + +local zip = function(...) + local n = numargs(...) + if n == 0 then + return wrap(nil_gen, nil, nil) + end + local param = { [2 * n] = 0 } + local state = { [n] = 0 } + + local i, gen_x, param_x, state_x + for i=1,n,1 do + local it = select(n - i + 1, ...) + gen_x, param_x, state_x = rawiter(it) + param[2 * i - 1] = gen_x + param[2 * i] = param_x + state[i] = state_x + end + + return wrap(zip_gen, param, state) +end +methods.zip = zip +exports.zip = zip + +local cycle_gen_call = function(param, state_x, ...) + if state_x == nil then + local gen_x, param_x, state_x0 = param[1], param[2], param[3] + return gen_x(param_x, deepcopy(state_x0)) + end + return state_x, ... +end + +local cycle_gen = function(param, state_x) + local gen_x, param_x, state_x0 = param[1], param[2], param[3] + return cycle_gen_call(param, gen_x(param_x, state_x)) +end + +local cycle = function(gen, param, state) + return wrap(cycle_gen, {gen, param, state}, deepcopy(state)) +end +methods.cycle = method0(cycle) +exports.cycle = export0(cycle) + +-- call each other +local chain_gen_r1 +local chain_gen_r2 = function(param, state, state_x, ...) + if state_x == nil then + local i = state[1] + i = i + 1 + if param[3 * i - 1] == nil then + return nil + end + local state_x = param[3 * i] + return chain_gen_r1(param, {i, state_x}) + end + return {state[1], state_x}, ... +end + +chain_gen_r1 = function(param, state) + local i, state_x = state[1], state[2] + local gen_x, param_x = param[3 * i - 2], param[3 * i - 1] + return chain_gen_r2(param, state, gen_x(param_x, state[2])) +end + +local chain = function(...) + local n = numargs(...) + if n == 0 then + return wrap(nil_gen, nil, nil) + end + + local param = { [3 * n] = 0 } + local i, gen_x, param_x, state_x + for i=1,n,1 do + local elem = select(i, ...) + gen_x, param_x, state_x = iter(elem) + param[3 * i - 2] = gen_x + param[3 * i - 1] = param_x + param[3 * i] = state_x + end + + return wrap(chain_gen_r1, param, {1, param[3]}) +end +methods.chain = chain +exports.chain = chain + +-------------------------------------------------------------------------------- +-- Operators +-------------------------------------------------------------------------------- + +local operator = { + ---------------------------------------------------------------------------- + -- Comparison operators + ---------------------------------------------------------------------------- + lt = function(a, b) return a < b end, + le = function(a, b) return a <= b end, + eq = function(a, b) return a == b end, + ne = function(a, b) return a ~= b end, + ge = function(a, b) return a >= b end, + gt = function(a, b) return a > b end, + + ---------------------------------------------------------------------------- + -- Arithmetic operators + ---------------------------------------------------------------------------- + add = function(a, b) return a + b end, + div = function(a, b) return a / b end, + floordiv = function(a, b) return math.floor(a/b) end, + intdiv = function(a, b) + local q = a / b + if a >= 0 then return math.floor(q) else return math.ceil(q) end + end, + mod = function(a, b) return a % b end, + mul = function(a, b) return a * b end, + neq = function(a) return -a end, + unm = function(a) return -a end, -- an alias + pow = function(a, b) return a ^ b end, + sub = function(a, b) return a - b end, + truediv = function(a, b) return a / b end, + + ---------------------------------------------------------------------------- + -- String operators + ---------------------------------------------------------------------------- + concat = function(a, b) return a..b end, + len = function(a) return #a end, + length = function(a) return #a end, -- an alias + + ---------------------------------------------------------------------------- + -- Logical operators + ---------------------------------------------------------------------------- + land = function(a, b) return a and b end, + lor = function(a, b) return a or b end, + lnot = function(a) return not a end, + truth = function(a) return not not a end, +} +exports.operator = operator +methods.operator = operator +exports.op = operator +methods.op = operator + +-------------------------------------------------------------------------------- +-- module definitions +-------------------------------------------------------------------------------- + +-- a special syntax sugar to export all functions to the global table +setmetatable(exports, { + __call = function(t, override) + for k, v in pairs(t) do + if rawget(_G, k) ~= nil then + local msg = 'function ' .. k .. ' already exists in global scope.' + if override then + rawset(_G, k, v) + print('WARNING: ' .. msg .. ' Overwritten.') + else + print('NOTICE: ' .. msg .. ' Skipped.') + end + else + rawset(_G, k, v) + end + end + end, +}) + +return exports diff --git a/lib/html.lua b/lib/html.lua new file mode 100644 index 0000000..ea53355 --- /dev/null +++ b/lib/html.lua @@ -0,0 +1,264 @@ +-- LUA HTML parser +-- Original: https://gist.github.com/exebetche/6126573 +-- Adapted by Andrey Gavrilov + +local parser = {} + +local empty_tags = { + br = true, + hr = true, + img = true, + embed = true, + param = true, + area = true, + col = true, + input = true, + meta = true, + link = true, + base = true, + basefont = true, + frame = true, + isindex = true +} + +-- omittable tags siblings +-- if an open tag from the primary entry follow +-- an unclosed tag of the secondary, +-- the secondary is automatically closed +-- See http://www.w3.org/TR/html5/syntax.html#optional-tags +local omittable_tags = { + tbody = { + thead = true, + tbody = true, + tfoot = true + }, + thead = { + thead = true, + tbody = true, + tfoot = true + }, + tfoot = { + thead = true, + tbody = true, + tfoot = true + }, + td = { + td = true, + th = true + }, + th = { + td = true, + th = true + }, + tr = { + tr = true + }, + dd = { + dd = true, + dt = true + }, + dt = { + dd = true, + dt = true + }, + optgroup = { + optgroup = true, + option = true + }, + optgroup = { + optgroup = true, + option = true + }, + address = { p = true}, + article = { p = true}, + aside = { p = true}, + blockquote = { p = true}, + dir = { p = true}, + div = { p = true}, + dl = { p = true}, + fieldset = { p = true}, + footer = { p = true}, + form = { p = true}, + h1 = { p = true}, + h2 = { p = true}, + h3 = { p = true}, + h4 = { p = true}, + h5 = { p = true}, + h6 = { p = true}, + header = { p = true}, + hgroup = { p = true}, + hr = { p = true}, + menu = { p = true}, + nav = { p = true}, + ol = { p = true}, + p = { p = true}, + pre = { p = true}, + section = { p = true}, + table = { p = true}, + ul= { p = true} +} + +-- omittable tags children +local omittable_tags2 = { + table = { + tr = true, + td = true, + p = true, + }, + tr = { + td = true, + p = true + }, + td = { + p = true + } +} + +function parser.parse(data, lazy) + local tree = {} + local stack = {} + local level = 0 + local new_level = 0 + table.insert(stack, tree) + local node + local lower_tag + local script_open = false + local script_val = "" + local script_node = nil + local tag_match = "" + lazy = lazy or false + + for b, op, tag, attr, op2, bl1, val, bl2 in string.gmatch( + data, + "(<)(%/?!?)([%w:_-'\\\"%[]+)(.-)(%/?%-?)>".. + "([%s\r\n\t]*)([^<]*)([%s\r\n\t]*)" + ) do + lower_tag = string.lower(tag) + + if script_open then + if lower_tag == "script" and op == "/" then + node.childNodes[1].value = string.gsub(script_val, "^ 0 + and omittable_tags2[lower_tag][stack[level][#stack[level]].tagName] + do + print("Auto closing ".. + stack[level][#stack[level]].tagName.. + " followed by ending "..lower_tag) + + level = level - 1 + table.remove(stack) + end + if level==0 then return tree end + + if lower_tag ~= stack[level][#stack[level]].tagName + then + print("Mismatch: "..lower_tag.. + ", (has "..stack[level][#stack[level]].tagName..")") + end + + level = level - 1 + table.remove(stack) + else + + level = level + 1 + node = nil + node = {} + node.tagName = lower_tag + node.childNodes = {} + + if attr ~= "" then + node.attr = {} + + for n, v in string.gmatch( + attr, + "%s([^%s=]+)=\"([^\"]+)\"" + ) do + node.attr[n] = string.gsub(v, '"', '[^\\]\\"') + end + + for n, v in string.gmatch( + attr, + "%s([^%s=]+)='([^']+)'" + ) do + node.attr[n] = string.gsub(v, '"', '[^\\]\\"') + end + end + + if lower_tag == "script" + and node.attr + and not node.attr["src"] + then + script_val = bl1..val..bl2 + table.insert(node.childNodes, { + tagName = "textNode", + value = "" + }) + + table.insert(stack[level], node) + script_open = true + else + -- Check if the previous sibling element end tag has been omitted + -- and should be close automatically + + if not lazy + and omittable_tags[lower_tag] + and level > 1 + and stack[level-1] + and #stack[level-1] > 0 + and omittable_tags[lower_tag][stack[level-1][#stack[level-1]].tagName] == true + then + print("Auto closing ".. + stack[level-1][#stack[level-1]].tagName.. + " followed by "..lower_tag) + + level = level - 1 + table.remove(stack) + if level==0 then return tree end + end + + table.insert(stack[level], node) + + if empty_tags[lower_tag] then + if val ~= "" then + table.insert(stack[level], { + tagName = "textNode", + value = val + }) + end + node.childNodes = nil + level = level - 1 + else + if val ~= "" then + table.insert(node.childNodes, { + tagName = "textNode", + value = val + }) + end + table.insert(stack, node.childNodes) + end + + end + end + end + return tree +end + +return parser diff --git a/lib/json.lua b/lib/json.lua new file mode 100644 index 0000000..711ef78 --- /dev/null +++ b/lib/json.lua @@ -0,0 +1,388 @@ +-- +-- json.lua +-- +-- Copyright (c) 2020 rxi +-- +-- Permission is hereby granted, free of charge, to any person obtaining a copy of +-- this software and associated documentation files (the "Software"), to deal in +-- the Software without restriction, including without limitation the rights to +-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +-- of the Software, and to permit persons to whom the Software is furnished to do +-- so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in all +-- copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-- SOFTWARE. +-- + +local json = { _version = "0.1.2" } + +------------------------------------------------------------------------------- +-- Encode +------------------------------------------------------------------------------- + +local encode + +local escape_char_map = { + [ "\\" ] = "\\", + [ "\"" ] = "\"", + [ "\b" ] = "b", + [ "\f" ] = "f", + [ "\n" ] = "n", + [ "\r" ] = "r", + [ "\t" ] = "t", +} + +local escape_char_map_inv = { [ "/" ] = "/" } +for k, v in pairs(escape_char_map) do + escape_char_map_inv[v] = k +end + + +local function escape_char(c) + return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte())) +end + + +local function encode_nil(val) + return "null" +end + + +local function encode_table(val, stack) + local res = {} + stack = stack or {} + + -- Circular reference? + if stack[val] then error("circular reference") end + + stack[val] = true + + if rawget(val, 1) ~= nil or next(val) == nil then + -- Treat as array -- check keys are valid and it is not sparse + local n = 0 + for k in pairs(val) do + if type(k) ~= "number" then + error("invalid table: mixed or invalid key types") + end + n = n + 1 + end + if n ~= #val then + error("invalid table: sparse array") + end + -- Encode + for i, v in ipairs(val) do + table.insert(res, encode(v, stack)) + end + stack[val] = nil + return "[" .. table.concat(res, ",") .. "]" + + else + -- Treat as an object + for k, v in pairs(val) do + if type(k) ~= "string" then + error("invalid table: mixed or invalid key types") + end + table.insert(res, encode(k, stack) .. ":" .. encode(v, stack)) + end + stack[val] = nil + return "{" .. table.concat(res, ",") .. "}" + end +end + + +local function encode_string(val) + return '"' .. val:gsub('[%z\1-\31\\"]', escape_char) .. '"' +end + + +local function encode_number(val) + -- Check for NaN, -inf and inf + if val ~= val or val <= -math.huge or val >= math.huge then + error("unexpected number value '" .. tostring(val) .. "'") + end + return string.format("%.14g", val) +end + + +local type_func_map = { + [ "nil" ] = encode_nil, + [ "table" ] = encode_table, + [ "string" ] = encode_string, + [ "number" ] = encode_number, + [ "boolean" ] = tostring, +} + + +encode = function(val, stack) + local t = type(val) + local f = type_func_map[t] + if f then + return f(val, stack) + end + error("unexpected type '" .. t .. "'") +end + + +function json.encode(val) + return ( encode(val) ) +end + + +------------------------------------------------------------------------------- +-- Decode +------------------------------------------------------------------------------- + +local parse + +local function create_set(...) + local res = {} + for i = 1, select("#", ...) do + res[ select(i, ...) ] = true + end + return res +end + +local space_chars = create_set(" ", "\t", "\r", "\n") +local delim_chars = create_set(" ", "\t", "\r", "\n", "]", "}", ",") +local escape_chars = create_set("\\", "/", '"', "b", "f", "n", "r", "t", "u") +local literals = create_set("true", "false", "null") + +local literal_map = { + [ "true" ] = true, + [ "false" ] = false, + [ "null" ] = nil, +} + + +local function next_char(str, idx, set, negate) + for i = idx, #str do + if set[str:sub(i, i)] ~= negate then + return i + end + end + return #str + 1 +end + + +local function decode_error(str, idx, msg) + local line_count = 1 + local col_count = 1 + for i = 1, idx - 1 do + col_count = col_count + 1 + if str:sub(i, i) == "\n" then + line_count = line_count + 1 + col_count = 1 + end + end + error( string.format("%s at line %d col %d", msg, line_count, col_count) ) +end + + +local function codepoint_to_utf8(n) + -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa + local f = math.floor + if n <= 0x7f then + return string.char(n) + elseif n <= 0x7ff then + return string.char(f(n / 64) + 192, n % 64 + 128) + elseif n <= 0xffff then + return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128) + elseif n <= 0x10ffff then + return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128, + f(n % 4096 / 64) + 128, n % 64 + 128) + end + error( string.format("invalid unicode codepoint '%x'", n) ) +end + + +local function parse_unicode_escape(s) + local n1 = tonumber( s:sub(1, 4), 16 ) + local n2 = tonumber( s:sub(7, 10), 16 ) + -- Surrogate pair? + if n2 then + return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000) + else + return codepoint_to_utf8(n1) + end +end + + +local function parse_string(str, i) + local res = "" + local j = i + 1 + local k = j + + while j <= #str do + local x = str:byte(j) + + if x < 32 then + decode_error(str, j, "control character in string") + + elseif x == 92 then -- `\`: Escape + res = res .. str:sub(k, j - 1) + j = j + 1 + local c = str:sub(j, j) + if c == "u" then + local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1) + or str:match("^%x%x%x%x", j + 1) + or decode_error(str, j - 1, "invalid unicode escape in string") + res = res .. parse_unicode_escape(hex) + j = j + #hex + else + if not escape_chars[c] then + decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string") + end + res = res .. escape_char_map_inv[c] + end + k = j + 1 + + elseif x == 34 then -- `"`: End of string + res = res .. str:sub(k, j - 1) + return res, j + 1 + end + + j = j + 1 + end + + decode_error(str, i, "expected closing quote for string") +end + + +local function parse_number(str, i) + local x = next_char(str, i, delim_chars) + local s = str:sub(i, x - 1) + local n = tonumber(s) + if not n then + decode_error(str, i, "invalid number '" .. s .. "'") + end + return n, x +end + + +local function parse_literal(str, i) + local x = next_char(str, i, delim_chars) + local word = str:sub(i, x - 1) + if not literals[word] then + decode_error(str, i, "invalid literal '" .. word .. "'") + end + return literal_map[word], x +end + + +local function parse_array(str, i) + local res = {} + local n = 1 + i = i + 1 + while 1 do + local x + i = next_char(str, i, space_chars, true) + -- Empty / end of array? + if str:sub(i, i) == "]" then + i = i + 1 + break + end + -- Read token + x, i = parse(str, i) + res[n] = x + n = n + 1 + -- Next token + i = next_char(str, i, space_chars, true) + local chr = str:sub(i, i) + i = i + 1 + if chr == "]" then break end + if chr ~= "," then decode_error(str, i, "expected ']' or ','") end + end + return res, i +end + + +local function parse_object(str, i) + local res = {} + i = i + 1 + while 1 do + local key, val + i = next_char(str, i, space_chars, true) + -- Empty / end of object? + if str:sub(i, i) == "}" then + i = i + 1 + break + end + -- Read key + if str:sub(i, i) ~= '"' then + decode_error(str, i, "expected string for key") + end + key, i = parse(str, i) + -- Read ':' delimiter + i = next_char(str, i, space_chars, true) + if str:sub(i, i) ~= ":" then + decode_error(str, i, "expected ':' after key") + end + i = next_char(str, i + 1, space_chars, true) + -- Read value + val, i = parse(str, i) + -- Set + res[key] = val + -- Next token + i = next_char(str, i, space_chars, true) + local chr = str:sub(i, i) + i = i + 1 + if chr == "}" then break end + if chr ~= "," then decode_error(str, i, "expected '}' or ','") end + end + return res, i +end + + +local char_func_map = { + [ '"' ] = parse_string, + [ "0" ] = parse_number, + [ "1" ] = parse_number, + [ "2" ] = parse_number, + [ "3" ] = parse_number, + [ "4" ] = parse_number, + [ "5" ] = parse_number, + [ "6" ] = parse_number, + [ "7" ] = parse_number, + [ "8" ] = parse_number, + [ "9" ] = parse_number, + [ "-" ] = parse_number, + [ "t" ] = parse_literal, + [ "f" ] = parse_literal, + [ "n" ] = parse_literal, + [ "[" ] = parse_array, + [ "{" ] = parse_object, +} + + +parse = function(str, idx) + local chr = str:sub(idx, idx) + local f = char_func_map[chr] + if f then + return f(str, idx) + end + decode_error(str, idx, "unexpected character '" .. chr .. "'") +end + + +function json.decode(str) + if type(str) ~= "string" then + error("expected argument of type string, got " .. type(str)) + end + local res, idx = parse(str, next_char(str, 1, space_chars, true)) + idx = next_char(str, idx, space_chars, true) + if idx <= #str then + decode_error(str, idx, "trailing garbage") + end + return res +end + + +return json diff --git a/lib/utf8.lua b/lib/utf8.lua new file mode 100644 index 0000000..f11814c --- /dev/null +++ b/lib/utf8.lua @@ -0,0 +1,1049 @@ +-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $ +-- +-- Provides UTF-8 aware string functions implemented in pure lua: +-- * utf8len(s) +-- * utf8sub(s, i, j) +-- * utf8reverse(s) +-- * utf8char(unicode) +-- * utf8unicode(s, i, j) +-- * utf8gensub(s, sub_len) +-- * utf8find(str, regex, init, plain) +-- * utf8match(str, regex, init) +-- * utf8gmatch(str, regex, all) +-- * utf8gsub(str, regex, repl, limit) +-- +-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these +-- additional functions are available: +-- * utf8upper(s) +-- * utf8lower(s) +-- +-- All functions behave as their non UTF-8 aware counterparts with the exception +-- that UTF-8 characters are used instead of bytes for all units. + +--[[ +Copyright (c) 2006-2007, Kyle Smith +All rights reserved. + +Contributors: + Alimov Stepan + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--]] + +-- ABNF from RFC 3629 +-- +-- UTF8-octets = *( UTF8-char ) +-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 +-- UTF8-1 = %x00-7F +-- UTF8-2 = %xC2-DF UTF8-tail +-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / +-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) +-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / +-- %xF4 %x80-8F 2( UTF8-tail ) +-- UTF8-tail = %x80-BF +-- + +local byte = string.byte +local char = string.char +local dump = string.dump +local find = string.find +local format = string.format +local gmatch = string.gmatch +local gsub = string.gsub +local len = string.len +local lower = string.lower +local match = string.match +local rep = string.rep +local reverse = string.reverse +local sub = string.sub +local upper = string.upper + +-- returns the number of bytes used by the UTF-8 character at byte i in s +-- also doubles as a UTF-8 character validator +local function utf8charbytes (s, i) + -- argument defaults + i = i or 1 + + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")") + end + if type(i) ~= "number" then + error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")") + end + + local c = byte(s, i) + + -- determine bytes needed for character, based on RFC 3629 + -- validate byte 1 + if c > 0 and c <= 127 then + -- UTF8-1 + return 1 + + elseif c >= 194 and c <= 223 then + -- UTF8-2 + local c2 = byte(s, i + 1) + + if not c2 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + return 2 + + elseif c >= 224 and c <= 239 then + -- UTF8-3 + local c2 = byte(s, i + 1) + local c3 = byte(s, i + 2) + + if not c2 or not c3 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c == 224 and (c2 < 160 or c2 > 191) then + error("Invalid UTF-8 character") + elseif c == 237 and (c2 < 128 or c2 > 159) then + error("Invalid UTF-8 character") + elseif c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 3 + if c3 < 128 or c3 > 191 then + error("Invalid UTF-8 character") + end + + return 3 + + elseif c >= 240 and c <= 244 then + -- UTF8-4 + local c2 = byte(s, i + 1) + local c3 = byte(s, i + 2) + local c4 = byte(s, i + 3) + + if not c2 or not c3 or not c4 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c == 240 and (c2 < 144 or c2 > 191) then + error("Invalid UTF-8 character") + elseif c == 244 and (c2 < 128 or c2 > 143) then + error("Invalid UTF-8 character") + elseif c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 3 + if c3 < 128 or c3 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 4 + if c4 < 128 or c4 > 191 then + error("Invalid UTF-8 character") + end + + return 4 + + else + error("Invalid UTF-8 character") + end +end + +-- returns the number of characters in a UTF-8 string +local function utf8len (s) + -- argument checking + if type(s) ~= "string" then + for k,v in pairs(s) do print('"',tostring(k),'"',tostring(v),'"') end + error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")") + end + + local pos = 1 + local bytes = len(s) + local len = 0 + + while pos <= bytes do + len = len + 1 + pos = pos + utf8charbytes(s, pos) + end + + return len +end + +-- functions identically to string.sub except that i and j are UTF-8 characters +-- instead of bytes +local function utf8sub (s, i, j) + -- argument defaults + j = j or -1 + + local pos = 1 + local bytes = len(s) + local len = 0 + + -- only set l if i or j is negative + local l = (i >= 0 and j >= 0) or utf8len(s) + local startChar = (i >= 0) and i or l + i + 1 + local endChar = (j >= 0) and j or l + j + 1 + + -- can't have start before end! + if startChar > endChar then + return "" + end + + -- byte offsets to pass to string.sub + local startByte,endByte = 1,bytes + + while pos <= bytes do + len = len + 1 + + if len == startChar then + startByte = pos + end + + pos = pos + utf8charbytes(s, pos) + + if len == endChar then + endByte = pos - 1 + break + end + end + + if startChar > len then startByte = bytes+1 end + if endChar < 1 then endByte = 0 end + + return sub(s, startByte, endByte) +end + + +-- replace UTF-8 characters based on a mapping table +local function utf8replace (s, mapping) + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") + end + if type(mapping) ~= "table" then + error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") + end + + local pos = 1 + local bytes = len(s) + local charbytes + local newstr = "" + + while pos <= bytes do + charbytes = utf8charbytes(s, pos) + local c = sub(s, pos, pos + charbytes - 1) + + newstr = newstr .. (mapping[c] or c) + + pos = pos + charbytes + end + + return newstr +end + + +-- identical to string.upper except it knows about unicode simple case conversions +local function utf8upper (s) + return utf8replace(s, utf8_lc_uc) +end + +-- identical to string.lower except it knows about unicode simple case conversions +local function utf8lower (s) + return utf8replace(s, utf8_uc_lc) +end + +-- identical to string.reverse except that it supports UTF-8 +local function utf8reverse (s) + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")") + end + + local bytes = len(s) + local pos = bytes + local charbytes + local newstr = "" + + while pos > 0 do + c = byte(s, pos) + while c >= 128 and c <= 191 do + pos = pos - 1 + c = byte(s, pos) + end + + charbytes = utf8charbytes(s, pos) + + newstr = newstr .. sub(s, pos, pos + charbytes - 1) + + pos = pos - 1 + end + + return newstr +end + +-- http://en.wikipedia.org/wiki/Utf8 +-- http://developer.coronalabs.com/code/utf-8-conversion-utility +local function utf8char(unicode) + if unicode <= 0x7F then return char(unicode) end + + if (unicode <= 0x7FF) then + local Byte0 = 0xC0 + math.floor(unicode / 0x40); + local Byte1 = 0x80 + (unicode % 0x40); + return char(Byte0, Byte1); + end; + + if (unicode <= 0xFFFF) then + local Byte0 = 0xE0 + math.floor(unicode / 0x1000); + local Byte1 = 0x80 + (math.floor(unicode / 0x40) % 0x40); + local Byte2 = 0x80 + (unicode % 0x40); + return char(Byte0, Byte1, Byte2); + end; + + if (unicode <= 0x10FFFF) then + local code = unicode + local Byte3= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte2= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte1= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte0= 0xF0 + code; + + return char(Byte0, Byte1, Byte2, Byte3); + end; + + error 'Unicode cannot be greater than U+10FFFF!' +end + +local shift_6 = 2^6 +local shift_12 = 2^12 +local shift_18 = 2^18 + +local utf8unicode +utf8unicode = function(str, i, j, byte_pos) + i = i or 1 + j = j or i + + if i > j then return end + + local char,bytes + + if byte_pos then + bytes = utf8charbytes(str,byte_pos) + char = sub(str,byte_pos,byte_pos-1+bytes) + else + char,byte_pos = utf8sub(str,i,i), 0 + bytes = #char + end + + local unicode + + if bytes == 1 then unicode = byte(char) end + if bytes == 2 then + local byte0,byte1 = byte(char,1,2) + local code0,code1 = byte0-0xC0,byte1-0x80 + unicode = code0*shift_6 + code1 + end + if bytes == 3 then + local byte0,byte1,byte2 = byte(char,1,3) + local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80 + unicode = code0*shift_12 + code1*shift_6 + code2 + end + if bytes == 4 then + local byte0,byte1,byte2,byte3 = byte(char,1,4) + local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80 + unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3 + end + + return unicode,utf8unicode(str, i+1, j, byte_pos+bytes) +end + +-- Returns an iterator which returns the next substring and its byte interval +local function utf8gensub(str, sub_len) + sub_len = sub_len or 1 + local byte_pos = 1 + local len = #str + return function(skip) + if skip then byte_pos = byte_pos + skip end + local char_count = 0 + local start = byte_pos + repeat + if byte_pos > len then return end + char_count = char_count + 1 + local bytes = utf8charbytes(str,byte_pos) + byte_pos = byte_pos+bytes + + until char_count == sub_len + + local last = byte_pos-1 + local sub = sub(str,start,last) + return sub, start, last + end +end + +local function binsearch(sortedTable, item, comp) + local head, tail = 1, #sortedTable + local mid = math.floor((head + tail)/2) + if not comp then + while (tail - head) > 1 do + if sortedTable[tonumber(mid)] > item then + tail = mid + else + head = mid + end + mid = math.floor((head + tail)/2) + end + else + end + if sortedTable[tonumber(head)] == item then + return true, tonumber(head) + elseif sortedTable[tonumber(tail)] == item then + return true, tonumber(tail) + else + return false + end +end +local function classMatchGenerator(class, plain) + local codes = {} + local ranges = {} + local ignore = false + local range = false + local firstletter = true + local unmatch = false + + local it = utf8gensub(class) + + local skip + for c,bs,be in it do + skip = be + if not ignore and not plain then + if c == "%" then + ignore = true + elseif c == "-" then + table.insert(codes, utf8unicode(c)) + range = true + elseif c == "^" then + if not firstletter then + error('!!!') + else + unmatch = true + end + elseif c == ']' then + break + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + end + elseif ignore and not plain then + if c == 'a' then -- %a: represents all letters. (ONLY ASCII) + table.insert(ranges, {65, 90}) -- A - Z + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'c' then -- %c: represents all control characters. + table.insert(ranges, {0, 31}) + table.insert(codes, 127) + elseif c == 'd' then -- %d: represents all digits. + table.insert(ranges, {48, 57}) -- 0 - 9 + elseif c == 'g' then -- %g: represents all printable characters except space. + table.insert(ranges, {1, 8}) + table.insert(ranges, {14, 31}) + table.insert(ranges, {33, 132}) + table.insert(ranges, {134, 159}) + table.insert(ranges, {161, 5759}) + table.insert(ranges, {5761, 8191}) + table.insert(ranges, {8203, 8231}) + table.insert(ranges, {8234, 8238}) + table.insert(ranges, {8240, 8286}) + table.insert(ranges, {8288, 12287}) + elseif c == 'l' then -- %l: represents all lowercase letters. (ONLY ASCII) + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'p' then -- %p: represents all punctuation characters. (ONLY ASCII) + table.insert(ranges, {33, 47}) + table.insert(ranges, {58, 64}) + table.insert(ranges, {91, 96}) + table.insert(ranges, {123, 126}) + elseif c == 's' then -- %s: represents all space characters. + table.insert(ranges, {9, 13}) + table.insert(codes, 32) + table.insert(codes, 133) + table.insert(codes, 160) + table.insert(codes, 5760) + table.insert(ranges, {8192, 8202}) + table.insert(codes, 8232) + table.insert(codes, 8233) + table.insert(codes, 8239) + table.insert(codes, 8287) + table.insert(codes, 12288) + elseif c == 'u' then -- %u: represents all uppercase letters. (ONLY ASCII) + table.insert(ranges, {65, 90}) -- A - Z + elseif c == 'w' then -- %w: represents all alphanumeric characters. (ONLY ASCII) + table.insert(ranges, {48, 57}) -- 0 - 9 + table.insert(ranges, {65, 90}) -- A - Z + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'x' then -- %x: represents all hexadecimal digits. + table.insert(ranges, {48, 57}) -- 0 - 9 + table.insert(ranges, {65, 70}) -- A - F + table.insert(ranges, {97, 102}) -- a - f + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + end + ignore = false + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + ignore = false + end + + firstletter = false + end + + table.sort(codes) + + local function inRanges(charCode) + for _,r in ipairs(ranges) do + if r[1] <= charCode and charCode <= r[2] then + return true + end + end + return false + end + if not unmatch then + return function(charCode) + return binsearch(codes, charCode) or inRanges(charCode) + end, skip + else + return function(charCode) + return charCode ~= -1 and not (binsearch(codes, charCode) or inRanges(charCode)) + end, skip + end +end + +-- utf8sub with extra argument, and extra result value +local function utf8subWithBytes (s, i, j, sb) + -- argument defaults + j = j or -1 + + local pos = sb or 1 + local bytes = len(s) + local len = 0 + + -- only set l if i or j is negative + local l = (i >= 0 and j >= 0) or utf8len(s) + local startChar = (i >= 0) and i or l + i + 1 + local endChar = (j >= 0) and j or l + j + 1 + + -- can't have start before end! + if startChar > endChar then + return "" + end + + -- byte offsets to pass to string.sub + local startByte,endByte = 1,bytes + + while pos <= bytes do + len = len + 1 + + if len == startChar then + startByte = pos + end + + pos = pos + utf8charbytes(s, pos) + + if len == endChar then + endByte = pos - 1 + break + end + end + + if startChar > len then startByte = bytes+1 end + if endChar < 1 then endByte = 0 end + + return sub(s, startByte, endByte), endByte + 1 +end + +local cache = setmetatable({},{ + __mode = 'kv' +}) +local cachePlain = setmetatable({},{ + __mode = 'kv' +}) +local function matcherGenerator(regex, plain) + local matcher = { + functions = {}, + captures = {} + } + if not plain then + cache[regex] = matcher + else + cachePlain[regex] = matcher + end + local function simple(func) + return function(cC) + if func(cC) then + matcher:nextFunc() + matcher:nextStr() + else + matcher:reset() + end + end + end + local function star(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextFunc() + matcher:nextStr() + else + matcher:nextFunc() + end + end + end + local function minus(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextStr() + end + matcher:nextFunc() + end + end + local function question(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextFunc() + matcher:nextStr() + end + matcher:nextFunc() + end + end + + local function capture(id) + return function(cC) + local l = matcher.captures[id][2] - matcher.captures[id][1] + local captured = utf8sub(matcher.string, matcher.captures[id][1], matcher.captures[id][2]) + local check = utf8sub(matcher.string, matcher.str, matcher.str + l) + if captured == check then + for i = 0, l do + matcher:nextStr() + end + matcher:nextFunc() + else + matcher:reset() + end + end + end + local function captureStart(id) + return function(cC) + matcher.captures[id][1] = matcher.str + matcher:nextFunc() + end + end + local function captureStop(id) + return function(cC) + matcher.captures[id][2] = matcher.str - 1 + matcher:nextFunc() + end + end + + local function balancer(str) + local sum = 0 + local bc, ec = utf8sub(str, 1, 1), utf8sub(str, 2, 2) + local skip = len(bc) + len(ec) + bc, ec = utf8unicode(bc), utf8unicode(ec) + return function(cC) + if cC == ec and sum > 0 then + sum = sum - 1 + if sum == 0 then + matcher:nextFunc() + end + matcher:nextStr() + elseif cC == bc then + sum = sum + 1 + matcher:nextStr() + else + if sum == 0 or cC == -1 then + sum = 0 + matcher:reset() + else + matcher:nextStr() + end + end + end, skip + end + + matcher.functions[1] = function(cC) + matcher:fullResetOnNextStr() + matcher.seqStart = matcher.str + matcher:nextFunc() + if (matcher.str > matcher.startStr and matcher.fromStart) or matcher.str >= matcher.stringLen then + matcher.stop = true + matcher.seqStart = nil + end + end + + local lastFunc + local ignore = false + local skip = nil + local it = (function() + local gen = utf8gensub(regex) + return function() + return gen(skip) + end + end)() + local cs = {} + for c, bs, be in it do + skip = nil + if plain then + table.insert(matcher.functions, simple(classMatchGenerator(c, plain))) + else + if ignore then + if find('123456789', c, 1, true) then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + table.insert(matcher.functions, capture(tonumber(c))) + elseif c == 'b' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + local b + b, skip = balancer(sub(regex, be + 1, be + 9)) + table.insert(matcher.functions, b) + else + lastFunc = classMatchGenerator('%' .. c) + end + ignore = false + else + if c == '*' then + if lastFunc then + table.insert(matcher.functions, star(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '+' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + table.insert(matcher.functions, star(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '-' then + if lastFunc then + table.insert(matcher.functions, minus(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '?' then + if lastFunc then + table.insert(matcher.functions, question(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '^' then + if bs == 1 then + matcher.fromStart = true + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '$' then + if be == len(regex) then + matcher.toEnd = true + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '[' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc, skip = classMatchGenerator(sub(regex, be + 1)) + elseif c == '(' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + table.insert(matcher.captures, {}) + table.insert(cs, #matcher.captures) + table.insert(matcher.functions, captureStart(cs[#cs])) + if sub(regex, be + 1, be + 1) == ')' then matcher.captures[#matcher.captures].empty = true end + elseif c == ')' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + local cap = table.remove(cs) + if not cap then + error('invalid capture: "(" missing') + end + table.insert(matcher.functions, captureStop(cap)) + elseif c == '.' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = function(cC) return cC ~= -1 end + elseif c == '%' then + ignore = true + else + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = classMatchGenerator(c) + end + end + end + end + if #cs > 0 then + error('invalid capture: ")" missing') + end + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = nil + ignore = nil + + table.insert(matcher.functions, function() + if matcher.toEnd and matcher.str ~= matcher.stringLen then + matcher:reset() + else + matcher.stop = true + end + end) + + matcher.nextFunc = function(self) + self.func = self.func + 1 + end + matcher.nextStr = function(self) + self.str = self.str + 1 + end + matcher.strReset = function(self) + local oldReset = self.reset + local str = self.str + self.reset = function(s) + s.str = str + s.reset = oldReset + end + end + matcher.fullResetOnNextFunc = function(self) + local oldReset = self.reset + local func = self.func +1 + local str = self.str + self.reset = function(s) + s.func = func + s.str = str + s.reset = oldReset + end + end + matcher.fullResetOnNextStr = function(self) + local oldReset = self.reset + local str = self.str + 1 + local func = self.func + self.reset = function(s) + s.func = func + s.str = str + s.reset = oldReset + end + end + + matcher.process = function(self, str, start) + + self.func = 1 + start = start or 1 + self.startStr = (start >= 0) and start or utf8len(str) + start + 1 + self.seqStart = self.startStr + self.str = self.startStr + self.stringLen = utf8len(str) + 1 + self.string = str + self.stop = false + + self.reset = function(s) + s.func = 1 + end + + local lastPos = self.str + local lastByte + local char + while not self.stop do + if self.str < self.stringLen then + --[[ if lastPos < self.str then + print('last byte', lastByte) + char, lastByte = utf8subWithBytes(str, 1, self.str - lastPos - 1, lastByte) + char, lastByte = utf8subWithBytes(str, 1, 1, lastByte) + lastByte = lastByte - 1 + else + char, lastByte = utf8subWithBytes(str, self.str, self.str) + end + lastPos = self.str ]] + char = utf8sub(str, self.str,self.str) + --print('char', char, utf8unicode(char)) + self.functions[self.func](utf8unicode(char)) + else + self.functions[self.func](-1) + end + end + + if self.seqStart then + local captures = {} + for _,pair in pairs(self.captures) do + if pair.empty then + table.insert(captures, pair[1]) + else + table.insert(captures, utf8sub(str, pair[1], pair[2])) + end + end + return self.seqStart, self.str - 1, unpack(captures) + end + end + + return matcher +end + +-- string.find +local function utf8find(str, regex, init, plain) + local matcher = cache[regex] or matcherGenerator(regex, plain) + return matcher:process(str, init) +end + +-- string.match +local function utf8match(str, regex, init) + init = init or 1 + local found = {utf8find(str, regex, init)} + if found[1] then + if found[3] then + return unpack(found, 3) + end + return utf8sub(str, found[1], found[2]) + end +end + +-- string.gmatch +local function utf8gmatch(str, regex, all) + regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex + local lastChar = 1 + return function() + local found = {utf8find(str, regex, lastChar)} + if found[1] then + lastChar = found[2] + 1 + if found[all and 1 or 3] then + return unpack(found, all and 1 or 3) + end + return utf8sub(str, found[1], found[2]) + end + end +end + +local function replace(repl, args) + local ret = '' + if type(repl) == 'string' then + local ignore = false + local num = 0 + for c in utf8gensub(repl) do + if not ignore then + if c == '%' then + ignore = true + else + ret = ret .. c + end + else + num = tonumber(c) + if num then + ret = ret .. args[num] + else + ret = ret .. c + end + ignore = false + end + end + elseif type(repl) == 'table' then + ret = repl[args[1] or args[0]] or '' + elseif type(repl) == 'function' then + if #args > 0 then + ret = repl(unpack(args, 1)) or '' + else + ret = repl(args[0]) or '' + end + end + return ret +end +-- string.gsub +local function utf8gsub(str, regex, repl, limit) + limit = limit or -1 + local ret = '' + local prevEnd = 1 + local it = utf8gmatch(str, regex, true) + local found = {it()} + local n = 0 + while #found > 0 and limit ~= n do + local args = {[0] = utf8sub(str, found[1], found[2]), unpack(found, 3)} + ret = ret .. utf8sub(str, prevEnd, found[1] - 1) + .. replace(repl, args) + prevEnd = found[2] + 1 + n = n + 1 + found = {it()} + end + return ret .. utf8sub(str, prevEnd), n +end + +local utf8 = {} +utf8.len = utf8len +utf8.sub = utf8sub +utf8.reverse = utf8reverse +utf8.char = utf8char +utf8.unicode = utf8unicode +utf8.gensub = utf8gensub +utf8.byte = utf8unicode +utf8.find = utf8find +utf8.match = utf8match +utf8.gmatch = utf8gmatch +utf8.gsub = utf8gsub +utf8.dump = dump +utf8.format = format +utf8.lower = lower +utf8.upper = upper +utf8.rep = rep +return utf8 \ No newline at end of file diff --git a/lib/utils.lua b/lib/utils.lua index 814ee56..38c5b52 100644 --- a/lib/utils.lua +++ b/lib/utils.lua @@ -65,3 +65,4 @@ function use(module, ...) end end end + diff --git a/lib/xml.lua b/lib/xml.lua index b467b54..9af2df6 100644 --- a/lib/xml.lua +++ b/lib/xml.lua @@ -128,11 +128,7 @@ function XmlParser:ParseXmlText(xmlText) end function XmlParser:parse(text) - return ParseXmlText(text) -end - -function XmlParser:decode(text) - return ParseXmlText(text) + return self:ParseXmlText(text) end return XmlParser