Module:HTMLParser/ElementNode
Documentation for this module may be created at Module:HTMLParser/ElementNode/doc
-- Derivative work of: --[[ (The MIT license) Copyright (c) 2013, Wouter Scherphof ([email protected]) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --]] -- Source: https://github.com/wscherphof/lua-htmlparser/tree/master/src/htmlparser local Set = require("Module:Set") local ElementNode = {} ElementNode.mt = {__index = ElementNode} function ElementNode:new(nameortext, node, descend, openstart, openend) local instance = { name = nameortext, level = 0, parent = nil, root = nil, nodes = {}, _openstart = openstart, _openend = openend, _closestart = openstart, _closeend = openend, attributes = {}, id = nil, classes = {}, deepernodes = Set:new(), deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {} } if not node then instance.name = "root" instance.root = instance instance._text = nameortext local length = string.len(nameortext) instance._openstart, instance._openend = 1, length instance._closestart, instance._closeend = 1, length elseif descend then instance.root = node.root instance.parent = node instance.level = node.level + 1 table.insert(node.nodes, instance) else instance.root = node.root instance.parent = node.parent instance.level = node.level table.insert(node.parent.nodes, instance) end return setmetatable(instance, ElementNode.mt) end function ElementNode:gettext() return string.sub(self.root._text, self._openstart, self._closeend) end function ElementNode:getcontent() return string.sub(self.root._text, self._openend + 1, self._closestart - 1) end function ElementNode:addattribute(k, v) self.attributes[k] = v if string.lower(k) == "id" then self.id = v -- class attribute contains "space-separated tokens", each of which we'd like quick access to elseif string.lower(k) == "class" then for class in string.gmatch(v, "%S+") do table.insert(self.classes, class) end end end local function insert(table, name, node) table[name] = table[name] or Set:new() table[name]:add(node) end function ElementNode:close(closestart, closeend) if closestart and closeend then self._closestart, self._closeend = closestart, closeend end -- inform hihger level nodes about this element's existence in their branches local node = self while true do node = node.parent if not node then break end node.deepernodes:add(self) insert(node.deeperelements, self.name, self) for k in pairs(self.attributes) do insert(node.deeperattributes, k, self) end if self.id then insert(node.deeperids, self.id, self) end for _,v in ipairs(self.classes) do insert(node.deeperclasses, v, self) end end end local function escape(s) -- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end local function select(self, s) if not s or type(s) ~= "string" or s == "" then return Set:new() end local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes, ["#"] = self.deeperids, ["."] = self.deeperclasses} local function match(t, w) local m, e, v if t == "[" then w, m, e, v = string.match(w, "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^" "([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "=" "(=?)" .. -- e = the optional "=" "(.*)" -- v = anything following the "=", or else "" ) end local matched = Set:new(sets[t][w]) -- attribute value selectors if e == "=" then if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted v = string.sub(v, 2, #v - 1) -- strip quotes if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute for node in pairs(matched) do local a = node.attributes[w] -- equals if m == "" and a ~= v then matched:remove(node) -- not equals elseif m == "!" and a == v then matched:remove(node) -- prefix elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node) -- contains elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node) -- word elseif m =="~" then matched:remove(node) for word in string.gmatch(a, "%S+") do if word == v then matched:add(node) break end end -- starts with elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node) -- ends with elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node) end end -- for node end -- if v return matched end local subjects, resultset, childrenonly = Set:new({self}) for part in string.gmatch(s, "%S+") do if part == ">" then childrenonly = true -- Next part else resultset = Set:new() for subject in pairs(subjects) do local star = subject.deepernodes if childrenonly then star = Set:new(subject.nodes) end resultset = resultset + star end childrenonly = false if part == "*" then -- Next part else local excludes, filter = Set:new() for t, w in string.gmatch(part, "([:%[#.]?)" .. -- t = an optional :, [, #, or . "([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or ) "%]?%)?" -- followed by an uncaptured optional ] and/or ) ) do if t == ":" then filter = w -- nextw else local matched = match(t, w) if filter == "not" then excludes = excludes + matched else resultset = resultset * matched end filter = nil end end resultset = resultset - excludes subjects = Set:new(resultset) end end end return resultset end function ElementNode:select(s) return select(self, s) end ElementNode.mt.__call = select return ElementNode