{"id":243,"date":"2016-07-04T23:05:04","date_gmt":"2016-07-04T22:05:04","guid":{"rendered":"https:\/\/solidt.eu\/blog\/?p=243"},"modified":"2023-05-12T10:26:56","modified_gmt":"2023-05-12T09:26:56","slug":"lua-crawl-host","status":"publish","type":"post","link":"https:\/\/solidt.eu\/site\/lua-crawl-host\/","title":{"rendered":"Lua Crawl Host"},"content":{"rendered":"\n<div style=\"height: 250px; position:relative; margin-bottom: 50px;\" class=\"wp-block-simple-code-block-ace\"><pre class=\"wp-block-simple-code-block-ace\" style=\"position:absolute;top:0;right:0;bottom:0;left:0\" data-mode=\"lua\" data-theme=\"monokai\" data-fontsize=\"14\" data-lines=\"Infinity\" data-showlines=\"true\" data-copy=\"false\">module(... or 'crawl', package.seeall)\n\nlocal httputils = require('httputils')\n\nfunction scanHost(host, startURL, useProxy)\n\t\n\tlocal curl = 'curl -s -H \"Accept: text\/html\" --max-filesize 100000 \"%s\"'\n\t\n\tif (useProxy) then\n\t\tcurl = 'curl -s -H \"Accept: text\/html\" -x http:\/\/fw.net.local:8081 --proxy-user proxyuser --max-filesize 100000 \"%s\"'\n\tend\n\t\n\tlocal urls = {}\n\tlocal urls_queued = {}\n\t\n\tlocal abshost = \"http:\/\/\"..host\n\t\n\tfunction addUrl(url)\t\n\t\tif (urls_queued[url] == nil and #url &lt; 255) then\n\t\t\ttable.insert(urls, url)\n\t\t\turls_queued[url] = true\n\t\tend\n\tend\n\t\n\taddUrl(startURL)\n\n\twhile #urls > 0 do\n\t\n\t\tlocal url = urls[1];\n\t\tlocal cmd = string.format(curl, url)\t\t\t\n\t\ttable.remove(urls, 1)\n\t\t\n\t\tprint(url)\t\t\t\t\t\n\t\tlocal cd = io.popen (cmd , 'r')\t\n\t\tcontent = cd:read(\"*all\")\t\t\n\t\tio.close(cd)\n\t\t\n\t\t--print('content:')\n\t\t--print(content)\n\t\t--os.exit(1)\t\t\n\t\tlocal absURL = \"[=(]['\\\"]([^'\\\"]-:\/\/[^'\\\"]-)['\\\"]\"\n\t\tlocal relURL = \"[=(]['\\\"]([%s]-\/[^'\\\"]-)['\\\"]\"\n\t\t\n\t\tfor word in string.gmatch(content, absURL) do \t\t\n\t\t\tlocal u = httputils.HtmlDecode(word)\n\t\t\tif string.find(u, host) then\n\t\t\t\taddUrl(u)\n\t\t\tend\n\t\tend\n\t\t\n\t\tfor word in string.gmatch(content, relURL) \n\t\tdo \n\t\t\tlocal u = httputils.HtmlDecode(word)\n\t\t\tif (string.sub(u, 1, 4) ~= \"\/wEP\") and (string.find(u, '\/\/') == nil) then\n\t\t\t\tu = abshost..u\t\t\n\t\t\t\taddUrl(u)\n\t\t\tend\n\t\tend\n\tend\nend\n\n--scanHost(\"localhost:1467\", \"http:\/\/localhost:1467\/\", false);\nscanHost(\"localhost:1470\", \"http:\/\/localhost:1470\/nl-NL\/Home.aspx\", false);\n\nmodule(... or 'httputils', package.seeall)\n\nlocal htmlEncodeEntities = { \n\t['&amp;'] = 'amp',  \n\t[' '] = 'nbsp',  \n\t['\"'] = 'quot',  \n\t['&lt;'] = 'lt',  \n\t['>'] = 'gt'\n\t\t}\n\nlocal htmlDecodeEntities = { }\n\nlocal function fillHtmlDecodeEntities()\n\tfor k,v in pairs(htmlEncodeEntities) do\n\t\thtmlDecodeEntities[v] = k\n\tend\nend\nfillHtmlDecodeEntities();\n\nlocal function EncodeHtmlEntity(char)\n\tif (htmlEncodeEntities[char]) then\n\t\treturn '&amp;'..htmlEncodeEntities[char]..';'\n\telse\n\t\treturn char\n\tend\nend\n\nlocal function DecodeHtmlEntity(entity)\n\tif (htmlDecodeEntities[entity]) then\n\t\treturn htmlDecodeEntities[entity]\n\telse\n\t\treturn entity\n\tend\nend\n\nfunction HtmlEncode(text)\n\treturn string.gsub(text, \"(.)\", EncodeHtmlEntity)\nend\n\nfunction HtmlDecode(html)\n\treturn string.gsub(html, \"&amp;(.-);\", DecodeHtmlEntity)\nend\n\nfunction UrlEncode()\n\nend\n\nfunction UrlDecode()\n\nend<\/pre><\/div>\n\n\n\n<pre class=\"wp-block-preformatted\">\n<\/pre>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"inline_featured_image":false,"footnotes":""},"categories":[9],"tags":[],"class_list":["post-243","post","type-post","status-publish","format-standard","hentry","category-lua"],"_links":{"self":[{"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/posts\/243","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/comments?post=243"}],"version-history":[{"count":3,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/posts\/243\/revisions"}],"predecessor-version":[{"id":7778,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/posts\/243\/revisions\/7778"}],"wp:attachment":[{"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/media?parent=243"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/categories?post=243"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/solidt.eu\/site\/wp-json\/wp\/v2\/tags?post=243"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}