如何使用luacurl / libcurl / curl和Lua获取HTML代码



我的代码中缺少什么才能获得网站的html源代码(归功于@Michal Kottman)?就像您在 chrome 中右键单击并单击"查看页面源代码"一样。

local curl = require "luacurl"
local c = curl.new()
function GET(url)
    c:setopt(curl.OPT_URL, url)
    c:setopt(curl.OPT_PROXY, "http://myproxy.bla.com:8080")
    c:setopt(curl.OPT_HTTPHEADER, "Connection: Keep-Alive", "Accept-Language: en-us")
    c:setopt(curl.OPT_CONNECTTIMEOUT, 30 )
    local t = {} -- this will collect resulting chunks
    c:setopt(curl.OPT_WRITEFUNCTION, function (param, buf)
        table.insert(t, buf) -- store a chunk of data received
        return #buf
    end)
    c:setopt(curl.OPT_PROGRESSFUNCTION, function(param, dltotal, dlnow)
        print('%', url, dltotal, dlnow) -- do your fancy reporting here
    end)
    c:setopt(curl.OPT_NOPROGRESS, false) -- use this to activate progress
    assert(c:perform())
    return table.concat(t) -- return the whole data as a string
end
--local s = GET 'http://www.lua.org/'
local s = GET 'https://www.youtube.com/watch?v=dT_fkwX4fRM'
print(s)
file = io.open("text.html", "wb")
file:write(s)
file:close()

不幸的是,它必须使用 Lua 并使用 luacurl 绑定作为 luasocket,它在提供代理时不起作用(至少对我来说)。我下载的文件是空的。使用cmd,我可以毫无问题地获得页面源代码 curl http://mypage.com

它非常适合 lua.org 但对于 youtube 链接则不然。我错过了什么?

local curl = require "luacurl"
local c = curl.new()
function GET(url)
    c:setopt(curl.OPT_URL, url)
    c:setopt(curl.OPT_PROXY, "http://myproxy.com:8080")
    c:setopt(curl.OPT_HTTPHEADER, "Connection: Keep-Alive", "Accept-Language: en-us")
    c:setopt(curl.OPT_CONNECTTIMEOUT, 30 )
    c:setopt(curl.OPT_FOLLOWLOCATION, true) -- REALLY IMPORTANT ELSE FAIL
    c:setopt(curl.OPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36")
    c:setopt(curl.OPT_SSL_VERIFYPEER, false) -- REALLY IMPORTANT ELSE NOTHING HAPPENS -.-
    c:setopt(curl.OPT_ENCODING, "utf8") -- could be important
    local t = {} -- this will collect resulting chunks
    c:setopt(curl.OPT_WRITEFUNCTION, function (param, buf)
        table.insert(t, buf) -- store a chunk of data received
        return #buf
    end)
    c:setopt(curl.OPT_PROGRESSFUNCTION, function(param, dltotal, dlnow)
        print('%', url, dltotal, dlnow) -- do your fancy reporting here
    end)
    c:setopt(curl.OPT_NOPROGRESS, false) -- use this to activate progress
    assert(c:perform())
    return table.concat(t) -- return the whole data as a string
end

相关内容

  • 没有找到相关文章

最新更新