可能重复:
轨道延迟作业不工作
如何在heroku上创建每15分钟运行一次的后台任务?
如何使用clockwork和delayed_job进行周期性刮擦?其他插件/宝石是什么?
我的rake文件:
desc "Importer statistikker"
namespace :reklamer do
task :runall => [:iqmedier, :euroads, :mikkelsen, :orville ] do
# This will run after all those tasks have run
end
task :iqmedier => :environment do
require 'Mechanize'
agent = WWW::Mechanize.new
agent.get("http://www.iqmedier.dk")
agent.page.link_with(:href => "/Publisher/Stats").click
form = agent.page.forms.first
form.submit
@stats = agent.page.search('//tr')[-2]
@existing = Reklamer.find(:first, :conditions => {:dato => @stats[0]})
if @existing.nil?
Reklamer.create!(:virksomhed => 'Iqmedier', :vis => @stats[4], :leads => @stats[5], :ordre => @stats[6], :cpc => @stats[7], :earn => @stats[8])
elsif @existing.dato != Date.today
Reklamer.create!(:virksomhed => 'Iqmedier', :dato => Date.today, :unik_klik => 0, :klik => 0, :unik_vis => 0, :vis => 0, :leads => 0, :ordre => 0, :cpc => 0, :earn => 0)
else
@existing.update_attributes!(:leads => @stats[5], :ordre => @stats[6], :cpc => @stats[7], :earn => @stats[8])
end
end
task :euroads => :environment do
require 'Mechanize'
require 'pp'
agent = Mechanize.new { |agent|
agent.user_agent_alias = 'Mac Safari'}
agent.get("http://www.euroads.dk")
@stats = agent.page.search('table.ea')
@existing = Reklamer.find(:first)
if @existing.nil?
Reklamer.create!(:klik => @stats[2].to_i, :unik_vis => @stats[3], :vis => @stats[4], :leads => @stats[5], :ordre => @stats[6], :cpc => @stats[7], :earn => @stats[8])
elsif @existing.dato != Date.today
Reklamer.create!(:virksomhed => 'Euroads', :dato => Date.today, :unik_klik => 0, :klik => 0, :unik_vis => 0, :vis => 0, :leads => 0, :ordre => 0, :cpc => 0, :earn => 0)
else
@existing.update_attributes(:unik_klik => :leads => @stats[5], :ordre => @stats[6], :cpc => @stats[7], :earn => @stats[8])
end
end
task :mikkelsen => :environment do
require 'Mechanize'
agent = Mechanize.new
agent.get("http://affilate.mikkelsenmedia.dk")
@stats = agent.page.search('//tr')[-5].search('td').map{ |n| n.text }
@existing = Reklamer.find(:first, :conditions => {:dato => @stats[0] })
if @existing.nil?
Reklamer.create!(:virksomhed => 'Mikkelsen', :dato => @stats[0], :unik_klik => @stats[3], :klik => @stats[3].to_i, :unik_vis => @stats[1], :vis => @stats[1], :leads => @stats[4], :ordre => @stats[9], :cpc => @stats[3], :earn => @stats[5])
else
@existing.update_attributes(:virksomhed => 'Mikkelsen', :dato => @stats[0], :unik_klik => @stats[3], :klik => @stats[3].to_i, :unik_vis => @stats[1], :vis => @stats[1], :leads => @stats[4], :ordre => @stats[9], :cpc => @stats[3], :earn => @stats[5])
end
end
task :orville => :environment do
require 'Mechanize'
agent = WWW::Mechanize.new
agent.get("https://dk.orvillemedia.com")
@dato = Date.today.strftime("%Y-%m-%d").to_s
@month = Date.today.strftime("%m").to_s
@day = Date.today.strftime("%d").to_s
agent.get("https://dk.orvillemedia.com/publisher/stats.php?sd=#{@day}&sm=#{@month}")
@stats = agent.page.search('td')
@existing = Reklamer.find(:first, :conditions => {:dato => @dato})
if @existing.nil?
Reklamer.create!(:vis => @stats[1], :leads => @stats[3], :ordre => '0')
else
@existing.update_attributes(:ordre => '0', :cpc => (@stats[5] == '-' ? 0 : @stats[3]), :earn => @stats[6])
end
end
end
在您的Gemfile:中
gem 'delayed_job'
在库/scraper.rb:中
class Scraper
def perform
# All the scraping code you need to run goes here.
end
def error(job, exception)
# Send a warning email to yourself, or whatever.
# The scraping will automatically be retried.
end
def success(job)
# Schedule the next scraping.
Delayed::Job.enqueue(Scraper.new, :run_at => 15.minutes.from_now)
end
end
然后启动最初的Scraper,它将从那里处理事情。在heroku控制台中,运行:
Scraper.new.perform
你需要有一个工人一直在运行才能运行废料。
这会为您提供每小时的任务:http://addons.heroku.com/cron