scheduler.rb 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # Copyright (C) 2012-2014 Zammad Foundation, http://zammad-foundation.org/
  2. class Scheduler < ApplicationModel
  3. def self.run( runner, runner_count )
  4. Thread.abort_on_exception = true
  5. jobs_started = {}
  6. while true
  7. logger.info "Scheduler running (runner #{runner} of #{runner_count})..."
  8. # reconnect in case db connection is lost
  9. begin
  10. ActiveRecord::Base.connection.reconnect!
  11. rescue => e
  12. puts "Can't reconnect to database #{ e.inspect }"
  13. end
  14. # read/load jobs and check if it is alredy started
  15. jobs = Scheduler.where( 'active = ? AND prio = ?', true, runner )
  16. jobs.each {|job|
  17. next if jobs_started[ job.id ]
  18. jobs_started[ job.id ] = true
  19. self.start_job( job, runner, runner_count )
  20. }
  21. sleep 90
  22. end
  23. end
  24. def self.start_job( job, runner, runner_count )
  25. logger.info "started job thread for '#{job.name}' (#{job.method})..."
  26. sleep 4
  27. Thread.new {
  28. if job.period
  29. while true
  30. self._start_job( job, runner, runner_count )
  31. job = Scheduler.lookup( :id => job.id )
  32. # exit is job got deleted
  33. break if !job
  34. # exit if job is not active anymore
  35. break if !job.active
  36. # exit if there is no loop period defined
  37. break if !job.period
  38. # wait until next run
  39. sleep job.period
  40. end
  41. else
  42. self._start_job( job, runner, runner_count )
  43. end
  44. # raise "Exception from thread"
  45. job.pid = ''
  46. job.save
  47. logger.info " ...stopped thread for '#{job.method}'"
  48. }
  49. end
  50. def self._start_job( job, runner, runner_count, try_count = 0, try_run_time = Time.now )
  51. sleep 5
  52. begin
  53. job.last_run = Time.now
  54. job.pid = Thread.current.object_id
  55. job.save
  56. logger.info "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count})..."
  57. eval job.method()
  58. rescue => e
  59. puts "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count}) exited with error #{ e.inspect }"
  60. # reconnect in case db connection is lost
  61. begin
  62. ActiveRecord::Base.connection.reconnect!
  63. rescue => e
  64. puts "Can't reconnect to database #{ e.inspect }"
  65. end
  66. try_run_max = 10
  67. try_count += 1
  68. # reset error counter if to old
  69. if try_run_time + ( 60 * 5 ) < Time.now
  70. try_count = 0
  71. end
  72. try_run_time = Time.now
  73. # restart job again
  74. if try_run_max > try_count
  75. self._start_job( job, runner, runner_count, try_count, try_run_time)
  76. else
  77. raise "STOP thread for #{job.method} (runner #{runner} of #{runner_count} after #{try_count} tries"
  78. end
  79. end
  80. end
  81. def self.worker
  82. wait = 10
  83. logger.info "*** Starting worker #{Delayed::Job.to_s}"
  84. loop do
  85. result = nil
  86. realtime = Benchmark.realtime do
  87. result = Delayed::Worker.new.work_off
  88. end
  89. count = result.sum
  90. break if $exit
  91. if count.zero?
  92. sleep(wait)
  93. logger.info "*** worker loop"
  94. else
  95. printf "*** #{count} jobs processed at %.4f j/s, %d failed ...\n" % [count / realtime, result.last]
  96. end
  97. end
  98. end
  99. def self.check( name, time_warning = 10, time_critical = 20 )
  100. time_warning_time = Time.now - time_warning.minutes
  101. time_critical_time = Time.now - time_critical.minutes
  102. scheduler = Scheduler.where( :name => name ).first
  103. if !scheduler
  104. puts "CRITICAL - no such scheduler jobs '#{name}'"
  105. return true
  106. end
  107. #puts "S " + scheduler.inspect
  108. if !scheduler.last_run
  109. puts "CRITICAL - scheduler jobs never started '#{name}'"
  110. exit 2
  111. end
  112. if scheduler.last_run < time_critical_time
  113. puts "CRITICAL - scheduler jobs was not running in last '#{time_critical.to_s}' minutes - last run at '#{scheduler.last_run.to_s}' '#{name}'"
  114. exit 2
  115. end
  116. if scheduler.last_run < time_warning_time
  117. puts "CRITICAL - scheduler jobs was not running in last '#{time_warning.to_s}' minutes - last run at '#{scheduler.last_run.to_s}' '#{name}'"
  118. exit 2
  119. end
  120. puts "ok - scheduler jobs was running at '#{scheduler.last_run.to_s}' '#{name}'"
  121. exit 0
  122. end
  123. end