scheduler.rb 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Copyright (C) 2012-2014 Zammad Foundation, http://zammad-foundation.org/
  2. # rubocop:disable Rails/Output
  3. class Scheduler < ApplicationModel
  4. def self.run( runner, runner_count )
  5. Thread.abort_on_exception = true
  6. jobs_started = {}
  7. loop do
  8. logger.info "Scheduler running (runner #{runner} of #{runner_count})..."
  9. # reconnect in case db connection is lost
  10. begin
  11. ActiveRecord::Base.connection.reconnect!
  12. rescue => e
  13. logger.error "Can't reconnect to database #{ e.inspect }"
  14. end
  15. # read/load jobs and check if it is alredy started
  16. jobs = Scheduler.where( 'active = ? AND prio = ?', true, runner )
  17. jobs.each {|job|
  18. next if jobs_started[ job.id ]
  19. jobs_started[ job.id ] = true
  20. start_job( job, runner, runner_count )
  21. }
  22. sleep 90
  23. end
  24. end
  25. def self.start_job( job, runner, runner_count )
  26. logger.info "started job thread for '#{job.name}' (#{job.method})..."
  27. sleep 4
  28. Thread.new {
  29. if job.period
  30. loop do
  31. _start_job( job, runner, runner_count )
  32. job = Scheduler.lookup( id: job.id )
  33. # exit is job got deleted
  34. break if !job
  35. # exit if job is not active anymore
  36. break if !job.active
  37. # exit if there is no loop period defined
  38. break if !job.period
  39. # wait until next run
  40. sleep job.period
  41. end
  42. else
  43. _start_job( job, runner, runner_count )
  44. end
  45. # raise "Exception from thread"
  46. job.pid = ''
  47. job.save
  48. logger.info " ...stopped thread for '#{job.method}'"
  49. ActiveRecord::Base.connection.close
  50. }
  51. end
  52. def self._start_job( job, runner, runner_count, try_count = 0, try_run_time = Time.zone.now )
  53. sleep 5
  54. begin
  55. job.last_run = Time.zone.now
  56. job.pid = Thread.current.object_id
  57. job.save
  58. logger.info "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count})..."
  59. eval job.method() # rubocop:disable Lint/Eval
  60. rescue => e
  61. logger.error "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count}) exited with error #{ e.inspect }"
  62. # reconnect in case db connection is lost
  63. begin
  64. ActiveRecord::Base.connection.reconnect!
  65. rescue => e
  66. logger.error "Can't reconnect to database #{ e.inspect }"
  67. end
  68. try_run_max = 10
  69. try_count += 1
  70. # reset error counter if to old
  71. if try_run_time + ( 60 * 5 ) < Time.zone.now
  72. try_count = 0
  73. end
  74. try_run_time = Time.zone.now
  75. # restart job again
  76. if try_run_max > try_count
  77. _start_job( job, runner, runner_count, try_count, try_run_time)
  78. else
  79. raise "STOP thread for #{job.method} (runner #{runner} of #{runner_count} after #{try_count} tries"
  80. end
  81. end
  82. end
  83. def self.worker
  84. wait = 10
  85. logger.info "*** Starting worker #{Delayed::Job}"
  86. loop do
  87. result = nil
  88. realtime = Benchmark.realtime do
  89. result = Delayed::Worker.new.work_off
  90. end
  91. count = result.sum
  92. break if $exit
  93. if count.zero?
  94. sleep(wait)
  95. logger.info '*** worker loop'
  96. else
  97. format "*** #{count} jobs processed at %.4f j/s, %d failed ...\n", count / realtime, result.last
  98. end
  99. end
  100. end
  101. def self.check( name, time_warning = 10, time_critical = 20 )
  102. time_warning_time = Time.zone.now - time_warning.minutes
  103. time_critical_time = Time.zone.now - time_critical.minutes
  104. scheduler = Scheduler.find_by( name: name )
  105. if !scheduler
  106. puts "CRITICAL - no such scheduler jobs '#{name}'"
  107. return true
  108. end
  109. logger.debug scheduler.inspect
  110. if !scheduler.last_run
  111. puts "CRITICAL - scheduler jobs never started '#{name}'"
  112. exit 2
  113. end
  114. if scheduler.last_run < time_critical_time
  115. puts "CRITICAL - scheduler jobs was not running in last '#{time_critical}' minutes - last run at '#{scheduler.last_run}' '#{name}'"
  116. exit 2
  117. end
  118. if scheduler.last_run < time_warning_time
  119. puts "CRITICAL - scheduler jobs was not running in last '#{time_warning}' minutes - last run at '#{scheduler.last_run}' '#{name}'"
  120. exit 2
  121. end
  122. puts "ok - scheduler jobs was running at '#{scheduler.last_run}' '#{name}'"
  123. exit 0
  124. end
  125. end