123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- # Copyright (C) 2012-2016 Zammad Foundation, http://zammad-foundation.org/
- class Scheduler < ApplicationModel
- # rubocop:disable Style/ClassVars
- @@jobs_started = {}
- # rubocop:enable Style/ClassVars
- # start threads
- def self.threads
- Thread.abort_on_exception = true
- # reconnect in case db connection is lost
- begin
- ActiveRecord::Base.connection.reconnect!
- rescue => e
- logger.error "Can't reconnect to database #{e.inspect}"
- end
- # cleanup old background jobs
- cleanup
- # start worker for background jobs
- worker
- # start loop to execute scheduler jobs
- loop do
- logger.info 'Scheduler running...'
- # reconnect in case db connection is lost
- begin
- ActiveRecord::Base.connection.reconnect!
- rescue => e
- logger.error "Can't reconnect to database #{e.inspect}"
- end
- # read/load jobs and check if it is alredy started
- jobs = Scheduler.where('active = ?', true).order('prio ASC')
- jobs.each do |job|
- # ignore job is still running
- next if @@jobs_started[ job.id ]
- # check job.last_run
- next if job.last_run && job.period && job.last_run > (Time.zone.now - job.period)
- # run job as own thread
- @@jobs_started[ job.id ] = true
- start_job(job)
- sleep 10
- end
- sleep 60
- end
- end
- # Checks all delayed jobs that are locked and cleans them up.
- # Should only get called when the Scheduler gets started.
- #
- # @see Scheduler#cleanup_delayed
- #
- # @param [Boolean] force forces the cleanup if not called in Scheduler starting context.
- #
- # @example
- # Scheduler.cleanup
- #
- # @raise [RuntimeError] If called without force and not when Scheduler gets started.
- #
- # return [nil]
- def self.cleanup(force: false)
- if !force && caller_locations(1..1).first.label != 'threads'
- raise 'This method should only get called when Scheduler.threads are initialized. Use `force: true` to start anyway.'
- end
- Delayed::Job.all.each do |job|
- cleanup_delayed(job)
- end
- end
- # Checks if the given job can be rescheduled or destroys it. Logs the action as warn.
- # Works only for locked jobs. Jobs that are not locked are ignored and
- # should get destroyed directly.
- # Checks the delayed job object for a method called .reschedule?. The memthod is called
- # with the delayed job as a parameter. The result value is expected as a Boolean. If the
- # result is true the lock gets removed and the delayed job gets rescheduled. If the return
- # value is false it will get destroyed which is the default behaviour.
- #
- # @param [Delayed::Job] job the job that should get checked for destroying/rescheduling.
- #
- # @example
- # Scheduler.cleanup_delayed(job)
- #
- # return [nil]
- def self.cleanup_delayed(job)
- return if job.locked_at.blank?
- job_name = job.name
- payload_object = job.payload_object
- reschedule = false
- if payload_object.present?
- if payload_object.respond_to?(:object)
- object = payload_object.object
- if object.respond_to?(:id)
- job_name += " (id: #{object.id})"
- end
- if object.respond_to?(:reschedule?) && object.reschedule?(job)
- reschedule = true
- end
- end
- if payload_object.respond_to?(:args)
- job_name += " - ARGS: #{payload_object.args.inspect}"
- end
- end
- if reschedule
- action = 'Rescheduling'
- job.unlock
- job.save
- else
- action = 'Destroyed'
- job.destroy
- end
- Rails.logger.warn "#{action} locked delayed job: #{job_name}"
- end
- def self.start_job(job)
- Thread.new do
- ApplicationHandleInfo.current = 'scheduler'
- logger.info "Started job thread for '#{job.name}' (#{job.method})..."
- # start loop for periods under 5 minutes
- if job.period && job.period <= 300
- loop do
- _start_job(job)
- job = Scheduler.lookup(id: job.id)
- # exit is job got deleted
- break if !job
- # exit if job is not active anymore
- break if !job.active
- # exit if there is no loop period defined
- break if !job.period
- # wait until next run
- sleep job.period
- end
- else
- _start_job(job)
- end
- job.pid = ''
- job.save
- logger.info " ...stopped thread for '#{job.method}'"
- ActiveRecord::Base.connection.close
- # release thread lock
- @@jobs_started[ job.id ] = false
- end
- end
- def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
- job.update!(
- last_run: Time.zone.now,
- pid: Thread.current.object_id,
- status: 'ok',
- error_message: '',
- )
- logger.info "execute #{job.method} (try_count #{try_count})..."
- eval job.method() # rubocop:disable Security/Eval
- rescue => e
- logger.error "execute #{job.method} (try_count #{try_count}) exited with error #{e.inspect}"
- # reconnect in case db connection is lost
- begin
- ActiveRecord::Base.connection.reconnect!
- rescue => e
- logger.error "Can't reconnect to database #{e.inspect}"
- end
- try_run_max = 10
- try_count += 1
- # reset error counter if to old
- if try_run_time + (60 * 5) < Time.zone.now
- try_count = 0
- end
- try_run_time = Time.zone.now
- # restart job again
- if try_run_max > try_count
- _start_job(job, try_count, try_run_time)
- else
- @@jobs_started[ job.id ] = false
- error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
- logger.error error
- job.update!(
- error_message: error,
- status: 'error',
- active: false,
- )
- end
- # rescue any other Exceptions that are not StandardError or childs of it
- # https://stackoverflow.com/questions/10048173/why-is-it-bad-style-to-rescue-exception-e-in-ruby
- # http://rubylearning.com/satishtalim/ruby_exceptions.html
- rescue Exception => e # rubocop:disable Lint/RescueException
- logger.error "execute #{job.method} (try_count #{try_count}) exited with a non standard-error #{e.inspect}"
- raise
- end
- def self.worker(foreground = false)
- # used for tests
- if foreground
- original_interface_handle = ApplicationHandleInfo.current
- ApplicationHandleInfo.current = 'scheduler'
- original_user_id = UserInfo.current_user_id
- UserInfo.current_user_id = nil
- loop do
- success, failure = Delayed::Worker.new.work_off
- if failure.nonzero?
- raise "ERROR: #{failure} failed background jobs: #{Delayed::Job.where('last_error IS NOT NULL').inspect}"
- end
- break if success.zero?
- end
- UserInfo.current_user_id = original_user_id
- ApplicationHandleInfo.current = original_interface_handle
- return
- end
- # used for production
- wait = 8
- Thread.new do
- sleep wait
- logger.info "Starting worker thread #{Delayed::Job}"
- loop do
- ApplicationHandleInfo.current = 'scheduler'
- result = nil
- realtime = Benchmark.realtime do
- logger.debug "*** worker thread, #{Delayed::Job.all.count} in queue"
- result = Delayed::Worker.new.work_off
- end
- count = result.sum
- if count.zero?
- sleep wait
- logger.debug '*** worker thread loop'
- else
- format "*** #{count} jobs processed at %.4f j/s, %d failed ...\n", count / realtime, result.last
- end
- end
- logger.info ' ...stopped worker thread'
- ActiveRecord::Base.connection.close
- end
- end
- # This function returns a list of failed jobs
- #
- # @example
- # Scheduler.failed_jobs
- #
- # return [Array]
- def self.failed_jobs
- where(status: 'error', active: false)
- end
- # This function restarts failed jobs to retry them
- #
- # @example
- # Scheduler.restart_failed_jobs
- #
- # return [true]
- def self.restart_failed_jobs
- failed_jobs.each do |job|
- job.update!(active: true)
- end
- true
- end
- end
|