From 1aa17c9f9a2cba213af2a4883555c7705cd87243 Mon Sep 17 00:00:00 2001 From: xuxueli <931591021@qq.com> Date: Sat, 10 Nov 2018 15:21:30 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=91=8A=E8=AD=A6=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E8=B0=83=E6=95=B4=EF=BC=8C=E6=94=B9=E4=B8=BA=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E6=89=AB=E6=8F=8F=E5=A4=B1=E8=B4=A5=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=E8=A7=A6=E5=8F=91=E3=80=82=E4=B8=80=E6=96=B9?= =?UTF-8?q?=E9=9D=A2=E7=B2=BE=E7=A1=AE=E6=89=AB=E6=8F=8F=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=EF=BC=8C=E9=99=8D=E4=BD=8E=E6=89=AB=E6=8F=8F?= =?UTF-8?q?=E8=8C=83=E5=9B=B4=EF=BC=9B=E5=8F=A6=E4=B8=80=E6=96=B9=E9=9D=A2?= =?UTF-8?q?=E5=8F=96=E6=B6=88=E5=86=85=E5=AD=98=E9=98=9F=E5=88=97=EF=BC=8C?= =?UTF-8?q?=E9=99=8D=E4=BD=8E=E7=BA=BF=E7=A8=8B=E5=86=85=E5=AD=98=E6=B6=88?= =?UTF-8?q?=E8=80=97=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/XXL-JOB官方文档.md | 2 +- doc/db/tables_xxl_job.sql | 1 + .../xxl/job/admin/core/model/XxlJobLog.java | 12 ++ .../core/thread/JobFailMonitorHelper.java | 107 +++++++----------- .../job/admin/core/trigger/XxlJobTrigger.java | 3 - .../com/xxl/job/admin/dao/XxlJobLogDao.java | 6 + .../mybatis-mapper/XxlJobLogMapper.xml | 26 ++++- 7 files changed, 86 insertions(+), 71 deletions(-) diff --git a/doc/XXL-JOB官方文档.md b/doc/XXL-JOB官方文档.md index e9a5c270..b9aac981 100644 --- a/doc/XXL-JOB官方文档.md +++ b/doc/XXL-JOB官方文档.md @@ -1389,7 +1389,7 @@ Tips: 历史版本(V1.3.x)目前已经Release至稳定版本, 进入维护阶段 ### 6.24 版本 v2.0.2 Release Notes[迭代中] - 1、调度中心告警邮件发送组件改为 “spring-boot-starter-mail”; -- 2、[迭代中]任务告警逻辑调整:任务调度,以及任务回调失败时,均推送监控队列。考虑通过任务Log字段控制告警状态; +- 2、任务告警逻辑调整,改为通过扫描失败日志方式触发。一方面精确扫描失败任务,降低扫描范围;另一方面取消内存队列,降低线程内存消耗; ### TODO LIST diff --git a/doc/db/tables_xxl_job.sql b/doc/db/tables_xxl_job.sql index 0add92ca..d597d034 100644 --- a/doc/db/tables_xxl_job.sql +++ b/doc/db/tables_xxl_job.sql @@ -187,6 +187,7 @@ CREATE TABLE `XXL_JOB_QRTZ_TRIGGER_LOG` ( `handle_time` datetime DEFAULT NULL COMMENT '执行-时间', `handle_code` int(11) NOT NULL COMMENT '执行-状态', `handle_msg` text COMMENT '执行-日志', + `alarm_status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '告警状态:0-默认、1-无需告警、2-告警成功、3-告警失败', PRIMARY KEY (`id`), KEY `I_trigger_time` (`trigger_time`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/model/XxlJobLog.java b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/model/XxlJobLog.java index ee029242..c4ba0b0c 100644 --- a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/model/XxlJobLog.java +++ b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/model/XxlJobLog.java @@ -31,6 +31,9 @@ public class XxlJobLog { private int handleCode; private String handleMsg; + // alarm info + private int alarmStatus; + public int getId() { return id; } @@ -142,4 +145,13 @@ public class XxlJobLog { public void setHandleMsg(String handleMsg) { this.handleMsg = handleMsg; } + + public int getAlarmStatus() { + return alarmStatus; + } + + public void setAlarmStatus(int alarmStatus) { + this.alarmStatus = alarmStatus; + } + } diff --git a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/thread/JobFailMonitorHelper.java b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/thread/JobFailMonitorHelper.java index 03c614e2..2e59d633 100644 --- a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/thread/JobFailMonitorHelper.java +++ b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/thread/JobFailMonitorHelper.java @@ -7,7 +7,6 @@ import com.xxl.job.admin.core.model.XxlJobLog; import com.xxl.job.admin.core.trigger.TriggerTypeEnum; import com.xxl.job.admin.core.util.I18nUtil; import com.xxl.job.core.biz.model.ReturnT; -import com.xxl.job.core.handler.IJobHandler; import org.apache.commons.collections4.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,12 +16,15 @@ import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; import java.io.UnsupportedEncodingException; import java.text.MessageFormat; -import java.util.*; -import java.util.concurrent.LinkedBlockingQueue; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import java.util.concurrent.TimeUnit; /** * job monitor instance + * * @author xuxueli 2015-9-1 18:05:56 */ public class JobFailMonitorHelper { @@ -35,8 +37,6 @@ public class JobFailMonitorHelper { // ---------------------- monitor ---------------------- - private LinkedBlockingQueue queue = new LinkedBlockingQueue(0xfff8); - private Thread monitorThread; private volatile boolean toStop = false; public void start(){ @@ -44,52 +44,47 @@ public class JobFailMonitorHelper { @Override public void run() { + // monitor while (!toStop) { try { - List jobLogIdList = new ArrayList(); - int drainToNum = JobFailMonitorHelper.instance.queue.drainTo(jobLogIdList); - if (CollectionUtils.isNotEmpty(jobLogIdList)) { - for (Integer jobLogId : jobLogIdList) { - if (jobLogId==null || jobLogId==0) { + List failLogIds = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().findFailJobLogIds(1000); + if (CollectionUtils.isNotEmpty(failLogIds)) { + for (int failLogId: failLogIds) { + + // lock log + int lockRet = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, 0, -1); + if (lockRet < 1) { continue; } - XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(jobLogId); - if (log == null) { - continue; + XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(failLogId); + XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId()); + + // 1、fail retry monitor + if (log.getExecutorFailRetryCount() > 0) { + JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam(), null); + String retryMsg = "

>>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<<
"; + log.setTriggerMsg(log.getTriggerMsg() + retryMsg); + XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(log); } - if (IJobHandler.SUCCESS.getCode() == log.getTriggerCode() && log.getHandleCode() == 0) { - // job running - JobFailMonitorHelper.monitor(jobLogId); - logger.debug(">>>>>>>>>>> job monitor, job running, JobLogId:{}", jobLogId); - } else if (IJobHandler.SUCCESS.getCode() == log.getHandleCode()) { - // job success, pass - logger.info(">>>>>>>>>>> job monitor, job success, JobLogId:{}", jobLogId); - } else /*if (IJobHandler.FAIL.getCode() == log.getTriggerCode() - || IJobHandler.FAIL.getCode() == log.getHandleCode() - || IJobHandler.FAIL_RETRY.getCode() == log.getHandleCode() )*/ { - // job fail, - - // 1、fail retry - XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId()); - - if (log.getExecutorFailRetryCount() > 0) { - JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam(), null); - String retryMsg = "

>>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<<
"; - log.setTriggerMsg(log.getTriggerMsg() + retryMsg); - XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(log); + // 2、fail alarm monitor + int newAlarmStatus = 0; // 告警状态:0-默认、-1=锁定状态、1-无需告警、2-告警成功、3-告警失败 + if (info!=null && info.getAlarmEmail()!=null && info.getAlarmEmail().trim().length()>0) { + boolean alarmResult = true; + try { + alarmResult = failAlarm(info, log); + } catch (Exception e) { + alarmResult = false; + logger.error(e.getMessage(), e); } + newAlarmStatus = alarmResult?2:3; + } else { + newAlarmStatus = 1; + } - // 2、fail alarm - failAlarm(info, log); - - logger.info(">>>>>>>>>>> job monitor, job fail, JobLogId:{}", jobLogId); - }/* else { - JobFailMonitorHelper.monitor(jobLogId); - logger.info(">>>>>>>>>>> job monitor, job status unknown, JobLogId:{}", jobLogId); - }*/ + XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, -1, newAlarmStatus); } } @@ -99,22 +94,6 @@ public class JobFailMonitorHelper { } } - // monitor all clear - List jobLogIdList = new ArrayList(); - int drainToNum = getInstance().queue.drainTo(jobLogIdList); - if (jobLogIdList!=null && jobLogIdList.size()>0) { - for (Integer jobLogId: jobLogIdList) { - XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(jobLogId); - if (ReturnT.FAIL_CODE == log.getTriggerCode()|| ReturnT.FAIL_CODE==log.getHandleCode()) { - // job fail, - XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId()); - - failAlarm(info, log); - logger.info(">>>>>>>>>>> job monitor last, job fail, JobLogId:{}", jobLogId); - } - } - } - } }); monitorThread.setDaemon(true); @@ -131,11 +110,6 @@ public class JobFailMonitorHelper { logger.error(e.getMessage(), e); } } - - // producer - public static void monitor(int jobLogId){ - getInstance().queue.offer(jobLogId); - } // ---------------------- alarm ---------------------- @@ -168,7 +142,8 @@ public class JobFailMonitorHelper { * * @param jobLog */ - private void failAlarm(XxlJobInfo info, XxlJobLog jobLog){ + private boolean failAlarm(XxlJobInfo info, XxlJobLog jobLog){ + boolean alarmResult = true; // send monitor email if (info!=null && info.getAlarmEmail()!=null && info.getAlarmEmail().trim().length()>0) { @@ -205,8 +180,10 @@ public class JobFailMonitorHelper { helper.setText(content, true); XxlJobAdminConfig.getAdminConfig().getMailSender().send(mimeMessage); - } catch (UnsupportedEncodingException | MessagingException e) { + } catch (Exception e) { logger.error(">>>>>>>>>>> job monitor alarm email send error, JobLogId:{}", jobLog.getId(), e); + + alarmResult = false; } } @@ -214,6 +191,8 @@ public class JobFailMonitorHelper { // TODO, custom alarm strategy, such as sms + + return alarmResult; } } diff --git a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/trigger/XxlJobTrigger.java b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/trigger/XxlJobTrigger.java index f6115ea9..7a60eeed 100644 --- a/xxl-job-admin/src/main/java/com/xxl/job/admin/core/trigger/XxlJobTrigger.java +++ b/xxl-job-admin/src/main/java/com/xxl/job/admin/core/trigger/XxlJobTrigger.java @@ -6,7 +6,6 @@ import com.xxl.job.admin.core.model.XxlJobInfo; import com.xxl.job.admin.core.model.XxlJobLog; import com.xxl.job.admin.core.route.ExecutorRouteStrategyEnum; import com.xxl.job.admin.core.schedule.XxlJobDynamicScheduler; -import com.xxl.job.admin.core.thread.JobFailMonitorHelper; import com.xxl.job.admin.core.util.I18nUtil; import com.xxl.job.core.biz.ExecutorBiz; import com.xxl.job.core.biz.model.ReturnT; @@ -173,8 +172,6 @@ public class XxlJobTrigger { jobLog.setTriggerMsg(triggerMsgSb.toString()); XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(jobLog); - // 7、monitor trigger - JobFailMonitorHelper.monitor(jobLog.getId()); logger.debug(">>>>>>>>>>> xxl-job trigger end, jobId:{}", jobLog.getId()); } diff --git a/xxl-job-admin/src/main/java/com/xxl/job/admin/dao/XxlJobLogDao.java b/xxl-job-admin/src/main/java/com/xxl/job/admin/dao/XxlJobLogDao.java index a5489d59..cb5848e3 100644 --- a/xxl-job-admin/src/main/java/com/xxl/job/admin/dao/XxlJobLogDao.java +++ b/xxl-job-admin/src/main/java/com/xxl/job/admin/dao/XxlJobLogDao.java @@ -50,4 +50,10 @@ public interface XxlJobLogDao { @Param("clearBeforeTime") Date clearBeforeTime, @Param("clearBeforeNum") int clearBeforeNum); + public List findFailJobLogIds(@Param("pagesize") int pagesize); + + public int updateAlarmStatus(@Param("logId") int logId, + @Param("oldAlarmStatus") int oldAlarmStatus, + @Param("newAlarmStatus") int newAlarmStatus); + } diff --git a/xxl-job-admin/src/main/resources/mybatis-mapper/XxlJobLogMapper.xml b/xxl-job-admin/src/main/resources/mybatis-mapper/XxlJobLogMapper.xml index d51cbf8d..9d69eb11 100644 --- a/xxl-job-admin/src/main/resources/mybatis-mapper/XxlJobLogMapper.xml +++ b/xxl-job-admin/src/main/resources/mybatis-mapper/XxlJobLogMapper.xml @@ -22,7 +22,8 @@ - + + @@ -39,7 +40,8 @@ t.trigger_msg, t.handle_time, t.handle_code, - t.handle_msg + t.handle_msg, + t.alarm_status + SELECT id FROM `XXL_JOB_QRTZ_TRIGGER_LOG` + WHERE !( + (trigger_code in (0, 200) and handle_code = 0) + OR + (handle_code = 200) + ) + AND `alarm_status` = 0 + ORDER BY id ASC + + + + UPDATE XXL_JOB_QRTZ_TRIGGER_LOG + SET + `alarm_status` = #{newAlarmStatus} + WHERE `id`= #{logId} AND `alarm_status` = #{oldAlarmStatus} + \ No newline at end of file