告警升级监控仪表盘

当前告警

警告 API 响应时间过长
严重 数据库连接失败

告警统计

告警统计图表

告警升级规则配置

告警级别 持续时间 (分钟) 升级到 操作
警告 10 严重

测试场景: 自动升级未处理的告警

{
  "scenario": "自动升级未处理的告警",
  "description": "系统自动检测到某个中危告警持续未被处理超过10分钟,自动将其升级为高危告警,并发送邮件给高级运维人员。",
  "SQL操作": [
    {
      "type": "INSERT",
      "table": "alert_events",
      "data": {
        "model_name": "模型A",
        "alert_time": "2024-11-16 10:00:00",
        "alert_level": "warning",
        "alert_message": "数据库连接池利用率超过85%",
        "metric_name": "db_connection_pool_usage",
        "metric_value": 88.0,
        "threshold_value": 85.0,
        "channel": "email",
        "is_resolved": false
      },
      "comment": "初始化告警事件"
    },
        {
      "type": "SELECT",
      "table": "configuration",
      "where": "config_key='auto_upgrade_rule_warning'",
      "comment": "查询告警升级规则"
    }
  ],
  "告警升级规则": {
    "alert_level": "warning",
    "unhandled_duration": 10, // 单位:分钟
    "upgrade_to": "critical",
    "notification_channel": "email"
  },
  "通知配置": {
    "critical": {
      "email": "senior_ops@example.com"
    }
  },
  "验证目标": {
    "数据库验证": [
      {
        "table": "alert_events",
        "condition": "id = (SELECT id FROM alert_events WHERE alert_message='数据库连接池利用率超过85%' ORDER BY id DESC LIMIT 1)",
        "assert": {
          "alert_level": "critical",
          "updated_at": "CHECK_NOT_NULL"
        },
        "comment": "确认数据库中该告警的级别已更新为'高危' (critical),更新时间已更新"
      }
    ],
    "邮件验证": "确认 senior_ops@example.com 收到了告警升级的邮件通知。",
    "告警列表UI验证": "在告警列表中,确认该告警的级别显示为'高危',并有升级记录。",
    "日志记录验证": "查看告警升级日志,确认记录了升级操作。"
  },
    "数据清理": [
      {
        "type": "DELETE",
        "table": "alert_events",
        "condition": "alert_message='数据库连接池利用率超过85%'",
        "comment": "删除本次测试数据,恢复环境"
      }
    ]
}
            

测试场景: 人工手动升级告警

{
  "scenario": "人工手动升级告警",
  "description": "运维人员判断某个中危告警虽然未达到自动升级条件,但需要立即处理,手动将其升级为高危告警。",
  "SQL操作": [
    {
      "type": "INSERT",
      "table": "alert_events",
      "data": {
        "model_name": "模型B",
        "alert_time": "2024-11-16 10:15:00",
        "alert_level": "warning",
        "alert_message": "API 响应时间超过 2 秒",
        "metric_name": "api_response_time",
        "metric_value": 2.5,
        "threshold_value": 2.0,
        "channel": "email",
        "is_resolved": false
      },
      "comment": "插入需要手动升级的告警数据"
    }
  ],
  "用户权限": {
    "升级告警": true
  },
  "验证目标": {
    "数据库验证": [
      {
        "table": "alert_events",
        "condition": "id = (SELECT id FROM alert_events WHERE alert_message='API 响应时间超过 2 秒' ORDER BY id DESC LIMIT 1)",
        "assert": {
          "alert_level": "critical",
          "updated_at": "CHECK_NOT_NULL"
        },
        "comment": "确认数据库中该告警的级别已更新为'高危' (critical),更新时间已更新"
      }
    ],
    "审计日志验证": "确认审计日志中记录了该告警被手动升级的操作,包含操作者、时间等信息。",
    "告警列表UI验证": "在告警列表中,确认该告警的级别显示为'高危',并显示由谁在何时升级的信息。"
  },
  "数据清理": [
      {
        "type": "DELETE",
        "table": "alert_events",
        "condition": "alert_message='API 响应时间超过 2 秒'",
        "comment": "删除本次测试数据,恢复环境"
      }
    ]
}
            

测试场景: 告警升级失败

{
    "scenario": "告警升级失败",
    "description": "模拟由于数据库连接失败导致告警升级操作失败的场景,并验证系统是否能正确处理并提供错误提示。",
    "SQL操作": [
        {
            "type": "INSERT",
            "table": "alert_events",
            "data": {
                "model_name": "模型C",
                "alert_time": "2024-11-16 10:30:00",
                "alert_level": "warning",
                "alert_message": "磁盘空间即将耗尽",
                "metric_name": "disk_space_usage",
                "metric_value": 95.0,
                "threshold_value": 90.0,
                "channel": "email",
                "is_resolved": false
            },
            "comment": "插入需要升级的告警"
        }
    ],
    "模拟故障": {
        "database_connection_failure": true  // 模拟数据库连接失败
    },
    "验证目标": {
        "UI错误提示": "UI应显示友好的错误提示信息,说明升级失败的原因(例如:无法连接数据库)。",
        "日志记录": "系统日志应记录详细的错误信息,方便后续排查。",
        "数据库验证": [
            {
                "table": "alert_events",
                "condition": "id = (SELECT id FROM alert_events WHERE alert_message='磁盘空间即将耗尽' ORDER BY id DESC LIMIT 1)",
                "assert": {
                    "alert_level": "warning" // 告警级别保持不变
                },
                "comment": "确认数据库中告警级别没有改变"
            }
        ]
    },
  "数据清理": [
      {
        "type": "DELETE",
        "table": "alert_events",
        "condition": "alert_message='磁盘空间即将耗尽'",
        "comment": "删除本次测试数据,恢复环境"
      }
    ]
}