模拟从金融实体关系数据库接入公司信息数据,验证数据清洗、转换和标准化流程。
假设 MySQL 数据库中公司信息表中的一条数据:
{
"company_name": "ABC公司",
"industry": "金融",
"reg_capital": "1000万",
"est_date": "2010-01-01",
"credit_rating": "AA",
"address": "北京",
"is_listed": "1",
"website": "www.abc.com",
"电话": "13800000000",
"fax": null
}
-- 从金融实体关系数据库 (MySQL) 接入公司信息数据
INSERT INTO raw_data (source_id, data_type, data_content, ingest_time)
VALUES (
1, -- 假设数据来源ID为1
'company_info',
'{"company_name": "ABC公司", "industry": "金融", "reg_capital": "1000万", "est_date": "2010-01-01", "credit_rating": "AA", "address": "北京", "is_listed": "1", "website": "www.abc.com", "电话": "13800000000", "fax": null}', -- 模拟公司信息
NOW()
);
-- 数据清洗:去除注册资本中的“万”字,并将注册资本转换为数值类型
UPDATE raw_data
SET data_content = JSON_REPLACE(data_content, '$.reg_capital', REPLACE(JSON_EXTRACT(data_content, '$.reg_capital'), '万', ''))
WHERE source_id = 1 AND data_type = 'company_info';
-- 数据标准化:将是否上市转换为数值类型 (1: 是, 0: 否)
UPDATE raw_data
SET data_content = JSON_REPLACE(data_content, '$.is_listed', CASE WHEN JSON_EXTRACT(data_content, '$.is_listed') = '1' THEN 1 ELSE 0 END)
WHERE source_id = 1 AND data_type = 'company_info';
数据清洗、转换和标准化后的结果:
{
"company_name": "ABC公司",
"industry": "金融",
"reg_capital": 1000,
"est_date": "2010-01-01",
"credit_rating": "AA",
"address": "北京",
"is_listed": 1,
"website": "www.abc.com",
"电话": "13800000000",
"fax": null
}
公司名称 | 行业 | 注册资本 | 成立日期 | 信用评级 | 地址 | 是否上市 | 电话 | 传真 |
---|---|---|---|---|---|---|---|---|
ABC公司 | 金融 | 1000 | 2010-01-01 | AA | 北京 | 1 | 13800000000 | null |
模拟从新闻资讯数据中提取实体和关系,更新知识图谱。
模拟新闻报道,内容为 "ABC公司在金融行业取得了新的进展。"
-- 从新闻资讯数据中提取实体和关系
INSERT INTO knowledge_entity (entity_name, entity_type, properties)
VALUES
('ABC公司', '公司', '{"industry": "金融", "reg_capital": "1000万"}')
ON DUPLICATE KEY UPDATE properties = '{"industry": "金融", "reg_capital": "1000万"}'; -- 如果实体已存在,则更新属性
INSERT INTO knowledge_entity (entity_name, entity_type, properties)
VALUES
('金融行业', '行业', NULL)
ON DUPLICATE KEY UPDATE properties = NULL;
INSERT INTO knowledge_relation (source_entity_id, target_entity_id, relation_type, properties)
SELECT
(SELECT entity_id FROM knowledge_entity WHERE entity_name = 'ABC公司'),
(SELECT entity_id FROM knowledge_entity WHERE entity_name = '金融行业'),
'所属行业',
'{"confidence": 0.9}'
WHERE NOT EXISTS (
SELECT 1
FROM knowledge_relation
WHERE source_entity_id = (SELECT entity_id FROM knowledge_entity WHERE entity_name = 'ABC公司')
AND target_entity_id = (SELECT entity_id FROM knowledge_entity WHERE entity_name = '金融行业')
AND relation_type = '所属行业'
);
知识图谱在此处展示 (简化显示)
实体:ABC公司, 金融行业
关系:ABC公司 -> 所属行业 -> 金融行业
模拟检测公司财务数据是否出现异常,触发风险预警。
模拟公司财务数据:
[
{"company_name": "ABC公司", "date": "2023-01-01", "profit": 1000000, "revenue": 5000000, "cost": 4000000},
{"company_name": "ABC公司", "date": "2023-02-01", "profit": 800000, "revenue": 4500000, "cost": 3700000}
]
-- 检测公司财务数据是否出现异常
-- 假设正常情况下,公司的利润增长率在 -0.1 到 0.2 之间
-- 如果利润增长率低于 -0.2 或高于 0.3,则触发风险预警
INSERT INTO anomaly_event (entity_id, event_type, event_time, description, properties, risk_level)
SELECT
(SELECT entity_id FROM knowledge_entity WHERE entity_name = 'ABC公司'),
'财务数据异常',
NOW(),
'公司利润增长率异常',
JSON_OBJECT('profit_growth_rate', profit_growth_rate),
CASE
WHEN profit_growth_rate < -0.2 THEN '高'
WHEN profit_growth_rate > 0.3 THEN '高'
ELSE '中'
END
FROM (
-- 计算公司利润增长率
SELECT
(current_profit - last_profit) / last_profit AS profit_growth_rate
FROM (
-- 获取公司当前利润和上期利润
SELECT
(SELECT properties->>'$.profit' FROM financial_data WHERE company_name = 'ABC公司' ORDER BY date DESC LIMIT 1) AS current_profit,
(SELECT properties->>'$.profit' FROM financial_data WHERE company_name = 'ABC公司' ORDER BY date DESC LIMIT 1 OFFSET 1) AS last_profit
) AS t
) AS t2
WHERE profit_growth_rate < -0.2 OR profit_growth_rate > 0.3;
-- 如果存在风险预警,则发送通知
SELECT event_id, entity_name, event_type, event_time, description, risk_level
FROM anomaly_event
JOIN knowledge_entity ON anomaly_event.entity_id = knowledge_entity.entity_id
WHERE entity_name = 'ABC公司' AND event_type = '财务数据异常';
检测到财务数据异常,利润增长率为 -0.2,低于阈值 -0.2,触发高风险预警!