From 12b1dbd91e4be3bfe056074771e2eed0b81bc6af Mon Sep 17 00:00:00 2001 From: firstdata-dev Date: Wed, 29 Apr 2026 10:24:45 +0800 Subject: [PATCH 1/4] feat: add 4 new data sources - china-cdc: Chinese Center for Disease Control and Prevention - china-cnpc: China National Petroleum Corporation - china-sinopec: China Petrochemical Corporation (Sinopec Group) - china-cnooc: China National Offshore Oil Corporation --- firstdata/sources/china/health/china-cdc.json | 59 +++++++++++++++++++ .../sources/china/resources/china-cnooc.json | 58 ++++++++++++++++++ .../sources/china/resources/china-cnpc.json | 59 +++++++++++++++++++ .../china/resources/china-sinopec.json | 57 ++++++++++++++++++ 4 files changed, 233 insertions(+) create mode 100644 firstdata/sources/china/health/china-cdc.json create mode 100644 firstdata/sources/china/resources/china-cnooc.json create mode 100644 firstdata/sources/china/resources/china-cnpc.json create mode 100644 firstdata/sources/china/resources/china-sinopec.json diff --git a/firstdata/sources/china/health/china-cdc.json b/firstdata/sources/china/health/china-cdc.json new file mode 100644 index 0000000..1c705cf --- /dev/null +++ b/firstdata/sources/china/health/china-cdc.json @@ -0,0 +1,59 @@ +{ + "id": "china-cdc", + "name": { + "en": "Chinese Center for Disease Control and Prevention", + "zh": "中国疾病预防控制中心" + }, + "description": { + "en": "The Chinese Center for Disease Control and Prevention (China CDC) is the national public health institute responsible for disease prevention, surveillance, and health emergency response. It publishes infectious disease surveillance reports, chronic disease data, vaccination coverage statistics, environmental health assessments, and public health emergency bulletins.", + "zh": "中国疾病预防控制中心是国家级疾病预防控制与公共卫生技术管理和服务机构,负责疾病预防控制、公共卫生监测和卫生应急处置。发布传染病疫情监测报告、慢性病数据、疫苗接种统计、环境卫生评估及突发公共卫生事件公报。" + }, + "website": "https://www.chinacdc.cn", + "data_url": "https://www.chinacdc.cn/jkzt/", + "api_url": null, + "country": "CN", + "domains": [ + "health", + "epidemiology", + "public-health" + ], + "geographic_scope": "national", + "update_frequency": "weekly", + "tags": [ + "china", + "cdc", + "disease control", + "epidemiology", + "infectious disease", + "public health", + "vaccination", + "surveillance", + "疾控中心", + "传染病", + "疫情监测", + "公共卫生", + "疫苗接种", + "慢性病" + ], + "data_content": { + "en": [ + "Infectious Disease Surveillance - Weekly and monthly reports on notifiable infectious diseases", + "Chronic Disease Data - Prevalence and incidence of major chronic diseases", + "Vaccination Coverage - National immunization program statistics and coverage rates", + "Environmental Health - Drinking water quality, air pollution health impact assessments", + "Health Emergency Reports - Public health emergency response and outbreak investigation reports", + "Disease Burden Studies - National disease burden and risk factor analysis", + "Nutrition and Food Safety - National nutrition surveys and food safety monitoring" + ], + "zh": [ + "传染病疫情监测 - 法定传染病周报和月报", + "慢性病数据 - 主要慢性病患病率和发病率", + "疫苗接种覆盖率 - 国家免疫规划统计和接种率", + "环境卫生 - 饮用水水质监测、空气污染健康影响评估", + "卫生应急报告 - 突发公共卫生事件应急处置和疫情调查报告", + "疾病负担研究 - 全国疾病负担和危险因素分析", + "营养与食品安全 - 全国营养调查和食品安全监测" + ] + }, + "authority_level": "government" +} diff --git a/firstdata/sources/china/resources/china-cnooc.json b/firstdata/sources/china/resources/china-cnooc.json new file mode 100644 index 0000000..650f60f --- /dev/null +++ b/firstdata/sources/china/resources/china-cnooc.json @@ -0,0 +1,58 @@ +{ + "id": "china-cnooc", + "name": { + "en": "China National Offshore Oil Corporation", + "zh": "中国海洋石油集团有限公司" + }, + "description": { + "en": "China National Offshore Oil Corporation (CNOOC) is China's largest offshore oil and gas producer and a central state-owned enterprise. It publishes annual reports, sustainability reports, offshore oil and gas production data, deepwater exploration results, LNG import and terminal operation statistics, and marine environmental protection reports.", + "zh": "中国海洋石油集团有限公司(中国海油)是中国最大的海上油气生产商,中央企业。发布年度报告、可持续发展报告、海上油气产量数据、深水勘探成果、LNG进口及接收站运营统计和海洋环境保护报告。" + }, + "website": "https://www.cnooc.com.cn", + "data_url": "https://www.cnooc.com.cn/col/col6264/index.html", + "api_url": null, + "country": "CN", + "domains": [ + "energy", + "petroleum", + "maritime" + ], + "geographic_scope": "national", + "update_frequency": "annual", + "tags": [ + "china", + "cnooc", + "offshore oil", + "natural gas", + "lng", + "deepwater", + "marine energy", + "state-owned enterprise", + "中国海油", + "中海油", + "海上石油", + "天然气", + "液化天然气", + "深水", + "央企" + ], + "data_content": { + "en": [ + "Annual Reports - Financial performance, production, and reserves data", + "Offshore Production - Crude oil and natural gas production from offshore fields", + "Deepwater Exploration - Deepwater and ultra-deepwater exploration discoveries and results", + "LNG Operations - LNG import volumes, receiving terminal throughput, and trade data", + "Reserves Reports - Proven and probable offshore oil and gas reserves", + "Sustainability Reports - Marine environmental protection, carbon emissions, and ESG metrics" + ], + "zh": [ + "年度报告 - 财务业绩、产量和储量数据", + "海上生产 - 海上油田原油和天然气产量", + "深水勘探 - 深水和超深水勘探发现及成果", + "LNG业务 - LNG进口量、接收站吞吐量和贸易数据", + "储量报告 - 探明和概算海上油气储量", + "可持续发展报告 - 海洋环境保护、碳排放和ESG指标" + ] + }, + "authority_level": "government" +} diff --git a/firstdata/sources/china/resources/china-cnpc.json b/firstdata/sources/china/resources/china-cnpc.json new file mode 100644 index 0000000..9a75821 --- /dev/null +++ b/firstdata/sources/china/resources/china-cnpc.json @@ -0,0 +1,59 @@ +{ + "id": "china-cnpc", + "name": { + "en": "China National Petroleum Corporation", + "zh": "中国石油天然气集团有限公司" + }, + "description": { + "en": "China National Petroleum Corporation (CNPC) is the largest state-owned oil and gas company in China and one of the world's largest energy companies. It publishes annual reports, sustainability reports, oil and gas production statistics, reserves data, and energy market analyses covering crude oil, natural gas, refining, and petrochemical operations.", + "zh": "中国石油天然气集团有限公司(中国石油)是中国最大的国有油气企业,也是全球最大的能源企业之一。发布年度报告、可持续发展报告、油气产量统计、储量数据及能源市场分析,涵盖原油、天然气、炼化及石化业务。" + }, + "website": "https://www.cnpc.com.cn", + "data_url": "https://www.cnpc.com.cn/cnpc/nybg/common_index.shtml", + "api_url": null, + "country": "CN", + "domains": [ + "energy", + "petroleum", + "natural-gas" + ], + "geographic_scope": "national", + "update_frequency": "annual", + "tags": [ + "china", + "cnpc", + "petrochina", + "oil", + "gas", + "petroleum", + "crude oil", + "natural gas", + "energy", + "state-owned enterprise", + "中国石油", + "中石油", + "原油", + "天然气", + "油气", + "央企" + ], + "data_content": { + "en": [ + "Annual Reports - Financial performance, production, and operational statistics", + "Oil and Gas Production - Crude oil and natural gas output by region and field", + "Reserves Data - Proven and probable oil and gas reserves", + "Sustainability Reports - Environmental, social, and governance (ESG) metrics", + "Refining and Petrochemical Output - Processed crude oil volume and chemical product yields", + "Energy Market Analysis - Domestic and international oil and gas market trends" + ], + "zh": [ + "年度报告 - 财务业绩、产量及运营统计", + "油气产量 - 按地区和油田分类的原油和天然气产出", + "储量数据 - 探明和概算油气储量", + "可持续发展报告 - 环境、社会和治理(ESG)指标", + "炼化产出 - 原油加工量和化工产品产量", + "能源市场分析 - 国内外油气市场趋势" + ] + }, + "authority_level": "government" +} diff --git a/firstdata/sources/china/resources/china-sinopec.json b/firstdata/sources/china/resources/china-sinopec.json new file mode 100644 index 0000000..8be5e8f --- /dev/null +++ b/firstdata/sources/china/resources/china-sinopec.json @@ -0,0 +1,57 @@ +{ + "id": "china-sinopec", + "name": { + "en": "China Petrochemical Corporation (Sinopec Group)", + "zh": "中国石油化工集团有限公司" + }, + "description": { + "en": "China Petrochemical Corporation (Sinopec Group) is one of the world's largest integrated energy and chemical companies and a central state-owned enterprise of China. It publishes annual reports, sustainability reports, refining and chemical production statistics, oil and gas exploration data, and energy transition progress reports covering upstream, midstream, and downstream operations.", + "zh": "中国石油化工集团有限公司(中国石化)是全球最大的综合性能源化工企业之一,中国中央企业。发布年度报告、可持续发展报告、炼化生产统计、油气勘探数据及能源转型进展报告,涵盖上游、中游和下游业务。" + }, + "website": "https://www.sinopecgroup.com", + "data_url": "https://www.sinopecgroup.com/group/xhtml/shzr/", + "api_url": null, + "country": "CN", + "domains": [ + "energy", + "petroleum", + "petrochemical" + ], + "geographic_scope": "national", + "update_frequency": "annual", + "tags": [ + "china", + "sinopec", + "petrochemical", + "oil", + "refining", + "chemical", + "energy", + "state-owned enterprise", + "中国石化", + "中石化", + "石油化工", + "炼化", + "化工", + "央企" + ], + "data_content": { + "en": [ + "Annual Reports - Financial and operational performance data", + "Refining Output - Crude oil processing volume and refined product output", + "Chemical Production - Ethylene, synthetic resin, synthetic fiber, and other chemical product yields", + "Oil and Gas Exploration - Upstream exploration and production statistics", + "Sustainability Reports - Carbon emissions, environmental protection, and ESG data", + "Retail Network Statistics - Gas station network and fuel sales volume" + ], + "zh": [ + "年度报告 - 财务和运营业绩数据", + "炼油产出 - 原油加工量和成品油产量", + "化工生产 - 乙烯、合成树脂、合成纤维及其他化工产品产量", + "油气勘探 - 上游勘探开发生产统计", + "可持续发展报告 - 碳排放、环境保护和ESG数据", + "零售网络统计 - 加油站网络和成品油销售量" + ] + }, + "authority_level": "government" +} From 86f6d3578b22fe2f2911bee905762bcfc3f032de Mon Sep 17 00:00:00 2001 From: firstdata-dev Date: Thu, 30 Apr 2026 10:50:15 +0800 Subject: [PATCH 2/4] fix: remove Chinese tags and convert spaces to hyphens Response to review: tags must be lowercase English with hyphens only. No Chinese characters, no spaces. Schema rule (PR #175/#176/#178 lineage). --- firstdata/sources/china/health/china-cdc.json | 14 ++++---------- .../sources/china/resources/china-cnooc.json | 15 ++++----------- firstdata/sources/china/resources/china-cnpc.json | 12 +++--------- .../sources/china/resources/china-sinopec.json | 8 +------- 4 files changed, 12 insertions(+), 37 deletions(-) diff --git a/firstdata/sources/china/health/china-cdc.json b/firstdata/sources/china/health/china-cdc.json index 1c705cf..09d12ea 100644 --- a/firstdata/sources/china/health/china-cdc.json +++ b/firstdata/sources/china/health/china-cdc.json @@ -22,18 +22,12 @@ "tags": [ "china", "cdc", - "disease control", + "disease-control", "epidemiology", - "infectious disease", - "public health", + "infectious-disease", + "public-health", "vaccination", - "surveillance", - "疾控中心", - "传染病", - "疫情监测", - "公共卫生", - "疫苗接种", - "慢性病" + "surveillance" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-cnooc.json b/firstdata/sources/china/resources/china-cnooc.json index 650f60f..f4cc21c 100644 --- a/firstdata/sources/china/resources/china-cnooc.json +++ b/firstdata/sources/china/resources/china-cnooc.json @@ -22,19 +22,12 @@ "tags": [ "china", "cnooc", - "offshore oil", - "natural gas", + "offshore-oil", + "natural-gas", "lng", "deepwater", - "marine energy", - "state-owned enterprise", - "中国海油", - "中海油", - "海上石油", - "天然气", - "液化天然气", - "深水", - "央企" + "marine-energy", + "state-owned-enterprise" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-cnpc.json b/firstdata/sources/china/resources/china-cnpc.json index 9a75821..232d8a0 100644 --- a/firstdata/sources/china/resources/china-cnpc.json +++ b/firstdata/sources/china/resources/china-cnpc.json @@ -26,16 +26,10 @@ "oil", "gas", "petroleum", - "crude oil", - "natural gas", + "crude-oil", + "natural-gas", "energy", - "state-owned enterprise", - "中国石油", - "中石油", - "原油", - "天然气", - "油气", - "央企" + "state-owned-enterprise" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-sinopec.json b/firstdata/sources/china/resources/china-sinopec.json index 8be5e8f..b9ee32f 100644 --- a/firstdata/sources/china/resources/china-sinopec.json +++ b/firstdata/sources/china/resources/china-sinopec.json @@ -27,13 +27,7 @@ "refining", "chemical", "energy", - "state-owned enterprise", - "中国石化", - "中石化", - "石油化工", - "炼化", - "化工", - "央企" + "state-owned-enterprise" ], "data_content": { "en": [ From b4b13072fd876eb8ae82649819e976dec24be39c Mon Sep 17 00:00:00 2001 From: firstdata-dev Date: Thu, 30 Apr 2026 10:58:54 +0800 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20URL?= =?UTF-8?q?=20accessibility=20notes=20for=20CDC/CNPC/Sinopec/CNOOC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - china-cdc: data_url → /gzdt/ (stable), note about /jkzt/ reorganization - china-cnpc: note about WAF returning 412 to automated probes - china-sinopec: data_url switched to http (https endpoint unstable from some networks) - china-cnooc: data_url simplified to root landing (col/col6264 server-side redirect loop for non-browser clients) All 4 files still pass schema validation. --- firstdata/sources/china/health/china-cdc.json | 4 ++-- firstdata/sources/china/resources/china-cnooc.json | 4 ++-- firstdata/sources/china/resources/china-cnpc.json | 2 +- firstdata/sources/china/resources/china-sinopec.json | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/firstdata/sources/china/health/china-cdc.json b/firstdata/sources/china/health/china-cdc.json index 09d12ea..1f57ef9 100644 --- a/firstdata/sources/china/health/china-cdc.json +++ b/firstdata/sources/china/health/china-cdc.json @@ -5,11 +5,11 @@ "zh": "中国疾病预防控制中心" }, "description": { - "en": "The Chinese Center for Disease Control and Prevention (China CDC) is the national public health institute responsible for disease prevention, surveillance, and health emergency response. It publishes infectious disease surveillance reports, chronic disease data, vaccination coverage statistics, environmental health assessments, and public health emergency bulletins.", + "en": "The Chinese Center for Disease Control and Prevention (China CDC) is the national public health institute responsible for disease prevention, surveillance, and health emergency response. It publishes infectious disease surveillance reports, chronic disease data, vaccination coverage statistics, environmental health assessments, and public health emergency bulletins. Some deep-link sub-sections (e.g. /jkzt/) have been reorganized; use /gzdt/ for published work announcements or en.chinacdc.cn for English portal.", "zh": "中国疾病预防控制中心是国家级疾病预防控制与公共卫生技术管理和服务机构,负责疾病预防控制、公共卫生监测和卫生应急处置。发布传染病疫情监测报告、慢性病数据、疫苗接种统计、环境卫生评估及突发公共卫生事件公报。" }, "website": "https://www.chinacdc.cn", - "data_url": "https://www.chinacdc.cn/jkzt/", + "data_url": "https://www.chinacdc.cn/gzdt/", "api_url": null, "country": "CN", "domains": [ diff --git a/firstdata/sources/china/resources/china-cnooc.json b/firstdata/sources/china/resources/china-cnooc.json index f4cc21c..ab28452 100644 --- a/firstdata/sources/china/resources/china-cnooc.json +++ b/firstdata/sources/china/resources/china-cnooc.json @@ -5,11 +5,11 @@ "zh": "中国海洋石油集团有限公司" }, "description": { - "en": "China National Offshore Oil Corporation (CNOOC) is China's largest offshore oil and gas producer and a central state-owned enterprise. It publishes annual reports, sustainability reports, offshore oil and gas production data, deepwater exploration results, LNG import and terminal operation statistics, and marine environmental protection reports.", + "en": "China National Offshore Oil Corporation (CNOOC) is China's largest offshore oil and gas producer and a central state-owned enterprise. It publishes annual reports, sustainability reports, offshore oil and gas production data, deepwater exploration results, LNG import and terminal operation statistics, and marine environmental protection reports. Note: some sub-sections use server-side redirects that loop for non-browser clients; landing page (root) is stable and lists all reports.", "zh": "中国海洋石油集团有限公司(中国海油)是中国最大的海上油气生产商,中央企业。发布年度报告、可持续发展报告、海上油气产量数据、深水勘探成果、LNG进口及接收站运营统计和海洋环境保护报告。" }, "website": "https://www.cnooc.com.cn", - "data_url": "https://www.cnooc.com.cn/col/col6264/index.html", + "data_url": "https://www.cnooc.com.cn", "api_url": null, "country": "CN", "domains": [ diff --git a/firstdata/sources/china/resources/china-cnpc.json b/firstdata/sources/china/resources/china-cnpc.json index 232d8a0..750a957 100644 --- a/firstdata/sources/china/resources/china-cnpc.json +++ b/firstdata/sources/china/resources/china-cnpc.json @@ -5,7 +5,7 @@ "zh": "中国石油天然气集团有限公司" }, "description": { - "en": "China National Petroleum Corporation (CNPC) is the largest state-owned oil and gas company in China and one of the world's largest energy companies. It publishes annual reports, sustainability reports, oil and gas production statistics, reserves data, and energy market analyses covering crude oil, natural gas, refining, and petrochemical operations.", + "en": "China National Petroleum Corporation (CNPC) is the largest state-owned oil and gas company in China and one of the world's largest energy companies. It publishes annual reports, sustainability reports, oil and gas production statistics, reserves data, and energy market analyses covering crude oil, natural gas, refining, and petrochemical operations. Note: site is behind a WAF and may return HTTP 412 to automated requests; content accessible via standard browsers.", "zh": "中国石油天然气集团有限公司(中国石油)是中国最大的国有油气企业,也是全球最大的能源企业之一。发布年度报告、可持续发展报告、油气产量统计、储量数据及能源市场分析,涵盖原油、天然气、炼化及石化业务。" }, "website": "https://www.cnpc.com.cn", diff --git a/firstdata/sources/china/resources/china-sinopec.json b/firstdata/sources/china/resources/china-sinopec.json index b9ee32f..ae2b316 100644 --- a/firstdata/sources/china/resources/china-sinopec.json +++ b/firstdata/sources/china/resources/china-sinopec.json @@ -5,11 +5,11 @@ "zh": "中国石油化工集团有限公司" }, "description": { - "en": "China Petrochemical Corporation (Sinopec Group) is one of the world's largest integrated energy and chemical companies and a central state-owned enterprise of China. It publishes annual reports, sustainability reports, refining and chemical production statistics, oil and gas exploration data, and energy transition progress reports covering upstream, midstream, and downstream operations.", + "en": "China Petrochemical Corporation (Sinopec Group) is one of the world's largest integrated energy and chemical companies and a central state-owned enterprise of China. It publishes annual reports, sustainability reports, refining and chemical production statistics, oil and gas exploration data, and energy transition progress reports covering upstream, midstream, and downstream operations. Note: HTTPS endpoint is not reliably reachable from some networks; HTTP works.", "zh": "中国石油化工集团有限公司(中国石化)是全球最大的综合性能源化工企业之一,中国中央企业。发布年度报告、可持续发展报告、炼化生产统计、油气勘探数据及能源转型进展报告,涵盖上游、中游和下游业务。" }, - "website": "https://www.sinopecgroup.com", - "data_url": "https://www.sinopecgroup.com/group/xhtml/shzr/", + "website": "http://www.sinopecgroup.com", + "data_url": "http://www.sinopecgroup.com/group/xhtml/shzr/", "api_url": null, "country": "CN", "domains": [ From e92fdfb307f895abc286900d4efda67e63feb104 Mon Sep 17 00:00:00 2001 From: firstdata-dev Date: Thu, 30 Apr 2026 11:06:56 +0800 Subject: [PATCH 4/4] fix: restore Chinese tags (my earlier removal was over-correction) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Schema explicitly allows 'mixed Chinese/English keywords' for discoverability. Earlier commit 86f6d35 wrongly stripped Chinese tags based on a misremembered review rule from PR #175/#176/#178 (which were actually about space→hyphen, not CN removal). Chinese tags restored to match original feat commit, with space→hyphen applied only to English multi-word tags. No lowercase changes. --- firstdata/sources/china/health/china-cdc.json | 8 +++++++- firstdata/sources/china/resources/china-cnooc.json | 9 ++++++++- firstdata/sources/china/resources/china-cnpc.json | 8 +++++++- firstdata/sources/china/resources/china-sinopec.json | 8 +++++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/firstdata/sources/china/health/china-cdc.json b/firstdata/sources/china/health/china-cdc.json index 1f57ef9..c38689f 100644 --- a/firstdata/sources/china/health/china-cdc.json +++ b/firstdata/sources/china/health/china-cdc.json @@ -27,7 +27,13 @@ "infectious-disease", "public-health", "vaccination", - "surveillance" + "surveillance", + "疾控中心", + "传染病", + "疫情监测", + "公共卫生", + "疫苗接种", + "慢性病" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-cnooc.json b/firstdata/sources/china/resources/china-cnooc.json index ab28452..c12fff7 100644 --- a/firstdata/sources/china/resources/china-cnooc.json +++ b/firstdata/sources/china/resources/china-cnooc.json @@ -27,7 +27,14 @@ "lng", "deepwater", "marine-energy", - "state-owned-enterprise" + "state-owned-enterprise", + "中国海油", + "中海油", + "海上石油", + "天然气", + "液化天然气", + "深水", + "央企" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-cnpc.json b/firstdata/sources/china/resources/china-cnpc.json index 750a957..240e5dc 100644 --- a/firstdata/sources/china/resources/china-cnpc.json +++ b/firstdata/sources/china/resources/china-cnpc.json @@ -29,7 +29,13 @@ "crude-oil", "natural-gas", "energy", - "state-owned-enterprise" + "state-owned-enterprise", + "中国石油", + "中石油", + "原油", + "天然气", + "油气", + "央企" ], "data_content": { "en": [ diff --git a/firstdata/sources/china/resources/china-sinopec.json b/firstdata/sources/china/resources/china-sinopec.json index ae2b316..a3a5652 100644 --- a/firstdata/sources/china/resources/china-sinopec.json +++ b/firstdata/sources/china/resources/china-sinopec.json @@ -27,7 +27,13 @@ "refining", "chemical", "energy", - "state-owned-enterprise" + "state-owned-enterprise", + "中国石化", + "中石化", + "石油化工", + "炼化", + "化工", + "央企" ], "data_content": { "en": [