From 4d462c0e7fb34001dbb6d486bdfd933e3c0af3b1 Mon Sep 17 00:00:00 2001 From: skishchampi <996985+skishchampi@users.noreply.github.com> Date: Tue, 12 May 2026 10:58:00 -0400 Subject: [PATCH] feat(parliament): surface sansad-semantic-crawler v1.0.0 discourse layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframes the data-page parliament section from "what MPs ask" to "how the State responds when asked." Consumes the v1.0.0 analytical pipeline (extract-answers → analyse-discourse → analyse-ministry) and joins it into the public dataset. - Makefile: corpus-extract-answers, corpus-analyse-discourse, corpus-analyse-ministry, corpus-analyse, corpus-enrich. corpus-refresh now chains the full pipeline. - scripts/build_parliament_libraries.py: joins manifest + analysis_discourse + ministry_summary_qa into assets/parliament_libraries.js. Emits new top-level keys: discourseSummary, ministryDiscourse, discourseExcerpts, rrrlfDeflections. - data/index.html, assets/main.js, assets/styles.css: new section structure — headline evasion rate, "Library is a state subject" cascade (FEDERAL_DEFLECTION on RRRLF-tagged questions, sorted by date), per-ministry evasion bars with classified-N denominators, taxonomy of evasion (one verbatim phrase per label, with collapsible full passage), method note. Old keyQuestions / topTags grid demoted to a collapsible details element. - Cache-bust v=43 → v=44 across index.html, data/index.html, inequality/index.html. --- Makefile | 69 ++-- assets/main.js | 151 ++++++-- assets/parliament_libraries.js | 506 +++++++++++++++++++++++++- assets/styles.css | 177 +++++++++ data/index.html | 42 ++- index.html | 10 +- inequality/index.html | 10 +- scripts/build_parliament_libraries.py | 215 +++++++++++ 8 files changed, 1099 insertions(+), 81 deletions(-) create mode 100644 scripts/build_parliament_libraries.py diff --git a/Makefile b/Makefile index 20cb8f8..3776256 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,21 @@ # theright2read — corpus refresh entry points. # -# As of 2026-05-06 the LS + RS crawler that builds the parliamentary -# library corpus is the public package `sansad-semantic-crawler` -# (PolyForm-NC), pinned in requirements.txt at v0.2.0. The host project -# supplies the topic profile (`topics/libraries.json`, vendored from -# the upstream `examples/topics/libraries.json` because the package -# install does not include the `examples/` directory) and the output -# directory (`data/_parliament_libraries/`, gitignored). +# The LS + RS crawler is the public package `sansad-semantic-crawler` +# (PolyForm-NC), pinned in requirements.txt. The host project supplies +# the topic profile (`topics/libraries.json`, vendored from the upstream +# `examples/topics/libraries.json` because the package install does not +# include the `examples/` directory) and the output directory +# (`data/_parliament_libraries/`, gitignored). # -# Two legacy scripts (`scripts/sansad_library_crawl.py`, -# `scripts/sansad_library_parse.py`) were retired in the same commit; -# their LS-side schema variations are now harmonised by the package. +# As of 2026-05-12 the pipeline runs against sansad-semantic-crawler +# v1.0.0, which adds an analytical layer on top of the crawl/parse/export +# basics: extract-answers → analyse-discourse → analyse-ministry. The +# `corpus-enrich` step joins those analytical outputs into +# assets/parliament_libraries.js via scripts/build_parliament_libraries.py +# (the upstream `export` only emits the manifest-derived summary). # -# After regenerating `assets/parliament_libraries.js`, BUMP THE -# `?v=N` cache-bust suffix everywhere it is referenced — see AGENTS.md -# section 5 for the one-pass `find ... sed` command. +# After regenerating assets/parliament_libraries.js, BUMP THE `?v=N` +# cache-bust suffix everywhere it is referenced. VENV := .venv PYTHON := $(VENV)/bin/python @@ -24,7 +25,9 @@ TOPIC_PROFILE := topics/libraries.json CORPUS_OUT := data/_parliament_libraries EXPORT_PATH := assets/parliament_libraries.js -.PHONY: deps corpus-crawl corpus-parse corpus-export corpus-refresh sync-agents help +.PHONY: deps corpus-crawl corpus-parse corpus-export corpus-extract-answers \ + corpus-analyse-discourse corpus-analyse-ministry corpus-analyse \ + corpus-enrich corpus-refresh sync-agents help $(PYTHON): python3 -m venv $(VENV) @@ -53,21 +56,43 @@ corpus-export: $(PYTHON) --js-global PARLIAMENT_LIBRARY_DATA \ --export-path $(EXPORT_PATH) -# Full pipeline: crawl, parse, export. After this finishes, manually -# bump the `?v=N` cache-bust everywhere index.html / data/index.html / -# inequality/index.html load assets/parliament_libraries.js. See -# AGENTS.md section 5 for the canonical sed command. -corpus-refresh: corpus-crawl corpus-parse corpus-export +# v1.0.0 analytical layer. +corpus-extract-answers: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler extract-answers \ + --out $(CORPUS_OUT) + +corpus-analyse-discourse: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler analyse-discourse \ + --out $(CORPUS_OUT) + +corpus-analyse-ministry: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler analyse-ministry \ + --topic $(TOPIC_PROFILE) \ + --out $(CORPUS_OUT) + +corpus-analyse: corpus-extract-answers corpus-analyse-discourse corpus-analyse-ministry + +# Join the upstream manifest export with the v1.0.0 analytical outputs +# into a single enriched assets/parliament_libraries.js. +corpus-enrich: corpus-export + $(PYTHON) scripts/build_parliament_libraries.py + +# Full pipeline: crawl → parse → analyse → export → enrich. After this +# finishes, manually bump the `?v=N` cache-bust everywhere index.html / +# data/index.html / inequality/index.html load assets/parliament_libraries.js. +corpus-refresh: corpus-crawl corpus-parse corpus-analyse corpus-enrich sync-agents: python3 scripts/sync_agents.py help: - @echo "Corpus refresh (sansad-semantic-crawler):" - @echo " make corpus-refresh — full pipeline (crawl + parse + export)" + @echo "Corpus refresh (sansad-semantic-crawler v1.0.0):" + @echo " make corpus-refresh — full pipeline (crawl + parse + analyse + enrich)" @echo " make corpus-crawl ARGS='--max-records 5 --no-download' — smoke-test" @echo " make corpus-parse — re-extract text from cached PDFs" - @echo " make corpus-export — regenerate assets/parliament_libraries.js" + @echo " make corpus-analyse — extract-answers + analyse-discourse + analyse-ministry" + @echo " make corpus-export — upstream manifest-only export" + @echo " make corpus-enrich — export + join analytical files (the public artefact)" @echo "Setup:" @echo " make deps — install pinned deps into .venv" @echo "Agent rules:" diff --git a/assets/main.js b/assets/main.js index 28450e4..7679d4b 100644 --- a/assets/main.js +++ b/assets/main.js @@ -193,23 +193,123 @@ $$("#actions-grid").innerHTML = ACTIONS.map((a) => `

${esc(a.body)}

`).join(""); -// Parliament library corpus. The data is generated by -// scripts/sansad_library_parse.py into assets/parliament_libraries.js. +// Parliament library corpus. Generated by +// scripts/build_parliament_libraries.py into assets/parliament_libraries.js, +// joining the upstream sansad-semantic-crawler manifest with the v1.0.0 +// discourse-analysis outputs. (function renderParliamentLibraries() { - const grid = $("#parl-grid"); - if (!grid) return; const data = window.PARLIAMENT_LIBRARY_DATA || {}; - const stats = data.summaryStats || []; - const tags = data.topTags || []; - const questions = data.keyQuestions || []; + if (!$("#parl-headline-stat")) return; + + const ds = data.discourseSummary || {}; + const ministries = data.ministryDiscourse || []; + const excerpts = data.discourseExcerpts || []; + const rrrlf = data.rrrlfDeflections || []; + + // ── Headline stat ───────────────────────────────────────────────── + const headlineEl = $("#parl-headline-stat"); + if (headlineEl && ds.responsesClassified) { + const pct = ds.evasionRateClassified != null + ? Math.round(ds.evasionRateClassified * 100) + "%" + : "—"; + headlineEl.innerHTML = ` +
${esc(pct)}
+
+
${esc(ds.evasiveCount)} of ${esc(ds.responsesClassified)} classified responses to library questions in Parliament were evasive.
+
+ Across labels: REJECTED · SUBSTITUTED · FEDERAL_DEFLECTION · DEFLECTED · DATA_WITHHELD · STRUCTURAL_REFUSAL · CONSTITUTIONAL_DEFAULT · REPRESENTATIONAL_SILENCE. +
+
`; + } - grid.innerHTML = stats.map((p) => ` -
-
${esc(p.label)}
-
${esc(p.value)}
-
${esc(p.sub)}
-
`).join(""); + // ── RRRLF "State subject" cascade ───────────────────────────────── + const cascadeEl = $("#parl-rrrlf-cascade"); + if (cascadeEl) { + cascadeEl.innerHTML = rrrlf.length + ? rrrlf.map((r) => { + const asker = (r.askers || []).filter(Boolean).slice(0, 2).join(", "); + return ` +
+
${esc(r.date || "undated")}
+
+
“${esc(r.matchedPattern)}”
+ +
${esc(r.ministry)} · response to ${esc(asker || "MP")}
+
+
`; + }).join("") + : `
No RRRLF-tagged FEDERAL_DEFLECTION responses in the current corpus.
`; + } + + // ── Per-ministry evasion bars ──────────────────────────────────── + const minRowsEl = $("#parl-ministry-rows"); + if (minRowsEl) { + const shown = ministries.filter((m) => (m.recordsClassified || 0) > 0); + minRowsEl.innerHTML = shown.length + ? shown.map((m) => { + const rate = m.evasionRateClassified != null + ? Math.round(m.evasionRateClassified * 100) + : 0; + const ratePct = `${rate}%`; + return ` +
+
${esc(m.ministry)}
+
+
+
${ratePct}
+
+
N = ${esc(m.recordsClassified)} classified of ${esc(m.recordsTotal)} total
+
`; + }).join("") + : ""; + } + + // ── Verbatim evasion cards (one per label) ─────────────────────── + const gridEl = $("#parl-evasion-grid"); + if (gridEl) { + const seen = new Set(); + const oneEach = []; + for (const e of excerpts) { + if (seen.has(e.label)) continue; + seen.add(e.label); + oneEach.push(e); + } + gridEl.innerHTML = oneEach.map((e) => { + const pattern = (e.matchedPattern || "").trim() || e.excerpt; + const citation = `${esc(e.ministry)} · ${esc(e.date)} · ${esc(e.house)} ${esc(e.qtype)} Q${esc(e.qno)} — ${esc(e.title)}`; + const cite = e.uri + ? `${citation}` + : citation; + return ` +
+
${esc(e.label.replace(/_/g, " "))}
+
+
“${esc(pattern)}”
+
${cite}
+
+ Read the passage +
${esc(e.excerpt)}
+ ${e.politicalFunction ? `
Classifier note: ${esc(e.politicalFunction)}
` : ""} +
+
+
`; + }).join(""); + } + // ── Collapsible: corpus stats + top tags (kept for completeness) ─ + const stats = data.summaryStats || []; + const tags = data.topTags || []; + const grid = $("#parl-grid"); + if (grid) { + grid.innerHTML = stats.map((p) => ` +
+
${esc(p.label)}
+
${esc(p.value)}
+
${esc(p.sub)}
+
`).join(""); + } const tagEl = $("#parl-tags"); if (tagEl) { tagEl.innerHTML = tags.length @@ -217,32 +317,11 @@ $$("#actions-grid").innerHTML = ACTIONS.map((a) => ` : `No topic tags yet`; } - const questionsEl = $("#parl-questions"); - if (questionsEl) { - questionsEl.innerHTML = questions.length - ? questions.map((q) => { - const asker = (q.askers || []).filter(Boolean).slice(0, 3).join(", "); - const tagLine = (q.tags || []).join(", "); - const title = q.href - ? `${esc(q.title)}` - : esc(q.title); - return ` -
-
${esc(q.label)} · ${esc(q.date || "undated")}${q.ministry ? ` · ${esc(q.ministry)}` : ""}
-

${title}

- ${asker ? `
${esc(asker)}
` : ""} - ${q.stat ? `
${esc(q.stat)}
` : ""} -

${esc(q.excerpt || "")}

- ${tagLine ? `
${esc(tagLine)}
` : ""} -
`; - }).join("") - : `
No local Parliament library crawl has been exported yet. Run python scripts/sansad_library_crawl.py, then python scripts/sansad_library_parse.py.
`; - } - + // ── Method note ────────────────────────────────────────────────── const sourceEl = $("#parl-source-note"); if (sourceEl) { const generated = data.generatedAt ? `Generated ${data.generatedAt}. ` : ""; - sourceEl.textContent = `${generated}Sources: ${data.sourceManifest || "data/_parliament_libraries/manifest.jsonl"}; Lok Sabha elibrary.sansad.in; Rajya Sabha rsdoc.nic.in.`; + sourceEl.innerHTML = `${esc(generated)}Corpus: ${esc(ds.questionsTotal || 0)} library questions, ${esc(ds.responsesExtracted || 0)} parseable responses, ${esc(ds.responsesClassified || 0)} classified by sansad-semantic-crawler regex_v2 + LLM ensemble. Sources: Lok Sabha elibrary.sansad.in; Rajya Sabha rsdoc.nic.in. Classifier: CommonerLLP/sansad-semantic-crawler v1.0.0.`; } })(); diff --git a/assets/parliament_libraries.js b/assets/parliament_libraries.js index ea34174..da72b64 100644 --- a/assets/parliament_libraries.js +++ b/assets/parliament_libraries.js @@ -1,7 +1,7 @@ window.PARLIAMENT_LIBRARY_DATA = { "topic": "libraries", "description": "Public, institutional, digital, and access-oriented library questions in Parliament.", - "generatedAt": "2026-05-10T09:40:44", + "generatedAt": "2026-05-12T05:36:06", "sourceManifest": "data/_parliament_libraries/manifest.jsonl", "summaryStats": [ { @@ -520,5 +520,509 @@ window.PARLIAMENT_LIBRARY_DATA = { "source": "rsdoc.nic.in", "href": "https://sansad.in/getFile/annex/249/Au320.doc?source=pqars" } + ], + "discourseSummary": { + "questionsTotal": 341, + "responsesExtracted": 105, + "responsesClassified": 105, + "evasiveCount": 64, + "substantiveCount": 41, + "evasionRateClassified": 0.6095, + "labelDistribution": { + "REJECTED": 23, + "FACTUAL_DISCLOSURE": 21, + "ACCEPTED": 20, + "SUBSTITUTED": 12, + "FEDERAL_DEFLECTION": 12, + "DEFLECTED": 8, + "DATA_WITHHELD": 4, + "STRUCTURAL_REFUSAL": 2, + "CONSTITUTIONAL_DEFAULT": 2, + "REPRESENTATIONAL_SILENCE": 1 + } + }, + "ministryDiscourse": [ + { + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "recordsTotal": 83, + "recordsClassified": 15, + "recordsUnclassified": 63, + "evasionRateClassified": 0.6, + "labelDistribution": { + "DEFLECTED": 3, + "UNCLASSIFIED": 63, + "SUBSTITUTED": 4, + "STRUCTURAL_REFUSAL": 2, + "DATA_WITHHELD": 2, + "FEDERAL_DEFLECTION": 1, + "REJECTED": 3, + "CONSTITUTIONAL_DEFAULT": 2, + "FACTUAL_DISCLOSURE": 3 + }, + "perEvasionShare": { + "SUBSTITUTED": 0.4444, + "DATA_WITHHELD": 0.2222, + "DEFLECTED": 0.3333 + } + }, + { + "ministry": "CULTURE", + "recordsTotal": 67, + "recordsClassified": 15, + "recordsUnclassified": 41, + "evasionRateClassified": 0.4, + "labelDistribution": { + "UNCLASSIFIED": 41, + "REJECTED": 5, + "DEFLECTED": 1, + "SUBSTITUTED": 4, + "ACCEPTED": 1, + "FACTUAL_DISCLOSURE": 3, + "FEDERAL_DEFLECTION": 11, + "DATA_WITHHELD": 1 + }, + "perEvasionShare": { + "SUBSTITUTED": 0.6667, + "DATA_WITHHELD": 0.1667, + "DEFLECTED": 0.1667 + } + }, + { + "ministry": "SOCIAL JUSTICE AND EMPOWERMENT", + "recordsTotal": 11, + "recordsClassified": 4, + "recordsUnclassified": 7, + "evasionRateClassified": 0.0, + "labelDistribution": { + "FACTUAL_DISCLOSURE": 2, + "UNCLASSIFIED": 7, + "REJECTED": 1, + "ACCEPTED": 1 + }, + "perEvasionShare": {} + }, + { + "ministry": "Education", + "recordsTotal": 8, + "recordsClassified": 2, + "recordsUnclassified": 6, + "evasionRateClassified": 0.5, + "labelDistribution": { + "UNCLASSIFIED": 6, + "SUBSTITUTED": 1, + "FACTUAL_DISCLOSURE": 1 + }, + "perEvasionShare": { + "SUBSTITUTED": 1.0 + } + }, + { + "ministry": "TRIBAL AFFAIRS", + "recordsTotal": 5, + "recordsClassified": 3, + "recordsUnclassified": 1, + "evasionRateClassified": 1.0, + "labelDistribution": { + "SUBSTITUTED": 3, + "UNCLASSIFIED": 1, + "REPRESENTATIONAL_SILENCE": 1 + }, + "perEvasionShare": { + "SUBSTITUTED": 1.0 + } + }, + { + "ministry": "RURAL DEVELOPMENT", + "recordsTotal": 4, + "recordsClassified": 1, + "recordsUnclassified": 3, + "evasionRateClassified": 1.0, + "labelDistribution": { + "UNCLASSIFIED": 3, + "DATA_WITHHELD": 1 + }, + "perEvasionShare": { + "DATA_WITHHELD": 1.0 + } + }, + { + "ministry": "CULTURE, YOUTH AFFAIRS AND SPORTS", + "recordsTotal": 3, + "recordsClassified": 0, + "recordsUnclassified": 3, + "evasionRateClassified": null, + "labelDistribution": { + "UNCLASSIFIED": 3 + }, + "perEvasionShare": {} + } + ], + "discourseExcerpts": [ + { + "label": "CONSTITUTIONAL_DEFAULT", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "3367", + "date": "2007-05-07", + "title": "CONFERENCE OF EDUCATION MINISTERS OF STATES .", + "askers": [ + "Motilal Vora" + ], + "tags": [ + "digital_library" + ], + "excerpt": "MN1STER OF STATE IN THE MINISTRY OF HUMAN RESOURCE DEVELOPMENT (SMT. D. PURANDESWARI) a)A Conference all State Education Ministers was held on 10th and 11th April 2007 at New Delhi. Record of the Co", + "matchedPattern": "mission mode", + "politicalFunction": "Institutional default of Article 16 representation. Erases Bahujan (SC/ST/OBC) presence via aggregate totals or substitution.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "CONSTITUTIONAL_DEFAULT", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "STARRED", + "qno": "400", + "date": "2007-04-30", + "title": "CONFERENCE OF EDUCATION MINISTERS OF STATES .", + "askers": [ + "Rajeev Chandrasekhar" + ], + "tags": [ + "digital_library" + ], + "excerpt": "ED ON 30.4.2007. (a): Yes, Sir.A Conference of the State Education Ministers was held on 10th and 11th April, 2007. (b): Points on which a consensus was reached in the said Conference are given in t", + "matchedPattern": "mission mode", + "politicalFunction": "Institutional default of Article 16 representation. Erases Bahujan (SC/ST/OBC) presence via aggregate totals or substitution.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "DATA_WITHHELD", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "1147", + "date": "1998-06-05", + "title": "APPOINTMENT OF THE KINS OF DELHI UNIVERSITY EMPLOYEES ON COMPASSIONATE GROUNDS .", + "askers": [ + "RAHASBIHARI BARIK" + ], + "tags": [ + "university_library" + ], + "excerpt": "MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) to (b) According to the information furnished by the University of Delhi, seventeen cases of kins of the deceased employees of the", + "matchedPattern": "information is being collected", + "politicalFunction": "Data exists but is withheld or deliberately not collected. The absence of data is itself a political choice.", + "confidence": 0.85, + "uri": "" + }, + { + "label": "DATA_WITHHELD", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "728", + "date": "2000-07-28", + "title": "HIGH COST OF HIGHER EDUCATION .", + "askers": [ + "JANESHWAR MISHRA" + ], + "tags": [ + "public_library" + ], + "excerpt": "THE MINSITER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) to (e): The information will be laid on the Table of the House in due course.", + "matchedPattern": "information will be laid on the Table", + "politicalFunction": "Data exists but is withheld or deliberately not collected. The absence of data is itself a political choice.", + "confidence": 0.85, + "uri": "" + }, + { + "label": "DEFLECTED", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "2143", + "date": "2000-12-08", + "title": "BOOKS PUBLISHED IN INDIAN LANGUAGES", + "askers": [ + "KARTAR SINGH DUGGAL" + ], + "tags": [ + "public_library" + ], + "excerpt": "THE MINISTER OF STATE IN THE MINISTRY OF HUMAN RESOURCE DEVELOPMENT (SHRI SYED SHAHNAWAZ HUSSAIN) (a) & (b): Two statements, one language-wise and the other state-wise, of the publications received", + "matchedPattern": "The response defers responsibility by stating that concerted efforts have been made and such efforts are a continuous pr", + "politicalFunction": "Indefinite deferral with no timeline or accountability trigger: 'under consideration', 'steps are being taken', 'will be examined', 'in due course'.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "DEFLECTED", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "4373", + "date": "2001-08-31", + "title": "LIBRARY SCIENCE EDUCATION .", + "askers": [ + "KUM KUM RAI" + ], + "tags": [ + "public_library", + "reading_room" + ], + "excerpt": "THE MINISTER OF HUMAN RESOURCE DEVELOPMENT, SCIENCE AND TECHNOLOGY AND OCEAN DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) According to the information furnished by University Grants Commission, the Com", + "matchedPattern": "The response deflects by stating that books, furniture and fixtures are not allowed to be purchased for public libraries", + "politicalFunction": "Indefinite deferral with no timeline or accountability trigger: 'under consideration', 'steps are being taken', 'will be examined', 'in due course'.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "FEDERAL_DEFLECTION", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "1121", + "date": "1998-06-05", + "title": "RAJA RAMMOHUN ROY LIBRARY FOUNDATION .", + "askers": [ + "KARTAR SINGH DUGGAL" + ], + "tags": [ + "public_library", + "rrrlf" + ], + "excerpt": "MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) Since library is a State subject, the main objective of Raja Rammohan Roy Library foundation is to support the library movement in", + "matchedPattern": "library is a State subject", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "confidence": 0.92, + "uri": "" + }, + { + "label": "FEDERAL_DEFLECTION", + "ministry": "CULTURE", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "4193", + "date": "2018-04-04", + "title": "Libraries and museums", + "askers": [ + "Derek O' Brien" + ], + "tags": [ + "digital_library", + "public_library", + "rrrlf" + ], + "excerpt": "04.04.2018 LIBRARIES AND MUSEUMS 4193 . SHRI DEREK O’BRIEN Will the Minister of CULTURE be pleased to state: (a) the total number of libraries and museums in the country which are maintained by t", + "matchedPattern": "Library is a state subject", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "confidence": 0.92, + "uri": "" + }, + { + "label": "REJECTED", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "2059", + "date": "1998-07-03", + "title": "TITLES PUBLISHED IN INDIA DURING .", + "askers": [ + "KARTAR SINGH DUGGAL" + ], + "tags": [ + "public_library" + ], + "excerpt": "THE MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI): (a) Under the Delivery of Book Act, 1954 there are 4 Depository Libraries 1. National Library, Calcutta 2. Delhi Public Library,", + "matchedPattern": "The text does not provide any concrete commitment or specific action related to the query. Instead, it provides factual ", + "politicalFunction": "Flat refusal: 'does not agree', 'not feasible', 'does not arise', 'resource constraints'. The recommendation or question is dead.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "REJECTED", + "ministry": "SOCIAL JUSTICE AND EMPOWERMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "471", + "date": "2001-02-26", + "title": "NGOS RECEIVING FINANCIAL ASSISTANCE IN MAHARASHTRA .", + "askers": [ + "VEDPRAKASH P. GOYAL" + ], + "tags": [ + "accessibility" + ], + "excerpt": "MINISTER OF STATE OF THE MINISTRY OF SOCIAL JUSTICE AND EMPOWERMENT (SMT. MANEKA GANDHI) (a)The names of Non-Governmental Organisations in Maharashtra Which are receiving financial assistance from Go", + "matchedPattern": "The text clearly states that the NGOs have been blacklisted and action taken against them (such as warning, suspending f", + "politicalFunction": "Flat refusal: 'does not agree', 'not feasible', 'does not arise', 'resource constraints'. The recommendation or question is dead.", + "confidence": 0.95, + "uri": "" + }, + { + "label": "REPRESENTATIONAL_SILENCE", + "ministry": "TRIBAL AFFAIRS", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "2154", + "date": "2020-03-12", + "title": "Funds for NGOs working with Ministry", + "askers": [ + "Parimal Nathwani" + ], + "tags": [ + "public_library" + ], + "excerpt": "12.03.2020 FUNDS FOR NGOs WORKING WITH MINISTRY 2154. SHRI PARIMAL NATHWANI: Will the Minister of TRIBAL AFFAIRS be pleased to state: (a) whether any grant has been released for Non-Governmental", + "matchedPattern": "objective of the scheme is", + "politicalFunction": "Factual recitation that deliberately ignores Article 16/representation mandates.", + "confidence": 0.88, + "uri": "" + }, + { + "label": "STRUCTURAL_REFUSAL", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "2688", + "date": "1998-07-10", + "title": "SCHEME FOR ESTABLISHING RURAL LIBRARIES .", + "askers": [ + "NAGENDRA NATH OJHA" + ], + "tags": [ + "public_library" + ], + "excerpt": "MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) Govt. has not approved the scheme namely Financial Assistance for Establishment Rural Library. (b) and (c) Do not arise.", + "matchedPattern": "Govt. has not approved the scheme", + "politicalFunction": "Blunt refusal to establish the 'Form of Administration' required for social democracy.", + "confidence": 0.9, + "uri": "" + }, + { + "label": "STRUCTURAL_REFUSAL", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "1115", + "date": "1998-06-05", + "title": "AFFAIRS OF ADI SANKARACHARYA SANSKRIT UNIVERSITY .", + "askers": [ + "NARAYANASWAMY YERRA" + ], + "tags": [ + "public_library" + ], + "excerpt": "MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) There is no scheme for establishment of rural libraries by the Union Government. However, for supporting the library Foundation ha", + "matchedPattern": "no scheme for establishment", + "politicalFunction": "Blunt refusal to establish the 'Form of Administration' required for social democracy.", + "confidence": 0.9, + "uri": "" + }, + { + "label": "SUBSTITUTED", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "2173", + "date": "1997-03-14", + "title": "FINANCIAL ASSISTANCE BEING OFFERED TO LIBRARIES .", + "askers": [ + "NAGENDRA NATH OJHA" + ], + "tags": [ + "public_library" + ], + "excerpt": "THE MINISTER OF STATE IN THE DEPARTMENT OF EDUCATION IN THE MINISTRY OF EDUCATION IN THE MINISTRY OF HUMAN RESOURCE DEVELOPMENT (SHRI MUHI RAM SAIKIA) (a) Yes, Sir. (b) Government of India have reco", + "matchedPattern": "Under the\nScheme of", + "politicalFunction": "Replaced the question's framing with the ministry's preferred metric. The original question is unanswered.", + "confidence": 0.75, + "uri": "" + }, + { + "label": "SUBSTITUTED", + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "4669", + "date": "2001-04-27", + "title": "PROMOTION OF HINDI IN NON HINDI SPEAKING REGIONS", + "askers": [ + "VANGA GEETHA" + ], + "tags": [ + "reading_room" + ], + "excerpt": "ra) and the Central Institute of Indian Languages (Mysore), all Central Government institutions, implement several schemes and programmes for this purpose such as : i) Teaching of Hindi to the people", + "matchedPattern": "Under the Scheme of", + "politicalFunction": "Replaced the question's framing with the ministry's preferred metric. The original question is unanswered.", + "confidence": 0.75, + "uri": "" + } + ], + "rrrlfDeflections": [ + { + "ministry": "HUMAN RESOURCE DEVELOPMENT", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "1121", + "date": "1998-06-05", + "title": "RAJA RAMMOHUN ROY LIBRARY FOUNDATION .", + "askers": [ + "KARTAR SINGH DUGGAL" + ], + "matchedPattern": "library is a State subject", + "excerpt": "MINISTER OF HUMAN RESOURCE DEVELOPMENT (DR. MURLI MANOHAR JOSHI) (a) Since library is a State subject, the main objective of Raja Rammohan Roy Library foundation is to support the library movement in", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "uri": "" + }, + { + "ministry": "CULTURE", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "3708", + "date": "2018-03-28", + "title": "Opening of more public libraries in rural and semi urban areas", + "askers": [ + "Anil Desai" + ], + "matchedPattern": "Library is a state subject", + "excerpt": "(a) Public Library provides knowledge based services to all the cross-section of the society including Students, Women, Senior Citizen, Children, Youth, Neo-literates and Specially abled persons. (", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "uri": "" + }, + { + "ministry": "CULTURE", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "4193", + "date": "2018-04-04", + "title": "Libraries and museums", + "askers": [ + "Derek O' Brien" + ], + "matchedPattern": "Library is a state subject", + "excerpt": "04.04.2018 LIBRARIES AND MUSEUMS 4193 . SHRI DEREK O’BRIEN Will the Minister of CULTURE be pleased to state: (a) the total number of libraries and museums in the country which are maintained by t", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "uri": "" + }, + { + "ministry": "CULTURE", + "house": "Rajya Sabha", + "qtype": "UNSTARRED", + "qno": "4190", + "date": "2018-04-04", + "title": "Public Library Systems", + "askers": [ + "Mahesh Poddar" + ], + "matchedPattern": "Library is a state subject", + "excerpt": "(a) & (b) Currently nineteen (19) states have Library Legislation. Name of the states having Library legislation is at Annexure. (c) Library is a state subject and opening of new library comes under", + "politicalFunction": "Uses federalism/State Subjects to dodge Central responsibility for national standards.", + "uri": "" + } ] }; diff --git a/assets/styles.css b/assets/styles.css index bbd8bd9..a52c460 100644 --- a/assets/styles.css +++ b/assets/styles.css @@ -1476,6 +1476,174 @@ font-family: var(--f-mono); color: var(--red); font-size: 0.9em; } + /* PARLIAMENT · discourse analysis (v1.0.0 of sansad-semantic-crawler) */ + .t-cream-soft { color: var(--cream-deep); font-weight: 500; } + + .parl-headline-stat { + margin-top: var(--space-8); + display: grid; grid-template-columns: auto 1fr; gap: var(--space-8); + align-items: start; + padding: var(--space-7) var(--space-7); + border: 2px solid var(--red); + background: rgba(229, 62, 62, 0.08); + } + .parl-headline-num { + font-family: var(--f-display); font-size: 120px; font-weight: 900; + line-height: 0.85; letter-spacing: -4px; color: var(--red); + font-variant-numeric: tabular-nums; + } + .parl-headline-lede { + font-family: var(--f-slab); font-size: 22px; line-height: 1.3; + color: var(--cream); + } + .parl-headline-sub { + margin-top: var(--space-3); + font-family: var(--f-mono); font-size: 11px; letter-spacing: 1.2px; + color: var(--cream-deep); line-height: 1.55; + } + + .parl-subhead { + margin: var(--space-12) 0 var(--space-2); + font-family: var(--f-display); font-size: 34px; font-weight: 900; + letter-spacing: -1px; line-height: 1.05; color: var(--cream); + text-transform: none; + } + .parl-subhead-note { + margin: 0 0 var(--space-5); + font-family: var(--f-slab); font-size: 15px; line-height: 1.5; + color: var(--cream-deep); + } + + .parl-rrrlf-cascade { + display: grid; grid-template-columns: 1fr; gap: 0; + border-left: 4px solid var(--red); + } + .parl-cascade-card { + display: grid; grid-template-columns: 130px 1fr; + gap: var(--space-5); + padding: var(--space-5) var(--space-5); + border-bottom: 1px solid rgba(255, 248, 226, 0.15); + } + .parl-cascade-card:last-child { border-bottom: none; } + .parl-cascade-date { + font-family: var(--f-mono); font-size: 13px; font-weight: 700; + letter-spacing: 0.5px; color: var(--red); + padding-top: 6px; + } + .parl-cascade-quote { + font-family: var(--f-slab); font-size: 22px; font-weight: 700; + line-height: 1.25; color: var(--cream); font-style: italic; + } + .parl-cascade-meta { + margin-top: var(--space-2); + font-family: var(--f-mono); font-size: 11px; letter-spacing: 1px; + color: var(--cream-deep); text-transform: uppercase; + } + .parl-cascade-meta a { color: var(--cream); text-decoration-color: var(--red); } + .parl-cascade-min { + margin-top: 4px; + font-family: var(--f-mono); font-size: 11px; letter-spacing: 1px; + color: var(--cream-deep); + } + + .parl-ministry-rows { + display: flex; flex-direction: column; gap: var(--space-3); + } + .parl-ministry-row { + display: grid; grid-template-columns: 240px 1fr 220px; + gap: var(--space-4); align-items: center; + padding: var(--space-3) 0; + border-bottom: 1px solid rgba(255, 248, 226, 0.12); + } + .parl-ministry-name { + font-family: var(--f-mono); font-size: 12px; font-weight: 700; + letter-spacing: 1px; text-transform: uppercase; color: var(--cream); + } + .parl-ministry-bar-wrap { + position: relative; height: 28px; + background: rgba(255, 248, 226, 0.08); + border: 1px solid rgba(255, 248, 226, 0.15); + } + .parl-ministry-bar { + position: absolute; left: 0; top: 0; bottom: 0; + background: var(--red); + } + .parl-ministry-rate { + position: absolute; right: 10px; top: 50%; transform: translateY(-50%); + font-family: var(--f-display); font-size: 18px; font-weight: 900; + color: var(--cream); font-variant-numeric: tabular-nums; + } + .parl-ministry-n { + font-family: var(--f-mono); font-size: 11px; letter-spacing: 0.5px; + color: var(--cream-deep); + } + + .parl-evasion-grid { + display: grid; grid-template-columns: 1fr; gap: 0; + border-left: 4px solid var(--red); + } + .parl-evasion-row { + display: grid; grid-template-columns: 200px 1fr; + gap: var(--space-5); + padding: var(--space-5) var(--space-5); + border-bottom: 1px solid rgba(255, 248, 226, 0.15); + } + .parl-evasion-row:last-child { border-bottom: none; } + .parl-evasion-tag { + font-family: var(--f-mono); font-size: 11px; font-weight: 800; + letter-spacing: 2px; color: var(--red); text-transform: uppercase; + padding-top: 8px; + } + .parl-evasion-pattern { + font-family: var(--f-slab); font-size: 22px; font-weight: 700; + line-height: 1.25; color: var(--cream); font-style: italic; + } + .parl-evasion-cite { + margin-top: var(--space-2); + font-family: var(--f-mono); font-size: 11px; letter-spacing: 1px; + color: var(--cream-deep); text-transform: uppercase; + } + .parl-evasion-cite a { color: var(--cream); text-decoration-color: var(--red); } + .parl-evasion-more { + margin-top: var(--space-3); + } + .parl-evasion-more summary { + cursor: pointer; list-style: none; + font-family: var(--f-mono); font-size: 10.5px; font-weight: 700; + letter-spacing: 1.2px; text-transform: uppercase; + color: var(--cream-deep); + } + .parl-evasion-more summary::-webkit-details-marker { display: none; } + .parl-evasion-more summary::before { content: "▸ "; color: var(--red); } + .parl-evasion-more[open] summary::before { content: "▾ "; } + .parl-evasion-more blockquote { + margin: var(--space-3) 0 0; + padding: var(--space-3) var(--space-4); + border-left: 2px solid rgba(229, 62, 62, 0.5); + font-family: var(--f-slab); font-size: 14px; line-height: 1.45; + color: var(--cream-deep); font-style: normal; + } + .parl-evasion-function { + margin-top: var(--space-2); + font-family: var(--f-mono); font-size: 10px; letter-spacing: 0.5px; + color: var(--cream-deep); font-style: italic; + } + + .parl-corpus-details { + margin-top: var(--space-10); + border-top: 1px solid rgba(255, 248, 226, 0.15); + padding-top: var(--space-5); + } + .parl-corpus-details summary { + cursor: pointer; list-style: none; + font-family: var(--f-mono); font-size: 11px; font-weight: 800; + letter-spacing: 1.5px; text-transform: uppercase; + color: var(--cream-deep); + } + .parl-corpus-details summary::-webkit-details-marker { display: none; } + .parl-corpus-details summary::before { content: "▸ "; color: var(--red); } + .parl-corpus-details[open] summary::before { content: "▾ "; } + /* QUOTES */ .quotes h2 { font-size: clamp(44px, 8vw, 96px); letter-spacing: -2px; } .quote-grid { margin-top: var(--space-12); display: grid; grid-template-columns: 1fr 1fr; gap: var(--space-10) 48px; } @@ -1932,6 +2100,15 @@ .parl-questions { grid-template-columns: 1fr; } .parl-question h3 { font-size: 24px; } .parl-question-stat { font-size: 28px; } + .parl-headline-stat { grid-template-columns: 1fr; gap: var(--space-3); } + .parl-headline-num { font-size: 88px; } + .parl-headline-lede { font-size: 18px; } + .parl-subhead { font-size: 26px; } + .parl-cascade-card { grid-template-columns: 1fr; gap: var(--space-2); } + .parl-cascade-quote { font-size: 18px; } + .parl-ministry-row { grid-template-columns: 1fr; gap: var(--space-2); } + .parl-evasion-row { grid-template-columns: 1fr; gap: var(--space-2); } + .parl-evasion-pattern { font-size: 18px; } .quote-grid { grid-template-columns: 1fr; gap: var(--space-6); } .quote .text, .quote.deva .text, .quote.tamil .text { font-size: 20px; } .cta { padding: var(--space-12) var(--space-6); } diff --git a/data/index.html b/data/index.html index fdcaa22..5a522d5 100644 --- a/data/index.html +++ b/data/index.html @@ -39,7 +39,7 @@ - + @@ -152,12 +152,30 @@

HOW DOES YOUR STATE GRADE?

-
Parliament · library questions
-

WHAT MPs ASK WHEN THEY ASK ABOUT LIBRARIES.

-

A local crawler now tracks Lok Sabha and Rajya Sabha questions on public libraries, RRRLF, the National Mission on Libraries, Library Acts, digital libraries, school and university libraries, reading rooms, and access. The cards below are generated from the local Parliament corpus.

-
-
-
+
Parliament · how the State responds
+

MPs ASK ABOUT LIBRARIES. THE STATE EVADES.

+

The chart above shows what the Centre spent. This section shows what the Centre says when MPs ask about that spending. 341 library questions in Lok Sabha and Rajya Sabha. 105 had a parseable ministry response. Of those, 64 (61%) were classified as evasive by the open-source sansad-semantic-crawler — not Government opinion, Government's own words against a public taxonomy of parliamentary evasion.

+ +
+ +

"Library is a state subject." Same five words. Two decades. Both major parties.

+

The Centre's library-funding arm is RRRLF. Its smallness — ₹197 crore over twenty years — is justified, in Parliament, by the same five-word deflection. Below: every time the Centre invoked it in response to an RRRLF-tagged question, in the order it happened.

+
+ +

Per-ministry evasion rate on classified responses

+

Of the 105 responses our classifier could read, here is the share each ministry spent on deflection, refusal, redirection, withheld data, or constitutional default. N is small — show it.

+
+ +

A taxonomy of evasion in the State's own words

+

One verbatim example per category. The classifier's label and political function are shown above the excerpt; the clickable line below it is the original Parliament URI.

+
+ +
+ Corpus stats and top tags +
+
+
+
@@ -385,10 +403,10 @@

EDUCATE.
AGITATE.
ORGANISE.

- - - - - + + + + + diff --git a/index.html b/index.html index b406374..854c247 100644 --- a/index.html +++ b/index.html @@ -39,7 +39,7 @@ - + @@ -588,9 +588,9 @@

EDUCATE.
AGITATE.
ORGANISE.

- - - - + + + + diff --git a/inequality/index.html b/inequality/index.html index 8367a5b..89fcf1c 100644 --- a/inequality/index.html +++ b/inequality/index.html @@ -39,7 +39,7 @@ - + @@ -204,9 +204,9 @@

EDUCATE.
AGITATE.
ORGANISE.

- - - - + + + + diff --git a/scripts/build_parliament_libraries.py b/scripts/build_parliament_libraries.py new file mode 100644 index 0000000..551332b --- /dev/null +++ b/scripts/build_parliament_libraries.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +# DO NOT HAND-EDIT consumers of this script's output. +# Generated by: scripts/build_parliament_libraries.py +# +# Joins sansad-semantic-crawler outputs (manifest + analysis_discourse + +# ministry_summary_qa) into assets/parliament_libraries.js, the dataset +# the public data page consumes. +# +# Runs after `make corpus-refresh` and `make corpus-analyse`. The upstream +# package's `export` subcommand emits the same manifest-derived summary +# but does not merge the v1.0.0 discourse-analysis files, so this adapter +# fills that gap. + +from __future__ import annotations + +import datetime as dt +import json +from collections import Counter, defaultdict +from html import escape +from pathlib import Path + +REPO = Path(__file__).resolve().parent.parent +CORPUS = REPO / "data" / "_parliament_libraries" +EXPORT = REPO / "assets" / "parliament_libraries.js" + +MANIFEST = CORPUS / "manifest.jsonl" +DISCOURSE = CORPUS / "analysis_discourse.jsonl" +ANSWERS = CORPUS / "answers.jsonl" +MINISTRY = CORPUS / "ministry_summary_qa.jsonl" + +# Labels the classifier emits, grouped by political function. +EVASIVE_LABELS = { + "DEFLECTED", + "ABSORBED", + "REJECTED", + "SUBSTITUTED", + "DATA_WITHHELD", + "SCOPE_NARROWED", + "CIRCULAR_REFERENCE", + "FEDERAL_DEFLECTION", + "STRUCTURAL_REFUSAL", + "CONSTITUTIONAL_DEFAULT", + "REPRESENTATIONAL_SILENCE", +} +SUBSTANTIVE_LABELS = {"ACCEPTED", "FACTUAL_DISCLOSURE"} + +# Representative excerpts to surface on the site: pick at most this many +# per evasion label, keep the highest-confidence ones. +EXCERPTS_PER_LABEL = 2 +EXCERPT_MAX_CHARS = 320 + + +def load_jsonl(path: Path) -> list[dict]: + if not path.exists(): + return [] + return [json.loads(line) for line in path.read_text().splitlines() if line.strip()] + + +def existing_export() -> dict: + """Read the manifest-derived base export the package produces.""" + text = EXPORT.read_text() + start = text.index("{") + return json.loads(text[start:].rstrip().rstrip(";")) + + +def shorten(s: str, n: int = EXCERPT_MAX_CHARS) -> str: + s = " ".join(s.split()) + return s if len(s) <= n else s[: n - 1].rstrip() + "…" + + +def build_discourse_summary(discourse: list[dict], questions_total: int) -> dict: + labels = Counter(r.get("label", "UNCLASSIFIED") for r in discourse) + classified = sum(c for lab, c in labels.items() if lab != "UNCLASSIFIED") + evasive = sum(c for lab, c in labels.items() if lab in EVASIVE_LABELS) + substantive = sum(c for lab, c in labels.items() if lab in SUBSTANTIVE_LABELS) + rate = round(evasive / classified, 4) if classified else None + return { + "questionsTotal": questions_total, + "responsesExtracted": len(discourse), + "responsesClassified": classified, + "evasiveCount": evasive, + "substantiveCount": substantive, + "evasionRateClassified": rate, + "labelDistribution": dict(labels.most_common()), + } + + +def build_ministry_discourse(ministry_rows: list[dict]) -> list[dict]: + out = [] + for row in sorted(ministry_rows, key=lambda r: -r.get("records_total", 0)): + out.append( + { + "ministry": row["ministry"], + "recordsTotal": row["records_total"], + "recordsClassified": row.get("records_classified", 0), + "recordsUnclassified": row.get("records_unclassified", 0), + "evasionRateClassified": row.get("evasion_rate_classified"), + "labelDistribution": row.get("label_distribution", {}), + "perEvasionShare": row.get("per_evasion_label_share", {}), + } + ) + return out + + +def build_excerpts( + discourse: list[dict], + by_key: dict[str, dict], +) -> list[dict]: + by_label: dict[str, list[dict]] = defaultdict(list) + for rec in discourse: + label = rec.get("label") + if label not in EVASIVE_LABELS: + continue + manifest = by_key.get(rec["key"]) + if not manifest: + continue + by_label[label].append({"rec": rec, "manifest": manifest}) + + out: list[dict] = [] + for label in sorted(by_label): + picks = sorted( + by_label[label], + key=lambda x: -float(x["rec"].get("confidence", 0)), + )[:EXCERPTS_PER_LABEL] + for p in picks: + rec, m = p["rec"], p["manifest"] + out.append( + { + "label": label, + "ministry": m.get("ministry", ""), + "house": m.get("house", ""), + "qtype": m.get("qtype", ""), + "qno": m.get("qno", ""), + "date": m.get("date", ""), + "title": escape(m.get("title", "")), + "askers": m.get("askers", []), + "tags": m.get("tags", []), + "excerpt": shorten(rec.get("text_excerpt", "")), + "matchedPattern": rec.get("matched_pattern", ""), + "politicalFunction": rec.get("political_function", ""), + "confidence": rec.get("confidence"), + "uri": m.get("uri", ""), + } + ) + return out + + +def build_rrrlf_deflections( + discourse: list[dict], + by_key: dict[str, dict], +) -> list[dict]: + """The 'Library is a State subject' cascade: FEDERAL_DEFLECTION on RRRLF-tagged questions. + + This is the structural keystone of the page's pair argument: the + Centre's library-funding arm exists, and the Centre justifies its + smallness by deflecting library policy to the States — in the same + five words, across decades and dispensations. + """ + out: list[dict] = [] + for rec in discourse: + if rec.get("label") != "FEDERAL_DEFLECTION": + continue + m = by_key.get(rec["key"]) + if not m or "rrrlf" not in (m.get("tags") or []): + continue + out.append( + { + "ministry": m.get("ministry", ""), + "house": m.get("house", ""), + "qtype": m.get("qtype", ""), + "qno": m.get("qno", ""), + "date": m.get("date", ""), + "title": escape(m.get("title", "")), + "askers": m.get("askers", []), + "matchedPattern": rec.get("matched_pattern", ""), + "excerpt": shorten(rec.get("text_excerpt", "")), + "politicalFunction": rec.get("political_function", ""), + "uri": m.get("uri", ""), + } + ) + out.sort(key=lambda x: x.get("date", "")) + return out + + +def main() -> None: + manifest = load_jsonl(MANIFEST) + discourse = load_jsonl(DISCOURSE) + ministry_rows = load_jsonl(MINISTRY) + + by_key = {m["key"]: m for m in manifest} + + base = existing_export() + base["generatedAt"] = dt.datetime.now().replace(microsecond=0).isoformat() + base["discourseSummary"] = build_discourse_summary(discourse, len(manifest)) + base["ministryDiscourse"] = build_ministry_discourse(ministry_rows) + base["discourseExcerpts"] = build_excerpts(discourse, by_key) + base["rrrlfDeflections"] = build_rrrlf_deflections(discourse, by_key) + + payload = json.dumps(base, indent=2, ensure_ascii=False) + EXPORT.write_text(f"window.PARLIAMENT_LIBRARY_DATA = {payload};\n") + + summary = base["discourseSummary"] + print(f"wrote {EXPORT.relative_to(REPO)}") + print( + f" responses extracted: {summary['responsesExtracted']}" + f" · classified: {summary['responsesClassified']}" + f" · evasion rate: {summary['evasionRateClassified']}" + ) + print(f" ministries: {len(base['ministryDiscourse'])}") + print(f" excerpts: {len(base['discourseExcerpts'])}") + print(f" rrrlf deflections: {len(base['rrrlfDeflections'])}") + + +if __name__ == "__main__": + main()