diff --git a/Makefile b/Makefile index 20cb8f8..3776256 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,21 @@ # theright2read — corpus refresh entry points. # -# As of 2026-05-06 the LS + RS crawler that builds the parliamentary -# library corpus is the public package `sansad-semantic-crawler` -# (PolyForm-NC), pinned in requirements.txt at v0.2.0. The host project -# supplies the topic profile (`topics/libraries.json`, vendored from -# the upstream `examples/topics/libraries.json` because the package -# install does not include the `examples/` directory) and the output -# directory (`data/_parliament_libraries/`, gitignored). +# The LS + RS crawler is the public package `sansad-semantic-crawler` +# (PolyForm-NC), pinned in requirements.txt. The host project supplies +# the topic profile (`topics/libraries.json`, vendored from the upstream +# `examples/topics/libraries.json` because the package install does not +# include the `examples/` directory) and the output directory +# (`data/_parliament_libraries/`, gitignored). # -# Two legacy scripts (`scripts/sansad_library_crawl.py`, -# `scripts/sansad_library_parse.py`) were retired in the same commit; -# their LS-side schema variations are now harmonised by the package. +# As of 2026-05-12 the pipeline runs against sansad-semantic-crawler +# v1.0.0, which adds an analytical layer on top of the crawl/parse/export +# basics: extract-answers → analyse-discourse → analyse-ministry. The +# `corpus-enrich` step joins those analytical outputs into +# assets/parliament_libraries.js via scripts/build_parliament_libraries.py +# (the upstream `export` only emits the manifest-derived summary). # -# After regenerating `assets/parliament_libraries.js`, BUMP THE -# `?v=N` cache-bust suffix everywhere it is referenced — see AGENTS.md -# section 5 for the one-pass `find ... sed` command. +# After regenerating assets/parliament_libraries.js, BUMP THE `?v=N` +# cache-bust suffix everywhere it is referenced. VENV := .venv PYTHON := $(VENV)/bin/python @@ -24,7 +25,9 @@ TOPIC_PROFILE := topics/libraries.json CORPUS_OUT := data/_parliament_libraries EXPORT_PATH := assets/parliament_libraries.js -.PHONY: deps corpus-crawl corpus-parse corpus-export corpus-refresh sync-agents help +.PHONY: deps corpus-crawl corpus-parse corpus-export corpus-extract-answers \ + corpus-analyse-discourse corpus-analyse-ministry corpus-analyse \ + corpus-enrich corpus-refresh sync-agents help $(PYTHON): python3 -m venv $(VENV) @@ -53,21 +56,43 @@ corpus-export: $(PYTHON) --js-global PARLIAMENT_LIBRARY_DATA \ --export-path $(EXPORT_PATH) -# Full pipeline: crawl, parse, export. After this finishes, manually -# bump the `?v=N` cache-bust everywhere index.html / data/index.html / -# inequality/index.html load assets/parliament_libraries.js. See -# AGENTS.md section 5 for the canonical sed command. -corpus-refresh: corpus-crawl corpus-parse corpus-export +# v1.0.0 analytical layer. +corpus-extract-answers: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler extract-answers \ + --out $(CORPUS_OUT) + +corpus-analyse-discourse: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler analyse-discourse \ + --out $(CORPUS_OUT) + +corpus-analyse-ministry: $(PYTHON) + $(PYTHON) -m sansad_semantic_crawler analyse-ministry \ + --topic $(TOPIC_PROFILE) \ + --out $(CORPUS_OUT) + +corpus-analyse: corpus-extract-answers corpus-analyse-discourse corpus-analyse-ministry + +# Join the upstream manifest export with the v1.0.0 analytical outputs +# into a single enriched assets/parliament_libraries.js. +corpus-enrich: corpus-export + $(PYTHON) scripts/build_parliament_libraries.py + +# Full pipeline: crawl → parse → analyse → export → enrich. After this +# finishes, manually bump the `?v=N` cache-bust everywhere index.html / +# data/index.html / inequality/index.html load assets/parliament_libraries.js. +corpus-refresh: corpus-crawl corpus-parse corpus-analyse corpus-enrich sync-agents: python3 scripts/sync_agents.py help: - @echo "Corpus refresh (sansad-semantic-crawler):" - @echo " make corpus-refresh — full pipeline (crawl + parse + export)" + @echo "Corpus refresh (sansad-semantic-crawler v1.0.0):" + @echo " make corpus-refresh — full pipeline (crawl + parse + analyse + enrich)" @echo " make corpus-crawl ARGS='--max-records 5 --no-download' — smoke-test" @echo " make corpus-parse — re-extract text from cached PDFs" - @echo " make corpus-export — regenerate assets/parliament_libraries.js" + @echo " make corpus-analyse — extract-answers + analyse-discourse + analyse-ministry" + @echo " make corpus-export — upstream manifest-only export" + @echo " make corpus-enrich — export + join analytical files (the public artefact)" @echo "Setup:" @echo " make deps — install pinned deps into .venv" @echo "Agent rules:" diff --git a/assets/main.js b/assets/main.js index 28450e4..7679d4b 100644 --- a/assets/main.js +++ b/assets/main.js @@ -193,23 +193,123 @@ $$("#actions-grid").innerHTML = ACTIONS.map((a) => `
${esc(a.body)}
`).join(""); -// Parliament library corpus. The data is generated by -// scripts/sansad_library_parse.py into assets/parliament_libraries.js. +// Parliament library corpus. Generated by +// scripts/build_parliament_libraries.py into assets/parliament_libraries.js, +// joining the upstream sansad-semantic-crawler manifest with the v1.0.0 +// discourse-analysis outputs. (function renderParliamentLibraries() { - const grid = $("#parl-grid"); - if (!grid) return; const data = window.PARLIAMENT_LIBRARY_DATA || {}; - const stats = data.summaryStats || []; - const tags = data.topTags || []; - const questions = data.keyQuestions || []; + if (!$("#parl-headline-stat")) return; + + const ds = data.discourseSummary || {}; + const ministries = data.ministryDiscourse || []; + const excerpts = data.discourseExcerpts || []; + const rrrlf = data.rrrlfDeflections || []; + + // ── Headline stat ───────────────────────────────────────────────── + const headlineEl = $("#parl-headline-stat"); + if (headlineEl && ds.responsesClassified) { + const pct = ds.evasionRateClassified != null + ? Math.round(ds.evasionRateClassified * 100) + "%" + : "—"; + headlineEl.innerHTML = ` +${esc(e.excerpt)}+ ${e.politicalFunction ? `
${esc(q.excerpt || "")}
- ${tagLine ? `` : ""} -python scripts/sansad_library_crawl.py, then python scripts/sansad_library_parse.py.A local crawler now tracks Lok Sabha and Rajya Sabha questions on public libraries, RRRLF, the National Mission on Libraries, Library Acts, digital libraries, school and university libraries, reading rooms, and access. The cards below are generated from the local Parliament corpus.
- - - +The chart above shows what the Centre spent. This section shows what the Centre says when MPs ask about that spending. 341 library questions in Lok Sabha and Rajya Sabha. 105 had a parseable ministry response. Of those, 64 (61%) were classified as evasive by the open-source sansad-semantic-crawler — not Government opinion, Government's own words against a public taxonomy of parliamentary evasion.
+ + + +The Centre's library-funding arm is RRRLF. Its smallness — ₹197 crore over twenty years — is justified, in Parliament, by the same five-word deflection. Below: every time the Centre invoked it in response to an RRRLF-tagged question, in the order it happened.
+ + +Of the 105 responses our classifier could read, here is the share each ministry spent on deflection, refusal, redirection, withheld data, or constitutional default. N is small — show it.
+ + +One verbatim example per category. The classifier's label and political function are shown above the excerpt; the clickable line below it is the original Parliament URI.
+ + +