From c95a3bd502b923e0c8386440c90a130c627010a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C5=82awek=20Staworko?= <slawek.staworko@relational.ai>
Date: Thu, 16 Apr 2026 16:08:12 +0200
Subject: [PATCH] Add documentation comments to LQP protobuf definitions

Document all messages, fields, and enums across logic.proto, fragments.proto,
and transactions.proto with comments explaining purpose and semantics. Extend
docs/lqp.md with sections on execution model, write/read operations, types,
and external data. Information sourced from the design doc, usage patterns in
raicode and relationalai-python, and test examples in tests/lqp.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docs/lqp.md                                  |  55 +++-
 proto/relationalai/lqp/v1/fragments.proto    |  26 +-
 proto/relationalai/lqp/v1/logic.proto        | 328 ++++++++++++++-----
 proto/relationalai/lqp/v1/transactions.proto | 118 +++++--
 4 files changed, 411 insertions(+), 116 deletions(-)

diff --git a/docs/lqp.md b/docs/lqp.md
index 2bc4fdaa..d4527ab1 100644
--- a/docs/lqp.md
+++ b/docs/lqp.md
@@ -48,15 +48,64 @@ LQP clients send `Transaction`s that the engine executes.
 
     Write := Define(fragment::Fragment)
            | Undefine(fragment_id::FragmentId)
+           | Context(relations::RelationId[])
+           | Snapshot(mappings::SnapshotMapping[], prefix::String[])
 
     Read := Demand(relation_id::RelationId)
           | Output(name::String, relation_id::RelationId)
-          | Export(config::ExportCSVConfig)
-          | WhatIf(branch::String, epochs::Epoch[])
+          | Export(config::ExportConfig)
+          | WhatIf(branch::String, epoch::Epoch)
           | Abort(name::String, relation_id::RelationId)
 
 Transactions are structured into one or more epochs, which correspond to observable states
 of the installed program. This allows users to execute a sequence of steps in a single
 transaction. Within an epoch writes execute before reads. Multiple writes or multiple reads
-can be performed concurrently and in any order. Of special note are the WhatIf operations,
+can be performed concurrently and in any order. Of special note are the `WhatIf` operations,
 which allow executing an epoch in a throwaway clone of the runtime state.
+
+## Execution Model
+
+Transaction execution proceeds in two passes. First, the _simulator_ runs the transaction
+against a transient copy of the runtime state to validate it and minimize it (e.g. dropping
+writes whose effects are clobbered by later writes). Then the _driver_ executes the
+validated, minimized transaction against the actual runtime. If the simulator detects invalid
+state at any point, the transaction is aborted and errors are returned.
+
+## Write Operations
+
+`Define` installs a fragment and its declarations into the execution graph. `Undefine`
+removes a fragment. `Context` declares which relations should be jointly optimized — more
+context gives the optimizer more reuse opportunities but increases planning time. `Snapshot`
+materializes derived relations into durable EDB (base) relations, associating new relation
+values with stable identities over time.
+
+## Read Operations
+
+`Demand` triggers computation of a relation without returning its contents — useful for
+warming caches. `Output` computes and returns a relation's contents under a human-readable
+name. `Export` writes data to external storage (CSV or Iceberg). `WhatIf` runs a speculative
+epoch on a transient fork; writes don't persist, reads observe the modified state. `Abort`
+enforces integrity constraints: the transaction fails if the referenced relation is non-empty.
+
+## Types
+
+All types are primitive and aligned with the Apache Iceberg type system. The engine uses
+type information for equality, ordering, promotion, and algebraic properties of operations.
+Overloading must be handled by higher-level compilers. See the `Type` message in
+`logic.proto` for the full list.
+
+## External Data
+
+`Data` declarations describe external sources (CSV, Iceberg, BeTree) without eagerly
+ingesting them — data is loaded lazily when first demanded. `EDB` declares durable
+engine-managed base relations (the result of `Snapshot` operations). `CSVData` and
+`IcebergData` describe how to read from those respective formats, with column-to-relation
+mappings via `GNFColumn`.
+
+## Protobuf Specification
+
+The proto files in `../proto/relationalai/lqp/v1/` are the authoritative specification:
+
+- `logic.proto` — Declarations, formulas, types, values, and external data sources
+- `fragments.proto` — Content-addressable compilation units and debug info
+- `transactions.proto` — Transaction structure, write/read operations, and export config
diff --git a/proto/relationalai/lqp/v1/fragments.proto b/proto/relationalai/lqp/v1/fragments.proto
index 05fbde8d..47100058 100644
--- a/proto/relationalai/lqp/v1/fragments.proto
+++ b/proto/relationalai/lqp/v1/fragments.proto
@@ -1,3 +1,17 @@
+// Logical Query Protocol — Fragments
+//
+// Fragments are the unit of incremental compilation and installation. The full
+// execution graph is partitioned into content-addressable fragments that can be
+// defined, redefined, and undefined independently.
+//
+// Instead of resending the entire program on every transaction, a client can
+// use the Sync mechanism (see transactions.proto) to reconcile its expected
+// set of fragments with the engine's installed state. The engine identifies
+// fragments by their content hash, so unchanged fragments are never resent.
+//
+// The granularity of fragments is chosen by the compiler: one per source file,
+// per module, per definition, or even a single fragment for the entire program.
+
 syntax = "proto3";
 
 option go_package = "github.com/RelationalAI/logical-query-protocol/sdks/go/src/lqp/v1";
@@ -6,17 +20,27 @@ package relationalai.lqp.v1;
 
 import "relationalai/lqp/v1/logic.proto";
 
+// A content-addressable unit of the execution graph containing one or more
+// declarations. Each declaration can only belong to a single fragment.
+// Fragments are installed/removed via Define/Undefine write operations.
 message Fragment {
   FragmentId id = 1;
   repeated Declaration declarations = 2;
+  // Optional human-readable name mappings for debugging and logging.
   DebugInfo debug_info = 3;
 }
 
+// Maps opaque RelationIds to human-readable names for use in logs,
+// error messages, and debugging tools. The ids and orig_names arrays
+// are parallel (same length, matched by index).
 message DebugInfo {
   repeated RelationId ids = 1;
   repeated string orig_names = 2;
 }
 
+// Content-based identifier for a fragment. Typically a hash of the
+// fragment's declarations, enabling deduplication and cache-friendly
+// synchronization.
 message FragmentId {
-  bytes id = 1; // Variable-length identifier, up to 256 bits (32 bytes) or less
+  bytes id = 1; // Variable-length identifier, up to 256 bits (32 bytes)
 }
diff --git a/proto/relationalai/lqp/v1/logic.proto b/proto/relationalai/lqp/v1/logic.proto
index 918561b1..94ad7527 100644
--- a/proto/relationalai/lqp/v1/logic.proto
+++ b/proto/relationalai/lqp/v1/logic.proto
@@ -1,9 +1,21 @@
+// Logical Query Protocol — Logic Definitions
+//
+// This file defines the core logical constructs of the LQP: relation definitions
+// via first-order logic with negation and aggregation (Def), iterative computations
+// (Algorithm), semantic constraints (Constraint), and external data sources (Data).
+//
+// All relations are globally identified by opaque UUIDs (RelationId). Namespacing,
+// scoping, and abstractions are the responsibility of higher-level compilers targeting
+// this protocol.
+
 syntax = "proto3";
 
 option go_package = "github.com/RelationalAI/logical-query-protocol/sdks/go/src/lqp/v1";
 
 package relationalai.lqp.v1;
 
+// A Declaration introduces one or more relations into the execution graph.
+// Declarations are grouped into Fragments for incremental installation.
 message Declaration {
   oneof declaration_type {
     Def def = 1;
@@ -13,12 +25,19 @@ message Declaration {
   }
 }
 
+// Defines a single derived relation via first-order logic. The body is an
+// Abstraction whose bound variables form the relation's columns and whose
+// formula describes which tuples belong to the relation.
+// Supports negation, aggregation (via Reduce), and recursion.
 message Def {
   RelationId name = 1;
   Abstraction body = 2;
   repeated Attribute attrs = 3;
 }
 
+// Declares a semantic constraint on relations, used for validation and
+// optimization. The engine may use constraints to prune search space or
+// verify data integrity.
 message Constraint {
   RelationId name = 2;
   oneof constraint_type {
@@ -26,22 +45,36 @@ message Constraint {
   }
 }
 
+// Declares that within tuples satisfying the guard, the key variables
+// functionally determine the value variables (keys → values).
+// Used for semantic optimization and data validation.
 message FunctionalDependency {
+  // Restricts the scope of the dependency to tuples matching this formula.
   Abstraction guard = 1;
+  // Variables that form the determinant (left-hand side).
   repeated Var keys = 2;
+  // Variables that are determined (right-hand side).
   repeated Var values = 3;
 }
 
+// Derives one or more relations via nested iterative computation.
+// Unlike Def (which is purely declarative), Algorithm provides imperative
+// control over execution order, enabling fixed-point loops, incremental
+// updates, and stateful accumulation.
 message Algorithm {
+  // Relations exported by this algorithm, visible outside its scope.
   repeated RelationId global = 1;
   Script body = 2;
   repeated Attribute attrs = 3;
 }
 
+// An ordered sequence of constructs (loops and instructions) that execute
+// top-to-bottom within an Algorithm or Loop body.
 message Script {
   repeated Construct constructs = 1;
 }
 
+// A single step in a Script: either a Loop or a flat Instruction.
 message Construct {
   oneof construct_type {
     Loop loop = 1;
@@ -49,58 +82,83 @@ message Construct {
   }
 }
 
+// Fixed-point iteration. The init instructions execute once before iteration
+// begins. The body executes repeatedly until a Break condition fires or a
+// fixed point is reached. Loops can be arbitrarily nested.
 message Loop {
+  // Instructions executed once to initialize loop-local relations.
   repeated Instruction init = 1;
+  // Body executed on each iteration.
   Script body = 2;
   repeated Attribute attrs = 3;
 }
 
+// A single imperative step within a Loop. Each instruction type defines a
+// different strategy for updating a relation during iterative computation.
 message Instruction {
-  reserved 4; // Copy rule
+  reserved 4; // Copy rule (removed)
   oneof instr_type {
-    Assign assign = 1; // Work like Defs, but only within a loop
-    Upsert upsert = 2; // In-place update of relation by another by swapping
-    Break break = 3; // Nullary relation that exits the loop when fired
-    MonoidDef monoid_def = 5; // In-place update of relation by another by a monoid operation
-    MonusDef monus_def = 6; // In-place update of relation by another by "subtraction" operation, if it exists
+    Assign assign = 1;
+    Upsert upsert = 2;
+    Break break = 3;
+    MonoidDef monoid_def = 5;
+    MonusDef monus_def = 6;
   }
 }
 
+// Replaces a relation's contents with the result of evaluating the body.
+// Works like a Def, but scoped to a single loop iteration.
 message Assign {
   RelationId name = 1;
   Abstraction body = 2;
   repeated Attribute attrs = 3;
 }
 
+// Merges tuples from the body into the target relation. For tuples sharing
+// the same key, the new value replaces the old one. The value_arity controls
+// how tuple columns are partitioned into keys and values.
 message Upsert {
   RelationId name = 1;
   Abstraction body = 2;
   repeated Attribute attrs = 3;
-  int64 value_arity = 4; // dictates how to partition tuples into key-values pairs
+  // Number of trailing columns that form the value; the rest are keys.
+  int64 value_arity = 4;
 }
 
+// Terminates the enclosing loop when the body evaluates to non-empty.
 message Break {
   RelationId name = 1;
   Abstraction body = 2;
   repeated Attribute attrs = 3;
 }
 
+// Accumulates tuples into a relation using an associative-commutative monoid
+// (e.g. sum, min, max, or). For tuples sharing the same key, values are
+// combined via the monoid operation.
 message MonoidDef {
   Monoid monoid = 1;
   RelationId name = 2;
   Abstraction body = 3;
   repeated Attribute attrs = 4;
-  int64 value_arity = 5; // dictates how to partition tuples into key-values pairs
+  // Number of trailing columns that form the value; the rest are keys.
+  int64 value_arity = 5;
 }
 
+// Removes tuples from a relation using the monus (subtraction) operation
+// dual to the given monoid. For example, sum-monus subtracts values for
+// matching keys.
 message MonusDef {
   Monoid monoid = 1;
   RelationId name = 2;
   Abstraction body = 3;
   repeated Attribute attrs = 4;
-  int64 value_arity = 5; // dictates how to partition tuples into key-values pairs
+  // Number of trailing columns that form the value; the rest are keys.
+  int64 value_arity = 5;
 }
 
+// An associative-commutative aggregation operation used with MonoidDef and
+// MonusDef. The engine leverages the algebraic properties of monoids for
+// parallel and incremental evaluation.
 message Monoid {
   oneof value {
     OrMonoid or_monoid = 1;
@@ -110,33 +168,47 @@ message Monoid {
   }
 }
 
-message OrMonoid {} // Only over Booleans
+// Logical disjunction (OR). Only valid over Boolean values.
+message OrMonoid {}
 
-message MinMonoid { // Parametrized by a type T
+// Minimum over a totally ordered type.
+message MinMonoid {
   Type type = 1;
 }
 
-message MaxMonoid { // Parametrized by a type T
+// Maximum over a totally ordered type.
+message MaxMonoid {
   Type type = 1;
 }
 
-message SumMonoid { // Parametrized by a type T
+// Summation over a numeric type.
+message SumMonoid {
   Type type = 1;
 }
 
 
+// A typed variable binding within an Abstraction.
 message Binding {
   Var var = 1;
   Type type = 2;
 }
 
-// Abstraction and related types
+// A parameterized formula: the bound variables define the relation's columns,
+// and the formula body defines which tuples satisfy the relation. Abstractions
+// appear in Defs, Exists, Reduce, FFI, and loop instructions.
 message Abstraction {
   repeated Binding vars = 1;
   Formula value = 2;
 }
 
-// Formula variants
+// A logical formula in the LQP. Formulas are the building blocks of relation
+// definitions: they combine atoms, logical connectives, quantifiers,
+// aggregations, and built-in operations to describe which tuples a relation
+// contains.
+//
+// Key semantics:
+//   Conjunction([]) = true  (the empty conjunction is always satisfied)
+//   Disjunction([]) = false (the empty disjunction is never satisfied)
 message Formula {
   oneof formula_type {
     Exists exists = 1;
@@ -153,59 +225,95 @@ message Formula {
   }
 }
 
+// Existential quantification. True when at least one binding of the
+// abstraction's variables satisfies the body formula.
 message Exists {
   Abstraction body = 3;
 }
 
+// Aggregation over tuples. The body produces candidate tuples, and the op
+// defines how to combine them (e.g. sum, count). The terms receive the
+// aggregated result. Grouping is implicit via free variables in the terms.
 message Reduce {
+  // The aggregation operator (e.g. sum, min, count).
   Abstraction op = 1;
+  // The relation being aggregated over.
   Abstraction body = 2;
+  // Output terms receiving the aggregated result.
   repeated Term terms = 3;
 }
 
+// Logical AND. All args must be satisfied. An empty Conjunction is true.
 message Conjunction {
   repeated Formula args = 1;
 }
 
+// Logical OR. At least one arg must be satisfied. An empty Disjunction is false.
 message Disjunction {
   repeated Formula args = 1;
 }
 
+// Logical negation. True when the inner formula is not satisfied.
+// Must be used in a stratified manner: the negated formula cannot
+// depend on the relation currently being defined.
 message Not {
   Formula arg = 1;
 }
 
+// Foreign Function Interface call. Invokes an engine-provided function
+// (e.g. rel_primitive_sort, rel_primitive_top) that cannot be expressed
+// in pure first-order logic.
 message FFI {
+  // Function identifier (e.g. "rel_primitive_sort").
   string name = 1;
+  // Input abstractions passed to the function.
   repeated Abstraction args = 2;
+  // Output terms bound by the function.
   repeated Term terms = 3;
 }
 
+// A relation reference (predicate application). Asserts that the given terms
+// form a tuple in the named relation. This is the primary way to reference
+// other Defs, Algorithms, and Data declarations.
 message Atom {
   RelationId name = 1;
   repeated Term terms = 2;
 }
 
+// A compiler directive that influences execution without changing logical
+// semantics (e.g. variable ordering hints, optimization flags).
 message Pragma {
   string name = 1;
   repeated Term terms = 2;
 }
 
+// A built-in primitive operation (arithmetic, comparison, string ops, etc.)
+// hardwired into the engine. Uses RelTerms to allow specialized (constant)
+// arguments alongside variables.
 message Primitive {
+  // Operation identifier (e.g. "rel_primitive_add", "rel_primitive_eq").
   string name = 1;
   repeated RelTerm terms = 2;
 }
 
+// A relation atom identified by a symbolic name (string) rather than a
+// RelationId. Uses RelTerms, allowing mixed constant and variable arguments.
+// Used for engine-internal relations and symbolic tuple construction.
 message RelAtom {
   string name = 3;
   repeated RelTerm terms = 2;
 }
 
+// Type cast from one primitive type to another. The input term is converted
+// and bound to the result term's variable with the target type.
 message Cast {
   Term input = 2;
   Term result = 3;
 }
 
+// A term in a Primitive or RelAtom that can be either a compile-time constant
+// (specialized_value) or a runtime Term. Specialized values allow the engine
+// to optimize for known constants.
 message RelTerm {
   oneof rel_term_type {
     Value specialized_value = 1;
@@ -213,6 +321,7 @@ message RelTerm {
   }
 }
 
+// A term is either a variable reference or a literal constant value.
 message Term {
   oneof term_type {
     Var var = 1;
@@ -220,18 +329,30 @@ message Term {
   }
 }
 
+// A named variable. Variable names are scoped to their enclosing Abstraction.
+// Variables must not shadow bindings from outer Abstractions.
 message Var {
   string name = 1;
 }
 
+// A key-value metadata annotation on a Def, Algorithm, Loop, or Instruction.
+// Used for optimization hints, tagging, and engine directives.
+// Example: @tag(:human_readable_name) for debug/log output.
 message Attribute {
   string name = 1;
   repeated Value args = 2;
 }
 
 //
-// Input data (base relations, CSVs, Iceberg)
+// External data sources
 //
+// Data declarations make external data available as relations in the execution
+// graph. The data is ingested lazily — describing a source does not trigger
+// loading until the relation is actually demanded.
+
+// Declares an external data source. Each variant corresponds to a different
+// storage format. The declared relations can be referenced by Atoms in Defs
+// and Algorithms just like any other relation.
 message Data {
   oneof data_type {
     EDB edb = 1;
@@ -241,25 +362,35 @@ message Data {
   }
 }
 
+// An Extensional Database relation — durable engine-managed data identified
+// by a hierarchical path. EDBs are the result of Snapshot operations and
+// persist across transactions. They are the LQP equivalent of base tables.
 message EDB {
   RelationId target_id = 1;
+  // Hierarchical name path (e.g. ["my_model", "person", "age"]).
   repeated string path = 2;
+  // Column types for the relation.
   repeated Type types = 3;
 }
 
+// A relation backed by a BeTree (B-epsilon tree) storage structure.
+// BeTrees are the engine's native on-disk format for persistent relations.
 message BeTreeRelation {
   RelationId name = 1;
   BeTreeInfo relation_info = 2;
 }
 
+// Physical metadata describing a BeTree-backed relation: its schema,
+// storage configuration, and locator for finding it on disk.
 message BeTreeInfo {
-  reserved 3; // potentially needed for relation_identifier
+  reserved 3;
   repeated Type key_types = 1;
   repeated Type value_types = 2;
   BeTreeConfig storage_config = 4;
   BeTreeLocator relation_locator = 5;
 }
 
+// Tuning parameters for BeTree storage.
 message BeTreeConfig {
   double epsilon = 1;
   int64 max_pivots = 2;
@@ -267,91 +398,114 @@ message BeTreeConfig {
   int64 max_leaf = 4;
 }
 
+// Locates a BeTree on disk, either by page ID or inline data.
 message BeTreeLocator {
   oneof location {
+    // Page ID of the tree's root node.
     UInt128Value root_pageid = 1;
+    // Small relations can be embedded directly as inline bytes.
     bytes inline_data = 4;
   }
   int64 element_count = 2;
   int64 tree_height = 3;
 }
 
+// Loads one or more relations from CSV files. Columns are mapped to
+// relations via GNFColumn entries. The asof timestamp controls cache
+// freshness — changing it triggers re-ingestion.
 message CSVData {
   CSVLocator locator = 1;
   CSVConfig config = 2;
+  // Each column maps a CSV column to a target relation with typed schema.
   repeated GNFColumn columns = 3;
-  string asof = 4; // Blob storage timestamp for freshness requirements
+  // Blob storage timestamp for cache invalidation.
+  string asof = 4;
 }
 
+// Identifies the CSV source: either remote/local file paths or inline data.
 message CSVLocator {
-  repeated string paths = 1; // URL(s) or filesystem path(s) for partitioned loading
-  bytes inline_data = 2; // Inline CSV content (mutually exclusive with paths)
+  // URL(s) or filesystem path(s). Multiple paths for partitioned loading.
+  repeated string paths = 1;
+  // Inline CSV content (mutually exclusive with paths).
+  bytes inline_data = 2;
 }
 
+// Parsing and formatting options for CSV data, used for both import and export.
 message CSVConfig {
-  // Header and structure
-  int32 header_row = 1; // Row number for headers (< 1 means no header)
-  int64 skip = 2; // Lines to skip at start (default: 0)
-  string new_line = 3; // Newline char(s) (default: auto-detect)
-
-  // Delimiters and quotes
-  string delimiter = 4; // Column delimiter (default: ",")
-  string quotechar = 5; // Quote character (default: "\"")
-  string escapechar = 6; // Escape character (default: "\"")
-  string comment = 7; // Comment initiator (default: none)
-
-  // NULL handling
-  repeated string missing_strings = 8; // Strings to treat as NULL/missing
-
-  // Numeric formatting
-  string decimal_separator = 9; // Decimal point (default: ".")
-
-  // Encoding
-  string encoding = 10; // Character encoding (default: "utf-8")
-
-  // Compression
-  string compression = 11; // "none", "gzip", "zstd", "auto" (default: "auto")
-
-  // Partitioning (for export)
-  int64 partition_size_mb = 12;
-}
-
+  int32 header_row = 1;           // Row number for headers (< 1 means no header)
+  int64 skip = 2;                 // Lines to skip at start (default: 0)
+  string new_line = 3;            // Newline char(s) (default: auto-detect)
+  string delimiter = 4;           // Column delimiter (default: ",")
+  string quotechar = 5;           // Quote character (default: "\"")
+  string escapechar = 6;          // Escape character (default: "\"")
+  string comment = 7;             // Comment line prefix (default: none)
+  repeated string missing_strings = 8; // Strings to interpret as NULL/missing
+  string decimal_separator = 9;   // Decimal point character (default: ".")
+  string encoding = 10;           // Character encoding (default: "utf-8")
+  string compression = 11;        // "none", "gzip", "zstd", "auto" (default: "auto")
+  int64 partition_size_mb = 12;   // Target partition size in MB (for export)
+}
+
+// Loads relations from an Apache Iceberg table. Supports full snapshots
+// and incremental delta reads between two snapshot versions.
 message IcebergData {
   IcebergLocator locator = 1;
   IcebergCatalogConfig config = 2;
+  // Column-to-relation mappings.
   repeated GNFColumn columns = 3;
+  // Start snapshot for incremental reads (omit for full table scan).
   optional string from_snapshot = 4;
+  // End snapshot (omit for latest).
   optional string to_snapshot = 5;
+  // When true, returns insert/delete deltas rather than full state.
   bool returns_delta = 6;
 }
 
+// Identifies an Iceberg table within a catalog.
 message IcebergLocator {
   string table_name = 1;
   repeated string namespace = 2;
   string warehouse = 3;
 }
 
+// Connection and authentication settings for an Iceberg catalog.
 message IcebergCatalogConfig {
   string catalog_uri = 1;
   optional string scope = 2;
+  // Catalog-specific properties (e.g. s3.region).
   map<string, string> properties = 3;
+  // Authentication credentials.
   map<string, string> auth_properties = 4;
 }
 
+// Maps a column from an external data source (CSV or Iceberg) to a relation
+// in the execution graph. The column_path identifies the source column, and
+// target_id + types define the destination relation and its schema.
 message GNFColumn {
-  repeated string column_path = 1; // Column identifier path (was: string column_name)
-  optional RelationId target_id = 2; // Target relation (now explicit optional)
-  repeated Type types = 3; // Relation signature (key types + value types)
+  // Hierarchical column identifier (e.g. ["address", "city"] for nested data).
+  repeated string column_path = 1;
+  // Target relation to populate with this column's data.
+  optional RelationId target_id = 2;
+  // Column schema: key types followed by value types.
+  repeated Type types = 3;
 }
 
 //
 // Relation identifiers and types
 //
+
+// Globally unique, opaque identifier for a relation. All relations in the
+// execution graph are identified by UUIDs. Higher-level naming and namespacing
+// is the responsibility of compilers targeting this protocol.
 message RelationId {
-  fixed64 id_low = 1; // Lower 64 bits of UInt128
+  fixed64 id_low = 1;  // Lower 64 bits of UInt128
   fixed64 id_high = 2; // Upper 64 bits of UInt128
 }
 
+// Primitive data type for a column in a relation. LQP only supports primitive
+// (scalar) types — there are no composite, nested, or user-defined types.
+// These types are aligned with Apache Iceberg's type system.
+// The engine uses type information for equality, ordering, and promotion rules.
 message Type {
   oneof type {
     UnspecifiedType unspecified_type = 1;
@@ -371,37 +525,30 @@ message Type {
   }
 }
 
-message UnspecifiedType {}
-
-message StringType {}
-
-message IntType {}
-
-message FloatType {}
-
-message UInt128Type {}
-
-message Int128Type {}
-
-message DateType {}
-
-message DateTimeType {}
-
-message MissingType {}
-
+message UnspecifiedType {}          // Type not yet determined
+message StringType {}               // Arbitrary-length UTF-8 string
+message IntType {}                  // 64-bit signed integer (Iceberg LONG)
+message FloatType {}                // 64-bit IEEE 754 float (Iceberg DOUBLE)
+message UInt128Type {}              // 128-bit unsigned integer
+message Int128Type {}               // 128-bit signed integer
+message DateType {}                 // Calendar date without time or timezone
+message DateTimeType {}             // Timestamp with microsecond precision (UTC)
+message MissingType {}              // Represents absence of a value (NULL)
+message BooleanType {}              // True or false
+message Int32Type {}                // 32-bit signed integer (promotes to IntType)
+message Float32Type {}              // 32-bit IEEE 754 float (promotes to FloatType)
+message UInt32Type {}               // 32-bit unsigned integer
+
+// Fixed-point decimal with configurable precision and scale.
+// Precision <= 38. Scale is fixed for a given column.
 message DecimalType {
-  int32 precision = 1;
-  int32 scale = 2;
+  int32 precision = 1; // Total number of digits
+  int32 scale = 2;     // Digits after the decimal point
 }
 
-message BooleanType {}
-
-message Int32Type {}
-
-message Float32Type {}
-
-message UInt32Type {}
-
+// A literal constant value. Each variant corresponds to a primitive Type.
+// Values appear in Terms (as constants), Attributes (as arguments), and
+// RelTerms (as specialized compile-time constants).
 message Value {
   oneof value {
     string string_value = 1;
@@ -420,27 +567,31 @@ message Value {
   }
 }
 
+// 128-bit unsigned integer, split into two 64-bit halves for protobuf encoding.
 message UInt128Value {
-  fixed64 low = 1; // Lower 64 bits of UInt128
-  fixed64 high = 2; // Upper 64 bits of UInt128
+  fixed64 low = 1;
+  fixed64 high = 2;
 }
 
+// 128-bit signed integer, split into two 64-bit halves for protobuf encoding.
 message Int128Value {
-  fixed64 low = 1; // Lower 64 bits of Int128
-  fixed64 high = 2; // Upper 64 bits of Int128
+  fixed64 low = 1;
+  fixed64 high = 2;
 }
 
+// The missing/NULL value.
 message MissingValue {}
 
+// Calendar date without time or timezone.
 message DateValue {
   int32 year = 1;
   int32 month = 2;
   int32 day = 3;
 }
 
+// Timestamp with microsecond precision. All values are UTC, consistent
+// with the Iceberg spec for timestamps.
 message DateTimeValue {
-  // DateTimeValue does not have a timezone - all values are assumed to be UTC. This is in
-  // line with the Iceberg spec for timestamp
   int32 year = 1;
   int32 month = 2;
   int32 day = 3;
@@ -450,8 +601,9 @@ message DateTimeValue {
   int32 microsecond = 7;
 }
 
+// Fixed-point decimal value with explicit precision and scale.
 message DecimalValue {
-  int32 precision = 1; // Total number of digits
-  int32 scale = 2; // Number of digits after decimal point
-  Int128Value value = 3; // Physical representation of decimal value
+  int32 precision = 1;    // Total number of digits
+  int32 scale = 2;        // Digits after the decimal point
+  Int128Value value = 3;  // Unscaled integer representation
 }
diff --git a/proto/relationalai/lqp/v1/transactions.proto b/proto/relationalai/lqp/v1/transactions.proto
index 8c9ce432..3bf35ff7 100644
--- a/proto/relationalai/lqp/v1/transactions.proto
+++ b/proto/relationalai/lqp/v1/transactions.proto
@@ -1,3 +1,17 @@
+// Logical Query Protocol — Transactions
+//
+// A Transaction is the top-level unit of work sent by a client to the engine.
+// It contains configuration, an optional sync directive, and one or more epochs
+// that execute sequentially. Each epoch groups writes (which modify the execution
+// graph) and reads (which query it).
+//
+// Execution model:
+//   - Epochs execute in order. Within an epoch, all writes execute before reads.
+//   - Multiple writes within an epoch are unordered and may execute concurrently.
+//   - Multiple reads within an epoch are unordered and may execute concurrently.
+//   - The engine first runs a "simulator" pass to validate and minimize the
+//     transaction, then a "driver" pass to execute against the real runtime state.
+
 syntax = "proto3";
 
 option go_package = "github.com/RelationalAI/logical-query-protocol/sdks/go/src/lqp/v1";
@@ -7,41 +21,62 @@ package relationalai.lqp.v1;
 import "relationalai/lqp/v1/fragments.proto";
 import "relationalai/lqp/v1/logic.proto";
 
+// The top-level unit of work. A client sends a Transaction to the engine,
+// which executes its epochs sequentially and returns results for all reads.
 message Transaction {
   repeated Epoch epochs = 1;
   Configure configure = 2;
+  // Optional fragment synchronization. When present, the engine verifies that
+  // exactly these fragments are installed, uninstalling extras and reporting
+  // missing ones as errors.
   optional Sync sync = 3;
 }
 
+// Transaction-wide configuration. All parameters must be set explicitly by
+// the client to ensure deterministic behavior across engine versions.
+// Changing a default in the engine has no effect until the client updates.
 message Configure {
+  // Protocol semantics version. Bumped when user-visible semantics change
+  // (e.g. how empty relations aggregate).
   int64 semantics_version = 1;
   IVMConfig ivm_config = 2;
 }
 
+// Controls incremental view maintenance (IVM) behavior.
 message IVMConfig {
   MaintenanceLevel level = 1;
 }
 
+// How aggressively the engine maintains derived relations incrementally
+// rather than recomputing from scratch.
 enum MaintenanceLevel {
-  MAINTENANCE_LEVEL_UNSPECIFIED = 0;
-  MAINTENANCE_LEVEL_OFF = 1;
-  MAINTENANCE_LEVEL_AUTO = 2;
-  MAINTENANCE_LEVEL_ALL = 3;
+  MAINTENANCE_LEVEL_UNSPECIFIED = 0; // Use engine default
+  MAINTENANCE_LEVEL_OFF = 1;         // No incremental maintenance; full recomputation
+  MAINTENANCE_LEVEL_AUTO = 2;        // Incrementally maintain relations the engine deems beneficial
+  MAINTENANCE_LEVEL_ALL = 3;         // Maintain all derived relations incrementally
 }
 
+// Declares the set of fragment IDs that the client expects to be installed.
+// The engine reconciles its state: extra fragments are uninstalled, missing
+// fragments cause a desync error with the offending IDs so the client can
+// retry with the missing definitions included.
+// Sending Sync([]) uninstalls all fragments, allowing a clean reinstall.
 message Sync {
   repeated FragmentId fragments = 1;
 }
 
+// An observable state boundary within a transaction. Epochs execute
+// sequentially; each epoch's writes take effect before its reads execute.
 message Epoch {
   repeated Write writes = 1;
   repeated Read reads = 2;
 }
 
 //
-// Write operations
+// Write operations — modify the installed execution graph
 //
 
+// A write operation that changes the state of installed fragments or relations.
 message Write {
   reserved 4; // Sync is now at transaction level
   oneof write_type {
@@ -52,47 +87,58 @@ message Write {
   }
 }
 
+// Installs a fragment (and all its declarations) into the execution graph.
+// If a fragment with the same ID already exists, it is replaced.
 message Define {
   Fragment fragment = 1;
 }
 
+// Removes a previously installed fragment and all its declarations.
 message Undefine {
   FragmentId fragment_id = 1;
 }
 
+// Declares which relations should be optimized together within this epoch.
+// More context gives the optimizer more opportunities for reuse, but
+// increases planning time.
 message Context {
   repeated RelationId relations = 1;
 }
 
-// A single (destination, source) pair within a Snapshot action. The destination_path is
-// relative to the Snapshot's prefix.
+// A single (destination, source) pair within a Snapshot action. Maps a
+// derived relation to a durable EDB path.
 message SnapshotMapping {
+  // Hierarchical destination path for the resulting EDB.
   repeated string destination_path = 1;
+  // The derived relation whose current state is captured.
   RelationId source_relation = 2;
 }
 
-// Demand the source IDBs, take immutable snapshots, and turn them into EDBs under the
-// given paths (specified as sequences of strings, see EDB). If a prefix is specified,
-// the engine should treat this as a complete snapshot of all EDBs under that prefix,
-// removing any pre-existing EDBs for the same prefix that are not in the mappings.
+// Materializes derived relations as durable EDB (base) relations. Demands
+// the source relations, takes immutable snapshots, and stores them under the
+// given paths. When a prefix is specified, any pre-existing EDBs under that
+// prefix not listed in the mappings are removed — enabling atomic replacement
+// of an entire namespace.
 message Snapshot {
   repeated SnapshotMapping mappings = 1;
+  // Optional namespace prefix for atomic replacement of all EDBs under it.
   repeated string prefix = 2;
 }
 
 //
-// Export config
+// Export configuration
 //
 
+// Configuration for exporting relation data to CSV files.
 message ExportCSVConfig {
+  // Destination path (URL or filesystem path).
   string path = 1;
 
   ExportCSVSource csv_source = 10;
   CSVConfig csv_config = 11;
 
-  // Below are all deprecated in favour of the `csv_config` above.
+  // Deprecated fields — use csv_config and csv_source instead.
   repeated ExportCSVColumn data_columns = 2;
-
   optional int64 partition_size = 3;
   optional string compression = 4;
   optional bool             syntax_header_row = 5;
@@ -100,43 +146,51 @@ message ExportCSVConfig {
   optional string           syntax_delim = 7;
   optional string           syntax_quotechar = 8;
   optional string           syntax_escapechar = 9;
-
-  // TODO: support data integration options, e.g., private tokens for private buckets etc.
 }
 
+// Maps a named column to a relation for CSV export.
 message ExportCSVColumn {
   string column_name = 1;
   RelationId column_data = 2;
 }
 
+// A list of CSV export column mappings.
 message ExportCSVColumns {
   repeated ExportCSVColumn columns = 1;
 }
 
+// Defines the data source for CSV export: either individual column mappings
+// (GNF-style) or a single table definition.
 message ExportCSVSource {
   oneof csv_source {
     ExportCSVColumns gnf_columns = 1;
+    // A single relation defining the entire table structure.
     RelationId table_def = 2;
   }
 }
 
-// Iceberg Export config
-
+// Configuration for exporting relation data to an Apache Iceberg table.
 message ExportIcebergConfig {
   IcebergLocator locator = 1;
   IcebergCatalogConfig config = 2;
-  RelationId table_def = 3;              // Which definition to export as a table.
-  reserved 4;                            // was: repeated ExportColumn columns
-  optional string prefix = 5;            // File name prefix for parquet files that are produced.
+  // The relation to export as an Iceberg table.
+  RelationId table_def = 3;
+  reserved 4;
+  // File name prefix for generated Parquet files.
+  optional string prefix = 5;
   optional int64 target_file_size_bytes = 6;
+  // Parquet compression codec (e.g. "snappy", "zstd", "gzip").
   string compression = 7;
+  // Iceberg table properties (e.g. write.format.default).
   map<string, string> table_properties = 8;
 }
 
 //
-// Read operations
+// Read operations — query the execution graph without modifying it
 //
 
+// A read operation that observes the current state of the execution graph.
+// All reads within an epoch execute after all writes in that epoch complete.
 message Read {
   oneof read_type {
     Demand demand = 1;
@@ -147,30 +201,46 @@ message Read {
   }
 }
 
+// Triggers computation of a relation without returning its contents.
+// Used to warm caches or force evaluation of side-effecting rules
+// before subsequent epochs reference them.
 message Demand {
   RelationId relation_id = 1;
 }
 
+// Computes a relation and returns its contents to the client, identified
+// by a human-readable name. Multiple outputs in the same epoch may share
+// a relation_id but must have distinct names.
 message Output {
   string name = 1;
   RelationId relation_id = 2;
 }
 
+// Writes relation data to an external storage system (CSV or Iceberg).
 message Export {
   oneof export_config {
     ExportCSVConfig csv_config = 1;
     ExportIcebergConfig iceberg_config = 2;
-
-    // TODO: support JSON export
   }
 }
 
+// Executes a speculative epoch on a transient fork of the database.
+// Writes within the WhatIf do not persist; reads observe the modified
+// state. Multiple WhatIfs in the same epoch execute independently.
+// WhatIfs can be nested.
 message WhatIf {
+  // Human-readable label for the branch.
   string branch = 1;
+  // The speculative epoch to execute.
   Epoch epoch = 2;
 }
 
+// Conditional transaction abort. If the referenced relation is non-empty
+// after all writes in this epoch, the entire transaction fails. Used to
+// enforce integrity constraints.
 message Abort {
+  // Human-readable error identifier included in the abort message.
   string name = 1;
+  // The "guard" relation: transaction aborts if this is non-empty.
   RelationId relation_id = 2;
 }