Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ Prose references a version as `v0.X.Y`; headings stay bare `[0.X.Y]`.

## [Unreleased]

### Added

- Type-system: `type_is_copy(const Type *)` classification (RFC 0001 Phase 1). Primitives, unit, function types, and unknown are `Copy`; generic-head aggregates (`Result<T>`, `Vec<T>`, etc.) are NOT `Copy`. Public API in `typecheck.h`.
- Type-system: move tracking for non-`Copy` bindings. Each scope entry carries a `moved` flag; the checker marks bindings as moved when a non-Copy identifier is consumed via `let y = x`, a function call argument, or a return value, then errors `"use of moved value 'x'"` on subsequent reads. Copy types (the entire current language surface in production examples) keep this flag at 0 throughout and see no diagnostic.
- 6 new typecheck tests covering the move-tracking surface: Copy repeated-use OK, non-Copy use-after-move-in-let errors, non-Copy use-after-move-via-call errors, non-Copy single-use OK, Copy-after-move irrelevant (no false positives on `u64`), direct `type_is_copy` API check.
- Sets up Phase 1 of the RFC 0001 implementation roadmap. Phases 2-5 (grammar extensions, codegen for heap, stdlib types, `extern host` ABI) still ahead.

### Changed

- `spec/rfcs/0001-memory-model.md` expanded from open-design to decision-ready proposal. Concrete syntax examples (move semantics, borrows, RAII drop). Resolutions to 7 of 10 originally-open questions (stack vs heap, string layout, collection placement, move-vs-copy default, references, drop, persistent state). 3 questions remain genuinely open (lifetime annotations, custom allocators, async interaction). Counterargument section steelmanning GC / `unsafe` / "C++ with extra steps". Migration path documented (existing examples compile unchanged; impact is on future heap-allocated types). Implementation roadmap split into 5 phases totaling ~3,700 LoC across 5 PRs. Decision criteria added so the RFC has a clear bar for moving from `draft` to `accepted`.
Expand Down
15 changes: 15 additions & 0 deletions compiler/include/typecheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,19 @@ const Type *type_unknown(void);
* reused across calls; copy out if you need to keep the string. */
const char *type_describe(const Type *t);

/* Is this type `Copy`? Copy types can be used freely without losing the
* source binding (no move semantics). Per RFC 0001:
* - All primitives (u8..u256, i8..i64, bool, char, str) are Copy.
* - Unit `()` is Copy.
* - Function types are Copy (the reference, not the body).
* - Generic-head aggregates (`Result<u64>`, `Vec<T>`, etc.) are NOT Copy
* by default; they move on use.
* - Unknown is permissively treated as Copy so the checker does not
* spam errors on values it cannot resolve.
*
* Used by the move-tracker to decide whether a binding's use marks the
* binding as moved. Once `&T` references land, this also informs how
* borrows compose with moves. */
int type_is_copy(const Type *t);

#endif /* CLEAVE_TYPECHECK_H */
96 changes: 96 additions & 0 deletions compiler/src/typecheck.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,24 @@ const char *type_describe(const Type *t) {
return buf;
}

int type_is_copy(const Type *t) {
/* Permissive default for missing / unresolved types. Lets the
* checker stay quiet on identifiers we cannot place yet (e.g.
* stdlib constructors that the typecheck pass does not resolve).
* This is a soundness gap that will tighten as the type system
* grows; for v0 it deliberately errs on the side of "no spurious
* diagnostics on code the user did not get to write yet." */
if (!t) return 1;
switch (t->kind) {
case TY_UNKNOWN: return 1;
case TY_UNIT: return 1;
case TY_PRIM: return 1;
case TY_FN: return 1; /* function references are Copy */
case TY_GENERIC: return 0; /* aggregates: Result<T>, Vec<T>, etc. */
}
return 1;
}

/* ============== diagnostics ============== */

static void report_at(TypeChecker *tc, const Span *span, const char *fmt, ...) {
Expand Down Expand Up @@ -308,6 +326,12 @@ typedef struct {
StrRef name;
const Type *type;
int depth;
/* Move-tracking flag (RFC 0001 Phase 1). When a non-Copy binding is
* consumed (passed by value to a fn, assigned to another binding,
* returned), this is set; subsequent reads error out as "use of
* moved value". Copy types keep this flag at 0 throughout their
* lifetime since every use is a value copy. */
int moved;
} ScopeEntry;

typedef struct {
Expand Down Expand Up @@ -343,6 +367,7 @@ static void scope_bind(Scope *s, StrRef name, const Type *type) {
s->entries[s->n].name = name;
s->entries[s->n].type = type;
s->entries[s->n].depth = s->depth;
s->entries[s->n].moved = 0;
s->n++;
}

Expand All @@ -357,6 +382,46 @@ static const Type *scope_lookup(const Scope *s, StrRef name) {
return NULL;
}

/* Mutable variant of scope_lookup. Returns the most-recent binding
* matching `name`, so callers can read or update its `moved` flag. */
static ScopeEntry *scope_lookup_mut(Scope *s, StrRef name) {
for (size_t i = s->n; i-- > 0; ) {
ScopeEntry *e = &s->entries[i];
if (e->name.length == name.length &&
memcmp(e->name.start, name.start, name.length) == 0) {
return e;
}
}
return NULL;
}

/* Mark `name` as moved in scope. No-op if the binding is missing or
* already moved, or if its type is Copy (Copy types never move).
* Returns 1 if the binding was actually flipped to moved, 0 otherwise. */
static int scope_mark_moved(Scope *s, StrRef name) {
ScopeEntry *e = scope_lookup_mut(s, name);
if (!e) return 0;
if (type_is_copy(e->type)) return 0;
if (e->moved) return 0;
e->moved = 1;
return 1;
}

/* Returns 1 if `name` names a binding that has already been moved.
* Returns 0 otherwise (including the case where the binding doesn't
* exist; check_expr's identifier path handles unresolved names via
* the normal scope_lookup -> Unknown fallthrough). */
static int scope_is_moved(const Scope *s, StrRef name) {
for (size_t i = s->n; i-- > 0; ) {
const ScopeEntry *e = &s->entries[i];
if (e->name.length == name.length &&
memcmp(e->name.start, name.start, name.length) == 0) {
return e->moved;
}
}
return 0;
}

/* ============== expression checker ============== */

static const Type *check_expr(TypeChecker *tc, Scope *scope, AstNode *node);
Expand Down Expand Up @@ -429,11 +494,26 @@ static const Type *check_binary(TypeChecker *tc, Scope *scope, AstNode *node) {
return type_unknown();
}

/* Mark the source binding of `expr` as moved if appropriate (RFC 0001
* move semantics). Only fires when the expression is a bare identifier
* referring to a non-Copy binding. Compound expressions (e.g. method
* chains, struct field access, indexing) bubble up to their own
* tracking in a richer follow-up; v0 only handles the common case of
* passing a let-binding directly. */
static void consume_if_movable(Scope *scope, AstNode *expr) {
if (!expr || expr->kind != AST_EXPR_IDENT) return;
scope_mark_moved(scope, expr->as.ident.name);
}

static const Type *check_call(TypeChecker *tc, Scope *scope, AstNode *node) {
ExprCall *c = &node->as.call;
const Type *callee_t = check_expr(tc, scope, c->callee);
for (size_t i = 0; i < c->n_args; ++i) {
check_expr(tc, scope, c->args[i]);
/* Passing a non-Copy value by name moves it into the callee.
* Mark after the read so the use-of-moved diagnostic fires on
* subsequent reads, not on this one. */
consume_if_movable(scope, c->args[i]);
}
if (callee_t && callee_t->kind == TY_FN) {
if (c->n_args != callee_t->as.fn.n_params) {
Expand Down Expand Up @@ -479,6 +559,13 @@ static const Type *check_expr(TypeChecker *tc, Scope *scope, AstNode *node) {
break;
case AST_EXPR_IDENT: {
const Type *bound = scope_lookup(scope, node->as.ident.name);
if (bound && !type_is_copy(bound) &&
scope_is_moved(scope, node->as.ident.name)) {
report_at(tc, &node->span,
"use of moved value '%.*s'",
(int)node->as.ident.name.length,
node->as.ident.name.start);
}
t = bound ? bound : type_unknown();
break;
}
Expand Down Expand Up @@ -537,6 +624,12 @@ static void check_let(TypeChecker *tc, Scope *scope, AstNode *node) {
StmtLet *l = &node->as.let_stmt;
const Type *value_t = check_expr(tc, scope, l->value);

/* `let y = x` where x is non-Copy moves x into y. The new binding
* `y` starts fresh (moved=false); the source `x` is marked moved
* by consume_if_movable. Order matters: check_expr above already
* fired the use-of-moved diagnostic if x was already moved. */
consume_if_movable(scope, l->value);

const Type *declared_t = NULL;
if (l->type) {
declared_t = resolve_type_expr(tc, l->type);
Expand All @@ -562,6 +655,9 @@ static void check_return(TypeChecker *tc, Scope *scope, AstNode *node,
return;
}
const Type *value_t = check_expr(tc, scope, r->value);
/* Returning a non-Copy value moves it out of the function; the
* caller now owns it. */
consume_if_movable(scope, r->value);
if (expected_ret && !type_equal(expected_ret, value_t)) {
report_mismatch(tc, &node->span, expected_ret, value_t,
"return value");
Expand Down
115 changes: 115 additions & 0 deletions compiler/tests/typecheck_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,113 @@ TEST(test_continues_after_first_error) {
free(err);
}

/* ============== move tracking (RFC 0001 Phase 1) ============== */

TEST(test_copy_type_can_be_used_repeatedly) {
/* Primitives are Copy; using `x` multiple times is fine. */
const char *src =
"module M {\n"
" fn f() -> u64 { let x: u64 = 42; let y: u64 = x; let z: u64 = x; z }\n"
"}";
int errors = 0;
char *err = NULL;
ASSERT(run_check(src, &errors, &err));
ASSERT_EQ_INT(errors, 0);
free(err);
}

TEST(test_non_copy_use_after_move_in_let_errors) {
/* `make_result` returns Result<u64> which is TY_GENERIC -> non-Copy.
* `let y = r` moves r; the subsequent `let z = r` reads a moved
* value and should error. */
const char *src =
"module M {\n"
" fn make() -> Result<u64> { make() }\n"
" fn use_twice() -> Result<u64> {\n"
" let r: Result<u64> = make()\n"
" let y: Result<u64> = r\n"
" let z: Result<u64> = r\n"
" z\n"
" }\n"
"}";
int errors = 0;
char *err = NULL;
ASSERT(run_check(src, &errors, &err));
ASSERT(errors >= 1);
ASSERT(err && strstr(err, "use of moved value 'r'") != NULL);
free(err);
}

TEST(test_non_copy_use_after_move_via_call_errors) {
/* Passing r to take(r) moves it; subsequent reference errors. */
const char *src =
"module M {\n"
" fn make() -> Result<u64> { make() }\n"
" fn take(r: Result<u64>) -> u64 { 0 }\n"
" fn use_then_call() -> Result<u64> {\n"
" let r: Result<u64> = make()\n"
" let _: u64 = take(r)\n"
" r\n"
" }\n"
"}";
int errors = 0;
char *err = NULL;
ASSERT(run_check(src, &errors, &err));
ASSERT(errors >= 1);
ASSERT(err && strstr(err, "use of moved value 'r'") != NULL);
free(err);
}

TEST(test_non_copy_single_use_is_fine) {
/* Moving once is the happy path; only the second use should error. */
const char *src =
"module M {\n"
" fn make() -> Result<u64> { make() }\n"
" fn ok() -> Result<u64> {\n"
" let r: Result<u64> = make()\n"
" r\n"
" }\n"
"}";
int errors = 0;
char *err = NULL;
ASSERT(run_check(src, &errors, &err));
ASSERT_EQ_INT(errors, 0);
free(err);
}

TEST(test_copy_after_move_irrelevant) {
/* `x: u64` is Copy. Both `let y = x` and the later read of `x` are
* value copies; nothing moves. No diagnostic should fire even
* though syntactically this looks like a use-after-move pattern. */
const char *src =
"module M {\n"
" fn read(x: u64) -> u64 { x }\n"
" fn caller() -> u64 {\n"
" let x: u64 = 5\n"
" let y: u64 = x\n"
" let z: u64 = x\n"
" read(x)\n"
" }\n"
"}";
int errors = 0;
char *err = NULL;
ASSERT(run_check(src, &errors, &err));
ASSERT_EQ_INT(errors, 0);
free(err);
}

TEST(test_type_is_copy_classification) {
/* Direct API test of the Copy classification. */
ASSERT_EQ_INT(type_is_copy(type_prim(PRIM_U64)), 1);
ASSERT_EQ_INT(type_is_copy(type_prim(PRIM_BOOL)), 1);
ASSERT_EQ_INT(type_is_copy(type_prim(PRIM_STR)), 1);
ASSERT_EQ_INT(type_is_copy(type_unit()), 1);
ASSERT_EQ_INT(type_is_copy(type_unknown()), 1);
ASSERT_EQ_INT(type_is_copy(NULL), 1);
/* No public API for constructing a generic type; covered by the
* end-to-end use-after-move tests above. */
}

int main(void) {
/* type singletons */
RUN(test_prim_interning);
Expand All @@ -267,5 +374,13 @@ int main(void) {
RUN(test_call_arg_type_mismatch);
RUN(test_continues_after_first_error);

/* move tracking (RFC 0001 Phase 1) */
RUN(test_copy_type_can_be_used_repeatedly);
RUN(test_non_copy_use_after_move_in_let_errors);
RUN(test_non_copy_use_after_move_via_call_errors);
RUN(test_non_copy_single_use_is_fine);
RUN(test_copy_after_move_irrelevant);
RUN(test_type_is_copy_classification);

REPORT();
}
Loading