diff --git a/README.md b/README.md index da6cb36..ebebf9c 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,84 @@ exe.root_module.addImport("zig_csv", zig_csv.module("zig_csv")); ## Usage +The library provides two primary types for working with CSV data: +`StructuredTable` and `Table`. + +The differences between them are as follows: +- `StructuredTable` requires a predefined schema, + allowing for type-safe parsing and manipulation of CSV data. +- `Table` offers a more flexible approach, enabling dynamic + handling of CSV data without a predefined schema. + +### StructuredTable + +The `StructuredTable` allows you to define a schema for your CSV data, +enabling type-safe parsing and manipulation. + +```zig +const std = @import("std"); +const csv = @import("zig_csv"); +const allocator = std.heap.page_allocator; + +// Define a schema for the CSV data +const Animal = struct { + id: i32, + name: []const u8, + happy: ?bool, +}; + +// Parse CSV data into a StructuredTable +var table = csv.StructuredTable(Animal).init( + allocator, + csv.Settings.default() +); +defer table.deinit(); +try table.parse( + \\id,name,happy + \\1,dog, + \\2,cat, + \\3,bird, +); + +// Modify the name of the animal with id 2 +for (0..table.getRowCount()) |index| { + // Retrieve the row at the current index. + const row = try table.getRow(index); + if (row == .@"error") { + // If the row structure doesn't match the schema, handle the error. + break; + } + // Access the parsed Animal struct from the row. + var animal = row.ok.value; + // Look for the animal with id == 2. + if (animal.id != 2) continue; + + // Change the animal's name to "mouse". + animal.name = "mouse"; + // Attempt to write the modified struct back to the table. + const result = try table.editRow(index, animal); + if (result == .@"error") { + // If the new struct doesn't match the schema, handle the error. + } + // Stop after editing the first matching row. + break; +} + +// Export the table back to CSV +const exported_csv = try table.exportCSV(allocator); +defer allocator.free(exported_csv); +std.debug.print("Exported CSV:\n{s}\n", .{exported_csv}); +// id,name,happy +// 1,dog, +// 2,mouse, +// 3,bird, + +``` + +### Table + +The `Table` type provides a flexible way to work with CSV data without a predefined schema. + ```zig const std = @import("std"); const csv = @import("zig_csv"); diff --git a/build.zig b/build.zig index 79fbe97..a6ee2c5 100644 --- a/build.zig +++ b/build.zig @@ -15,13 +15,14 @@ pub fn build(b: *std.Build) void { .root_module = module_root, }); const module_tests = b.addModule("tests", .{ - .root_source_file = b.path("src/tests.zig"), + .root_source_file = b.path("src/tests/root.zig"), .optimize = mode, .target = target, }); const lib_tests = b.addTest(.{ .root_module = module_tests, }); + lib_tests.root_module.addImport("zig_csv", module_root); const install_docs = b.addInstallDirectory(.{ .source_dir = lib.getEmittedDocs(), diff --git a/src/root.zig b/src/root.zig index 8c4c933..adf1b14 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,270 +1,12 @@ -//! This module provides structs for parsing and manipulating CSV data -//! [Released under GNU LGPLv3] -//! -const std = @import("std"); -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; - -/// A structure for storing settings for use with struct Table -pub const Settings = struct { - /// The delimiter that separates the values (aka. separator) - delimiter: []const u8, - /// The terminator that defines when a row of delimiter-separated values is terminated - terminator: []const u8, - - /// A function that returns the default settings that are most commonly used for CSV data - /// { .delimiter = ",", .terminator = "\n" } - pub fn default() Settings { - return Settings{ - .delimiter = ",", - .terminator = "\n", - }; - } -}; - -/// Errors that may return from struct Table -pub const TableError = error{ - /// The requested column was not found - ColumnNotFound, - /// The requested value contains a delimiter or terminator character - IllegalCharacter, - /// A row is inconsistent with the number of values previously parsed - InconsistentRowLength, - /// Data must be loaded first to perform the requested operation - NoData, - /// Could not allocate required memory - OutOfMemory, - /// The requested row was not found - RowNotFound, - /// The requested value was not found - ValueNotFound, -}; - -/// A structure for parsing and manipulating CSV data -pub const Table = struct { - /// The settings that should be used when parsing the CSV data - settings: Settings, - // allocator used for temporary allocations - allocator: Allocator, - // amount of columns expected in each row, used for validation - expected_column_count: ?usize, - // array of rows, each row is an array of subsequent column values - data: ArrayList(ArrayList([]const u8)), - - /// Initialize struct Table - pub fn init(allocator: Allocator, settings: Settings) Table { - return Table{ - .settings = settings, - .allocator = allocator, - .expected_column_count = null, - .data = .empty, - }; - } - - /// Deinitializes the internal arena allocator and parsed data - pub fn deinit(self: *Table) void { - for (self.data.items) |*row| { - row.deinit(self.allocator); - } - self.data.deinit(self.allocator); - } - - /// Load and append CSV data to the struct Table - pub fn parse(self: *Table, csv_data: []const u8) TableError!void { - const csv_data_sanatized = std.mem.trimRight(u8, csv_data, self.settings.terminator); - var rows = std.mem.splitSequence(u8, csv_data_sanatized, self.settings.terminator); - while (rows.next()) |row| { - const value_count = try self.parseRow(row); - if (self.expected_column_count == null) { - self.expected_column_count = value_count; - } else if (value_count != self.expected_column_count) { - return TableError.InconsistentRowLength; - } - } - } - - /// Parse a single row of CSV data and append it to the struct Table - /// - /// Returns the number of values parsed in the row. - fn parseRow(self: *Table, row: []const u8) TableError!usize { - var values: ArrayList([]const u8) = .empty; - var columns = std.mem.splitSequence(u8, row, self.settings.delimiter); - while (columns.next()) |value| { - try values.append(self.allocator, value); - } - try self.data.append(self.allocator, values); - return values.items.len; - } - - /// Returns the number of rows in the table - pub fn getRowCount(self: Table) usize { - return self.data.items.len; - } - - /// Returns the number of rows in the table - pub fn getColumnCount(self: Table) TableError!usize { - if (self.expected_column_count == null) return TableError.NoData; - return self.expected_column_count orelse unreachable; - } - - /// Returns all columns indexes that match a given value in a specific row - /// - /// Arguments: - /// - `allocator`: The allocator to use for the returned slice. - /// - `row_index`: The index of the row to search in. - /// - `searched_value`: The value to search for in the row. - /// - /// Raises `TableError.ValueNotFound` if no matching values are found. - /// - /// This function may be used for retrieving columns by their header key: - /// ```zig - /// try table.parse( - /// \\id,name - /// \\1,John - /// ); - /// const indexes = try table.findColumnIndexesByValue(allocator, 0, "id"); - /// assert(indexes == &.{0}); - /// ``` - pub fn findColumnIndexesByValue(self: Table, allocator: Allocator, row_index: usize, searched_value: []const u8) TableError![]usize { - if (self.data.items.len < row_index) return TableError.RowNotFound; - var column_indexes: ArrayList(usize) = .empty; - for (self.data.items[row_index].items, 0..) |column_value, column_index| { - if (std.mem.eql(u8, column_value, searched_value)) { - try column_indexes.append(allocator, column_index); - } - } - if (column_indexes.items.len <= 0) { - column_indexes.deinit(allocator); - return TableError.ValueNotFound; - } - return column_indexes.toOwnedSlice(allocator); - } - - /// Returns all row indexes that match a given value in a specific column - /// - /// Arguments: - /// - `allocator`: The allocator to use for the returned slice. - /// - `column_index`: The index of the column to search in. - /// - `searched_value`: The value to search for in the column. - /// - /// Raises `TableError.ValueNotFound` if no matching values are found. - /// - /// This function may be used for retrieving columns by their header key: - /// ```zig - /// try table.parse( - /// \\id,name - /// \\1,John - /// ); - /// const indexes = try table.findRowIndexesByValue(allocator, 0, "1"); - /// assert(indexes == &.{1}); - /// ``` - pub fn findRowIndexesByValue(self: Table, allocator: Allocator, column_index: usize, searched_value: []const u8) TableError![]usize { - if (column_index >= self.expected_column_count orelse 0) return TableError.ColumnNotFound; - var row_indexes: ArrayList(usize) = .empty; - for (self.data.items, 0..) |row, row_index| { - if (std.mem.eql(u8, row.items[column_index], searched_value)) { - try row_indexes.append(allocator, row_index); - } - } - if (row_indexes.items.len <= 0) { - row_indexes.deinit(allocator); - return TableError.ValueNotFound; - } - return row_indexes.toOwnedSlice(allocator); - } - - /// Return the column at the provided index as a slice of values - pub fn getColumnByIndex(self: Table, allocator: Allocator, column_index: usize) TableError![]const []const u8 { - if (column_index > self.expected_column_count orelse 0) return TableError.ColumnNotFound; - var column_values: ArrayList([]const u8) = .empty; - for (self.data.items) |row| { - try column_values.append(allocator, row.items[column_index]); - } - return column_values.toOwnedSlice(allocator); - } - - /// Return the row at the provided index as a slice of values - pub fn getRowByIndex(self: Table, row_index: usize) TableError![]const []const u8 { - if (row_index >= self.data.items.len) return TableError.RowNotFound; - return self.data.items[row_index].items; - } - - /// Insert an empty row at the provided index and shift all subsequent rows - /// - /// Arguments: - /// - `row_index`: The index at which to insert the empty row. If `null`, the row will be appended to the end. - /// - /// Returns the index of the newly inserted row. - pub fn insertEmptyRow(self: *Table, row_index: ?usize) TableError!usize { - const target_index = row_index orelse self.data.items.len; - if (self.expected_column_count == null) return TableError.NoData; - if (target_index > self.data.items.len) return TableError.RowNotFound; - var empty_row: ArrayList([]const u8) = .empty; - for (0..self.expected_column_count orelse unreachable) |_| try empty_row.append(self.allocator, ""); - try self.data.insert(self.allocator, target_index, empty_row); - return target_index; - } - - /// Insert an empty column at the provided index and shift all subsequent columns - /// - /// Arguments: - /// - `column_index`: The index at which to insert the empty column. If `null`, the column will be appended to the end. - /// - /// Returns the index of the newly inserted column. - pub fn insertEmptyColumn(self: *Table, column_index: ?usize) TableError!usize { - const target_index = column_index orelse self.expected_column_count orelse return TableError.NoData; - if (target_index > self.expected_column_count orelse unreachable) return TableError.ColumnNotFound; - for (self.data.items) |*row| { - try row.insert(self.allocator, target_index, ""); - } - self.expected_column_count = (self.expected_column_count orelse unreachable) + 1; - return target_index; - } - - /// Replace a value by a given new value, row index, and column index - pub fn replaceValue(self: *Table, row_index: usize, column_index: usize, new_value: []const u8) TableError!void { - if (row_index >= self.data.items.len) return TableError.RowNotFound; - if (column_index >= self.expected_column_count orelse 0) return TableError.ColumnNotFound; - if (std.mem.count(u8, new_value, self.settings.delimiter) != 0) return TableError.IllegalCharacter; - if (std.mem.count(u8, new_value, self.settings.terminator) != 0) return TableError.IllegalCharacter; - self.data.items[row_index].items[column_index] = new_value; - } - - /// Remove a column by its index - /// - /// All prior column indexes will be invalidated. - pub fn deleteColumnByIndex(self: *Table, column_index: usize) TableError!void { - if (self.expected_column_count == null) return TableError.NoData; - if (column_index >= self.expected_column_count orelse unreachable) return TableError.ColumnNotFound; - for (self.data.items) |*row| { - _ = row.orderedRemove(column_index); - } - self.expected_column_count = (self.expected_column_count orelse unreachable) - 1; - } - - /// Remove a row by its index - /// - /// All prior row indexes will be invalidated. - pub fn deleteRowByIndex(self: *Table, row_index: usize) TableError!void { - if (row_index >= self.data.items.len) return TableError.RowNotFound; - self.data.items[row_index].deinit(self.allocator); - _ = self.data.orderedRemove(row_index); - } - - /// Returns a slice of bytes containing the CSV data stored in the struct Table. - pub fn exportCSV(self: *Table, allocator: Allocator) TableError![]const u8 { - var csv: ArrayList(u8) = .empty; - for (self.data.items, 0..) |row, row_index| { - if (row_index > 0) { - try csv.appendSlice(allocator, self.settings.terminator); - } - for (row.items, 0..) |column, column_index| { - if (column_index > 0) { - try csv.appendSlice(allocator, self.settings.delimiter); - } - try csv.appendSlice(allocator, column); - } - } - return csv.toOwnedSlice(allocator); - } -}; +const table = @import("table.zig"); +const schema = @import("schema.zig"); + +/// Thin root module that re-exports the core Table implementation and the schema +/// module. This avoids circular import issues by keeping the core implementation +/// in `table.zig` while allowing consumers to import this single entrypoint. +pub const Table = table.Table; +pub const Settings = table.Settings; +pub const TableError = table.TableError; +pub const StructureError = schema.StructureError; +pub const ParseResult = schema.ParseResult; +pub const StructuredTable = schema.StructuredTable; diff --git a/src/schema.zig b/src/schema.zig new file mode 100644 index 0000000..39611ab --- /dev/null +++ b/src/schema.zig @@ -0,0 +1,344 @@ +const std = @import("std"); +const table = @import("table.zig"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Table = table.Table; +const TableError = table.TableError; +const Settings = table.Settings; + +/// Errors that can occur when mapping CSV data to a structured type +pub const StructureError = error{ + /// Multiple columns found with the same name + AmbiguousColumn, + /// Number of columns does not correspond to number of fields expected + InvalidColumnCount, + /// No column found for a given expected field name + MissingColumn, + /// Column value could not be converted to expected field type + UnexpectedType, +}; + +/// Result of parsing a row into a structured type +/// Used to provide detailed error information when parsing fails +pub fn ParseResult(table_schema: type) type { + return union(enum) { + /// Successfully parsed structured value + ok: struct { + /// The parsed structured value + value: table_schema, + }, + /// Error occurred while parsing structured value + @"error": struct { + /// The kind of structure error that occurred + kind: (StructureError || TableError), + /// The name of the field that caused the error + field_name: ?[]const u8, + /// The expected type of the field that caused the error + field_type: ?[]const u8, + /// The CSV value that caused the error + csv_value: ?[]const u8, + }, + }; +} + +/// A high-level table that maps CSV data to a struct type +pub fn StructuredTable(table_schema: type) type { + const schema_info = @typeInfo(table_schema); + if (schema_info != .@"struct") { + @compileError("table_schema must be a struct type"); + } + return struct { + /// The underlying CSV table + table: Table, + /// The settings that should be used when parsing the CSV data + settings: Settings, + /// The allocator used for memory management + allocator: Allocator, + /// An arena allocator for dangling allocations + arena_allocator: std.heap.ArenaAllocator, + + const Self = @This(); + + /// Initialize a new StructuredTable + pub fn init(allocator: Allocator, settings: Settings) Self { + return Self{ + .table = Table.init(allocator, settings), + .settings = settings, + .allocator = allocator, + .arena_allocator = std.heap.ArenaAllocator.init(allocator), + }; + } + + /// Deinitialize the StructuredTable and free its resources + pub fn deinit(self: *Self) void { + self.arena_allocator.deinit(); + self.table.deinit(); + } + + /// Parse CSV data into the StructuredTable + pub fn parse(self: *Self, csv_data: []const u8) (TableError || StructureError)!void { + try self.table.parse(csv_data); + if (self.table.getColumnCount() != schema_info.@"struct".fields.len) return StructureError.InvalidColumnCount; + } + + /// Get the number of data rows in the StructuredTable + /// + /// StructuredTable exposes only data rows (the header row at table index 0 + /// is excluded). This returns the count of data rows, guarding against + /// unsigned underflow when the table is empty. + pub fn getRowCount(self: Self) usize { + const count = self.table.getRowCount(); + if (count == 0) return 0; + return count - 1; + } + + /// Convert a data-row index to the corresponding underlying table index. + /// + /// The underlying `Table` stores the header row at table index 0, while + /// data rows start at 1. This helper maps a data-row index to the `Table` insert index. + fn headerAwareToTableIndex(data_index: usize) usize { + return data_index + 1; + } + + /// Convert an underlying table index to a data-row index. + /// + /// Returns `null` when the provided table index refers to the header row (0). + fn headerAwareToDataIndex(table_index: usize) ?usize { + if (table_index == 0) return null; + return table_index - 1; + } + + /// Deserialize a CSV value into the appropriate field type + fn deserializeCsvValue(self: Self, comptime T: type, value: []const u8) (TableError || StructureError)!T { + const type_info = @typeInfo(T); + if (type_info == .pointer and + type_info.pointer.size == .slice and + type_info.pointer.child == u8) + { + return value; + } + switch (type_info) { + .optional => { + const child_type = type_info.optional.child; + if (value.len == 0) { + return null; + } else { + return try self.deserializeCsvValue(child_type, value); + } + }, + .bool => { + const lower = std.ascii.allocLowerString(self.allocator, value) catch return TableError.OutOfMemory; + defer self.allocator.free(lower); + for ([_][]const u8{ "true", "1", "yes", "y" }) |true_word| { + if (std.mem.eql(u8, true_word, lower)) { + return true; + } + } + for ([_][]const u8{ "false", "0", "no", "n" }) |false_word| { + if (std.mem.eql(u8, false_word, lower)) { + return false; + } + } + return StructureError.UnexpectedType; + }, + .int => { + return std.fmt.parseInt(T, value, 0) catch StructureError.UnexpectedType; + }, + .float => { + return std.fmt.parseFloat(T, value) catch StructureError.UnexpectedType; + }, + else => { + @compileError(std.fmt.comptimePrint("unsupported field type for '{}'", .{@typeName(T)})); + }, + } + } + + /// Serialize a field value into a CSV-compatible string + fn serializeCsvValue(self: *Self, comptime T: type, value: T) TableError![]const u8 { + const type_info = @typeInfo(T); + if (type_info == .pointer and + type_info.pointer.size == .slice and + type_info.pointer.child == u8) + { + return value; + } + switch (type_info) { + .optional => { + const child_type = type_info.optional.child; + if (value == null) { + return ""; + } else { + return try self.serializeCsvValue(child_type, value.?); + } + }, + .bool => { + if (value) { + return "true"; + } else { + return "false"; + } + }, + .int, .float => { + return std.fmt.allocPrint(self.arena_allocator.allocator(), "{d}", .{value}) catch TableError.OutOfMemory; + }, + else => { + @compileError(std.fmt.comptimePrint("unsupported field type for '{}'", .{@typeName(T)})); + }, + } + } + + /// Get a structured row from the StructuredTable by index + /// + /// Example looping through all rows: + /// ```zig + /// var table = StructuredTable(MyStruct).init(allocator, settings); + /// defer table.deinit(); + /// try table.parse(csv_data); + /// for (0..table.getRowCount()) |index| { + /// const row_result = try table.getRow(index); + /// if (row_result == .@"error") { + /// // Handle error + /// break; + /// } + /// const row = row_result.ok.value; + /// } + /// ``` + pub fn getRow(self: Self, row_index: usize) TableError!ParseResult(table_schema) { + if (row_index >= self.getRowCount()) return TableError.RowNotFound; + var out: table_schema = undefined; + inline for (schema_info.@"struct".fields) |field| { + const field_name = field.name; + const column_indexes = self.table.findColumnIndexesByValue(self.allocator, 0, field_name) catch return ParseResult(table_schema){ + .@"error" = .{ + .kind = StructureError.MissingColumn, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = null, + }, + }; + defer self.allocator.free(column_indexes); + if (column_indexes.len > 1) return ParseResult(table_schema){ + .@"error" = .{ + .kind = StructureError.AmbiguousColumn, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = null, + }, + }; + const rows = self.table.getColumnByIndex(self.allocator, column_indexes[0]) catch return ParseResult(table_schema){ + .@"error" = .{ + .kind = StructureError.MissingColumn, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = null, + }, + }; + defer self.allocator.free(rows); + const value = rows[row_index + 1]; + const parsed = (&self).deserializeCsvValue(field.type, value) catch |err| return ParseResult(table_schema){ + .@"error" = .{ + .kind = err, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = value, + }, + }; + @field(out, field_name) = parsed; + } + return ParseResult(table_schema){ + .ok = .{ + .value = out, + }, + }; + } + + /// Edit a structured row in the StructuredTable by index + /// + /// Example: + /// ```zig + /// var table = StructuredTable(MyStruct).init(allocator, settings); + /// defer table.deinit(); + /// try table.parse(csv_data); + /// const row = try table.getRow(0); + /// var value = row.ok.value; + /// value.my_field = 42; + /// try table.editRow(0, value); + /// ``` + pub fn editRow(self: *Self, row_index: usize, row: table_schema) TableError!ParseResult(table_schema) { + if (row_index >= self.getRowCount()) return TableError.RowNotFound; + inline for (schema_info.@"struct".fields) |field| { + const field_name = field.name; + const column_indexes = self.table.findColumnIndexesByValue(self.allocator, 0, field_name) catch return ParseResult(table_schema){ + .@"error" = .{ + .kind = StructureError.MissingColumn, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = null, + }, + }; + defer self.allocator.free(column_indexes); + if (column_indexes.len > 1) return ParseResult(table_schema){ + .@"error" = .{ + .kind = StructureError.AmbiguousColumn, + .field_name = field_name, + .field_type = @typeName(field.type), + .csv_value = null, + }, + }; + const column_index = column_indexes[0]; + const table_index = headerAwareToTableIndex(row_index); + const value = try self.serializeCsvValue(field.type, @field(row, field_name)); + try self.table.replaceValue(table_index, column_index, value); + } + return ParseResult(table_schema){ + .ok = .{ + .value = row, + }, + }; + } + + /// Insert a structured row into the StructuredTable at the specified index + /// + /// If `row_index` is `null`, the row is appended to the end of the table. + /// + /// Notes on indexing: + /// - The underlying `Table` stores a header row at table index 0. + /// - StructuredTable's `row_index` values are 0-based and refer only to data rows + /// (so structured `0` corresponds to table index `1`). + pub fn insertRow(self: *Self, row_index: ?usize, row: table_schema) TableError!void { + if (self.table.getRowCount() == 0) { + _ = try self.table.insertEmptyRow(null); + inline for (schema_info.@"struct".fields) |field| { + const header_row_index = try self.table.insertEmptyColumn(null); + try self.table.replaceValue(0, header_row_index, field.name); + } + } + const table_index = if (row_index) |index| headerAwareToTableIndex(index) else null; + const index = try self.table.insertEmptyRow(table_index); + const data_index = headerAwareToDataIndex(index) orelse return TableError.RowNotFound; + _ = try self.editRow(data_index, row); + } + + /// Delete a structured row from the StructuredTable by index + pub fn deleteRow(self: *Self, row_index: usize) TableError!void { + if (row_index >= self.getRowCount()) return TableError.RowNotFound; + try self.table.deleteRowByIndex(row_index + 1); + } + + /// Export the StructuredTable to CSV format + /// + /// Returns the CSV data as a byte slice + /// + /// Example: + /// ```zig + /// var table = StructuredTable(MyStruct).init(allocator, settings); + /// defer table.deinit(); + /// try table.parse(csv_data); + /// const csv_output = try table.exportCSV(allocator); + /// defer allocator.free(csv_output); + /// ``` + pub fn exportCSV(self: *Self, allocator: Allocator) TableError![]const u8 { + return self.table.exportCSV(allocator); + } + }; +} diff --git a/src/table.zig b/src/table.zig new file mode 100644 index 0000000..1ad4f55 --- /dev/null +++ b/src/table.zig @@ -0,0 +1,196 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + +/// A structure for storing settings for use with struct Table +pub const Settings = struct { + /// The delimiter that separates the values (aka. separator) + delimiter: []const u8, + /// The terminator that defines when a row of delimiter-separated values is terminated + terminator: []const u8, + + pub fn default() Settings { + return Settings{ + .delimiter = ",", + .terminator = "\n", + }; + } +}; + +/// Errors that may return from struct Table +pub const TableError = error{ + ColumnNotFound, + IllegalCharacter, + InconsistentRowLength, + OutOfMemory, + RowNotFound, + ValueNotFound, +}; + +/// A structure for parsing and manipulating CSV data +pub const Table = struct { + settings: Settings, + allocator: Allocator, + expected_column_count: ?usize, + data: ArrayList(ArrayList([]const u8)), + + pub fn init(allocator: Allocator, settings: Settings) Table { + return Table{ + .settings = settings, + .allocator = allocator, + .expected_column_count = null, + .data = .empty, + }; + } + + pub fn deinit(self: *Table) void { + for (self.data.items) |*row| { + row.deinit(self.allocator); + } + self.data.deinit(self.allocator); + } + + pub fn parse(self: *Table, csv_data: []const u8) TableError!void { + const csv_data_sanitized = std.mem.trimRight(u8, csv_data, self.settings.terminator); + var rows = std.mem.splitSequence(u8, csv_data_sanitized, self.settings.terminator); + while (rows.next()) |row| { + const value_count = try self.parseRow(row); + if (self.expected_column_count == null) { + self.expected_column_count = value_count; + } else if (value_count != self.expected_column_count) { + return TableError.InconsistentRowLength; + } + } + } + + fn parseRow(self: *Table, row: []const u8) TableError!usize { + var values: ArrayList([]const u8) = .empty; + var columns = std.mem.splitSequence(u8, row, self.settings.delimiter); + while (columns.next()) |value| { + try values.append(self.allocator, value); + } + try self.data.append(self.allocator, values); + return values.items.len; + } + + pub fn getRowCount(self: Table) usize { + return self.data.items.len; + } + + pub fn getColumnCount(self: Table) usize { + return self.expected_column_count orelse 0; + } + + pub fn findColumnIndexesByValue(self: Table, allocator: Allocator, row_index: usize, searched_value: []const u8) TableError![]usize { + if (row_index >= self.data.items.len) return TableError.RowNotFound; + var column_indexes: ArrayList(usize) = .empty; + const row = self.data.items[row_index]; + for (row.items, 0..) |column_value, column_index| { + if (std.mem.eql(u8, column_value, searched_value)) { + try column_indexes.append(allocator, column_index); + } + } + if (column_indexes.items.len == 0) { + column_indexes.deinit(allocator); + return TableError.ValueNotFound; + } + return column_indexes.toOwnedSlice(allocator); + } + + pub fn findRowIndexesByValue(self: Table, allocator: Allocator, column_index: usize, searched_value: []const u8) TableError![]usize { + if (self.expected_column_count == null) return TableError.ColumnNotFound; + const col_count = self.expected_column_count orelse 0; + if (column_index >= col_count) return TableError.ColumnNotFound; + var row_indexes: ArrayList(usize) = .empty; + for (self.data.items, 0..) |row, row_index| { + if (row.items.len <= column_index) continue; // skip inconsistent rows + if (std.mem.eql(u8, row.items[column_index], searched_value)) { + try row_indexes.append(allocator, row_index); + } + } + if (row_indexes.items.len == 0) { + row_indexes.deinit(allocator); + return TableError.ValueNotFound; + } + return row_indexes.toOwnedSlice(allocator); + } + + pub fn getColumnByIndex(self: Table, allocator: Allocator, column_index: usize) TableError![]const []const u8 { + if (self.expected_column_count == null) return TableError.ColumnNotFound; + const col_count = self.expected_column_count orelse 0; + if (column_index >= col_count) return TableError.ColumnNotFound; + var column_values: ArrayList([]const u8) = .empty; + for (self.data.items) |row| { + if (row.items.len <= column_index) { + try column_values.append(allocator, ""); + } else { + try column_values.append(allocator, row.items[column_index]); + } + } + return column_values.toOwnedSlice(allocator); + } + + pub fn getRowByIndex(self: Table, row_index: usize) TableError![]const []const u8 { + if (row_index >= self.data.items.len) return TableError.RowNotFound; + return self.data.items[row_index].items; + } + + pub fn insertEmptyRow(self: *Table, row_index: ?usize) TableError!usize { + const target_index = row_index orelse self.data.items.len; + if (target_index > self.data.items.len) return TableError.RowNotFound; + var empty_row: ArrayList([]const u8) = .empty; + for (0..self.expected_column_count orelse 0) |_| try empty_row.append(self.allocator, ""); + try self.data.insert(self.allocator, target_index, empty_row); + return target_index; + } + + pub fn insertEmptyColumn(self: *Table, column_index: ?usize) TableError!usize { + const target_index = column_index orelse self.expected_column_count orelse 0; + if (target_index > self.expected_column_count orelse 0) return TableError.ColumnNotFound; + for (self.data.items) |*row| { + try row.insert(self.allocator, target_index, ""); + } + self.expected_column_count = (self.expected_column_count orelse 0) + 1; + return target_index; + } + + pub fn replaceValue(self: *Table, row_index: usize, column_index: usize, new_value: []const u8) TableError!void { + if (row_index >= self.data.items.len) return TableError.RowNotFound; + if (self.expected_column_count == null) return TableError.ColumnNotFound; + if (column_index >= (self.expected_column_count orelse 0)) return TableError.ColumnNotFound; + if (std.mem.indexOf(u8, new_value, self.settings.delimiter) != null) return TableError.IllegalCharacter; + if (std.mem.indexOf(u8, new_value, self.settings.terminator) != null) return TableError.IllegalCharacter; + self.data.items[row_index].items[column_index] = new_value; + } + + pub fn deleteColumnByIndex(self: *Table, column_index: usize) TableError!void { + if (self.expected_column_count == null) return TableError.ColumnNotFound; + if (column_index >= (self.expected_column_count orelse 0)) return TableError.ColumnNotFound; + for (self.data.items) |*row| { + _ = row.orderedRemove(column_index); + } + self.expected_column_count = (self.expected_column_count orelse 0) - 1; + } + + pub fn deleteRowByIndex(self: *Table, row_index: usize) TableError!void { + if (row_index >= self.data.items.len) return TableError.RowNotFound; + self.data.items[row_index].deinit(self.allocator); + _ = self.data.orderedRemove(row_index); + } + + pub fn exportCSV(self: *Table, allocator: Allocator) TableError![]const u8 { + var csv: ArrayList(u8) = .empty; + for (self.data.items, 0..) |row, row_index| { + if (row_index > 0) { + try csv.appendSlice(allocator, self.settings.terminator); + } + for (row.items, 0..) |column, column_index| { + if (column_index > 0) { + try csv.appendSlice(allocator, self.settings.delimiter); + } + try csv.appendSlice(allocator, column); + } + } + return csv.toOwnedSlice(allocator); + } +}; diff --git a/src/tests/root.zig b/src/tests/root.zig new file mode 100644 index 0000000..6b3cb13 --- /dev/null +++ b/src/tests/root.zig @@ -0,0 +1,10 @@ +// Test harness root file: imports individual test files located under src/tests/ +// This file is used as the root source for the tests module so that +// @import("root.zig") in test files resolves to src/root.zig. + +// Import test files as anonymous comptime blocks so they don't create duplicate +// top-level symbols in this module. +comptime { + _ = @import("schema.zig"); + _ = @import("table.zig"); +} diff --git a/src/tests/schema.zig b/src/tests/schema.zig new file mode 100644 index 0000000..c15198d --- /dev/null +++ b/src/tests/schema.zig @@ -0,0 +1,268 @@ +const std = @import("std"); +const csv = @import("zig_csv"); +const expect = std.testing.expect; +const allocator = std.testing.allocator; +const StructuredTable = csv.StructuredTable; + +test "StructuredTable: Parse CSV into struct and access rows" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,Yes,0.3 + \\Rex,7,0,0.11 + ); + + try expect(table.getRowCount() == 2); + + const row_1 = try table.getRow(0); + const row_1_value = row_1.ok.value; + const row_2 = try table.getRow(1); + const row_2_value = row_2.ok.value; + try expect(table.getRow(2) == csv.TableError.RowNotFound); + + try expect(std.mem.eql(u8, row_1_value.name, "Fido")); + try expect(std.mem.eql(u8, row_2_value.name, "Rex")); + try expect(row_1_value.age == 4); + try expect(row_2_value.age == 7); + try expect(row_1_value.alive); + try expect(!row_2_value.alive); + try expect(row_1_value.foo == 0.3); + try expect(row_2_value.foo == 0.11); +} + +test "StructuredTable: Edit struct row and export to CSV" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,true,0.3 + \\Rex,7,false,0.11 + ); + + try expect(table.getRowCount() == 2); + + const row = try table.getRow(0); + var value = row.ok.value; + try expect(std.mem.eql(u8, value.name, "Fido")); + + value.name = "Berta"; + _ = try table.editRow(0, value); + + const exported_csv = try table.exportCSV(allocator); + defer allocator.free(exported_csv); + const expected_csv = + \\name,age,alive,foo + \\Berta,4,true,0.3 + \\Rex,7,false,0.11 + ; + try expect(std.mem.eql(u8, exported_csv, expected_csv)); +} + +test "StructuredTable: Delete struct row" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,true,0.3 + \\Rex,7,false,0.11 + ); + + try expect(table.getRowCount() == 2); + + try table.deleteRow(0); + try expect(table.getRowCount() == 1); + + const exported_csv = try table.exportCSV(allocator); + defer allocator.free(exported_csv); + const expected_csv = + \\name,age,alive,foo + \\Rex,7,false,0.11 + ; + try expect(std.mem.eql(u8, exported_csv, expected_csv)); +} + +test "StructuredTable: Create empty struct table and insert rows" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + + const new_row_1 = DogTable{ + .name = "Buddy", + .age = 3, + .alive = true, + .foo = 0.5, + }; + _ = try table.insertRow(null, new_row_1); + + const new_row_2 = DogTable{ + .name = "Max", + .age = 5, + .alive = false, + .foo = 0.2, + }; + _ = try table.insertRow(null, new_row_2); + + try expect(table.getRowCount() == 2); + + const exported_csv = try table.exportCSV(allocator); + defer allocator.free(exported_csv); + const expected_csv = + \\name,age,alive,foo + \\Buddy,3,true,0.5 + \\Max,5,false,0.2 + ; + try expect(std.mem.eql(u8, exported_csv, expected_csv)); +} + +test "StructuredTable: Insert row at specific index" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,true,0.3 + \\Rex,7,false,0.11 + ); + + const new_row = DogTable{ + .name = "Buddy", + .age = 3, + .alive = true, + .foo = 0.5, + }; + _ = try table.insertRow(1, new_row); + + try expect(table.getRowCount() == 3); + + const exported_csv = try table.exportCSV(allocator); + defer allocator.free(exported_csv); + const expected_csv = + \\name,age,alive,foo + \\Fido,4,true,0.3 + \\Buddy,3,true,0.5 + \\Rex,7,false,0.11 + ; + try expect(std.mem.eql(u8, exported_csv, expected_csv)); +} + +test "StructuredTable: Handle parsing error due to invalid csv type" { + const DogTable = struct { + name: []const u8, + age: u8, + alive: bool, + foo: f32, + }; + + var table = StructuredTable(DogTable).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,invalid_age,true,0.3 + ); + const result = try table.getRow(0); + const err = result.@"error"; + try expect(err.kind == csv.StructureError.UnexpectedType); + try expect(std.mem.eql(u8, err.csv_value.?, "invalid_age")); + try expect(std.mem.eql(u8, err.field_name.?, "age")); + try expect(std.mem.eql(u8, err.field_type.?, "u8")); +} + +test "StructuredTable: Optional fields parse and null behavior" { + const DogTableOpt = struct { + name: ?[]const u8, + age: ?u8, + alive: ?bool, + foo: ?f32, + }; + + var table = StructuredTable(DogTableOpt).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,Yes,0.3 + \\,,, + ); + + try expect(table.getRowCount() == 2); + + const row_0 = try table.getRow(0); + const value_0 = row_0.ok.value; + try expect(std.mem.eql(u8, value_0.name.?, "Fido")); + try expect(value_0.age.? == 4); + try expect(value_0.alive.?); + try expect(value_0.foo.? == 0.3); + + const row_1 = try table.getRow(1); + const value_1 = row_1.ok.value; + try expect(value_1.name == null); + try expect(value_1.age == null); + try expect(value_1.alive == null); + try expect(value_1.foo == null); +} + +test "StructuredTable: Optional fields edit writes empty when null" { + const DogTableOpt = struct { + name: ?[]const u8, + age: ?u8, + alive: ?bool, + foo: ?f32, + }; + + var table = StructuredTable(DogTableOpt).init(allocator, csv.Settings.default()); + defer table.deinit(); + try table.parse( + \\name,age,alive,foo + \\Fido,4,true,0.3 + ); + + const row = try table.getRow(0); + var value = row.ok.value; + value.name = null; + value.age = null; + value.alive = null; + value.foo = null; + + _ = try table.editRow(0, value); + + const exported = try table.exportCSV(allocator); + defer allocator.free(exported); + const expected_csv = + \\name,age,alive,foo + \\,,, + ; + try expect(std.mem.eql(u8, exported, expected_csv)); +} diff --git a/src/tests.zig b/src/tests/table.zig similarity index 98% rename from src/tests.zig rename to src/tests/table.zig index e6af172..9243cfe 100644 --- a/src/tests.zig +++ b/src/tests/table.zig @@ -1,9 +1,8 @@ -//! Unit and Integration tests for the module scope `src/*.zig` -//! [Released under GNU LGPLv3] const std = @import("std"); -const csv = @import("root.zig"); +const csv = @import("zig_csv"); const expect = std.testing.expect; const allocator = std.testing.allocator; +const StructuredTable = csv.StructuredTable; test "Initialize Table using Table.parse and export to CSV via Table.exportCSV" { var table = csv.Table.init(allocator, csv.Settings.default()); @@ -72,7 +71,7 @@ test "Get number of columns using Table.getColumnCount" { \\id,animal name,scientific name \\0,rat,rattus rattus ); - const column_count = try table.getColumnCount(); + const column_count = table.getColumnCount(); try expect(column_count == 3); }