diff --git a/build.py b/build.py index 9b82104b..e8ae6e80 100644 --- a/build.py +++ b/build.py @@ -372,6 +372,13 @@ def clean_module(module: Module, verbose: bool = False) -> bool: def verify_binary(module: Module) -> Optional[str]: if module.build_dir is None: return None + + def report_path(path: Path) -> str: + try: + return str(path.relative_to(ROOT)) + except ValueError: + return str(path) + path = module.build_dir if module.name == "backend": @@ -379,9 +386,9 @@ def verify_binary(module: Module) -> Optional[str]: if not target.exists(): target = path / "release" / module.name if target.exists(): - return str(target) + return report_path(target) if path.exists(): - return str(path) + return report_path(path) return None def run_cmd(cmd: list[str], **kwargs) -> tuple[bool, str]: @@ -577,7 +584,17 @@ def generate_logd( timeout=300, ) if sr.returncode != 0: - error = sr.stderr.strip() or sr.stdout.strip() or "encryptly pack failed" + stderr = sr.stderr.strip() + stdout = sr.stdout.strip() + if stderr: + error = stderr + elif stdout: + error = ( + f"encryptly pack exited {sr.returncode} without creating " + f"{display_logd}; stdout was {len(stdout)} bytes" + ) + else: + error = f"encryptly pack exited {sr.returncode}" print( f" {color('✗', Colors.RED)} {logd_path.relative_to(ROOT)} creation failed: " f"{error}" @@ -720,7 +737,7 @@ def main(): print(f"\n {color('⚠ Some tools missing - will try anyway:', Colors.YELLOW)}") for m in missing: print(f" {m}") - print(f" {color('Not all modules will build. That\'s fine.', Colors.GRAY)}") + print(" " + color("Not all modules will build. That's fine.", Colors.GRAY)) else: print(f" {color('✓ All prerequisites found', Colors.GREEN)}") diff --git a/diagnostic/build-81f3e49f.json b/diagnostic/build-81f3e49f.json new file mode 100644 index 00000000..ca1d6583 --- /dev/null +++ b/diagnostic/build-81f3e49f.json @@ -0,0 +1,23 @@ +{ + "generated_at": "2026-06-21T04:38:18.625587+00:00", + "commit": "81f3e49f", + "diagnostic_logd": null, + "diagnostic_logd_error": "encryptly pack exited 1 without creating diagnostic/build-81f3e49f.logd; stdout was 20 bytes", + "chunked": false, + "chunk_size_bytes": null, + "password": null, + "decrypt_command": null, + "total_modules": 1, + "passed": 1, + "failed": 0, + "modules": [ + { + "name": "frailbox", + "status": "PASS", + "elapsed_seconds": 0.012, + "artifact": "frailbox/frailbox", + "output": "make: Nothing to be done for `all'." + } + ], + "pr_note": "Encrypted diagnostic logd artifact was not created; include this JSON report showing why. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging." +} diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md index 58642e7b..2dd004bf 100644 --- a/docs/OPERATIONS.md +++ b/docs/OPERATIONS.md @@ -286,6 +286,11 @@ Audit logs are retained for 365 days and include: 3. Check database connectivity: `kubectl exec -n tent-production deploy/backend-api -- nc -zv postgresql 5432` 4. Check resource limits: `kubectl describe pod -n tent-production -l app=backend-api` +**Frailbox legacy logger cannot write to its configured file** +1. Check stderr for a `frailbox logger` diagnostic that includes the failing path and OS error. +2. Fix parent directory permissions, disk capacity, or the configured `LOG_FILE` path. +3. Expect logs to continue on stderr until the process is restarted with a writable `LOG_FILE`. + **High latency** 1. Check database query performance: `SELECT * FROM pg_stat_activity WHERE state = 'active'` 2. Check connection pool utilization diff --git a/frailbox/Makefile b/frailbox/Makefile index d4383d85..90db7b3c 100644 --- a/frailbox/Makefile +++ b/frailbox/Makefile @@ -3,7 +3,10 @@ CFLAGS = -Wall -Wextra -Wpedantic -std=c2x -O2 -g CFLAGS += -D_FORTIFY_SOURCE=3 CFLAGS += -fstack-protector-strong CFLAGS += -fPIE -LDFLAGS = -pie -z relro -z now +LDFLAGS = -pie +ifeq ($(shell uname -s),Linux) +LDFLAGS += -z relro -z now +endif SRCDIR = src INCDIR = include @@ -12,10 +15,11 @@ BUILDDIR = build SRCS = $(wildcard $(SRCDIR)/*.c) main.c OBJS = $(patsubst %.c, $(BUILDDIR)/%.o, $(SRCS)) DEPS = $(OBJS:.o=.d) +LOGGER_ERROR_TEST = $(BUILDDIR)/tests/test_logger_error_handling TARGET = frailbox -.PHONY: all clean +.PHONY: all clean test test-logger-error all: $(TARGET) @@ -37,7 +41,14 @@ distclean: clean test: $(TARGET) ./$(TARGET) --sandbox-type seccomp --memory-limit 64 --verbose +$(LOGGER_ERROR_TEST): tests/test_logger_error_handling.c src/logger.c include/logger.h + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -I$(INCDIR) tests/test_logger_error_handling.c src/logger.c -o $@ $(LDFLAGS) -lpthread + +test-logger-error: $(LOGGER_ERROR_TEST) + ./$(LOGGER_ERROR_TEST) + valgrind: $(TARGET) valgrind --leak-check=full --show-leak-kinds=all ./$(TARGET) -.PHONY: all clean distclean test valgrind +.PHONY: all clean distclean test valgrind test-logger-error diff --git a/frailbox/include/logger.h b/frailbox/include/logger.h index b9e2248c..24d2d9ae 100644 --- a/frailbox/include/logger.h +++ b/frailbox/include/logger.h @@ -236,6 +236,9 @@ int log_get_level(void); * The log message is written to the configured output (file or stderr). * Messages are truncated at 4096 bytes. Truncated messages get a * "[TRUNCATED]" suffix. The truncation is silent - no error is returned. + * If the configured log file cannot be written or flushed, the logger + * emits a diagnostic to stderr and falls back to stderr for subsequent + * messages. * * This function is thread-safe. It acquires a global mutex before * formatting and writing the message. For high-throughput logging, diff --git a/frailbox/src/arena.c b/frailbox/src/arena.c index d4753b53..839026ac 100644 --- a/frailbox/src/arena.c +++ b/frailbox/src/arena.c @@ -6,6 +6,10 @@ #include #include +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0 +#endif + #define ALIGN_UP(x, a) (((x) + (a) - 1) & ~((a) - 1)) #define DEFAULT_ALIGNMENT 16 diff --git a/frailbox/src/logger.c b/frailbox/src/logger.c index f1a7aa1a..5f2b7361 100644 --- a/frailbox/src/logger.c +++ b/frailbox/src/logger.c @@ -144,6 +144,12 @@ static int g_include_timestamps = 1; */ static int g_include_source_info = 0; +/** + * Configured log file path. Kept only for diagnostics when a later write or + * flush fails after the file has already been opened. + */ +static char g_log_file_path[1024] = ""; + /** * Ring buffer for crash reporter. Stores the last N log entries. * This is a circular buffer. When full, old entries are overwritten. @@ -207,6 +213,72 @@ static void get_current_time(struct tm *result, struct timeval *tv) } } +static void remember_log_file_path(const char *path) +{ + if (path == NULL || path[0] == '\0') { + g_log_file_path[0] = '\0'; + return; + } + + int written = snprintf(g_log_file_path, sizeof(g_log_file_path), "%s", path); + if (written < 0 || (size_t)written >= sizeof(g_log_file_path)) { + snprintf(g_log_file_path, sizeof(g_log_file_path), "%.*s...", + (int)sizeof(g_log_file_path) - 4, path); + } +} + +static const char *configured_log_file_path(void) +{ + return g_log_file_path[0] != '\0' ? g_log_file_path : "(unknown)"; +} + +static void report_log_sink_error(const char *action, const char *path, int errnum) +{ + if (errnum == 0) { + errnum = EIO; + } + + fprintf(stderr, + "frailbox logger: failed to %s '%s': %s; using stderr fallback\n", + action, + (path != NULL && path[0] != '\0') ? path : "(unknown)", + strerror(errnum)); + fflush(stderr); +} + +static FILE *open_log_file_or_stderr(const char *path) +{ + FILE *file = fopen(path, "a"); + if (file == NULL) { + report_log_sink_error("open log file", path, errno); + return stderr; + } + + return file; +} + +static int write_log_stream(FILE *stream, const char *buffer) +{ + if (fputs(buffer, stream) == EOF) { + return -1; + } + if (fflush(stream) == EOF) { + return -1; + } + return 0; +} + +static void fallback_log_file_to_stderr(int errnum) +{ + report_log_sink_error("write to log file", configured_log_file_path(), errnum); + + if (g_log_file != NULL && g_log_file != stderr) { + clearerr(g_log_file); + (void)fclose(g_log_file); + } + g_log_file = stderr; +} + /** * Formats the log prefix (timestamp, level, source location). * Writes directly into the buffer to avoid extra string copies. @@ -377,14 +449,10 @@ int log_init(void) const char *env_log_file = getenv("LOG_FILE"); if (env_log_file != NULL && strlen(env_log_file) > 0) { - g_log_file = fopen(env_log_file, "a"); - if (g_log_file == NULL) { - fprintf(stderr, "Failed to open log file '%s': %s\n", - env_log_file, strerror(errno)); - /* Fall back to stderr */ - g_log_file = stderr; - } + remember_log_file_path(env_log_file); + g_log_file = open_log_file_or_stderr(env_log_file); } else { + remember_log_file_path(NULL); g_log_file = stderr; } @@ -527,11 +595,15 @@ void log_message(int level, const char *file, int line, const char *fmt, ...) /* Write to output */ if (g_log_file != NULL) { - fputs(buffer, g_log_file); - fflush(g_log_file); + if (write_log_stream(g_log_file, buffer) != 0) { + int write_errno = errno; + if (g_log_file != stderr) { + fallback_log_file_to_stderr(write_errno); + (void)write_log_stream(stderr, buffer); + } + } } else { - fputs(buffer, stderr); - fflush(stderr); + (void)write_log_stream(stderr, buffer); } pthread_mutex_unlock(&log_mutex); @@ -551,11 +623,19 @@ void log_shutdown(void) pthread_mutex_lock(&log_mutex); if (g_log_file != NULL && g_log_file != stderr) { - fflush(g_log_file); - fclose(g_log_file); + if (fflush(g_log_file) == EOF) { + report_log_sink_error("flush log file during shutdown", + configured_log_file_path(), errno); + } + if (fclose(g_log_file) == EOF) { + report_log_sink_error("close log file during shutdown", + configured_log_file_path(), errno); + } g_log_file = NULL; } + remember_log_file_path(NULL); + g_log_level = LOG_LEVEL_NONE; pthread_mutex_unlock(&log_mutex); diff --git a/frailbox/src/sandbox.c b/frailbox/src/sandbox.c index d9ceb645..cecd33f5 100644 --- a/frailbox/src/sandbox.c +++ b/frailbox/src/sandbox.c @@ -8,7 +8,9 @@ #include #include #include +#ifdef __linux__ #include +#endif #include #ifndef PR_SET_NO_NEW_PRIVS @@ -42,10 +44,12 @@ int sandbox_apply(sandbox_t *sandbox) { return 0; } +#ifdef __linux__ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { fprintf(stderr, "warning: PR_SET_NO_NEW_PRIVS failed: %s\n", strerror(errno)); } +#endif if (sandbox->config.memory_limit_bytes > 0) { struct rlimit rl = { diff --git a/frailbox/tests/test_logger_error_handling.c b/frailbox/tests/test_logger_error_handling.c new file mode 100644 index 00000000..0f277f53 --- /dev/null +++ b/frailbox/tests/test_logger_error_handling.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include +#include + +#include "../include/logger.h" + +#define CAPTURE_SIZE 8192 + +static int failures = 0; + +static void fail(const char *message) +{ + fprintf(stderr, "FAIL: %s\n", message); + failures++; +} + +static int read_capture(const char *path, char *buffer, size_t buffer_size) +{ + FILE *file = fopen(path, "r"); + if (file == NULL) { + return -1; + } + + size_t bytes = fread(buffer, 1, buffer_size - 1, file); + buffer[bytes] = '\0'; + fclose(file); + return 0; +} + +static void expect_contains(const char *text, const char *needle, const char *context) +{ + if (strstr(text, needle) == NULL) { + fprintf(stderr, "Missing '%s' in %s\n", needle, context); + failures++; + } +} + +static int redirect_stderr_to(const char *path, int *saved_stderr) +{ + fflush(stderr); + + *saved_stderr = dup(STDERR_FILENO); + if (*saved_stderr < 0) { + return -1; + } + + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + close(*saved_stderr); + *saved_stderr = -1; + return -1; + } + + if (dup2(fd, STDERR_FILENO) < 0) { + close(fd); + close(*saved_stderr); + *saved_stderr = -1; + return -1; + } + + close(fd); + clearerr(stderr); + return 0; +} + +static void restore_stderr(int saved_stderr) +{ + fflush(stderr); + dup2(saved_stderr, STDERR_FILENO); + close(saved_stderr); + clearerr(stderr); +} + +static void clear_logger_env(void) +{ + unsetenv("LOG_FILE"); + unsetenv("LOG_LEVEL"); + unsetenv("LOG_MODULE"); + unsetenv("LOG_SOURCE_INFO"); + unsetenv("LOG_NO_TIMESTAMPS"); +} + +static int test_open_failure_uses_stderr(void) +{ + char template[] = "/tmp/frailbox-logger-open-XXXXXX"; + char *dir = mkdtemp(template); + if (dir == NULL) { + fail("mkdtemp failed"); + return -1; + } + + char capture_path[512]; + char log_path[512]; + snprintf(capture_path, sizeof(capture_path), "%s/stderr.txt", dir); + snprintf(log_path, sizeof(log_path), "%s/missing/log.txt", dir); + + clear_logger_env(); + setenv("LOG_FILE", log_path, 1); + setenv("LOG_LEVEL", "info", 1); + setenv("LOG_NO_TIMESTAMPS", "1", 1); + + int saved_stderr = -1; + if (redirect_stderr_to(capture_path, &saved_stderr) != 0) { + fail("redirect_stderr_to failed"); + return -1; + } + + log_init(); + LOG_INFO("fallback-open-check"); + log_shutdown(); + restore_stderr(saved_stderr); + + char capture[CAPTURE_SIZE]; + if (read_capture(capture_path, capture, sizeof(capture)) != 0) { + fail("failed to read open-failure capture"); + } else { + expect_contains(capture, "failed to open log file", "open failure capture"); + expect_contains(capture, "using stderr fallback", "open failure capture"); + expect_contains(capture, "fallback-open-check", "open failure capture"); + } + + unlink(capture_path); + rmdir(dir); + clear_logger_env(); + return 0; +} + +static int test_write_failure_uses_stderr(void) +{ + if (access("/dev/full", W_OK) != 0) { + printf("SKIP: /dev/full is unavailable; write failure fallback not exercised\n"); + return 0; + } + + char template[] = "/tmp/frailbox-logger-write-XXXXXX"; + char *dir = mkdtemp(template); + if (dir == NULL) { + fail("mkdtemp failed"); + return -1; + } + + char capture_path[512]; + snprintf(capture_path, sizeof(capture_path), "%s/stderr.txt", dir); + + clear_logger_env(); + setenv("LOG_FILE", "/dev/full", 1); + setenv("LOG_LEVEL", "info", 1); + setenv("LOG_NO_TIMESTAMPS", "1", 1); + + int saved_stderr = -1; + if (redirect_stderr_to(capture_path, &saved_stderr) != 0) { + fail("redirect_stderr_to failed"); + return -1; + } + + log_init(); + LOG_INFO("fallback-write-check"); + log_shutdown(); + restore_stderr(saved_stderr); + + char capture[CAPTURE_SIZE]; + if (read_capture(capture_path, capture, sizeof(capture)) != 0) { + fail("failed to read write-failure capture"); + } else { + expect_contains(capture, "failed to write to log file", "write failure capture"); + expect_contains(capture, "using stderr fallback", "write failure capture"); + expect_contains(capture, "fallback-write-check", "write failure capture"); + } + + unlink(capture_path); + rmdir(dir); + clear_logger_env(); + return 0; +} + +int main(void) +{ + test_open_failure_uses_stderr(); + test_write_failure_uses_stderr(); + + if (failures != 0) { + fprintf(stderr, "%d logger error-handling checks failed\n", failures); + return 1; + } + + printf("logger error-handling checks passed\n"); + return 0; +}