diff --git a/db/core/main.c b/db/core/main.c index b1e5a96c48..09dac4e309 100644 --- a/db/core/main.c +++ b/db/core/main.c @@ -355,11 +355,17 @@ tdb_rec_get_alloc(TDB *db, unsigned long key, TdbGetAllocCtx *ctx) } ctx->is_new = true; r = tdb_entry_alloc(db, key, &ctx->len); - if (!r) { + if (unlikely(!r)) { spin_unlock(&db->ga_lock); - return r; + return NULL; + } + if (unlikely(ctx->init_rec(r, ctx->ctx))) { + tdb_rec_put(db, r); + tdb_entry_remove(db, key, NULL, NULL, true); + spin_unlock(&db->ga_lock); + return NULL; } - ctx->init_rec(r, ctx->ctx); + tdb_entry_mark_complete(r); spin_unlock(&db->ga_lock); diff --git a/db/core/tdb.h b/db/core/tdb.h index 4473ecc0be..74b72e4106 100644 --- a/db/core/tdb.h +++ b/db/core/tdb.h @@ -160,7 +160,7 @@ typedef struct { typedef struct { bool (*eq_rec)(TdbRec *rec, void *ctx); int (*precreate_rec)(void *ctx); - void (*init_rec)(TdbRec *rec, void *ctx); + int (*init_rec)(TdbRec *rec, void *ctx); void *ctx; size_t len; bool is_new; diff --git a/fw/apm.c b/fw/apm.c index 9de85c5a72..c6b63951f4 100644 --- a/fw/apm.c +++ b/fw/apm.c @@ -1,7 +1,7 @@ /* * Tempesta FW * - * Copyright (C) 2016-2025 Tempesta Technologies, Inc. + * Copyright (C) 2016-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1659,8 +1659,8 @@ tfw_cfgop_apm_add_hm_req(const char *req_cstr, TfwApmHM *hm_entry) unsigned long size; size = strlen(req_cstr); - hm_entry->req = (char *)__get_free_pages(GFP_KERNEL, - get_order(size)); + hm_entry->req = (char *)tfw__get_free_pages(GFP_KERNEL, + get_order(size)); if (!hm_entry->req) { T_ERR_NL("Can't allocate memory for health monitoring request" "\n"); diff --git a/fw/cache.c b/fw/cache.c index 502ccb81b7..79c69acb24 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -4,7 +4,7 @@ * HTTP cache (RFC 7234). * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -812,14 +812,14 @@ tfw_cache_h2_write(TDB *db, TdbVRec **trec, TfwHttpResp *resp, char **data, TfwStr c = { 0 }; TdbVRec *tr = *trec; TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *it = &resp->iter; int r = 0, copied = 0; while (1) { c.data = *data; c.len = min(tr->data + tr->len - *data, (long)(len - copied)); if (!dc_iter->skip) { - r = tfw_http_msg_expand_data(it, &resp->msg.skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, + &resp->msg.skb_head, &c, &mit->start_off); if (unlikely(r)) break; @@ -887,11 +887,10 @@ static int tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, TdbVRec **trec, char **p, unsigned long *acc_len) { - int r; - TfwMsgIter *it = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; bool h2_mode = TFW_MSG_H2(resp->req); TfwDecodeCacheIter dc_iter = {}; + int r; if (h2_mode) resp->mit.start_off = FRAME_HEADER_SIZE; @@ -920,7 +919,8 @@ tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, H2_STAT_VAL_LEN)) return -E2BIG; - r = tfw_http_msg_expand_data(it, skb_head, &s_line, NULL); + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &s_line, NULL); if (unlikely(r)) return r; @@ -936,7 +936,8 @@ tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, *acc_len += dc_iter.acc_len; if (!h2_mode) { - r = tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL); + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL); if (unlikely(r)) return r; @@ -1095,6 +1096,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) int r, i; TfwMsgIter *it; TfwHttpResp *resp; + TfwHttpMsg *hm; struct sk_buff **skb_head; unsigned long h_len = 0; TdbVRec *trec = &ce->trec; @@ -1106,11 +1108,12 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) if (!(resp = tfw_http_msg_alloc_resp_light(req))) goto err_create; + hm = (TfwHttpMsg *)resp; it = &resp->iter; skb_head = &resp->msg.skb_head; if (!TFW_MSG_H2(req)) { - r = tfw_http_prep_304(req, skb_head, it); + r = tfw_http_prep_304(req, skb_head, hm); if (unlikely(r)) goto err_setup; } else { @@ -1149,7 +1152,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) } if (!TFW_MSG_H2(req)) { - if (tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + if (tfw_http_msg_expand_data(hm, skb_head, &g_crlf, NULL)) goto err_setup; tfw_http_resp_fwd(resp); @@ -1166,7 +1169,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) return; err_setup: T_WARN("Can't build 304 response, key=%lx\n", ce->key); - tfw_http_msg_free((TfwHttpMsg *)resp); + tfw_http_msg_free(hm); err_create: tfw_http_resp_build_error(req); } @@ -2817,6 +2820,8 @@ tfw_cache_add_body_page(TfwMsgIter *it, char *p, int sz, bool h2) if (!h2) skb_frag_ref(it->skb, it->frag); ss_skb_adjust_data_len(it->skb, sz); + if (!h2) + ss_skb_adjust_client_mem(it->skb, -sz); return 0; } @@ -2901,7 +2906,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, || (sh_frag && tfw_cache_should_append_body_skb(it, body_sz, chunked_body))) { - if ((r = tfw_msg_iter_append_skb(it))) + if ((r = tfw_http_msg_append_skb((TfwHttpMsg *)resp))) return r; } if (sh_frag) @@ -2935,7 +2940,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, __TFW_STR_CH(&b_len, 0)->len = digs; b_len.len += digs; - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &b_len, NULL); if (unlikely(r)) return r; @@ -2965,7 +2970,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, p = trec->data; if (it->frag + 1 == MAX_SKB_FRAGS - && (r = tfw_msg_iter_append_skb(it))) + && (r = tfw_http_msg_append_skb((TfwHttpMsg *)resp))) { return r; } @@ -2977,13 +2982,13 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, .len = SLEN(S_ZERO S_CRLF) }; - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &g_crlf, NULL); if (unlikely(r)) return r; add_zero_chunk: - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &b_len, NULL); if (unlikely(r)) return r; @@ -3027,11 +3032,11 @@ tfw_cache_set_hdr_age(TfwHttpResp *resp, TfwCacheEntry *ce, long age) if ((r = tfw_hpack_encode(resp, &h_age, false, false))) goto err; } else { - if ((r = tfw_http_msg_expand_data(&resp->iter, skb_head, + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &h_age, NULL))) goto err; - if ((r = tfw_http_msg_expand_data(&resp->iter, skb_head, + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &g_crlf, NULL))) goto err; } @@ -3160,7 +3165,8 @@ tfw_cache_build_resp(TfwHttpReq *req, TfwCacheEntry *ce, long age) && tfw_http_expand_stale_warn(resp)) || (!test_bit(TFW_HTTP_B_HDR_DATE, resp->flags) && tfw_http_expand_hdr_date(resp)) - || tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + || tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL)) { goto free; } @@ -3238,7 +3244,8 @@ tfw_cache_build_resp(TfwHttpReq *req, TfwCacheEntry *ce, long age) * trailer or chunked body. */ if (chunked_body && req->method != TFW_HTTP_METH_HEAD - && tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + && tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL)) goto free; return resp; diff --git a/fw/client.c b/fw/client.c index 9c6524ea93..e6b9eb9ce6 100644 --- a/fw/client.c +++ b/fw/client.c @@ -30,6 +30,7 @@ #include "log.h" #include "procfs.h" #include "tdb.h" +#include "lib/fault_injection_alloc.h" #include "lib/str.h" #include "lib/common.h" @@ -37,9 +38,9 @@ #define UA_CMP_LEN 256 static struct { - const char *db_path; - unsigned long db_size; - unsigned int lru_size; + const char *db_path; + unsigned long db_size; + unsigned int lru_size; } client_cfg __read_mostly; /** @@ -51,19 +52,185 @@ static struct { * @user_agent - UA_CMP_LEN first characters of User-Agent */ typedef struct { - TfwClient cli; - TfwAddr xff_addr; - unsigned long user_agent_len; - char user_agent[UA_CMP_LEN]; + TfwClient cli; + TfwAddr xff_addr; + unsigned long user_agent_len; + char user_agent[UA_CMP_LEN]; } TfwClientEntry; static struct { struct list_head head; - unsigned int lru_size; -} client_lru; + unsigned int lru_size; +} client_lru = { + .head = LIST_HEAD_INIT(client_lru.head), + .lru_size = 0, +}; static TDB *client_db; +static atomic_t shutdown_pending = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(shutdown_wq); + +static struct kmem_cache *cli_mem_cache; +static struct { + TfwClientMem *mem; + struct list_head free_list; + unsigned int size; + unsigned int order; +} cli_mem_pool = { + .mem = NULL, + .free_list = LIST_HEAD_INIT(cli_mem_pool.free_list), + .size = 0, + .order = 0, +}; + +static inline bool +tfw_cli_mem_belongs_to_pool(TfwClientMem *cli_mem) +{ + return cli_mem >= cli_mem_pool.mem + && cli_mem < cli_mem_pool.mem + cli_mem_pool.size; +} + +static void +__cli_mem_release(TfwClientMem *cli_mem) +{ + percpu_ref_exit(&cli_mem->refcnt); + free_percpu(cli_mem->mem); + if (!tfw_cli_mem_belongs_to_pool(cli_mem)) + kmem_cache_free(cli_mem_cache, cli_mem); +} + +static inline void +tfw_cli_mem_pool_free(TfwClientMem *cli_mem) +{ + int cpu; + + assert_spin_locked(&client_db->ga_lock); + + for_each_online_cpu(cpu) + *per_cpu_ptr(cli_mem->mem, cpu) = 0; + percpu_ref_reinit(&cli_mem->refcnt); + list_add_tail(&cli_mem->in_free_list, &cli_mem_pool.free_list); +} + +static inline TfwClientMem * +tfw_cli_mem_pool_alloc(void) +{ + TfwClientMem *cli_mem; + + assert_spin_locked(&client_db->ga_lock); + + cli_mem = list_first_entry_or_null(&cli_mem_pool.free_list, + TfwClientMem, in_free_list); + if (!cli_mem) + return NULL; + + list_del_init(&cli_mem->in_free_list); + + return cli_mem; +} + +static void +cli_mem_release(struct percpu_ref *ref) +{ + TfwClientMem *cli_mem = container_of(ref, TfwClientMem, refcnt); + + spin_lock_bh(&client_db->ga_lock); + + WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + if (tfw_cli_mem_belongs_to_pool(cli_mem)) + tfw_cli_mem_pool_free(cli_mem); + else + __cli_mem_release(cli_mem); + + spin_unlock_bh(&client_db->ga_lock); + + if (atomic_dec_and_test(&shutdown_pending)) + wake_up(&shutdown_wq); +} + +static void +tfw_cli_mem_kill_work_fn(struct work_struct *work) +{ + TfwClientMem *cli_mem = container_of(work, TfwClientMem, kill_work); + + percpu_ref_kill(&cli_mem->refcnt); + percpu_ref_put(&cli_mem->refcnt); +} + +static inline int +tfw_cli_mem_init(TfwClientMem *cli_mem, gfp_t flags) +{ + int r; + + cli_mem->mem = tfw_alloc_percpu_gfp(long, flags | __GFP_ZERO); + if (unlikely(!cli_mem->mem)) + return -ENOMEM; + + r = tfw_percpu_ref_init(&cli_mem->refcnt, cli_mem_release, + PERCPU_REF_ALLOW_REINIT, flags); + if (unlikely(r)) + goto free_per_cpu_mem; + + INIT_LIST_HEAD(&cli_mem->in_free_list); + INIT_WORK(&cli_mem->kill_work, tfw_cli_mem_kill_work_fn); + + return 0; + +free_per_cpu_mem: + free_percpu(cli_mem->mem); + + return r; +} + +static inline void +tfw_cli_mem_pool_exit(void) +{ + TfwClientMem *curr, *tmp; + + list_for_each_entry_safe(curr, tmp, &cli_mem_pool.free_list, + in_free_list) + { + list_del_init(&curr->in_free_list); + __cli_mem_release(curr); + } + + free_pages((unsigned long)cli_mem_pool.mem, cli_mem_pool.order); + cli_mem_pool.mem = NULL; +} + +static inline int +tfw_cli_mem_pool_init(void) +{ + TfwClientMem *block; + unsigned int order; + int i, r; + + if (WARN_ON_ONCE(!client_cfg.lru_size)) + return -EINVAL; + + order = get_order(sizeof(TfwClientMem) * client_cfg.lru_size); + if (order > MAX_PAGE_ORDER) + order = MAX_PAGE_ORDER; + + cli_mem_pool.order = order; + cli_mem_pool.mem = (TfwClientMem *)tfw__get_free_pages(GFP_KERNEL, + order); + if (unlikely(!cli_mem_pool.mem)) + return -ENOMEM; + + block = cli_mem_pool.mem; + for (i = 0; i < client_cfg.lru_size; i++) { + r = tfw_cli_mem_init(&block[i], GFP_KERNEL); + if (unlikely(r)) + return r; + list_add(&block[i].in_free_list, &cli_mem_pool.free_list); + cli_mem_pool.size++; + } + + return 0; +} + /* * Called only under db->ga_lock. * @@ -103,13 +270,11 @@ tfw_client_free(TdbRec *rec) * Tempesta FW shut down from `tfw_client_free_lru` */ WARN_ON(!list_empty(&cli->list)); -} - -static void -tfw_client_init_lru(void) -{ - INIT_LIST_HEAD(&client_lru.head); - client_lru.lru_size = 0; + if (likely(cli->cli_mem)) { + atomic_inc(&shutdown_pending); + if (!schedule_work(&cli->cli_mem->kill_work)) + atomic_dec(&shutdown_pending); + } } static void @@ -185,16 +350,57 @@ tfw_client_addr_eq(TdbRec *rec, void *data) return true; } -static void +static inline TfwClientMem * +tfw_cli_mem_alloc_from_cache(void) +{ + TfwClientMem *cli_mem; + + cli_mem = kmem_cache_alloc(cli_mem_cache, GFP_ATOMIC); + if (unlikely(!cli_mem)) + return NULL; + + if (unlikely(tfw_cli_mem_init(cli_mem, GFP_ATOMIC))) + goto free_cli_mem; + + return cli_mem; + +free_cli_mem: + kmem_cache_free(cli_mem_cache, cli_mem); + + return NULL; +} + +static inline TfwClientMem * +tfw_cli_mem_alloc(void) +{ + TfwClientMem *cli_mem; + + cli_mem = tfw_cli_mem_pool_alloc(); + if (!cli_mem) + cli_mem = tfw_cli_mem_alloc_from_cache(); + if (unlikely(!cli_mem)) + return NULL; + + percpu_ref_get(&cli_mem->refcnt); + + return cli_mem; +} + +static int tfw_client_ent_init(TdbRec *rec, void *data) { TfwClientEntry *ent = (TfwClientEntry *)rec->data; TfwClient *cli = &ent->cli; TfwClientEqCtx *ctx = (TfwClientEqCtx *)data; + INIT_LIST_HEAD(&cli->list); + + cli->cli_mem = tfw_cli_mem_alloc(); + if (unlikely(!cli->cli_mem)) + return -ENOMEM; + assert_spin_locked(&client_db->ga_lock); - INIT_LIST_HEAD(&cli->list); tfw_client_update_lru(cli); bzero_fast(&cli->class_prvt, sizeof(cli->class_prvt)); @@ -210,6 +416,8 @@ tfw_client_ent_init(TdbRec *rec, void *data) T_DBG("new client: cli=%p\n", cli); T_DBG_ADDR("client address", &cli->addr, TFW_NO_PORT); T_DBG2("client %p, users=%d\n", cli, 1); + + return 0; } /** @@ -300,6 +508,8 @@ tfw_client_for_each(int (*fn)(void *)) static int tfw_client_start(void) { + int r; + if (tfw_runstate_is_reconfig()) return 0; /* @@ -313,8 +523,11 @@ tfw_client_start(void) if (!client_db) return -EINVAL; + r = tfw_cli_mem_pool_init(); + if (unlikely(r)) + return r; + client_db->hdr->before_free = tfw_client_free; - tfw_client_init_lru(); return 0; } @@ -324,8 +537,11 @@ tfw_client_stop(void) { if (tfw_runstate_is_reconfig()) return; + if (client_db) { tfw_client_free_lru(); + wait_event(shutdown_wq, !atomic_read(&shutdown_pending)); + tfw_cli_mem_pool_exit(); tdb_close(client_db); client_db = NULL; } @@ -373,6 +589,11 @@ TfwMod tfw_client_mod = { int __init tfw_client_init(void) { + cli_mem_cache = kmem_cache_create("cli_mem_cache", + sizeof(TfwClientMem), + 0, 0, NULL); + if (!cli_mem_cache) + return -ENOMEM; tfw_mod_register(&tfw_client_mod); return 0; @@ -381,5 +602,6 @@ tfw_client_init(void) void tfw_client_exit(void) { + kmem_cache_destroy(cli_mem_cache); tfw_mod_unregister(&tfw_client_mod); } diff --git a/fw/client.h b/fw/client.h index 4487cb8cf0..d14409ba29 100644 --- a/fw/client.h +++ b/fw/client.h @@ -24,18 +24,27 @@ #include "http_limits.h" #include "connection.h" +typedef struct tfw_client_mem_t { + struct percpu_ref refcnt; + struct work_struct kill_work; + long __percpu *mem; + struct list_head in_free_list; +} TfwClientMem; + /** * Client descriptor. * * @class_prvt - private client accounting data for classifier module. * Typically it's large and wastes memory in vain if * no any classification logic is used; - * list_head - entry in the lru list; + * @list_head - entry in the lru list; + * @cli_mem - memory used by current client; */ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; + TfwClientMem *cli_mem; } TfwClient; int tfw_client_init(void); @@ -51,4 +60,37 @@ void tfw_cli_abort_all(void); void tfw_tls_connection_lost(TfwConn *conn); +#define CLIENT_MEM_FROM_CONN(conn) \ + ((TfwClient *)((TfwConn *)conn)->peer)->cli_mem + +static inline void +tfw_client_adjust_mem(TfwClientMem *cli_mem, int delta) +{ + this_cpu_add(*cli_mem->mem, delta); +} + +static inline bool +tfw_client_mem_get(TfwClientMem *cli_mem) +{ + return percpu_ref_tryget(&cli_mem->refcnt); +} + +static inline void +tfw_client_mem_put(TfwClientMem *cli_mem) +{ + percpu_ref_put(&cli_mem->refcnt); +} + +static inline long +tfw_client_mem(TfwClient *cli) +{ + long mem = 0; + int cpu; + + for_each_online_cpu(cpu) + mem += *(per_cpu_ptr(cli->cli_mem->mem, cpu)); + + return mem; +} + #endif /* __TFW_CLIENT_H__ */ diff --git a/fw/connection.c b/fw/connection.c index 1f7e3d1eda..d876d4b3a5 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -26,6 +26,7 @@ #include "sync_socket.h" #include "http.h" #include "websocket.h" +#include "tcp.h" TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS]; @@ -186,28 +187,27 @@ tfw_connection_recv(TfwConn *conn, struct sk_buff *skb) next = split; } } else { - __kfree_skb(skb); + __ss_kfree_skb(skb); } } /* - * T_BLOCK is error code for high level modules (like frang), - * here we should deal with error code, which accurately + * Here we should deal with error code, which accurately * determine further closing behavior. * When error occurs during response processing * we should close connection with backend immediatly * and try to reastablish it later, so we should not * return T_DROP for server connections. */ - BUG_ON(r == T_BLOCK || + BUG_ON(is_tfw_internal_error_code(r) || (r == T_DROP && TFW_CONN_TYPE(conn) & Conn_Srv)); - return r <= T_BAD || r == T_OK ? r : T_BAD; + return (r == T_OK || is_tfw_common_error_code(r)) ? r : T_BAD; } -void +int tfw_connection_recv_finish(TfwConn *conn) { - TFW_CONN_HOOK_CALL(conn, conn_recv_finish); + return TFW_CONN_HOOK_CALL(conn, conn_recv_finish); } void @@ -244,3 +244,80 @@ tfw_connection_unlink_to_sk(TfwConn *conn) conn->sk = NULL; ss_sock_put(sk); } + +static inline int +tfw_connection_shutdown(TfwConn *conn) +{ + struct sock *sk = conn->sk; + + SS_IN_USE_PROTECT({ + tcp_shutdown(sk, SEND_SHUTDOWN); + }); + if (unlikely(sk->sk_state == TCP_CLOSE)) + return -ENOMEM; + return 0; +} + +int +tfw_connection_fill_sk_write_queue(TfwConn *conn, unsigned int mss_now) +{ + struct sock *sk = conn->sk; + TfwH2Ctx *h2; + unsigned long snd_wnd; + int r; + + assert_spin_locked(&sk->sk_lock.slock); + WARN_ON(SS_CONN_TYPE(sk) & Conn_Closing); + + /* + * Update snd_cwnd if nedeed, to correct caclulation + * of count of bytes to send. + */ + tcp_slow_start_after_idle_check(sk); + + /* + * First of all Tempesta FW entails skb from connection write queue + * (all http1 data, control frames, tls alerts and so on for http2), + * then if `snd_wnd` is not exceeded make frames for http2. + */ + r = ss_skb_tcp_entail_list(sk, &conn->write_queue, + mss_now, &snd_wnd); + if (unlikely(r)) + return r; + + /* + * This function can be called both for HTTP1 and HTTP2 connections. + * Moreover this function can be called when HTTP2 connection is + * shutdowned before TLS hadshake was finished. + */ + h2 = TFW_CONN_PROTO(conn) == TFW_FSM_H2 ? + tfw_h2_context_safe(conn) : NULL; + if (!h2) { + if (unlikely(!conn->write_queue)) { + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); + if (unlikely(SS_CONN_TYPE(sk) & Conn_Shutdown)) + r = tfw_connection_shutdown(conn); + } + return r; + } + + r = tfw_h2_make_frames(sk, h2, mss_now, snd_wnd); + if (unlikely(r)) + return r; + + if (unlikely(!conn->write_queue)) { + /* + * If connection is shutdowned and error responce was sent + * shutdown the whole connection. + */ + if (unlikely(SS_CONN_TYPE(sk) & Conn_Shutdown) + && (!h2->error + || tfw_h2_conn_or_stream_wnd_is_exceeded(h2, + h2->error))) + r = tfw_connection_shutdown(conn); + if (!tfw_h2_is_ready_to_send(h2)) + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); + } + + return r; +} diff --git a/fw/connection.h b/fw/connection.h index 769d027660..ee0d6d3783 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -98,8 +98,9 @@ enum { * @list - member in the list of connections with @peer; * @refcnt - number of users of the connection structure instance; * @stream - instance for control messages processing; + * @write_queue - queue of skb to push to socket write queue; * @peer - TfwClient or TfwServer handler. Hop-by-hop peer; - * @pair - Paired TfwCliConn or TfwSrvConn for websocket connections; + * @pair - paired TfwCliConn or TfwSrvConn for websocket connections; * @sk - an appropriate sock handler; * @destructor - called when a connection is destroyed; */ @@ -110,6 +111,7 @@ typedef struct tfw_conn_t TfwConn; struct list_head list; \ atomic_t refcnt; \ TfwStream stream; \ + struct sk_buff *write_queue; \ TfwPeer *peer; \ TfwConn *pair; \ struct sock *sk; \ @@ -168,7 +170,7 @@ typedef struct tfw_conn_t { * timestamp, low 16 bits are count of misses; * */ -typedef struct { +typedef struct tfw_cli_conn_t { TFW_CONN_COMMON; struct timer_list timer; struct list_head seq_queue; @@ -358,7 +360,7 @@ typedef struct { /* * Called after processing all socket received queue. */ - void (*conn_recv_finish)(TfwConn *conn); + int (*conn_recv_finish)(TfwConn *conn); } TfwConnHooks; #define TFW_CONN_MAX_PROTOS TFW_GFSM_FSM_N @@ -579,6 +581,7 @@ tfw_connection_validate_cleanup(TfwConn *conn) BUG_ON(!conn); BUG_ON(!list_empty(&conn->list)); BUG_ON(conn->stream.msg); + BUG_ON(conn->write_queue); rc = atomic_read(&conn->refcnt); BUG_ON(rc && rc != TFW_CONN_DEATHCNT); @@ -612,13 +615,15 @@ tfw_peer_for_each_conn(TfwPeer *p, int (*cb)(TfwConn *)) } extern unsigned int tfw_cli_max_concurrent_streams; +extern u64 tfw_cli_soft_mem_limit; +extern u64 tfw_cli_hard_mem_limit; void tfw_connection_unlink_to_sk(TfwConn *conn); void tfw_connection_hooks_register(TfwConnHooks *hooks, int type); void tfw_connection_hooks_unregister(int type); int tfw_connection_send(TfwConn *conn, TfwMsg *msg); int tfw_connection_recv(TfwConn *conn, struct sk_buff *skb); -void tfw_connection_recv_finish(TfwConn *conn); +int tfw_connection_recv_finish(TfwConn *conn); /* Generic helpers, used for both client and server connections. */ void tfw_connection_init(TfwConn *conn); @@ -630,5 +635,6 @@ int tfw_connection_close(TfwConn *conn, bool sync); void tfw_connection_abort(TfwConn *conn); void tfw_connection_drop(TfwConn *conn); void tfw_connection_release(TfwConn *conn); +int tfw_connection_fill_sk_write_queue(TfwConn *conn, unsigned int mss_now); #endif /* __TFW_CONNECTION_H__ */ diff --git a/fw/hpack.c b/fw/hpack.c index 93909763ef..999e7529f5 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -1155,7 +1155,8 @@ tfw_huffman_init(TfwHPack *__restrict hp) } int -tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz) +tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, + unsigned int htbl_sz) { bool np; TfwHPackETbl *et = &hp->enc_tbl; @@ -1169,14 +1170,14 @@ tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz) tfw_huffman_init(hp); dt->window = hp->max_window = htbl_sz; - if (!(dt->pool = __tfw_pool_new(0))) + if (!(dt->pool = __tfw_pool_new(0, owner))) return -ENOMEM; - if (!(dt->h_pool = __tfw_pool_new(0))) + if (!(dt->h_pool = __tfw_pool_new(0, owner))) goto err_dt; et->window = htbl_sz; et->rb_size = HPACK_ENC_TABLE_MAX_SIZE; - if (!(et->pool = __tfw_pool_new(HPACK_ENC_TABLE_MAX_SIZE))) + if (!(et->pool = __tfw_pool_new(HPACK_ENC_TABLE_MAX_SIZE, owner))) goto err_et; et->rbuf = tfw_pool_alloc_np(et->pool, HPACK_ENC_TABLE_MAX_SIZE, &np); @@ -1935,7 +1936,6 @@ tfw_hpack_cache_decode_expand(TfwHPack *__restrict hp, unsigned int state; int r = T_OK; TfwStr exp_str = {}; - TfwMsgIter *it = &resp->iter; const unsigned char *last = src + n; unsigned char *prev = src; struct sk_buff **skb_head = &resp->msg.skb_head; @@ -1955,7 +1955,8 @@ do { \ #define EXPAND_STR_DATA(str) \ do { \ - if ((r = tfw_http_msg_expand_data(it, skb_head, str, NULL))) \ + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, \ + str, NULL))) \ goto out; \ dc_iter->acc_len += (str)->len; \ } while (0) @@ -2362,11 +2363,11 @@ do { \ * Compare split header/value against values stored inside * node and return positive/negative/zero depending on their * relation. - * + * * The order geven by this function is the following: * (hdr_name_len, total_hdr_len, special_strcmp) * where hdr_name_len and total_hdr_len are compared as integers. - * + * * Where special_strcmp is case-insensitive for header names, * case-sensitive for header values and in both cases it compares * multiple characters per instruction, so don't expect strict @@ -2381,7 +2382,7 @@ tfw_hpack_node_compare(const TfwStr *__restrict h_name, unsigned len; const char *np, *p; const TfwStr *c, *end; - + if (h_name->len != node->name_len) return (int)h_name->len - (int)node->name_len; @@ -2411,7 +2412,7 @@ tfw_hpack_node_compare(const TfwStr *__restrict h_name, if (nm_node) *nm_node = node; - + len = h_name->len + h_val->len; if (len != node->hdr_len) return (int)len - (int)node->hdr_len; @@ -3378,25 +3379,25 @@ tfw_huffman_encode_string(TfwStr *str, TfwPool *pool) } static int -tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwHttpMsg *hm, struct sk_buff **skb_head, TfwStr *str, bool in_huffman) { - int r; - TfwHPackInt len; TfwStr len_str = { 0 }; unsigned short mask = in_huffman ? 0x80 : 0x0; + TfwHPackInt len; + int r; write_int(str->len, 0x7F, mask, &len); len_str.data = len.buf; len_str.len = len.sz; - r = tfw_http_msg_expand_data(it, skb_head, &len_str, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &len_str, NULL); if (unlikely(r)) return r; mit->acc_len += len_str.len; - r = tfw_http_msg_expand_data(it, skb_head, str, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, str, NULL); if (unlikely(r)) return r; mit->acc_len += str->len; @@ -3426,7 +3427,7 @@ tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwMsgIter *it, * thus avoiding Huffman encodings is completely RFC-compliant behaviour. */ static inline int -tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwHttpMsg *hm, struct sk_buff **skb_head, TfwStr *str, TfwPool *pool) { @@ -3440,7 +3441,7 @@ tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwMsgIter *it, in_huffman = true; } - return tfw_hpack_str_expand_raw(mit, it, skb_head, str, in_huffman); + return tfw_hpack_str_expand_raw(mit, hm, skb_head, str, in_huffman); } static inline int @@ -3448,7 +3449,6 @@ tfw_hpack_write_idx(TfwHttpResp *__restrict resp, TfwHPackInt *__restrict idx, bool use_pool) { TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *iter = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; const TfwStr s_idx = { .data = idx->buf, @@ -3463,7 +3463,7 @@ tfw_hpack_write_idx(TfwHttpResp *__restrict resp, TfwHPackInt *__restrict idx, return tfw_h2_msg_expand_from_pool((TfwHttpMsg *)resp, &s_idx, &resp->mit); - return tfw_http_msg_expand_data(iter, skb_head, &s_idx, + return tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &s_idx, &mit->start_off); } @@ -3532,7 +3532,6 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, int ret; TfwStr *c, *end; TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *iter = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; TfwStr s_val; @@ -3547,7 +3546,7 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, mit->acc_len += idx->sz; if (unlikely(!name_indexed)) { - ret = tfw_hpack_str_expand(mit, iter, skb_head, + ret = tfw_hpack_str_expand(mit, (TfwHttpMsg *)resp, skb_head, TFW_STR_CHUNK(hdr, 0), NULL); if (unlikely(ret)) return ret; @@ -3579,7 +3578,8 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, c = tfw_str_collect_cmp(c, end, &s_val, NULL); BUG_ON(c != end); - return tfw_hpack_str_expand(mit, iter, skb_head, &s_val, NULL); + return tfw_hpack_str_expand(mit, (TfwHttpMsg *)resp, skb_head, + &s_val, NULL); } static inline int @@ -3760,30 +3760,26 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) { - TfwMsgIter it = { - .skb = stream->xmit.skb_head, - .skb_head = stream->xmit.skb_head, - .frag = -1 - }; - TfwStr new_size = {}; TfwHPackInt tmp = {}; + TfwStr dst = {}; char *data; + unsigned int _; int r = 0; WARN_ON_ONCE(!tbl->wnd_changed); - write_int(tbl->window, 0x1F, 0x20, &tmp); - new_size.data = tmp.buf; - new_size.len = tmp.sz; data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, FRAME_HEADER_SIZE); BUG_ON(!data); - r = tfw_http_msg_insert(&it, &data, &new_size); + r = ss_skb_get_room_w_frag(stream->xmit.skb_head, + stream->xmit.skb_head, + data, tmp.sz, &dst, &_); if (unlikely(r)) return r; + memcpy_fast(dst.data, tmp.buf, tmp.sz); stream->xmit.h_len += tmp.sz; tbl->wnd_changed = false; diff --git a/fw/hpack.h b/fw/hpack.h index 732e924ecb..1a139b5bb2 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -299,7 +299,8 @@ typedef struct { void write_int(unsigned long index, unsigned short max, unsigned short mask, TfwHPackInt *__restrict res_idx); -int tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz); +int tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, + unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, diff --git a/fw/http.c b/fw/http.c index 0742811247..d7960ddac1 100644 --- a/fw/http.c +++ b/fw/http.c @@ -658,7 +658,7 @@ tfw_h2_prep_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) __TFW_STR_CH(&hdr, 0)->len = name->len - SLEN(S_CRLF) - 2; if (__TFW_STR_CH(msg, i + 1)->nchunks) { - TfwMsgIter *iter = &resp->iter; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; struct sk_buff **skb_head = &resp->msg.skb_head; TfwHPackInt vlen; TfwStr s_vlen = {}; @@ -673,12 +673,12 @@ tfw_h2_prep_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) s_vlen.data = vlen.buf; s_vlen.len = vlen.sz; - r = tfw_http_msg_expand_data(iter, skb_head, &s_vlen, + r = tfw_http_msg_expand_data(hm, skb_head, &s_vlen, NULL); if (unlikely(r)) goto out; - r = tfw_http_msg_expand_data(iter, skb_head, val, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, val, NULL); if (unlikely(r)) goto out; @@ -728,7 +728,9 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - if ((r = tfw_http_msg_setup((TfwHttpMsg *)resp, &it, msg->len))) + r = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem((TfwHttpMsg *)resp), + &resp->msg.skb_head, msg->len); + if (unlikely(r)) return r; body = TFW_STR_BODY_CH(msg); @@ -738,7 +740,7 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) TFW_STR_FOR_EACH_CHUNK(c, msg, end) { if (c->data) { TFW_STR_FOR_EACH_CHUNK(field_c, c, field_end) { - if ((r = tfw_msg_write(&it, field_c))) + if ((r = tfw_msg_iter_write(&it, field_c))) return r; } } @@ -921,7 +923,7 @@ do { \ * Preparing 304 response (Not Modified) for HTTP/1.1-client. */ int -tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwMsgIter *it) +tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwHttpMsg *hm) { int ret = 0; static TfwStr rh = { @@ -938,12 +940,12 @@ tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwMsgIter *it) else if (test_bit(TFW_HTTP_B_CONN_KA, req->flags)) end = &crlf_keep; - ret = tfw_http_msg_expand_data(it, skb_head, &rh, NULL); + ret = tfw_http_msg_expand_data(hm, skb_head, &rh, NULL); if (unlikely(ret)) return ret; if (end) { - ret = tfw_http_msg_expand_data(it, skb_head, end, NULL); + ret = tfw_http_msg_expand_data(hm, skb_head, end, NULL); if (unlikely(ret)) return ret; } @@ -1147,21 +1149,37 @@ tfw_h2_resp_status_write(TfwHttpResp *resp, unsigned short status, void tfw_h2_resp_fwd(TfwHttpResp *resp) { - bool resp_in_xmit = - (TFW_SKB_CB(resp->msg.skb_head)->opaque_data == resp); + bool resp_in_xmit = !!TFW_SKB_CB(resp->msg.skb_head)->stream_id; TfwHttpReq *req = resp->req; TfwConn *conn = req->conn; int status = READ_ONCE(resp->status); + bool need_extra_put = false; tfw_connection_get(conn); + /* + * We need this extra get, because if send fails, connection + * will be put during freeing skbs of sending response (in + * skb destructor). + */ + if (resp_in_xmit) { + TfwClientMem *owner = + TFW_SKB_CB(resp->msg.skb_head)->opaque_data; + + WARN_ON(owner != CLIENT_MEM_FROM_CONN(resp->req->conn)); + TFW_SKB_CB(resp->msg.skb_head)->opaque_data = resp; + TFW_SKB_CB(resp->msg.skb_head)->destructor = + tfw_h2_stream_skb_destructor; + need_extra_put = true; + tfw_connection_get(conn); + } do_access_log(resp); if (tfw_cli_conn_send((TfwCliConn *)conn, (TfwMsg *)resp)) { T_DBG("%s: cannot send data to client via HTTP/2\n", __func__); TFW_INC_STAT_BH(serv.msgs_otherr); - tfw_connection_close(conn, true); /* We can't send response, so we should free it here. */ - resp_in_xmit = false; + tfw_connection_close(conn, true); + resp_in_xmit = !resp_in_xmit || !resp->msg.skb_head; } else { TFW_INC_STAT_BH(serv.msgs_forwarded); tfw_inc_global_hm_stats(status); @@ -1169,6 +1187,8 @@ tfw_h2_resp_fwd(TfwHttpResp *resp) if (!resp_in_xmit) tfw_http_resp_pair_free_and_put_conn(resp); + if (need_extra_put) + tfw_connection_put(conn); } /* @@ -1805,7 +1825,7 @@ __tfw_http_free_cleanup(TfwHttpMsgCleanup *cleanup) struct sk_buff *skb; while ((skb = ss_skb_dequeue(&cleanup->skb_head))) - __kfree_skb(skb); + __ss_kfree_skb(skb); for (i = 0; i < cleanup->pages_sz; i++) /* @@ -2845,7 +2865,11 @@ static TfwMsg * tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) { int type = TFW_CONN_TYPE(conn); - TfwHttpMsg *hm = __tfw_http_msg_alloc(type, true); + TfwClientMem *owner = type & Conn_Clnt ? + CLIENT_MEM_FROM_CONN(conn) : NULL; + TfwHttpMsg *hm; + + hm = __tfw_http_msg_alloc(owner, type, true); if (unlikely(!hm)) return NULL; @@ -2862,7 +2886,7 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) if (TFW_FSM_TYPE(conn->proto.type) == TFW_FSM_H2) { TfwHttpReq *req = (TfwHttpReq *)hm; - if(!(req->pit.pool = __tfw_pool_new(0))) + if(!(req->pit.pool = __tfw_pool_new(0, owner))) goto clean; req->pit.parsed_hdr = &req->stream->parser.hdr; __set_bit(TFW_HTTP_B_H2, req->flags); @@ -2876,6 +2900,17 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) if (unlikely(tfw_http_resp_pair(hm))) goto clean; + /* Can be equal to zero for health monitor requests. */ + if (likely(hm->req->conn)) { + TfwClient *cli = (TfwClient *)hm->req->conn->peer; + TfwClientMem *cli_mem = cli->cli_mem; + int delta = PAGE_SIZE << hm->pool->order; + + hm->pool->owner = cli_mem; + BUG_ON(!tfw_client_mem_get(cli_mem)); + tfw_client_adjust_mem(cli_mem, delta); + } + if (TFW_MSG_H2(hm->req)) { size_t sz = TFW_HDR_MAP_SZ(TFW_HDR_MAP_INIT_CNT); TfwHttpTransIter *mit = &((TfwHttpResp *)hm)->mit; @@ -3098,7 +3133,7 @@ tfw_http_conn_cli_drop(TfwCliConn *cli_conn) smp_mb__before_atomic(); set_bit(TFW_HTTP_B_REQ_DROP, req->flags); if (unused) { - tfw_http_free_req_carefully(req, &resp_del_queue); + tfw_http_free_req_carefully(req, &resp_del_queue); TFW_INC_STAT_BH(serv.msgs_otherr); } } @@ -3152,11 +3187,21 @@ tfw_http_conn_send(TfwConn *conn, TfwMsg *msg) return ss_send(conn->sk, &msg->skb_head, msg->ss_flags); } -static void +static int tfw_http_conn_recv_finish(TfwConn *conn) { if (TFW_FSM_TYPE(conn->proto.type) == TFW_FSM_H2) tfw_h2_conn_recv_finish(conn); + + /* + * SoftIRQ shot is very short, 0.001 to 0.01 sec, so we can account + * client memory once per softirq shot - this is a quite fast response + * to a DDoS attack. + */ + if (unlikely(frang_client_mem_limit((TfwCliConn *)conn, true))) + return T_BLOCK_WITH_RST; + + return 0; } /** @@ -3279,7 +3324,8 @@ tfw_http_expand_hbh(TfwHttpResp *resp, unsigned short status) tfw_http_req_set_conn_close(req); return add_h_conn - ? tfw_http_msg_expand_data(&resp->iter, skb_head, &h_conn, NULL) + ? tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &h_conn, NULL) : 0; } @@ -3333,6 +3379,7 @@ int tfw_http_expand_stale_warn(TfwHttpResp *resp) { /* TODO: adjust for #865 */ + TfwHttpMsg *hm = (TfwHttpMsg *)resp; struct sk_buff **skb_head = &resp->msg.skb_head; TfwStr wh = { .chunks = (TfwStr []){ @@ -3345,13 +3392,12 @@ tfw_http_expand_stale_warn(TfwHttpResp *resp) .nchunks = 4, }; - return tfw_http_msg_expand_data(&resp->iter, skb_head, &wh, NULL); + return tfw_http_msg_expand_data(hm, skb_head, &wh, NULL); } static __always_inline int __tfw_http_add_hdr_date(TfwHttpResp *resp, bool cache) { - int r; struct sk_buff **skb_head = &resp->msg.skb_head; char *date = *this_cpu_ptr(&g_buf); TfwStr h_date = { @@ -3363,16 +3409,17 @@ __tfw_http_add_hdr_date(TfwHttpResp *resp, bool cache) .len = SLEN(S_F_DATE) + SLEN(S_V_DATE) + SLEN(S_CRLF), .nchunks = 3 }; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; + int r; tfw_http_prep_date_from(date, resp->date); BUILD_BUG_ON(!__builtin_constant_p(cache)); if (!cache) - r = tfw_http_msg_expand_from_pool((TfwHttpMsg *)resp, &h_date); + r = tfw_http_msg_expand_from_pool(hm, &h_date); else - r = tfw_http_msg_expand_data(&resp->iter, skb_head, &h_date, - NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &h_date, NULL); if (unlikely(r)) T_ERR("Unable to add Date: header to resp [%p]\n", resp); @@ -3403,15 +3450,16 @@ tfw_http_expand_hdr_date(TfwHttpResp *resp) static int __tfw_http_add_hdr_server(TfwHttpResp *resp, bool cache) { - int r; struct sk_buff **skb_head = &resp->msg.skb_head; static char s_server[] = S_F_SERVER TFW_NAME "/" TFW_VERSION S_CRLF; TfwStr hdr = { .data = s_server, .len = SLEN(s_server) }; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; + int r; if (!cache) - r = tfw_http_msg_expand_from_pool((TfwHttpMsg *)resp, &hdr); + r = tfw_http_msg_expand_from_pool(hm, &hdr); else - r = tfw_http_msg_expand_data(&resp->iter, skb_head, &hdr, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &hdr, NULL); if (unlikely(r)) T_ERR("Unable to add Server: header to resp [%p]\n", resp); @@ -3469,12 +3517,11 @@ __tfw_http_add_hdr_via(TfwHttpMsg *hm, int http_version, bool from_cache) &TFW_STR_STRING(S_CRLF)); } else { struct sk_buff **skb_head = &hm->msg.skb_head; - TfwMsgIter *it = &hm->iter; - r = tfw_http_msg_expand_data(it, skb_head, &rh, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &rh, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &STR_CRLF, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &STR_CRLF, NULL); } if (unlikely(r)) @@ -3702,7 +3749,7 @@ tfw_h1_add_loc_hdrs(TfwHttpMsg *hm, const TfwHdrMods *h_mods, bool from_cache) */ if (from_cache) { struct sk_buff **skb_head = &hm->msg.skb_head; - TfwMsgIter *it = &hm->iter; + /* * Skip the configured header if the header is * configured for deletion (without value chunk). @@ -3710,11 +3757,11 @@ tfw_h1_add_loc_hdrs(TfwHttpMsg *hm, const TfwHdrMods *h_mods, bool from_cache) if (h_mdf.nchunks < 3) continue; /* h_mdf->eolen is ignored, add explicit CRLF. */ - r = tfw_http_msg_expand_data(it, skb_head, &h_mdf, + r = tfw_http_msg_expand_data(hm, skb_head, &h_mdf, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &STR_CRLF, + r = tfw_http_msg_expand_data(hm, skb_head, &STR_CRLF, NULL); } else { r = tfw_http_msg_expand_from_pool(hm, &h_mdf); @@ -4114,18 +4161,18 @@ write_merged_cookie_headers(TfwStr *hdr, TfwMsgIter *it) hval.nchunks--; hval.len -= chunk->len; } - r = tfw_msg_write(it, cookie_dlm); + r = tfw_msg_iter_write(it, cookie_dlm); if (unlikely(r)) return r; - r = tfw_msg_write(it, &hval); + r = tfw_msg_iter_write(it, &hval); if (unlikely(r)) return r; cookie_dlm = &val_dlm; } - return tfw_msg_write(it, &STR_CRLF); + return tfw_msg_iter_write(it, &STR_CRLF); } static int @@ -4136,12 +4183,12 @@ __h2_write_method(TfwHttpReq *req, TfwMsgIter *it) if (test_bit(TFW_HTTP_B_REQ_HEAD_TO_GET, req->flags)) { static const DEFINE_TFW_STR(meth_get, "GET"); - return tfw_msg_write(it, &meth_get); + return tfw_msg_iter_write(it, &meth_get); } else { TfwStr meth = {}; __h2_msg_hdr_val(&ht->tbl[TFW_HTTP_HDR_H2_METHOD], &meth); - return tfw_msg_write(it, &meth); + return tfw_msg_iter_write(it, &meth); } } ALLOW_ERROR_INJECTION(__h2_write_method, ERRNO); @@ -4171,7 +4218,6 @@ tfw_h2_adjust_req(TfwHttpReq *req) size_t pseudo_num; TfwStr tmp_host = {}, *host_val, *field, *end; struct sk_buff *new_head = NULL, *old_head = NULL; - TfwMsgIter it; TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, TFW_VHOST_HDRMOD_REQ); static const DEFINE_TFW_STR(sp, " "); @@ -4217,6 +4263,8 @@ tfw_h2_adjust_req(TfwHttpReq *req) char cl_data[TFW_ULTOA_BUF_SIZ] = {0}; size_t cl_data_len = 0; size_t cl_len = 0; + TfwMsgIter it; + /* * The Transfer-Encoding header field cannot be in the h2 request, because * requests with Transfer-Encoding are blocked. @@ -4340,7 +4388,8 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (WARN_ON_ONCE(h1_hdrs_sz < 0)) return -EINVAL; - r = tfw_msg_iter_setup(&it, &new_head, h1_hdrs_sz); + r = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem((TfwHttpMsg *)req), + &new_head, h1_hdrs_sz); if (unlikely(r)) return r; @@ -4349,13 +4398,13 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &sp); + r = tfw_msg_iter_write(&it, &sp); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &req->uri_path); + r = tfw_msg_iter_write(&it, &req->uri_path); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &fl_end); /* start of Host: header */ + r = tfw_msg_iter_write(&it, &fl_end); /* start of Host: header */ if (unlikely(r)) goto err; if (h_mods && test_bit(TFW_HTTP_HDR_HOST, h_mods->spec_hdrs)) { @@ -4370,10 +4419,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) __h2_msg_hdr_val(&ht->tbl[TFW_HTTP_HDR_HOST], &tmp_host); host_val = &tmp_host; } - r = tfw_msg_write(&it, host_val); + r = tfw_msg_iter_write(&it, host_val); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; @@ -4386,13 +4435,13 @@ tfw_h2_adjust_req(TfwHttpReq *req) case TFW_HTTP_HDR_HOST: continue; /* Already written. */ case TFW_HTTP_HDR_X_FORWARDED_FOR: - r = tfw_msg_write(&it, &h_xff); + r = tfw_msg_iter_write(&it, &h_xff); if (unlikely(r)) goto err; continue; case TFW_HTTP_HDR_CONTENT_TYPE: if (h_ct_replace) { - r = tfw_msg_write(&it, &h_ct); + r = tfw_msg_iter_write(&it, &h_ct); if (unlikely(r)) goto err; continue; @@ -4427,10 +4476,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) hval.nchunks++; hval.len += chunk->len; } - r = tfw_msg_write(&it, &hval); + r = tfw_msg_iter_write(&it, &hval); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &dlm); + r = tfw_msg_iter_write(&it, &dlm); if (unlikely(r)) goto err; @@ -4438,11 +4487,11 @@ tfw_h2_adjust_req(TfwHttpReq *req) hval.nchunks = dup->nchunks - hval.nchunks; hval.len = dup->len - hval.len; - r = tfw_msg_write(&it, &hval); + r = tfw_msg_iter_write(&it, &hval); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; } @@ -4450,7 +4499,7 @@ tfw_h2_adjust_req(TfwHttpReq *req) goto err; } - r = tfw_msg_write(&it, &h_via); + r = tfw_msg_iter_write(&it, &h_via); if (unlikely(r)) goto err; @@ -4465,12 +4514,12 @@ tfw_h2_adjust_req(TfwHttpReq *req) .len = cl_len, .nchunks = 4 }; - r = tfw_msg_write(&it, &h_cl); + r = tfw_msg_iter_write(&it, &h_cl); if (unlikely(r)) goto err; } /* Finally close headers. */ - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; @@ -4568,13 +4617,16 @@ tfw_http_resp_get_conn_flags(TfwHttpResp *resp) static int tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) { - struct sk_buff *nskb; + void *opaque_data = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; TfwMsgIter *iter = &resp->iter; + struct sk_buff *nskb; nskb = ss_skb_alloc(0); if (unlikely(!nskb)) return -ENOMEM; + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + opaque_data, nskb->truesize); nskb->mark = resp->msg.skb_head->mark; cleanup->skb_head = resp->msg.skb_head; resp->msg.skb_head = NULL; @@ -5353,6 +5405,7 @@ tfw_h2_hpack_encode_headers(TfwHttpResp *resp, const TfwHdrMods *h_mods) static int tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) { + TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwMsgIter *it = &resp->iter; size_t len, max_copy = PAGE_SIZE; char *data; @@ -5369,7 +5422,7 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) it->frag = skb_shinfo(it->skb)->nr_frags - 1; if (it->frag + 1 >= MAX_SKB_FRAGS) { - if ((r = tfw_msg_iter_append_skb(it))) + if ((r = tfw_http_msg_append_skb(hm))) return r; } @@ -5393,7 +5446,7 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) ss_skb_adjust_data_len(it->skb, copy); if (it->frag + 1 == MAX_SKB_FRAGS - && (r = tfw_msg_iter_append_skb(it))) + && (r = tfw_http_msg_append_skb(hm))) { return r; } @@ -5404,13 +5457,14 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) ALLOW_ERROR_INJECTION(tfw_h2_append_predefined_body, ERRNO); int -tfw_http_on_send_resp(void *conn, struct sk_buff **skb_head) +tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head) { TfwH2Ctx *ctx = tfw_h2_context_unsafe((TfwConn *)conn); - struct tfw_skb_cb *tfw_cb = TFW_SKB_CB(*skb_head); + TfwHttpResp *resp = TFW_SKB_CB(*skb_head)->opaque_data; + unsigned int stream_id = TFW_SKB_CB(*skb_head)->stream_id; TfwStream *stream; - stream = tfw_h2_find_not_closed_stream(ctx, tfw_cb->stream_id, false); + stream = tfw_h2_find_not_closed_stream(ctx, stream_id, false); /* * Very unlikely case. We check that stream is active, before * calling ss_send, but there is a very small chance, that @@ -5420,8 +5474,12 @@ tfw_http_on_send_resp(void *conn, struct sk_buff **skb_head) if (unlikely(!stream)) return -EPIPE; - BUG_ON(stream->xmit.skb_head); - stream->xmit.resp = (TfwHttpResp *)tfw_cb->opaque_data; + BUG_ON(stream->xmit.skb_head || stream->xmit.resp); + TFW_SKB_CB(*skb_head)->opaque_data = + CLIENT_MEM_FROM_CONN(resp->req->conn); + TFW_SKB_CB(*skb_head)->destructor = ss_skb_dflt_destructor; + stream->xmit.resp = resp; + if (test_bit(TFW_HTTP_B_CLOSE_ERROR_RESPONSE, stream->xmit.resp->flags)) ctx->error = stream; swap(stream->xmit.skb_head, *skb_head); @@ -5823,6 +5881,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) { int r = -EINVAL; TfwHttpReq *req = resp->req; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpTransIter *mit = &resp->mit; TfwHttpMsgCleanup cleanup = {}; TfwStr codings = {}; @@ -5874,7 +5933,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) * Alloc room for frame header. After this call resp->pool * must be used only as skb paged data. */ - r = tfw_http_msg_setup_transform_pool(mit, &resp->iter, resp->pool); + r = tfw_http_msg_setup_transform_pool(mit, hm, resp->pool); if (unlikely(r)) return r; @@ -6245,7 +6304,7 @@ tfw_h1_req_process(TfwStream *stream, struct sk_buff *skb) if (test_bit(TFW_HTTP_B_CONN_CLOSE, req->flags)) { TFW_CONN_TYPE(req->conn) |= Conn_Stop; if (unlikely(skb)) { - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = NULL; } } @@ -6264,7 +6323,7 @@ tfw_h1_req_process(TfwStream *stream, struct sk_buff *skb) TFW_CONN_TYPE(req->conn) |= Conn_Stop; tfw_http_conn_error_log(req->conn, "Can't create" " pipelined request"); - __kfree_skb(skb); + __ss_kfree_skb(skb); } } @@ -6542,6 +6601,15 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, actor = tfw_http_parse_req; req->tfh.version = TFW_HTTP_TFH_HTTP_REQ; } + /* + * For tls connections we already set `skb->owner` before + * tls decryption. + */ + if (!TFW_SKB_CB(skb)->opaque_data) { + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + CLIENT_MEM_FROM_CONN(conn), + skb->truesize); + } r = ss_skb_process(skb, actor, req, &req->chunk_cnt, &parsed); req->msg.len += parsed; @@ -6939,7 +7007,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, tfw_http_send_err_resp(req, 500, "request dropped:" " processing error"); TFW_INC_STAT_BH(clnt.msgs_otherr); - } + } /* * According to RFC 7230 6.3.2, connection with a client * must be dropped after a response is sent to that client, @@ -7074,6 +7142,18 @@ tfw_http_popreq(TfwHttpMsg *hmresp, bool fwd_unsent) tfw_http_req_zap_error(&eq); } +static inline int +tfw_http_resp_filtout(TfwHttpMsg *hmresp) +{ + TfwHttpReq *req = hmresp->req; + + tfw_http_popreq(hmresp, false); + TFW_INC_STAT_BH(serv.msgs_filtout); + /* The response is freed by tfw_http_req_block(). */ + return tfw_http_req_block(req, 403, "response blocked: filtered out", + HTTP2_ECODE_PROTO); +} + /* * Post-process the response. Pass it to modules registered with GFSM * for further processing. Finish the request/response exchange properly @@ -7083,7 +7163,6 @@ static int tfw_http_resp_gfsm(TfwHttpMsg *hmresp, TfwFsmData *data) { int r; - TfwHttpReq *req = hmresp->req; BUG_ON(!hmresp->conn); @@ -7103,11 +7182,7 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, TfwFsmData *data) BUG_ON(r != T_BLOCK); error: - tfw_http_popreq(hmresp, false); - TFW_INC_STAT_BH(serv.msgs_filtout); - /* The response is freed by tfw_http_req_block(). */ - return tfw_http_req_block(req, 403, "response blocked: filtered out", - HTTP2_ECODE_PROTO); + return tfw_http_resp_filtout(hmresp); } /* @@ -7370,12 +7445,23 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, hmsib = NULL; hmresp = (TfwHttpMsg *)stream->msg; cli_conn = (TfwCliConn *)hmresp->req->conn; + /* `cli_conn` is equal to zero for health monitor requests. */ if (likely(cli_conn)) { if (TFW_FSM_TYPE(cli_conn->proto.type) == TFW_FSM_H2) conn_stop = !hmresp->req->stream; else - conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); + conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, + hmresp->req->flags); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + CLIENT_MEM_FROM_CONN(cli_conn), + skb->truesize); + + r = frang_client_mem_limit(cli_conn, false); + if (unlikely(r)) { + BUG_ON(r != T_BLOCK); + return tfw_http_resp_filtout(hmresp); + } } else { conn_stop = false; } @@ -7544,7 +7630,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, TFW_INC_STAT_BH(serv.msgs_otherr); tfw_http_conn_error_log(conn, "Can't create pipelined" " response"); - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = NULL; conn_stop = true; } @@ -7738,7 +7824,7 @@ tfw_http_msg_process_generic(TfwConn *conn, TfwStream *stream, return r; err: - __kfree_skb(skb); + __ss_kfree_skb(skb); return r; } @@ -7785,12 +7871,12 @@ tfw_http_hm_srv_send(TfwServer *srv, char *data, unsigned long len) TfwHttpActionResult res; int r; - if (!(req = tfw_http_msg_alloc_req_light())) + if (!(req = tfw_http_msg_alloc_req_light(NULL))) return; hmreq = (TfwHttpMsg *)req; - if (tfw_http_msg_setup(hmreq, &it, msg.len)) + if (tfw_msg_iter_setup(&it, NULL, &hmreq->msg.skb_head, msg.len)) goto cleanup; - if (tfw_msg_write(&it, &msg)) + if (tfw_msg_iter_write(&it, &msg)) goto cleanup; __set_bit(TFW_HTTP_B_HMONITOR, req->flags); @@ -8123,7 +8209,8 @@ __tfw_http_msg_body_dup(const char *filename, TfwStr *c_len, size_t *len, } t_sz += b_sz; - b_start = res = (char *)__get_free_pages(GFP_KERNEL, get_order(t_sz)); + b_start = res = (char *)tfw__get_free_pages(GFP_KERNEL, + get_order(t_sz)); if (!res) { T_ERR_NL("Can't allocate memory storing file %s as response " "body\n", filename); diff --git a/fw/http.h b/fw/http.h index cc586ddb7f..c77100f839 100644 --- a/fw/http.h +++ b/fw/http.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -588,6 +588,25 @@ void tfw_http_exit(void); T_WARN("%s, status %d: %s\n", \ msg, status, addr_str)) +static inline bool +tfw_http_msg_is_req(TfwHttpMsg *msg) +{ + /* + * msg->conn can be equal to zero only for response + * which is served from cache or error response. + */ + return msg->conn && TFW_CONN_TYPE(msg->conn) & Conn_Clnt; +} + +static inline TfwClientMem * +tfw_http_msg_client_mem(TfwHttpMsg *msg) +{ + TfwCliConn *conn = (TfwCliConn *)(tfw_http_msg_is_req(msg) ? + msg->conn : msg->pair->conn); + + return ((TfwClient *)conn->peer)->cli_mem; +} + static inline int tfw_http_resp_code_range(const int n) { @@ -777,7 +796,7 @@ int tfw_h2_resp_encode_headers(TfwHttpResp *resp); int tfw_http_prep_redir(TfwHttpResp *resp, unsigned short status, TfwStr *cookie, TfwStr *body); int tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, - TfwMsgIter *it); + TfwHttpMsg *hm); void tfw_http_conn_msg_free(TfwHttpMsg *hm); void tfw_http_resp_pair_free_and_put_conn(void *opaque_data); void tfw_http_send_err_resp(TfwHttpReq *req, int status, const char *reason); @@ -795,6 +814,6 @@ int tfw_http_resp_copy_encodings(TfwHttpResp *resp, TfwStr* dst, void tfw_http_extract_request_authority(TfwHttpReq *req); bool tfw_http_mark_is_in_whitlist(unsigned int mark); char *tfw_http_resp_status_line(int status, size_t *len); -int tfw_http_on_send_resp(void *conn, struct sk_buff **skb_head); +int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head); #endif /* __TFW_HTTP_H__ */ diff --git a/fw/http2.c b/fw/http2.c index f67c3e755a..21c1ab1f6e 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2024-2025 Tempesta Technologies, Inc. + * Copyright (C) 2024-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -320,7 +320,8 @@ tfw_h2_context_init(TfwH2Ctx *ctx, TfwH2Conn *conn) rset->wnd_sz = DEF_WND_SIZE; ctx->conn = conn; - return tfw_hpack_init(&ctx->hpack, HPACK_TABLE_DEF_SIZE); + return tfw_hpack_init(&ctx->hpack, CLIENT_MEM_FROM_CONN(conn), + HPACK_TABLE_DEF_SIZE); } void @@ -351,7 +352,7 @@ tfw_h2_alloc_stream_sched_entry(TfwH2Ctx *ctx) * 15 - 20 streams in parallel even if there are much * more resourses to request). TfwStreamSchedEntry is * small (64 bytes), so use special cache for allocation. - */ + */ entry = kmem_cache_alloc(stream_sched_cache, GFP_ATOMIC | __GFP_ZERO); } else { @@ -603,7 +604,7 @@ tfw_h2_hpack_encode_trailer_headers(TfwHttpResp *resp) /* * TODO #2136: Remove this flag during reworking - * `tfw_http_msg_expand_from_pool` function. + * `tfw_http_msg_expand_from_pool` function. */ __set_bit(TFW_HTTP_B_RESP_ENCODE_TRAILERS, resp->flags); @@ -676,7 +677,7 @@ tfw_h2_stream_xmit_prepare_resp(TfwStream *stream) resp->iter.skb = resp->msg.skb_head->prev; resp->iter.frag = skb_shinfo(resp->iter.skb)->nr_frags - 1; - tfw_http_msg_setup_transform_pool(mit, &resp->iter, + tfw_http_msg_setup_transform_pool(mit, (TfwHttpMsg *)resp, resp->pool); r = tfw_h2_hpack_encode_trailer_headers(resp); @@ -729,7 +730,7 @@ tfw_h2_entail_stream_skb(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, T_DBG3("[%d]: %s: drop skb=%px data_len=%u len=%u\n", smp_processor_id(), __func__, skb, skb->data_len, skb->len); - kfree_skb(skb); + ss_kfree_skb(skb); continue; } @@ -759,7 +760,7 @@ tfw_h2_entail_stream_skb(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, } } *len -= skb->len; - ss_skb_tcp_entail(sk, skb, mark, tls_type); + ss_skb_tcp_entail(sk, skb, mark, tls_type); } /* diff --git a/fw/http2.h b/fw/http2.h index 80dc782dab..e95e97d9d3 100644 --- a/fw/http2.h +++ b/fw/http2.h @@ -59,7 +59,7 @@ typedef struct { */ typedef struct { unsigned int ping_cnt; - unsigned int settings_cnt; + unsigned int settings_cnt; unsigned int rst_cnt; unsigned int priority_cnt; unsigned int ts; @@ -204,4 +204,10 @@ tfw_h2_is_ready_to_send(TfwH2Ctx *ctx) return ctx->sched.root.active_cnt && ctx->rem_wnd; } +static inline bool +tfw_h2_conn_or_stream_wnd_is_exceeded(TfwH2Ctx *ctx, TfwStream *stream) +{ + return ctx->rem_wnd <= 0 || stream->rem_wnd <= 0; +} + #endif /* __HTTP2__ */ diff --git a/fw/http_frame.c b/fw/http_frame.c index 0e7b76e7f2..e7d1bf5a8f 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -312,7 +312,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, TfwMsg msg = {}; unsigned char buf[FRAME_HEADER_SIZE]; TfwStr *hdr_str = TFW_STR_CHUNK(data, 0); - TfwH2Conn *conn = ctx->conn; + TfwConn *conn = (TfwConn *)ctx->conn; BUG_ON(hdr_str->data); hdr_str->data = buf; @@ -326,10 +326,11 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, T_DBG2("Preparing HTTP/2 message with %lu bytes data\n", data->len); msg.len = data->len; - if ((r = tfw_msg_iter_setup(&it, &msg.skb_head, msg.len))) + if ((r = tfw_msg_iter_setup(&it, CLIENT_MEM_FROM_CONN(conn), + &msg.skb_head, msg.len))) goto err; - if ((r = tfw_msg_write(&it, data))) + if ((r = tfw_msg_iter_write(&it, data))) goto err; switch (type) { @@ -357,7 +358,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, tfw_h2_on_tcp_entail_ack; } - if ((r = tfw_connection_send((TfwConn *)conn, &msg))) + if ((r = tfw_connection_send(conn, &msg))) goto err; /* * We do not close client connection automatically here in case @@ -366,7 +367,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, * was successful - to avoid hanged unclosed client connection. */ if (type == TFW_FRAME_CLOSE || type == TFW_FRAME_SHUTDOWN) - TFW_CONN_TYPE((TfwConn *)conn) |= Conn_Stop; + TFW_CONN_TYPE(conn) |= Conn_Stop; return 0; @@ -1134,7 +1135,7 @@ tfw_h2_ctrl_frame_limit(TfwH2Ctx *ctx, TfwFrameType hdr_type) break; } - return true; + return true; } /* @@ -1179,7 +1180,7 @@ do { \ goto conn_term; if (unlikely(!tfw_h2_ctrl_frame_limit(ctx, hdr_type))) - return T_BLOCK_WITH_RST; + return T_BLOCK_WITH_RST; /* * TODO: RFC 7540 Section 6.2: @@ -1926,7 +1927,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) while (unlikely(h2->skb_head->len <= h2->data_off)) { struct sk_buff *skb = ss_skb_dequeue(&h2->skb_head); h2->data_off -= skb->len; - kfree_skb(skb); + ss_kfree_skb(skb); /* * Special case when the frame is postponed just * in the beginning of the app data, after all @@ -1946,16 +1947,14 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) pskb = h2->skb_head; if ((r = ss_skb_chop_head_tail(NULL, pskb, h2->data_off, 0))) { - kfree_skb(nskb); + ss_kfree_skb(nskb); goto out; } h2->data_off = 0; h2->skb_head = pskb->next = pskb->prev = NULL; r = tfw_http_msg_process_generic(c, h2->cur_stream, pskb, next); - /* TODO #1490: Check this place, when working on the task. */ - if (r && r != T_DROP) { - WARN_ON_ONCE(r == T_POSTPONE); - kfree_skb(nskb); + if (tfw_error_code_is_crucial(r)) { + ss_kfree_skb(nskb); goto out; } } @@ -1972,7 +1971,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) while (unlikely(h2->skb_head != end)) { pskb = ss_skb_dequeue(&h2->skb_head); h2->data_off -= pskb->len; - kfree_skb(pskb); + ss_kfree_skb(pskb); } pskb = h2->skb_head; @@ -1980,10 +1979,8 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) h2->data_off = 0; /* The skb will not be parsed, just flags will be checked. */ r = tfw_http_msg_process_generic(c, h2->cur_stream, pskb, next); - /* TODO #1490: Check this place, when working on the task. */ - if (r && r != T_DROP) { - WARN_ON_ONCE(r == T_POSTPONE); - kfree_skb(nskb); + if (tfw_error_code_is_crucial(r)) { + ss_kfree_skb(nskb); goto out; } } @@ -2003,7 +2000,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) out: ss_skb_queue_purge(&h2->skb_head); - if (r && r != T_POSTPONE && r != T_DROP) + if (tfw_error_code_is_crucial(r)) tfw_h2_context_reinit(h2, false); return r; @@ -2068,7 +2065,7 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, while (skb && unlikely(!skb->len)) { ss_skb_unlink(&stream->xmit.skb_head, skb); - kfree_skb(skb); + ss_kfree_skb(skb); skb = stream->xmit.skb_head; } } @@ -2139,6 +2136,24 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, return r; } +static int +tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) +{ + TfwConn *conn = (TfwConn *)sk->sk_user_data; + int r; + + BUG_ON(conn->write_queue); + r = ss_skb_tcp_entail_list(sk, skb_head, mss_now, snd_wnd); + if (unlikely(r)) + return r; + + ss_skb_queue_splice(&conn->write_queue, skb_head); + sock_set_flag(sk, SOCK_TEMPESTA_HAS_DATA); + + return 0; +} + static int tfw_h2_stream_xmit_process(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, bool stream_is_exclusive, unsigned int mss_now, @@ -2190,7 +2205,7 @@ do { \ #define FRAME_XMIT_FSM_NEXT(frame_length, state) \ do { \ - *snd_wnd -= frame_length; \ + *snd_wnd -= frame_length + FRAME_HEADER_SIZE; \ T_FSM_JMP(state); \ } while(0) @@ -2250,7 +2265,7 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_DATA_FRAMES) { - if (unlikely(ctx->rem_wnd <= 0 || stream->rem_wnd <= 0)) + if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) ADJUST_BLOCKED_STREAMS_AND_EXIT(0, HTTP2_DATA); CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_DATA, @@ -2308,13 +2323,15 @@ do { \ if (stream->xmit.h_len) { T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); } else { - if (stream->xmit.postponed + if (unlikely(stream->xmit.postponed) && !stream->xmit.frame_length - && !ctx->cur_send_headers) { - struct sk_buff **postponed = - &stream->xmit.postponed; + && !ctx->cur_send_headers) + { + struct sk_buff **head = &stream->xmit.postponed; - r = ss_skb_tcp_entail_list(sk, postponed); + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2343,7 +2360,10 @@ do { \ * response is sent. */ if (unlikely(stream->xmit.skb_head)) { - r = ss_skb_tcp_entail_list(sk, &stream->xmit.skb_head); + struct sk_buff **head = &stream->xmit.skb_head; + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2373,11 +2393,14 @@ do { \ T_WARN("Failed to send frame %d", r); return r; } - if (stream->xmit.postponed && !ctx->cur_send_headers) { - struct sk_buff **postponed = - &stream->xmit.postponed; + if (unlikely(stream->xmit.postponed) + && !ctx->cur_send_headers) + { + struct sk_buff **head = &stream->xmit.postponed; - r = ss_skb_tcp_entail_list(sk, postponed); + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2394,10 +2417,10 @@ do { \ } int -tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now) +tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, + unsigned long snd_wnd) { TfwStreamSched *sched = &ctx->sched; - unsigned long snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now); bool stop = false; int r = 0; TfwStream *stream; @@ -2434,7 +2457,6 @@ tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now) r = tfw_h2_stream_xmit_process(sk, ctx, stream, stream_is_exclusive, mss_now, &snd_wnd, &stop); - if (!tfw_h2_stream_is_active(stream)) { tfw_h2_sched_deactivate_stream(sched, stream); if (!stream->xmit.skb_head) { diff --git a/fw/http_frame.h b/fw/http_frame.h index dfac951beb..fed94c2e9a 100644 --- a/fw/http_frame.h +++ b/fw/http_frame.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2022-2025 Tempesta Technologies, Inc. + * Copyright (C) 2022-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -159,7 +159,8 @@ int tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next); int tfw_h2_send_rst_stream(TfwH2Ctx *ctx, unsigned int id, TfwH2Err err_code); int tfw_h2_send_goaway(TfwH2Ctx *ctx, TfwH2Err err_code, bool attack); -int tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now); +int tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, + unsigned long snd_wnd); static inline void tfw_h2_pack_frame_header(unsigned char *p, const TfwFrameHdr *hdr) diff --git a/fw/http_limits.c b/fw/http_limits.c index 147e1e5533..566de32fdd 100644 --- a/fw/http_limits.c +++ b/fw/http_limits.c @@ -1671,6 +1671,34 @@ frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len) } +int +frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) +{ + TfwClient *cli = (TfwClient *)conn->peer; + TfwVhost *dflt_vh; + + if (likely(!tfw_cli_hard_mem_limit + || tfw_client_mem(cli) <= tfw_cli_hard_mem_limit)) + return 0; + + if (!block_if_exceeded) + return T_BLOCK; + + dflt_vh = tfw_vhost_lookup_default(); + if (WARN_ON_ONCE(!dflt_vh)) + return T_BLOCK; + + if (dflt_vh->frang_gconf->ip_block) { + unsigned int duration = dflt_vh->frang_gconf->ip_block_duration; + + tfw_filter_block_ip(cli, duration); + } + tfw_vhost_put(dflt_vh); + + return T_BLOCK; +} + + static int frang_sticky_cookie_limit(FrangAcc *ra, TfwCliConn *conn, unsigned int max_misses) diff --git a/fw/http_limits.h b/fw/http_limits.h index 47a9083095..fc79f12585 100644 --- a/fw/http_limits.h +++ b/fw/http_limits.h @@ -204,6 +204,7 @@ int frang_tls_handler(TlsCtx *tls, int state); int frang_sticky_cookie_handler(TfwHttpReq *req); bool frang_req_is_whitelisted(TfwHttpReq *req); int frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len); +int frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded); static inline int frang_time_in_frame(const unsigned long tcur, const unsigned long tprev) diff --git a/fw/http_msg.c b/fw/http_msg.c index 498bd0a626..09440a53d7 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -4,7 +4,7 @@ * HTTP message manipulation helpers for the protocol processing. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -807,106 +807,27 @@ tfw_http_msg_cutoff_body_chunks(TfwHttpResp *resp) } /** - * Set up @hm with empty SKB space of size @data_len for data writing. - * Set up the iterator @it to support consecutive writes. - * - * This function is intended to work together with tfw_msg_write() - * or tfw_http_msg_add_data() which use the @it iterator. - * - * @hm must be allocated dynamically (NOT statically) as it may have - * to sit in a queue long after the caller has finished. It's assumed - * that @hm is properly initialized. - * - * It's essential to understand, that "properly initialized" for @hm - * may mean different things depending on the intended use. Currently - * this function is called to send a response from cache, or to send - * an error response. An error response is not parsed or adjusted, so - * a shorter/faster version of message allocation and initialization - * may be used. (See __tfw_http_msg_alloc(full=False)). + * Allocate and add a single empty skb (with a place for TCP headers though) + * to the @hm iterator. The allocated skb has no space for the data, user is + * expected to add new paged fragments. */ int -tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len) +tfw_http_msg_append_skb(TfwHttpMsg *hm) { + TfwMsgIter *it = &hm->iter; int r; - if ((r = tfw_msg_iter_setup(it, &hm->msg.skb_head, data_len))) + r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_client_mem(hm), 0); + if (unlikely(r)) return r; - T_DBG2("Set up HTTP message %pK with %lu bytes data\n", hm, data_len); - - return 0; -} -EXPORT_SYMBOL(tfw_http_msg_setup); - -/** - * Fill up an HTTP message by iterator @it with data from string @data. - * Properly maintain @hm header @field, so that @hm can be used in regular - * transformations. However, the header name and the value are not split into - * different chunks, so advanced headers matching is not available for @hm. - */ -int -tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field, - const TfwStr *data) -{ - const TfwStr *c, *end; - - BUG_ON(TFW_STR_DUP(data)); - if (WARN_ON_ONCE(it->frag >= skb_shinfo(it->skb)->nr_frags)) - return -E2BIG; - - TFW_STR_FOR_EACH_CHUNK(c, data, end) { - char *p; - unsigned int c_off = 0, c_size, f_room, n_copy; -this_chunk: - c_size = c->len - c_off; - if (it->frag >= 0) { - unsigned int f_size; - skb_frag_t *frag = &skb_shinfo(it->skb)->frags[it->frag]; - - f_size = skb_frag_size(frag); - f_room = PAGE_SIZE - skb_frag_off(frag) - f_size; - p = (char *)skb_frag_address(frag) + f_size; - n_copy = min(c_size, f_room); - skb_frag_size_add(frag, n_copy); - ss_skb_adjust_data_len(it->skb, n_copy); - } else { - f_room = skb_tailroom(it->skb); - n_copy = min(c_size, f_room); - p = skb_put(it->skb, n_copy); - } - - memcpy_fast(p, c->data + c_off, n_copy); - if (field && n_copy - && __tfw_http_msg_add_str_data(hm, field, p, n_copy, - it->skb)) - { - return -ENOMEM; - } - /* - * The chunk occupied all the spare space in the SKB fragment, - * switch to the next fragment. - */ - if (c_size >= f_room) { - if (WARN_ON_ONCE(tfw_msg_iter_next_data_frag(it) - && ((c_size != f_room) - || (c + 1 < end)))) - { - return -E2BIG; - } - /* - * Not all data from the chunk has been copied, - * stay in the current chunk and copy the rest to the - * next fragment. - */ - if (c_size != f_room) { - c_off += n_copy; - goto this_chunk; - } - } - } + it->skb = ss_skb_peek_tail(&it->skb_head); + it->frag = -1; + skb_shinfo(it->skb)->flags = skb_shinfo(it->skb->prev)->flags; return 0; } +EXPORT_SYMBOL(tfw_http_msg_append_skb); void tfw_http_msg_pair(TfwHttpResp *resp, TfwHttpReq *req) @@ -954,12 +875,12 @@ tfw_http_msg_free(TfwHttpMsg *m) * for parsing and subsequent adjustment. */ TfwHttpMsg * -__tfw_http_msg_alloc(int type, bool full) +__tfw_http_msg_alloc(TfwClientMem *owner, int type, bool full) { TfwHttpMsg *hm = (type & Conn_Clnt) - ? (TfwHttpMsg *)tfw_pool_new(TfwHttpReq, + ? (TfwHttpMsg *)tfw_pool_new(TfwHttpReq, owner, TFW_POOL_ZERO) - : (TfwHttpMsg *)tfw_pool_new(TfwHttpResp, + : (TfwHttpMsg *)tfw_pool_new(TfwHttpResp, owner, TFW_POOL_ZERO); if (!hm) { T_WARN("Insufficient memory to create %s message\n", @@ -1003,9 +924,10 @@ __tfw_http_msg_alloc(int type, bool full) * MUST be used only for messages from cache or messages constructed locally. */ int -tfw_http_msg_expand_data(TfwMsgIter *it, struct sk_buff **skb_head, +tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, const TfwStr *src, unsigned int *start_off) { + TfwMsgIter *it = &hm->iter; const TfwStr *c, *end; TFW_STR_FOR_EACH_CHUNK(c, src, end) { @@ -1015,6 +937,10 @@ tfw_http_msg_expand_data(TfwMsgIter *it, struct sk_buff **skb_head, if (!it->skb) { if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; + + ss_skb_set_owner(it->skb, ss_skb_dflt_destructor, + tfw_http_msg_client_mem(hm), + it->skb->truesize); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; if (!it->skb_head) @@ -1138,13 +1064,14 @@ tfw_http_msg_alloc_from_pool(TfwMsgIter *it, TfwPool* pool, size_t size) * data, which will split the paged fragment. */ int -tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwHttpMsg *msg, TfwPool* pool) { - int r; + TfwMsgIter *it = &msg->iter; + unsigned int room = TFW_POOL_CHUNK_ROOM(pool); char* addr; bool np; - unsigned int room = TFW_POOL_CHUNK_ROOM(pool); + int r; BUG_ON(room < 0); @@ -1283,6 +1210,9 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + tfw_http_msg_client_mem(hm), + nskb->truesize); /* * TODO #2136: Remove this flag during reworking * this function. Try to process headers and @@ -1512,27 +1442,3 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) return r; } - -/** - * Insert data from string @data to message at offset defined by message - * iterator @it and @off. This function doesn't maintain message structure. - * After insertion message iterator and @data will point at the start of - * inserted data fragment. - */ -int -tfw_http_msg_insert(TfwMsgIter *it, char **off, const TfwStr *data) -{ - int r; - TfwStr dst = {}; - - if ((r = ss_skb_get_room_w_frag(it->skb_head, it->skb, *off, data->len, - &dst, &it->frag))) - { - return r; - } - - *off = dst.data; - it->skb = dst.skb; - - return tfw_strcpy(&dst, data); -} diff --git a/fw/http_msg.h b/fw/http_msg.h index fd987938bc..b255f25c5d 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -76,18 +76,20 @@ tfw_http_msg_srvhdr_val(TfwStr *hdr, unsigned id, TfwStr *val) void tfw_http_msg_pair(TfwHttpResp *resp, TfwHttpReq *req); void tfw_http_msg_unpair(TfwHttpMsg *msg); -TfwHttpMsg *__tfw_http_msg_alloc(int type, bool full); +TfwHttpMsg *__tfw_http_msg_alloc(TfwClientMem *owner, int type, bool full); static inline TfwHttpReq * -tfw_http_msg_alloc_req_light(void) +tfw_http_msg_alloc_req_light(TfwClientMem *owner) { - return (TfwHttpReq *)__tfw_http_msg_alloc(Conn_Clnt, false); + return (TfwHttpReq *)__tfw_http_msg_alloc(owner, Conn_Clnt, false); } static inline TfwHttpResp * __tfw_http_msg_alloc_resp(TfwHttpReq *req, bool full) { - TfwHttpResp *resp = (TfwHttpResp *)__tfw_http_msg_alloc(Conn_Srv, full); + TfwHttpResp *resp = (TfwHttpResp *) + __tfw_http_msg_alloc(CLIENT_MEM_FROM_CONN(req->conn), + Conn_Srv, full); if (resp) tfw_http_msg_pair(resp, req); @@ -145,17 +147,14 @@ unsigned int tfw_http_msg_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); int tfw_http_msg_del_str(TfwHttpMsg *hm, TfwStr *str); int tfw_http_msg_cutoff_body_chunks(TfwHttpResp *resp); - -int tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len); -int tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field, - const TfwStr *data); +int tfw_http_msg_append_skb(TfwHttpMsg *hm); void tfw_http_msg_hdr_open(TfwHttpMsg *hm, unsigned char *hdr_start); int tfw_http_msg_hdr_close(TfwHttpMsg *hm); int tfw_http_msg_grow_hdr_tbl(TfwHttpMsg *hm); void tfw_http_msg_free(TfwHttpMsg *m); -int tfw_http_msg_expand_data(TfwMsgIter *it, struct sk_buff **skb_head, +int tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, const TfwStr *src, unsigned int *start_off); -int tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwMsgIter *it, +int tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwHttpMsg *hm, TfwPool* pool); int tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str); int tfw_h2_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, @@ -165,7 +164,6 @@ int tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, int __hdr_name_cmp(const TfwStr *hdr, const TfwStr *cmp_hdr); int __http_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup); -int tfw_http_msg_insert(TfwMsgIter *it, char **off, const TfwStr *data); #define TFW_H2_MSG_HDR_ADD(hm, name, val, idx) \ tfw_h2_msg_hdr_add(hm, name, sizeof(name) - 1, val, \ diff --git a/fw/http_parser.c b/fw/http_parser.c index c5864aab7a..3642b87ba4 100644 --- a/fw/http_parser.c +++ b/fw/http_parser.c @@ -5046,7 +5046,7 @@ tfw_http_parse_req(void *req_data, unsigned char *data, unsigned int len, /* HTTP method. */ __FSM_STATE(Req_Method, hot) { parser->_hdr_tag = TFW_HTTP_METHOD; - /* + /* * Open header manually. HTTP method is not a header, storing * it in @msg->h_tbl it's only optimization to not introduce * new field into TfwHttpReq. Using @tfw_http_msg_hdr_open @@ -10515,7 +10515,7 @@ tfw_h2_parse_req_hdr_val(unsigned char *data, unsigned long len, TfwHttpReq *req } __FSM_STATE(Req_Mark, hot) { - __FSM_H2_PSHDR_MOVE_FIN_fixup(Req_Mark, 1, Req_Path); + __FSM_H2_PSHDR_MOVE_FIN_fixup(Req_Mark, 1, Req_Path); } __FSM_STATE(Req_Path) { diff --git a/fw/http_sess.c b/fw/http_sess.c index 02c3d2cff4..6806a82fdd 100644 --- a/fw/http_sess.c +++ b/fw/http_sess.c @@ -29,7 +29,7 @@ * JS challenge client should execute it and send new request with * appropriate cookie just in time. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -365,13 +365,12 @@ tfw_http_sticky_add(TfwHttpResp *resp, bool cache) set_cookie.hpack_idx = 55; r = tfw_hpack_encode(resp, &set_cookie, !cache, !cache); } else if (cache) { - TfwMsgIter *it = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; - r = tfw_http_msg_expand_data(it, skb_head, &set_cookie, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &set_cookie, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &crlf, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &crlf, NULL); } else { r = tfw_http_msg_expand_from_pool(hm, &set_cookie); @@ -772,7 +771,7 @@ tfw_http_sess_precreate(void *data) return 0; } -static void +static int tfw_sess_ent_init(TdbRec *rec, void *data) { TfwSessEntry *ent = (TfwSessEntry *)rec->data; @@ -803,6 +802,8 @@ tfw_sess_ent_init(TdbRec *rec, void *data) rwlock_init(&sess->lock); T_DBG("http_sess was newly created, %pK\n", sess); + + return 0; } /** diff --git a/fw/http_stream.c b/fw/http_stream.c index bc47c85782..923ea20e0f 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -149,7 +149,7 @@ tfw_h2_stream_purge_send_queue(TfwStream *stream) BUG_ON(!skb); len -= skb->len; - kfree_skb(skb); + ss_kfree_skb(skb); } stream->xmit.h_len = stream->xmit.b_len = stream->xmit.t_len = stream->xmit.frame_length = 0; @@ -249,7 +249,7 @@ tfw_h2_stream_create(TfwH2Ctx *ctx, unsigned int id) void tfw_h2_stream_clean(TfwH2Ctx *ctx, TfwStream *stream) { - T_DBG3("Stop and delete stream (id %u state %d(%s) weight %u)," + T_DBG3("Stop and delete stream (id %u state %d(%s) weight %u)," " ctx %px streams num %lu\n", stream->id, tfw_h2_get_stream_state(stream), __h2_strm_st_n(stream), stream->weight, ctx, ctx->streams_num); @@ -812,6 +812,16 @@ tfw_h2_delete_stream(TfwH2Ctx *ctx, TfwStream *stream) kmem_cache_free(stream_cache, stream); } +void +tfw_h2_stream_skb_destructor(struct sk_buff *skb) +{ + TfwHttpResp *resp = (TfwHttpResp *)TFW_SKB_CB(skb)->opaque_data; + + TFW_SKB_CB(skb)->opaque_data = CLIENT_MEM_FROM_CONN(resp->req->conn); + ss_skb_dflt_destructor(skb); + tfw_http_resp_pair_free_and_put_conn(resp); +} + int tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, unsigned long h_len, unsigned long b_len) @@ -828,9 +838,7 @@ tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, return -EPIPE; } - ss_skb_setup_opaque_data(skb_head, resp, - tfw_http_resp_pair_free_and_put_conn); - TFW_SKB_CB(skb_head)->on_send = tfw_http_on_send_resp; + TFW_SKB_CB(skb_head)->on_send = tfw_h2_on_send_resp; TFW_SKB_CB(skb_head)->stream_id = stream->id; stream->xmit.resp = NULL; diff --git a/fw/http_stream.h b/fw/http_stream.h index 165becdef0..4e9b2c5d43 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -220,6 +220,7 @@ void tfw_h2_stream_add_closed(TfwH2Ctx *ctx, TfwStream *stream); void tfw_h2_stream_add_idle(TfwH2Ctx *ctx, TfwStream *idle); void tfw_h2_stream_purge_send_queue(TfwStream *stream); void tfw_h2_stream_purge_all_and_free_response(TfwStream *stream); +void tfw_h2_stream_skb_destructor(struct sk_buff *skb); static inline TfwStreamState tfw_h2_get_stream_state(TfwStream *stream) diff --git a/fw/http_stream_sched.c b/fw/http_stream_sched.c index 083adbfd57..7eeb8afd52 100644 --- a/fw/http_stream_sched.c +++ b/fw/http_stream_sched.c @@ -27,7 +27,7 @@ * are requested sequentially. Progressive JPEGS (only for Firefox browser) * is a very rare case, so we decide to process requests from streams from * larger to smaller weight. - * + * * When we search for the most priority stream we iterate over the levels of * the priority tree. For exanple: * 1 (256) @@ -234,7 +234,7 @@ static void __tfw_h2_stream_sched_remove(TfwStreamSched *sched, TfwStream *stream) { TfwStreamSchedEntry *parent = stream->sched->parent; - + tfw_h2_stream_sched_spin_lock_assert(sched); list_del_init(&stream->sched_node); stream->sched_state = HTTP2_STREAM_SCHED_STATE_UNKNOWN; @@ -453,7 +453,7 @@ tfw_h2_remove_stream_dep(TfwStreamSched *sched, TfwStream *stream) * Here we move children of the removed stream to the parent * scheduler. If parent scheduler has no children we move * current removed stream children as is (saving their weight) - * Otherwise we recalculate their weight according RFC. + * Otherwise we recalculate their weight according RFC. */ parent_has_children = tfw_h2_stream_sched_has_children(parent); diff --git a/fw/http_stream_sched.h b/fw/http_stream_sched.h index ceee369890..5fcabff52a 100644 --- a/fw/http_stream_sched.h +++ b/fw/http_stream_sched.h @@ -34,7 +34,7 @@ * free list; * @active - head of the active streams scheduler list; * @blocked - head of the blocked streams scheduler list; - */ + */ typedef struct tfw_stream_sched_entry_t { u64 total_weight; long int active_cnt; diff --git a/fw/http_tbl.c b/fw/http_tbl.c index b6d37120ec..f3d4dde9f9 100644 --- a/fw/http_tbl.c +++ b/fw/http_tbl.c @@ -83,7 +83,7 @@ * - Extended string matching operators: "regex", "substring". * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -281,7 +281,7 @@ tfw_http_tbl_cfgstart(void) { BUG_ON(tfw_table_reconfig); - tfw_table_reconfig = tfw_pool_new(TfwHttpTable, TFW_POOL_ZERO); + tfw_table_reconfig = tfw_pool_new(TfwHttpTable, NULL, TFW_POOL_ZERO); if (!tfw_table_reconfig) { T_ERR_NL("Can't create a memory pool\n"); return -ENOMEM; diff --git a/fw/http_types.h b/fw/http_types.h index e487d413f4..ee8f055d06 100644 --- a/fw/http_types.h +++ b/fw/http_types.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2018-2025 Tempesta Technologies, Inc. + * Copyright (C) 2018-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -21,101 +21,100 @@ #define __TFW_HTTP_TYPES_H__ enum { - /* Common flags for requests and responses. */ - TFW_HTTP_FLAGS_COMMON = 0, - /* - * Connection management flags. - * - * CONN_CLOSE: the connection is to be closed after response is - * forwarded to the client. Set if: - * - 'Connection:' header contains 'close' term; - * - there is no possibility to serve further requests from the same - * connection due to errors or protocol restrictions. - * - * CONN_KA: 'Connection:' header contains 'keep-alive' term. The flag - * is not set for HTTP/1.1 connections which are persistent by default. - * CONN_EXTRA: 'Connection:' header contains additional terms.(NOT used) - * - * CONN_CLOSE and CONN_KA flags are mutual exclusive. - */ - TFW_HTTP_B_CONN_CLOSE = TFW_HTTP_FLAGS_COMMON, - /* - * This flag is set only together with previos one. - * Typically we close connection gracefully with - * TCP shutdown, but in case of attack, we should - * do it immediately using tcp_close. - */ - TFW_HTTP_B_CONN_CLOSE_FORCE, - TFW_HTTP_B_CONN_KA, - TFW_HTTP_B_CONN_UPGRADE, - TFW_HTTP_B_CONN_EXTRA, - /* Message is a websocket upgrade request */ - TFW_HTTP_B_UPGRADE_WEBSOCKET, - /* Message upgrade header contains extra fields */ - TFW_HTTP_B_UPGRADE_EXTRA, - /* - * Chunked is last transfer encoding. - * It is important to notice that there is a valid case - * when we receive chunked encoded response with empty - * body on HEAD request. - */ - TFW_HTTP_B_CHUNKED, - /* Chunked in the middle of applied transfer encodings. */ - TFW_HTTP_B_CHUNKED_APPLIED, - /* Message has chunked trailer headers part. */ - TFW_HTTP_B_CHUNKED_TRAILER, - /* Message has transfer encodings other than chunked. */ - TFW_HTTP_B_TE_EXTRA, - /* The message body is limited by the connection closing. */ - TFW_HTTP_B_UNLIMITED, - /* Media type is multipart/form-data. */ - TFW_HTTP_B_CT_MULTIPART, - /* Multipart/form-data request has a boundary parameter. */ - TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, - /* Content-length header was parsed. */ - TFW_HTTP_B_REQ_CONTENT_LENGTH_PARSED, - /* Singular header presents more than once. */ - TFW_HTTP_B_FIELD_DUPENTRY, - /* Message headers are fully parsed */ - TFW_HTTP_B_HEADERS_PARSED, - /* Message is fully parsed */ - TFW_HTTP_B_FULLY_PARSED, - /* Message has HTTP/2 format. */ - TFW_HTTP_B_H2, - /* - * Message has all mandatory pseudo-headers - * (applicable for HTTP/2 mode only). - */ - TFW_HTTP_B_H2_HDRS_FULL, - - /* Request flags. */ - TFW_HTTP_FLAGS_REQ, - /* Sticky cookie is found and verified. */ - TFW_HTTP_B_HAS_STICKY = TFW_HTTP_FLAGS_REQ, - /* Request fitted no cache cookie rule */ - TFW_HTTP_B_CHAIN_NO_CACHE, - /* Request is non-idempotent. */ - TFW_HTTP_B_NON_IDEMP, - /* Request stated 'Accept: text/html' header */ - TFW_HTTP_B_ACCEPT_HTML, - /* Request is created by HTTP health monitor. */ - TFW_HTTP_B_HMONITOR, - /* Client was disconnected, drop the request. */ - TFW_HTTP_B_REQ_DROP, - /* Request is PURGE with an 'X-Tempesta-Cache: get' header. */ - TFW_HTTP_B_PURGE_GET, - /* - * Request should be challenged, but requested resourse - * is non-challengeable. Try to service such request - * from cache. - */ - TFW_HTTP_B_JS_NOT_SUPPORTED, - /* - * Response is fully processed and ready to be - * forwarded to the client. - */ - TFW_HTTP_B_REQ_RESP_READY, + /* Common flags for requests and responses. */ + TFW_HTTP_FLAGS_COMMON = 0, + /* + * Connection management flags. + * + * CONN_CLOSE: the connection is to be closed after response is + * forwarded to the client. Set if: + * - 'Connection:' header contains 'close' term; + * - there is no possibility to serve further requests from the same + * connection due to errors or protocol restrictions. + * + * CONN_KA: 'Connection:' header contains 'keep-alive' term. The flag + * is not set for HTTP/1.1 connections which are persistent by default. + * CONN_EXTRA: 'Connection:' header contains additional terms.(NOT used) + * + * CONN_CLOSE and CONN_KA flags are mutual exclusive. + */ + TFW_HTTP_B_CONN_CLOSE = TFW_HTTP_FLAGS_COMMON, + /* + * This flag is set only together with previos one. + * Typically we close connection gracefully with + * TCP shutdown, but in case of attack, we should + * do it immediately using tcp_close. + */ + TFW_HTTP_B_CONN_CLOSE_FORCE, + TFW_HTTP_B_CONN_KA, + TFW_HTTP_B_CONN_UPGRADE, + TFW_HTTP_B_CONN_EXTRA, + /* Message is a websocket upgrade request */ + TFW_HTTP_B_UPGRADE_WEBSOCKET, + /* Message upgrade header contains extra fields */ + TFW_HTTP_B_UPGRADE_EXTRA, + /* + * Chunked is last transfer encoding. + * It is important to notice that there is a valid case + * when we receive chunked encoded response with empty + * body on HEAD request. + */ + TFW_HTTP_B_CHUNKED, + /* Chunked in the middle of applied transfer encodings. */ + TFW_HTTP_B_CHUNKED_APPLIED, + /* Message has chunked trailer headers part. */ + TFW_HTTP_B_CHUNKED_TRAILER, + /* Message has transfer encodings other than chunked. */ + TFW_HTTP_B_TE_EXTRA, + /* The message body is limited by the connection closing. */ + TFW_HTTP_B_UNLIMITED, + /* Media type is multipart/form-data. */ + TFW_HTTP_B_CT_MULTIPART, + /* Multipart/form-data request has a boundary parameter. */ + TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + /* Content-length header was parsed. */ + TFW_HTTP_B_REQ_CONTENT_LENGTH_PARSED, + /* Singular header presents more than once. */ + TFW_HTTP_B_FIELD_DUPENTRY, + /* Message headers are fully parsed */ + TFW_HTTP_B_HEADERS_PARSED, + /* Message is fully parsed */ + TFW_HTTP_B_FULLY_PARSED, + /* Message has HTTP/2 format. */ + TFW_HTTP_B_H2, + /* + * Message has all mandatory pseudo-headers + * (applicable for HTTP/2 mode only). + */ + TFW_HTTP_B_H2_HDRS_FULL, + /* Request flags. */ + TFW_HTTP_FLAGS_REQ, + /* Sticky cookie is found and verified. */ + TFW_HTTP_B_HAS_STICKY = TFW_HTTP_FLAGS_REQ, + /* Request fitted no cache cookie rule */ + TFW_HTTP_B_CHAIN_NO_CACHE, + /* Request is non-idempotent. */ + TFW_HTTP_B_NON_IDEMP, + /* Request stated 'Accept: text/html' header */ + TFW_HTTP_B_ACCEPT_HTML, + /* Request is created by HTTP health monitor. */ + TFW_HTTP_B_HMONITOR, + /* Client was disconnected, drop the request. */ + TFW_HTTP_B_REQ_DROP, + /* Request is PURGE with an 'X-Tempesta-Cache: get' header. */ + TFW_HTTP_B_PURGE_GET, + /* + * Request should be challenged, but requested resourse + * is non-challengeable. Try to service such request + * from cache. + */ + TFW_HTTP_B_JS_NOT_SUPPORTED, + /* + * Response is fully processed and ready to be + * forwarded to the client. + */ + TFW_HTTP_B_REQ_RESP_READY, /* * Rewrite method from HEAD to GET. Applicable only to request that can * be employed from cache. @@ -127,34 +126,34 @@ enum { /* 100-continue response has been queued. */ TFW_HTTP_B_CONTINUE_QUEUED, - /* Response flags */ - TFW_HTTP_FLAGS_RESP, - /* Response has no body. */ - TFW_HTTP_B_VOID_BODY = TFW_HTTP_FLAGS_RESP, - /* Response has header 'Date:'. */ - TFW_HTTP_B_HDR_DATE, - /* Response has header 'Last-Modified:'. */ - TFW_HTTP_B_HDR_LMODIFIED, - /* - * Response has header 'Etag: ' and this header is - * not enclosed in double quotes. - */ - TFW_HTTP_B_HDR_ETAG_HAS_NO_QOUTES, - /* Request URI is absolute (HTTP/1.x only) */ - TFW_HTTP_B_ABSOLUTE_URI, - /* - * This is the error response, connection - * will be closed after sending it. - */ - TFW_HTTP_B_CLOSE_ERROR_RESPONSE, + /* Response flags */ + TFW_HTTP_FLAGS_RESP, + /* Response has no body. */ + TFW_HTTP_B_VOID_BODY = TFW_HTTP_FLAGS_RESP, + /* Response has header 'Date:'. */ + TFW_HTTP_B_HDR_DATE, + /* Response has header 'Last-Modified:'. */ + TFW_HTTP_B_HDR_LMODIFIED, + /* + * Response has header 'Etag: ' and this header is + * not enclosed in double quotes. + */ + TFW_HTTP_B_HDR_ETAG_HAS_NO_QOUTES, + /* Request URI is absolute (HTTP/1.x only) */ + TFW_HTTP_B_ABSOLUTE_URI, + /* + * This is the error response, connection + * will be closed after sending it. + */ + TFW_HTTP_B_CLOSE_ERROR_RESPONSE, /* This is 100-continue response. */ TFW_HTTP_B_CONTINUE_RESP, - /* This response is during trailers encoding. */ - TFW_HTTP_B_RESP_ENCODE_TRAILERS, + /* This response is during trailers encoding. */ + TFW_HTTP_B_RESP_ENCODE_TRAILERS, - _TFW_HTTP_FLAGS_NUM + _TFW_HTTP_FLAGS_NUM }; /* Forward declaration of common HTTP types. */ @@ -168,6 +167,8 @@ typedef struct tfw_hdr_mods_t TfwHdrMods; typedef struct frang_global_cfg_t FrangGlobCfg; typedef struct frang_vhost_cfg_t FrangVhostCfg; typedef struct tfw_http_cookie_t TfwStickyCookie; -typedef struct tfw_http_stream_t TfwStream; +typedef struct tfw_http_stream_t TfwStream; +typedef struct tfw_cli_conn_t TfwCliConn; +typedef struct tfw_client_mem_t TfwClientMem; #endif /* __TFW_HTTP_TYPES_H__ */ diff --git a/fw/msg.c b/fw/msg.c index b10c0b4442..db73ba4c2a 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2018-2023 Tempesta Technologies, Inc. + * Copyright (C) 2018-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -22,35 +22,18 @@ #include "http_msg.h" #include "ss_skb.h" -/** - * Fill up an HTTP message by iterator @it with data from string @data. - * This is a quick message creator which doesn't maintain properly - * parts of the message structure like headers table. So the HTTP message - * cannot be used where HTTP message transformations are required. - * - * An iterator @it is used to support multiple calls to this function - * after the set up. This function can only be called after a call to - * tfw_http_msg_setup(). It works only with empty SKB space prepared - * by the function. - */ -int -tfw_msg_write(TfwMsgIter *it, const TfwStr *data) -{ - return tfw_http_msg_add_data(it, NULL, NULL, data); -} -EXPORT_SYMBOL(tfw_msg_write); - /** * Allocate list of skbs to store data with given length @data_len and * initialise the iterator it. Shouldn't be called against previously used * iterator, since its current state is to be rewritten. */ int -tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, size_t data_len) +tfw_msg_iter_setup(TfwMsgIter *it, TfwClientMem *owner, + struct sk_buff **skb_head, size_t data_len) { int r; - if ((r = ss_skb_alloc_data(skb_head, data_len))) + if ((r = ss_skb_alloc_data(skb_head, owner, data_len))) return r; it->skb = it->skb_head = *skb_head; it->frag = -1; @@ -60,41 +43,105 @@ tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, size_t data_len) return 0; } -/** - * Allocate and add a single empty skb (with a place for TCP headers though) - * to the iterator. The allocated skb has no space for the data, user is - * expected to add new paged fragments. - */ -int -tfw_msg_iter_append_skb(TfwMsgIter *it) +static inline int +tfw_msg_iter_next_data_frag(TfwMsgIter *it) { - int r; + if (skb_shinfo(it->skb)->nr_frags > it->frag + 1) { + ++it->frag; + return 0; + } - if ((r = ss_skb_alloc_data(&it->skb_head, 0))) - return r; - it->skb = ss_skb_peek_tail(&it->skb_head); + it->skb = it->skb->next; + if (it->skb == it->skb_head || !skb_shinfo(it->skb)->nr_frags) { + it->frag = MAX_SKB_FRAGS; + return -EINVAL; + } it->frag = -1; - skb_shinfo(it->skb)->flags = skb_shinfo(it->skb->prev)->flags; return 0; } /** - * Find origin fragment of data @off and set it as active message iterator - * fragment. + * Fill up an HTTP message by iterator @it with data from string @data. + * Properly maintain @hm header @field, so that @hm can be used in regular + * transformations. However, the header name and the value are not split into + * different chunks, so advanced headers matching is not available for @hm. */ -int tfw_http_iter_set_at(TfwMsgIter *it, char *off) +static int +tfw_msg_iter_add_data(TfwMsgIter *it, const TfwStr *data) { - do { - if (!ss_skb_find_frag_by_offset(it->skb, off, &it->frag)) - return 0; - it->skb = it->skb->next; + const TfwStr *c, *end; + + BUG_ON(TFW_STR_DUP(data)); + if (WARN_ON_ONCE(it->frag >= skb_shinfo(it->skb)->nr_frags)) + return -E2BIG; + + TFW_STR_FOR_EACH_CHUNK(c, data, end) { + char *p; + unsigned int c_off = 0, c_size, f_room, n_copy; +this_chunk: + c_size = c->len - c_off; + if (it->frag >= 0) { + unsigned int f_size; + skb_frag_t *frag = &skb_shinfo(it->skb)->frags[it->frag]; + + f_size = skb_frag_size(frag); + f_room = PAGE_SIZE - skb_frag_off(frag) - f_size; + p = (char *)skb_frag_address(frag) + f_size; + n_copy = min(c_size, f_room); + skb_frag_size_add(frag, n_copy); + ss_skb_adjust_data_len(it->skb, n_copy); + } else { + f_room = skb_tailroom(it->skb); + n_copy = min(c_size, f_room); + p = skb_put(it->skb, n_copy); + } + + memcpy_fast(p, c->data + c_off, n_copy); + /* + * The chunk occupied all the spare space in the SKB fragment, + * switch to the next fragment. + */ + if (c_size >= f_room) { + if (WARN_ON_ONCE(tfw_msg_iter_next_data_frag(it) + && ((c_size != f_room) + || (c + 1 < end)))) + { + return -E2BIG; + } + /* + * Not all data from the chunk has been copied, + * stay in the current chunk and copy the rest to the + * next fragment. + */ + if (c_size != f_room) { + c_off += n_copy; + goto this_chunk; + } + } + } - } while (it->skb != it->skb_head); + return 0; +} - return -E2BIG; +/** + * Fill up an HTTP message by iterator @it with data from string @data. + * This is a quick message creator which doesn't maintain properly + * parts of the message structure like headers table. So the HTTP message + * cannot be used where HTTP message transformations are required. + * + * An iterator @it is used to support multiple calls to this function + * after the set up. This function can only be called after a call to + * tfw_http_msg_setup(). It works only with empty SKB space prepared + * by the function. + */ +int +tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data) +{ + return tfw_msg_iter_add_data(it, data); } +EXPORT_SYMBOL(tfw_msg_iter_write); /** * Move message iterator from @data pointer by @sz symbols right. diff --git a/fw/msg.h b/fw/msg.h index 6917489d43..2460b76227 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -4,7 +4,7 @@ * Generic protocol message. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2023 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -97,29 +97,9 @@ typedef struct { TfwStr hdr; } TfwMsgParseIter; -int tfw_msg_write(TfwMsgIter *it, const TfwStr *data); -int tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, - size_t data_len); -int tfw_msg_iter_append_skb(TfwMsgIter *it); -int tfw_http_iter_set_at(TfwMsgIter *it, char *off); +int tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data); +int tfw_msg_iter_setup(TfwMsgIter *it, TfwClientMem *owner, + struct sk_buff **skb_head, size_t data_len); int tfw_msg_iter_move(TfwMsgIter *it, unsigned char **data, unsigned long sz); -static inline int -tfw_msg_iter_next_data_frag(TfwMsgIter *it) -{ - if (skb_shinfo(it->skb)->nr_frags > it->frag + 1) { - ++it->frag; - return 0; - } - - it->skb = it->skb->next; - if (it->skb == it->skb_head || !skb_shinfo(it->skb)->nr_frags) { - it->frag = MAX_SKB_FRAGS; - return -EINVAL; - } - it->frag = -1; - - return 0; -} - #endif /* __TFW_MSG_H__ */ diff --git a/fw/pool.c b/fw/pool.c index e16225a096..ec920fab20 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -25,7 +25,7 @@ * be immediately freed to keep stack-like memory management. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ #include "lib/str.h" #include "lib/fault_injection_alloc.h" +#include "fw/client.h" #include "pool.h" #define TFW_POOL_HEAD_OFF (TFW_POOL_ALIGN_SZ(sizeof(TfwPool)) \ @@ -70,10 +71,10 @@ static unsigned long __percpu (*pg_cache)[TFW_POOL_PGCACHE_SZ]; * through buddies coalescing). So we never cache multi-pages. */ static unsigned long -tfw_pool_alloc_pages(unsigned int order) +tfw_pool_alloc_pages(TfwClientMem *cli_mem, unsigned int order) { + unsigned long pg_res = 0; unsigned int *pgn; - unsigned long pg_res; gfp_t flags; local_bh_disable(); @@ -83,20 +84,24 @@ tfw_pool_alloc_pages(unsigned int order) if (likely(*pgn && !order)) { --*pgn; pg_res = ((unsigned long *)this_cpu_ptr(pg_cache))[*pgn]; - - local_bh_enable(); - - return pg_res; } local_bh_enable(); - flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; - return __get_free_pages(flags, order); + if (!pg_res) { + flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; + pg_res = tfw__get_free_pages(flags, order); + } + if (likely(pg_res) && cli_mem) + tfw_client_adjust_mem(cli_mem, PAGE_SIZE << order); + + return pg_res; + } ALLOW_ERROR_INJECTION(tfw_pool_alloc_pages, NULL); static void -tfw_pool_free_pages(unsigned long addr, unsigned int order) +tfw_pool_free_pages(TfwClientMem *cli_mem, unsigned long addr, + unsigned int order) { unsigned int *pgn; int refcnt; @@ -106,6 +111,9 @@ tfw_pool_free_pages(unsigned long addr, unsigned int order) pgn = this_cpu_ptr(&pg_next); refcnt = page_count(virt_to_page(addr)); + if (cli_mem) + tfw_client_adjust_mem(cli_mem, -(PAGE_SIZE << order)); + if (likely(*pgn < TFW_POOL_PGCACHE_SZ && !order && refcnt == 1)) { ((unsigned long *)this_cpu_ptr(pg_cache))[*pgn] = addr; ++*pgn; @@ -129,7 +137,7 @@ __tfw_pool_alloc_page(TfwPool *p, size_t n, bool align) unsigned int off = desc_size + n; unsigned int order = get_order(off); - c = (TfwPoolChunk *)tfw_pool_alloc_pages(order); + c = (TfwPoolChunk *)tfw_pool_alloc_pages(p->owner, order); if (!c) return NULL; c->next = curr; @@ -186,7 +194,9 @@ tfw_pool_free(TfwPool *p, void *ptr, size_t n) /* Free empty chunk which doesn't contain the pool header. */ if (unlikely(p->off == TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))) { TfwPoolChunk *next = p->curr->next; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(p->curr), p->order); + + tfw_pool_free_pages(p->owner, TFW_POOL_CHUNK_BASE(p->curr), + p->order); p->curr = next; p->order = next->order; p->off = next->off; @@ -211,7 +221,8 @@ tfw_pool_clean_single(TfwPool *pool, void *ptr) if ((char *)ptr >= (char *)TFW_POOL_CHUNK_BASE(c) && (char *)ptr < (char *)TFW_POOL_CHUNK_BASE(c) + c->off) { - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(pool->owner, TFW_POOL_CHUNK_BASE(c), + c->order); prev->next = next; return; } @@ -236,7 +247,8 @@ tfw_pool_clean(TfwPool *pool) if (!(next = c->next)) break; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(pool->owner, TFW_POOL_CHUNK_BASE(c), + c->order); pool->curr->next = next; } } @@ -245,22 +257,27 @@ tfw_pool_clean(TfwPool *pool) * Allocate bit more pages than we need. */ TfwPool * -__tfw_pool_new(size_t n) +__tfw_pool_new(size_t n, TfwClientMem *owner) { + TfwClientMem *cli_mem = (TfwClientMem *)owner; TfwPool *p; TfwPoolChunk *c; unsigned int order; order = get_order(TFW_POOL_ALIGN_SZ(n) + TFW_POOL_HEAD_OFF); - c = (TfwPoolChunk *)tfw_pool_alloc_pages(order); + c = (TfwPoolChunk *)tfw_pool_alloc_pages(cli_mem, order); if (unlikely(!c)) return NULL; + if (cli_mem) + BUG_ON(!tfw_client_mem_get(cli_mem)); + p = (TfwPool *)((char *)c + TFW_POOL_ALIGN_SZ(sizeof(*c))); c->next = NULL; p->order = c->order = order; + p->owner = cli_mem; p->off = c->off = TFW_POOL_HEAD_OFF; p->curr = c; @@ -271,14 +288,19 @@ void tfw_pool_destroy(TfwPool *p) { TfwPoolChunk *c, *next; + TfwClientMem *cli_mem; if (!p) return; + cli_mem = p->owner; for (c = p->curr; c; c = next) { next = c->next; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(p->owner, TFW_POOL_CHUNK_BASE(c), + c->order); } + if (cli_mem) + tfw_client_mem_put(cli_mem); } int diff --git a/fw/pool.h b/fw/pool.h index 2fb623fc28..7fa305e0d4 100644 --- a/fw/pool.h +++ b/fw/pool.h @@ -4,7 +4,7 @@ * Memory pool. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ #define TFW_POOL_ALIGN_SZ(n) (((n) + 7) & ~7UL) #define TFW_POOL_ALIGN_PTR(p) ((void *)TFW_POOL_ALIGN_SZ((unsigned long)p)) +typedef struct tfw_client_mem_t TfwClientMem; + /** * Memory pool chunk descriptor. * @@ -53,18 +55,20 @@ typedef struct tfw_pool_chunk_t { * Memory pool descriptor. * * @curr - current chunk to allocate memory from; + * @owner - owner for memory accounting; * @order,@off - cached members of @curr; */ typedef struct { TfwPoolChunk *curr; + TfwClientMem *owner; unsigned int order; unsigned int off; } TfwPool; -#define tfw_pool_new(struct_name, mask) \ +#define tfw_pool_new(struct_name, owner, mask) \ ({ \ struct_name *s = NULL; \ - TfwPool *p = __tfw_pool_new(sizeof(struct_name)); \ + TfwPool *p = __tfw_pool_new(sizeof(struct_name), owner); \ if (likely(p)) { \ s = tfw_pool_alloc(p, sizeof(struct_name)); \ BUG_ON(!s); \ @@ -79,7 +83,7 @@ typedef struct { int tfw_pool_init(void); void tfw_pool_exit(void); -TfwPool *__tfw_pool_new(size_t n); +TfwPool *__tfw_pool_new(size_t n, TfwClientMem *owner); void *__tfw_pool_alloc_page(TfwPool *p, size_t n, bool align); void tfw_pool_free(TfwPool *p, void *ptr, size_t n); void tfw_pool_clean(TfwPool *p); diff --git a/fw/sock.c b/fw/sock.c index 10ff818adf..43e860ac50 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -2,7 +2,7 @@ * Synchronous Socket API. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -41,6 +41,7 @@ #include "tempesta_fw.h" #include "work_queue.h" #include "http_limits.h" +#include "tcp.h" typedef struct { struct sock *sk; @@ -170,7 +171,7 @@ ss_sk_incoming_cpu_update(struct sock *sk) * to shutdown. The only exception is closing activity - this is the only * activity allowed in progress of shutdown process. * - * Returns zero (SS_OK) if we're in critical section and SS_BAD if shutdown + * Returns zero (T_OK) if we're in critical section and T_BAD if shutdown * process in progress and we can't enter the section. */ static int @@ -183,7 +184,7 @@ ss_active_guard_enter(unsigned long val) * if we commited to shutdown. */ if (unlikely(!READ_ONCE(__ss_active))) - return SS_BAD; + return T_BAD; atomic64_add(val, acnt); @@ -194,10 +195,10 @@ ss_active_guard_enter(unsigned long val) */ if (unlikely(!READ_ONCE(__ss_active))) { atomic64_sub(val, acnt); - return SS_BAD; + return T_BAD; } - return SS_OK; + return T_OK; } ALLOW_ERROR_INJECTION(ss_active_guard_enter, ERRNO); @@ -224,7 +225,7 @@ ss_conn_drop_guard_exit(struct sock *sk) if (!sk->sk_user_data) return; - SS_CONN_TYPE(sk) &= ~Conn_Closing; + SS_CONN_TYPE(sk) &= ~(Conn_Closing | Conn_Shutdown | Conn_Stop); SS_CALL(connection_drop, sk); ss_active_guard_exit(SS_V_ACT_LIVECONN); } @@ -454,6 +455,7 @@ ss_skb_try_collapse(struct sock *sk, struct sk_buff *skb, tp->write_seq += skb->len; sk_wmem_queued_add(sk, delta); sk_mem_charge(sk, delta); + ss_skb_orphan(skb); kfree_skb_partial(skb, stolen); return true; @@ -476,6 +478,7 @@ ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, skb->mark = mark; if (tls_type) skb_set_tfw_tls_type(skb, tls_type); + skb_tfw_set_in_socket_write_queue(skb); ss_forced_mem_schedule(sk, skb->truesize); tcp_skb_entail(sk, skb); tp->write_seq += skb->len; @@ -484,16 +487,19 @@ ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, } int -ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) +ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) { - struct sk_buff *skb, *tail, *next, *to_destroy; + struct sk_buff *tail, *next, *to_destroy; unsigned char tls_type = 0; unsigned int mark = 0; - void *opaque_data = NULL; - void (*destructor)(void *) = NULL; int r; - while ((skb = ss_skb_dequeue(skb_head))) { + while ((*snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now))) { + struct sk_buff *skb = ss_skb_dequeue(skb_head); + + if (!skb) + break; /* * @skb_head can be the head of several different skb * lists. We set tls type for the head of each new @@ -504,8 +510,6 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) if (TFW_SKB_CB(skb)->is_head) { tls_type = skb_tfw_tls_type(skb); mark = skb->mark; - opaque_data = TFW_SKB_CB(skb)->opaque_data; - destructor = TFW_SKB_CB(skb)->destructor; tail = tcp_write_queue_tail(sk); } /* @@ -517,7 +521,7 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) T_DBG3("[%d]: %s: drop skb=%pK data_len=%u len=%u\n", smp_processor_id(), __func__, skb, skb->data_len, skb->len); - kfree_skb(skb); + ss_kfree_skb(skb); continue; } @@ -530,6 +534,9 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) ss_skb_tcp_entail(sk, skb, mark, tls_type); } + if (*skb_head && !TFW_SKB_CB(*skb_head)->is_head) + ss_skb_setup_head_of_list(*skb_head, mark, tls_type); + return 0; restore_sk_write_queue: @@ -540,7 +547,9 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) tcp_wmem_free_skb(sk, to_destroy); } } - ss_skb_setup_opaque_data(*skb_head, opaque_data, destructor); + if (*skb_head && !TFW_SKB_CB(*skb_head)->is_head) + ss_skb_setup_head_of_list(*skb_head, mark, tls_type); + return r; } @@ -550,16 +559,15 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) static void ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) { - int size, mss = tcp_send_mss(sk, &size, MSG_DONTWAIT); void *conn = sk->sk_user_data; unsigned char tls_type = flags & SS_F_ENCRYPT ? SS_SKB_F2TYPE(flags) : 0; T_DBG3("[%d]: %s: sk=%pK queue_empty=%d send_head=%pK" - " sk_state=%d mss=%d size=%d\n", + " sk_state=%d\n", smp_processor_id(), __func__, sk, tcp_write_queue_empty(sk), tcp_send_head(sk), - sk->sk_state, mss, size); + sk->sk_state); /* If the socket is inactive, there's no recourse. Drop the data. */ if (unlikely(!conn || !ss_sock_active(sk))) @@ -570,50 +578,30 @@ ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) if (ss_skb_on_send(conn, skb_head)) goto cleanup; - /* - * If skbs were pushed to scheuler tree, @skb_head is - * empty and `ss_skb_tcp_entail_list` doesn't make - * any job. - */ - if (ss_skb_tcp_entail_list(sk, skb_head)) { - ss_linkerror(sk, SS_F_ABORT); - goto cleanup; - } - - T_DBG3("[%d]: %s: sk=%p send_head=%p sk_state=%d flags=%x\n", - smp_processor_id(), __func__, - sk, tcp_send_head(sk), sk->sk_state, flags); - - /* - * If connection close flag is specified, then @ss_do_close is used to - * set FIN on final SKB and push all pending frames to the stack. - */ if (flags & SS_F_CONN_CLOSE) return; /* * We set SOCK_TEMPESTA_HAS_DATA when we add some skb in our - * scheduler tree. - * So there are two cases here: - * - packets out is equal to zero and sock flag is set, - * this means that we should call `tcp_push_pending_frames`. - * In this function our scheduler choose the most priority - * stream, make frames for this stream and push them to the - * socket write queue. - * - socket flag is not set, this means that we push skb directly - * to the socket write queue so we call `tcp_push` and don't - * run scheduler. - * If packets_out is not equal to zero `tcp_push_pending_frames` - * will be called later from `tcp_data_snd_check` when we receive - * ack from the peer. + * scheduler tree or connection write queue. + * So there are three cases here: + * - TCP window is not equal to zero. In this case Tempesta FW pushes + * skbs from connection write queue to socket write queue according + * TCP window and then (if there is a still available TCP window and + * this is http2 client connection) calls our scheduler to choose the + * most priority stream, make frames for this stream and push them to + * the socket write queue. + * - TCP window is equal to zero. In this case `tcp_push_pending_frames` + * doesn't do anything. It will be called later, when we receive ack + * from the peer. + * - SOCK_TEMPESTA_HAS_DATA flag is not set. This is a rare case, when + * we send goaway/tls alert after error response, but this error + * response exceeded http2 window. In this case SOCK_TEMPESTA_HAS_DATA + * will be set during WINDOW_UPDATE processing and this function + * (`tcp_push_pending_frames`) will be called again. */ SS_IN_USE_PROTECT({ - if (sock_flag(sk, SOCK_TEMPESTA_HAS_DATA)) { - tcp_push_pending_frames(sk); - } else { - tcp_push(sk, MSG_DONTWAIT, mss, - TCP_NAGLE_OFF | TCP_NAGLE_PUSH, size); - } + tcp_push_pending_frames(sk); }); SS_STATE_PROCESS_RETURN(sk); @@ -621,7 +609,6 @@ ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) return; cleanup: - ss_skb_destroy_opaque_data(*skb_head); ss_skb_queue_purge(skb_head); } @@ -671,6 +658,8 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) * and after the transmission. */ if (flags & SS_F_KEEP_SKB) { + unsigned int head_data, copied_truesize; + skb = *skb_head; do { /* tcp_transmit_skb() will clone the skb. */ @@ -681,6 +670,15 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) r = -ENOMEM; goto err; } + memset(twin_skb->cb, 0, sizeof(twin_skb->cb)); + head_data = MAX_TCP_HEADER + skb_headlen(twin_skb); + copied_truesize = + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + + SKB_DATA_ALIGN(head_data + + sizeof(struct skb_shared_info)); + ss_skb_set_owner(twin_skb, ss_skb_dflt_destructor, + TFW_SKB_CB(skb)->opaque_data, + copied_truesize); ss_skb_queue_tail(&sw.skb_head, twin_skb); skb = skb->next; } while (skb != *skb_head); @@ -900,7 +898,7 @@ ss_close(struct sock *sk, int flags) }; if (unlikely(!sk)) - return SS_OK; + return T_OK; ss_sk_incoming_cpu_update(sk); cpu = sk->sk_incoming_cpu; @@ -908,7 +906,7 @@ ss_close(struct sock *sk, int flags) sock_hold(sk); ticket = ss_wq_push(&sw, cpu); if (!ticket) - return SS_OK; + return T_OK; if (!(flags & SS_F_SYNC)) goto err; @@ -921,10 +919,10 @@ ss_close(struct sock *sk, int flags) goto err; } - return SS_OK; + return T_OK; err: sock_put(sk); - return SS_BAD; + return T_BAD; } /* @@ -958,7 +956,7 @@ do { \ tp->copied_seq += tcp_fin; ADJUST_PROCESSED_SKB(skb, tp, count, offset, processed); __kfree_skb(skb); - return SS_BAD; + return T_BAD; } while ((skb = ss_skb_dequeue(&skb_head))) { @@ -988,7 +986,7 @@ do { \ ss_skb_chop_head_tail(NULL, skb, offset, 0) != 0)) { __kfree_skb(skb); - r = SS_BAD; + r = T_BAD; goto out; } offset = 0; @@ -1022,7 +1020,7 @@ do { \ sk, smp_processor_id()); ++tp->copied_seq; if (!r) - r = SS_BAD; + r = T_BAD; } while ((skb = ss_skb_dequeue(&skb_head))) { if (unlikely(offset >= skb->len)) { @@ -1061,7 +1059,7 @@ do { \ static int ss_tcp_process_data(struct sock *sk) { - int r = 0, count, processed = 0; + int tmp_r, r = 0, count, processed = 0; unsigned int skb_len, skb_seq; struct sk_buff *skb, *tmp; struct tcp_sock *tp = tcp_sk(sk); @@ -1096,7 +1094,9 @@ ss_tcp_process_data(struct sock *sk) skb_len); } out: - SS_CALL(connection_recv_finish, sk->sk_user_data); + tmp_r = SS_CALL(connection_recv_finish, sk->sk_user_data); + if (unlikely(tfw_error_code_more_crucial(tmp_r, r))) + r = tmp_r; /* * Recalculate an appropriate TCP receive buffer space @@ -1156,16 +1156,16 @@ ss_tcp_data_ready(struct sock *sk) } switch (ss_tcp_process_data(sk)) { - case SS_OK: - case SS_POSTPONE: - case SS_DROP: + case T_OK: + case T_POSTPONE: + case T_DROP: SS_STATE_PROCESS_RETURN(sk); return; - case SS_BAD: - case SS_BLOCK_WITH_FIN: + case T_BAD: + case T_BLOCK_WITH_FIN: flags = SS_F_SYNC; break; - case SS_BLOCK_WITH_RST: + case T_BLOCK_WITH_RST: flags = SS_F_ABORT_FORCE; break; default: @@ -1622,12 +1622,24 @@ EXPORT_SYMBOL(ss_getpeername); static void __sk_close_locked(struct sock *sk, int flags) { + int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); + + if (sk->sk_fill_write_queue(sk, mss_now)) { + ss_linkerror(sk, 0); + bh_unlock_sock(sk); + return; + } ss_do_close(sk, flags); if (!sk_stream_closing(sk)) { ss_conn_drop_guard_exit(sk); } else { BUG_ON(!sock_flag(sk, SOCK_DEAD) || ((flags & SS_F_ABORT) == SS_F_ABORT)); + /* + * Tempesta FW sends all pending data in socket + * write queue and doesn't push anymore. + */ + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); SS_CONN_TYPE(sk) |= Conn_Closing; } bh_unlock_sock(sk); @@ -1637,16 +1649,16 @@ __sk_close_locked(struct sock *sk, int flags) static inline void ss_do_shutdown(struct sock *sk) { + int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); /* - * Prevent calling `tcp_done` from `tcp_shutdown` if error - * occurs to prevent double free. + * `tcp_shutdown` will ne called from `sk->sk_fill_write_queue` + * after sending all pending data. */ - SS_IN_USE_PROTECT({ - tcp_shutdown(sk, SEND_SHUTDOWN); - }); - SS_STATE_PROCESS_RETURN(sk); SS_CONN_TYPE(sk) |= Conn_Shutdown; - SS_CALL(connection_on_shutdown, sk->sk_user_data); + if (sk->sk_fill_write_queue(sk, mss_now)) + ss_linkerror(sk, 0); + else + SS_CALL(connection_on_shutdown, sk->sk_user_data); } static inline bool @@ -1765,11 +1777,9 @@ ss_tx_action(void) } dead_sock: sock_put(sk); /* paired with push() calls */ - if (sw.skb_head) - ss_skb_destroy_opaque_data(sw.skb_head); while ((skb = ss_skb_dequeue(&sw.skb_head))) - kfree_skb(skb); + ss_kfree_skb(skb); } /* diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 45190d122b..3beb82ddbc 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -35,7 +35,6 @@ #include "server.h" #include "sync_socket.h" #include "tls.h" -#include "tcp.h" /* * ------------------------------------------------------------------------ @@ -49,6 +48,8 @@ static struct kmem_cache *tfw_h2_conn_cache; static int tfw_cli_cfg_ka_timeout = -1; unsigned int tfw_cli_max_concurrent_streams; +u64 tfw_cli_soft_mem_limit; +u64 tfw_cli_hard_mem_limit; static inline struct kmem_cache * tfw_cli_cache(int type) @@ -84,7 +85,7 @@ tfw_sock_cli_keepalive_timer_cb(struct timer_list *t) T_DBG("Client timeout end\n"); - if (TFW_CONN_TYPE(conn) & Conn_Closing) { + if (TFW_CONN_TYPE(conn) & Conn_Shutdown) { /* * If socket was shut down it is in TCP_FIN_WAIT1 or * TCP_FIN_WAIT2 state depends on receiving ack from @@ -110,6 +111,7 @@ tfw_cli_conn_alloc(int type) return NULL; tfw_connection_init((TfwConn *)cli_conn); + cli_conn->write_queue = NULL; INIT_LIST_HEAD(&cli_conn->seq_queue); spin_lock_init(&cli_conn->seq_qlock); spin_lock_init(&cli_conn->ret_qlock); @@ -147,6 +149,7 @@ tfw_cli_conn_free(TfwCliConn *cli_conn) void tfw_cli_conn_release(TfwCliConn *cli_conn) { + ss_skb_queue_purge(&cli_conn->write_queue); /* Paired with @frang_conn_new client obtain. */ if (likely(cli_conn->sk)) tfw_connection_unlink_to_sk((TfwConn *)cli_conn); @@ -194,40 +197,9 @@ tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg) } static int -tfw_sk_fill_write_queue(struct sock *sk, unsigned int mss_now) +tfw_sock_clnt_fill_write_queue(struct sock *sk, unsigned int mss_now) { - TfwConn *conn = sk->sk_user_data; - TfwH2Ctx *h2; - int r; - - assert_spin_locked(&sk->sk_lock.slock); - /* - * This function is called under the socket lock, same as dropping a - * connection. Moreover this function is never called when socket - * state is TCP_CLOSE. When client closes the connection, we drop it - * from tcp_done() -> ss_conn_drop_guard_exit(), and socket state is - * set to TCP_CLOSE, so this function will never be called after it. - */ - BUG_ON(!conn); - - /* - * This function can be called both for HTTP1 and HTTP2 connections. - * Moreover this function can be called when HTTP2 connection is - * shut down before TLS hadshake was finished. - */ - h2 = TFW_CONN_PROTO(conn) == TFW_FSM_H2 ? - tfw_h2_context_safe(conn) : NULL; - if (!h2) - return 0; - - r = tfw_h2_make_frames(sk, h2, mss_now); - if (unlikely(r < 0)) - return r; - - if (!tfw_h2_is_ready_to_send(h2)) - sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); - - return r; + return tfw_connection_fill_sk_write_queue(sk->sk_user_data, mss_now); } /** @@ -289,8 +261,8 @@ tfw_sock_clnt_new(struct sock *sk) * find a simple and better solution. */ sk->sk_write_xmit = tfw_tls_encrypt; - sk->sk_fill_write_queue = tfw_sk_fill_write_queue; } + sk->sk_fill_write_queue = tfw_sock_clnt_fill_write_queue; /* Activate keepalive timer. */ mod_timer(&((TfwCliConn *)conn)->timer, @@ -379,7 +351,7 @@ tfw_cli_conn_on_shutdown(TfwConn *conn) * connection keep alive timer. Since we decide to patch and * reuse TCP keep alive timer, we don't need this callback and * should directly modify TCP keep alive timer from sock.c. - */ + */ tfw_cli_conn_mod_timer((TfwCliConn *)conn, tcp_fin_timeout); } @@ -387,6 +359,7 @@ static const SsHooks tfw_sock_http_clnt_ss_hooks = { .connection_new = tfw_sock_clnt_new, .connection_drop = tfw_sock_clnt_drop, .connection_recv = tfw_connection_recv, + .connection_recv_finish = tfw_connection_recv_finish, .connection_on_shutdown = tfw_cli_conn_on_shutdown, }; @@ -724,6 +697,57 @@ tfw_cfgop_keepalive_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce) return 0; } +static int +tfw_parse_client_mem(const char *val, unsigned long long *mem) +{ + size_t len = strlen(val); + char *p; + + *mem = memparse(val, &p); + if (p != val + len) + return -EINVAL; + + return 0; +} + +static int +tfw_cfgop_client_mem(TfwCfgSpec *cs, TfwCfgEntry *ce) +{ + int r; + + TFW_CFG_CHECK_NO_ATTRS(cs, ce); + TFW_CFG_CHECK_VAL_N(>=, 1, cs, ce); + TFW_CFG_CHECK_VAL_N(<, 3, cs, ce); + + r = tfw_parse_client_mem(ce->vals[0], &tfw_cli_soft_mem_limit); + if (unlikely(r)) { + T_ERR_NL("Invalid 'client_mem' value: '%s'", + ce->vals[0]); + return r; + } + + if (ce->val_n > 1) { + r = tfw_parse_client_mem(ce->vals[1], &tfw_cli_hard_mem_limit); + if (unlikely(r)) { + T_ERR_NL("Invalid 'client_mem' value: '%s'", + ce->vals[1]); + return r; + } + } else { + tfw_cli_hard_mem_limit = (tfw_cli_soft_mem_limit < U64_MAX / 2) ? + tfw_cli_soft_mem_limit * 2 : U64_MAX; + } + + if (tfw_cli_hard_mem_limit < tfw_cli_soft_mem_limit) { + T_ERR_NL("Invalid 'client_mem' value: hard limit (%llu) is" + " less then soft (%llu)", tfw_cli_hard_mem_limit, + tfw_cli_soft_mem_limit); + return -EINVAL; + } + + return 0; +} + static void tfw_cfgop_cleanup_sock_clnt(TfwCfgSpec *cs) { @@ -966,6 +990,15 @@ static TfwCfgSpec tfw_sock_clnt_specs[] = { .allow_repeat = false, .allow_reconfig = true, }, + { + .name = "client_mem", + .deflt = "0 0", + .handler = tfw_cfgop_client_mem, + .cleanup = tfw_cfgop_cleanup_sock_clnt, + .allow_none = true, + .allow_repeat = false, + .allow_reconfig = true, + }, { 0 } }; diff --git a/fw/sock_srv.c b/fw/sock_srv.c index f20ab3cea5..b48d12e10b 100644 --- a/fw/sock_srv.c +++ b/fw/sock_srv.c @@ -421,6 +421,7 @@ tfw_srv_conn_release(TfwSrvConn *srv_conn) { TfwServer *srv = (TfwServer *)srv_conn->peer; + ss_skb_queue_purge(&srv_conn->write_queue); tfw_connection_release((TfwConn *)srv_conn); /* * conn->sk may be zeroed if we get here after a failed @@ -441,6 +442,12 @@ tfw_srv_conn_release(TfwSrvConn *srv_conn) tfw_srv_conn_stop(srv_conn); } +static int +tfw_sock_srv_fill_write_queue(struct sock *sk, unsigned int mss_now) +{ + return tfw_connection_fill_sk_write_queue(sk->sk_user_data, mss_now); +} + /** * Initiate a non-blocking connect attempt. * Returns immediately without waiting until a connection is established. @@ -492,6 +499,7 @@ tfw_sock_srv_connect_try(TfwSrvConn *srv_conn) tfw_srv_conn_init_as_dead(srv_conn); sk->sk_uid.val = SS_SRV_USER; ss_set_callbacks(sk); + sk->sk_fill_write_queue = tfw_sock_srv_fill_write_queue; /* * Set connection destructor such that connection failover can * take place if the connection attempt fails. diff --git a/fw/ss_skb.c b/fw/ss_skb.c index e1d9fa7de4..e2596fdd92 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -7,7 +7,7 @@ * on top on native Linux socket buffers. The helpers provide common and * convenient wrappers for skb processing. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -91,7 +91,7 @@ ss_skb_alloc_pages(size_t len) for (i = 0; i < nr_frags; ++i) { struct page *page = alloc_page(GFP_ATOMIC); if (!page) { - kfree_skb(skb); + ss_kfree_skb(skb); return NULL; } skb_fill_page_desc(skb, i, page, 0, 0); @@ -109,7 +109,7 @@ ss_skb_alloc_pages(size_t len) * segmentation. The allocated payload space will be filled with data. */ int -ss_skb_alloc_data(struct sk_buff **skb_head, size_t len) +ss_skb_alloc_data(struct sk_buff **skb_head, TfwClientMem *owner, size_t len) { int i_skb, nr_skbs = len ? DIV_ROUND_UP(len, SS_SKB_MAX_DATA_LEN) : 1; size_t n = 0; @@ -120,6 +120,8 @@ ss_skb_alloc_data(struct sk_buff **skb_head, size_t len) skb = ss_skb_alloc_pages(n); if (!skb) return -ENOMEM; + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + owner, skb->truesize); ss_skb_queue_tail(skb_head, skb); } @@ -217,6 +219,12 @@ __extend_pgfrags(struct sk_buff *skb_head, struct sk_buff *skb, int from, int n) nskb = ss_skb_alloc(0); if (nskb == NULL) return -ENOMEM; + + if (!skb_tfw_is_in_socket_write_queue(skb)) { + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + TFW_SKB_CB(skb)->opaque_data, + nskb->truesize); + } skb_shinfo(nskb)->flags = skb_shinfo(skb)->flags; ss_skb_insert_after(skb, nskb); skb_shinfo(nskb)->nr_frags = n_excess; @@ -392,6 +400,7 @@ __split_linear_data(struct sk_buff *skb_head, struct sk_buff *skb, char *pspt, skb->tail -= tail_len; skb->data_len += tail_len; skb->truesize += tail_len; + ss_skb_adjust_client_mem(skb, tail_len); /* Make the fragment with the tail part. */ __skb_fill_page_desc(skb, alloc, page, tail_off, tail_len); @@ -950,7 +959,7 @@ ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, skb->next->prev = skb->prev; skb->prev->next = skb->next; *skb_list_head = skb_hd = skb->next; - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = skb_hd; if (unlikely(skb->next == skb)) goto single_buff; @@ -969,7 +978,7 @@ ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, trail -= skb->len; skb_hd->prev = skb->prev; skb->prev->next = skb_hd; - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = skb_hd->prev; if (unlikely(skb == skb_hd)) goto single_buff; @@ -1007,11 +1016,11 @@ __ss_skb_free_empty(struct sk_buff **skb_head, struct sk_buff *skb, TfwStr *it) it->skb = it->skb->next; it->data = __skb_data_address(it->skb, &fragn); ss_skb_unlink(skb_head, it->skb); - kfree_skb(to_delete); + ss_kfree_skb(to_delete); } if (unlikely(!is_same && !skb->len)) { ss_skb_unlink(skb_head, skb); - kfree_skb(skb); + ss_kfree_skb(skb); } return was_updated; @@ -1212,7 +1221,7 @@ int ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, unsigned int *chunks, unsigned int *processed) { - int i, r = SS_OK; + int i, r = T_OK; unsigned int headlen = skb_headlen(skb); unsigned int _processed; struct skb_shared_info *si = skb_shinfo(skb); @@ -1226,7 +1235,7 @@ ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, _processed = 0; r = actor(objdata, skb->data, headlen, &_processed); *processed += _processed; - if (r != SS_POSTPONE) + if (r != T_POSTPONE) return r; } @@ -1242,7 +1251,7 @@ ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, r = actor(objdata, skb_frag_address(frag), skb_frag_size(frag), &_processed); *processed += _processed; - if (r != SS_POSTPONE) + if (r != T_POSTPONE) return r; } @@ -1298,6 +1307,7 @@ ss_skb_split(struct sk_buff *skb, int len) if (!buff) return NULL; + memset(buff->cb, 0, sizeof(buff->cb)); skb_reserve(buff, MAX_TCP_HEADER); /* @buff already accounts @n in truesize. */ @@ -1306,6 +1316,8 @@ ss_skb_split(struct sk_buff *skb, int len) skb->truesize -= nlen; buff->mark = skb->mark; + ss_skb_adjust_client_mem(skb, -nlen); + /* * These are orphaned SKBs that are taken out of the TCP/IP * stack and are completely owned by Tempesta. There is no @@ -1330,10 +1342,16 @@ ss_skb_init_for_xmit(struct sk_buff *skb) struct skb_shared_info *shinfo = skb_shinfo(skb); __u8 pfmemalloc = skb->pfmemalloc; - WARN_ON_ONCE(skb->sk); + ss_skb_orphan(skb); skb_dst_drop(skb); INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + + /* + * dev is used to save connection for memory accounting + * clear it before pass skb to the kernel. + */ + skb->dev = NULL; /* * Since we use skb->sb for our purpose we should * zeroed it before pass skb to the kernel. @@ -1453,7 +1471,8 @@ ss_skb_unroll_slow(struct sk_buff **skb_head, struct sk_buff *skb) return 0; cleanup: - ss_skb_queue_purge(skb_head); + while ((skb = ss_skb_dequeue(skb_head)) != NULL) + kfree_skb(skb); return -ENOMEM; } @@ -1700,10 +1719,72 @@ int ss_skb_realloc_headroom(struct sk_buff *skb) { int delta = MAX_TCP_HEADER - skb_headroom(skb); + unsigned int old_truesize; + int r; if (likely(delta <= 0)) return 0; - return pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); + if (TFW_SKB_CB(skb)->opaque_data) + old_truesize = skb->truesize; + + r = pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); + if (unlikely(r)) + return r; + + if (TFW_SKB_CB(skb)->opaque_data) + ss_skb_adjust_client_mem(skb, skb->truesize - old_truesize); + + return 0; } ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); + +void +ss_skb_dflt_destructor(struct sk_buff *skb) +{ + TfwClientMem *cli_mem = + (TfwClientMem *)TFW_SKB_CB(skb)->opaque_data; + + BUG_ON(skb_tfw_is_in_socket_write_queue(skb)); + ss_skb_adjust_client_mem(skb, -TFW_SKB_CB(skb)->mem); + tfw_client_mem_put(cli_mem); +} + +void +ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head) +{ + ss_skb_queue_splice(&((TfwConn *)conn)->write_queue, skb_head); + sock_set_flag(((TfwConn *)conn)->sk, SOCK_TEMPESTA_HAS_DATA); +} + +void +ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), + TfwClientMem *owner, unsigned int mem) +{ + TfwClientMem *cli_mem = (TfwClientMem *)owner; + + if (!cli_mem || !tfw_client_mem_get(cli_mem)) + return; + + WARN_ON(TFW_SKB_CB(skb)->opaque_data); + WARN_ON(TFW_SKB_CB(skb)->mem != 0); + TFW_SKB_CB(skb)->opaque_data = cli_mem; + TFW_SKB_CB(skb)->destructor = destructor; + ss_skb_adjust_client_mem(skb, mem); +} + +void +ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) +{ + TfwClientMem *cli_mem; + + if (skb_tfw_is_in_socket_write_queue(skb)) + return; + + cli_mem = (TfwClientMem *)TFW_SKB_CB(skb)->opaque_data; + if (cli_mem) { + TFW_SKB_CB(skb)->mem += delta; + WARN_ON(TFW_SKB_CB(skb)->mem < 0); + tfw_client_adjust_mem(cli_mem, delta); + } +} diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 1d359b1172..b0febb0c0a 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -3,7 +3,7 @@ * * Synchronous Sockets API for Linux socket buffers manipulation. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -26,34 +26,11 @@ #include #include "str.h" -#include "lib/log.h" - -/** - * Responses from socket hook functions. - */ -enum { - /* Generic socket error. */ - SS_BAD = T_BAD, - /* The packet must be dropped, but connection should be alive. */ - SS_DROP = T_DROP, - /* - * The packet must be blocked with TCP FIN (typically on a - * security event, when we reply to client). - */ - SS_BLOCK_WITH_FIN = T_BLOCK_WITH_FIN, - /* - * The packet must be blocked with TCP RST (typically on a - * security event). - */ - SS_BLOCK_WITH_RST = T_BLOCK_WITH_RST, - /* The packet should be stashed (made by callback). */ - SS_POSTPONE = T_POSTPONE, - /* The packet looks good and we can safely pass it. */ - SS_OK = T_OK, -}; typedef int (*on_send_cb_t)(void *conn, struct sk_buff **skb_head); typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); +typedef void (*on_send_fail_cb_t)(void *conn, struct sk_buff *skb_head); +typedef struct tfw_client_mem_t TfwClientMem; /* * Tempesta FW sk_buff private data. @@ -63,20 +40,31 @@ typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); * @on_send - callback to special handling this skb before sending; * @on_tcp_entail - callback to special handling this skb before pushing * to socket write queue; + * @mem - memory used for this skb, used to account appropriate + * client memory; * @stream_id - id of sender stream; + * @tls_type - tls type of current skb, if it's data should be + * encrypted; * @is_head - flag indicates that this is a head of skb list; */ struct tfw_skb_cb { void *opaque_data; - void (*destructor)(void *opaque_data); + void (*destructor)(struct sk_buff *); on_send_cb_t on_send; on_tcp_entail_t on_tcp_entail; + long int mem; unsigned int stream_id; bool is_head; }; #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) +void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), + TfwClientMem *owner, unsigned int delta); +void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); +void ss_skb_dflt_destructor(struct sk_buff *skb); +void ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head); + static inline bool ss_skb_is_within_fragment(char *begin_fragment, char *position, char *end_fragment) @@ -88,35 +76,15 @@ static inline void ss_skb_setup_head_of_list(struct sk_buff *skb_head, unsigned int mark, unsigned char tls_type) { + BUILD_BUG_ON(sizeof(struct tfw_skb_cb) > + sizeof(((struct sk_buff *)(0))->cb)); + if (tls_type) skb_set_tfw_tls_type(skb_head, tls_type); skb_head->mark = mark; TFW_SKB_CB(skb_head)->is_head = true; } -static inline void -ss_skb_setup_opaque_data(struct sk_buff *skb_head, void *opaque_data, - void (*destructor)(void *)) -{ - TFW_SKB_CB(skb_head)->opaque_data = opaque_data; - TFW_SKB_CB(skb_head)->destructor = destructor; -} - -static inline void -ss_skb_destroy_opaque_data(struct sk_buff *skb_head) -{ - void *opaque_data = TFW_SKB_CB(skb_head)->opaque_data; - void (*destructor)(void *) = TFW_SKB_CB(skb_head)->destructor; - - BUILD_BUG_ON(sizeof(struct tfw_skb_cb) > - sizeof(((struct sk_buff *)(0))->cb)); - - if (opaque_data) { - BUG_ON(!destructor); - destructor(opaque_data); - } -} - static inline int ss_skb_on_send(void *conn, struct sk_buff **skb_head) { @@ -125,6 +93,8 @@ ss_skb_on_send(void *conn, struct sk_buff **skb_head) if (on_send) r = on_send(conn, skb_head); + if (!r && *skb_head) + ss_skb_on_send_dflt(conn, skb_head); return r; } @@ -198,6 +168,43 @@ ss_skb_queue_splice(struct sk_buff **skb_head, struct sk_buff **skb) *skb = NULL; } +static inline void +ss_skb_orphan(struct sk_buff *skb) +{ + void (*destructor)(struct sk_buff *); + + if (skb_tfw_is_in_socket_write_queue(skb)) + return; + + destructor = TFW_SKB_CB(skb)->destructor; + if (destructor) { + BUG_ON(!TFW_SKB_CB(skb)->opaque_data); + destructor(skb); + TFW_SKB_CB(skb)->destructor = NULL; + TFW_SKB_CB(skb)->opaque_data = NULL; + } else { + BUG_ON(TFW_SKB_CB(skb)->opaque_data); + } +} + +static inline void +__ss_kfree_skb(struct sk_buff *skb) +{ + if (!skb) + return; + ss_skb_orphan(skb); + __kfree_skb(skb); +} + +static inline void +ss_kfree_skb(struct sk_buff *skb) +{ + if (!skb) + return; + ss_skb_orphan(skb); + kfree_skb(skb); +} + static inline void ss_skb_remove(struct sk_buff *skb) { @@ -307,7 +314,7 @@ ss_skb_queue_purge(struct sk_buff **skb_head) { struct sk_buff *skb; while ((skb = ss_skb_dequeue(skb_head)) != NULL) - kfree_skb(skb); + ss_kfree_skb(skb); } static inline void @@ -316,6 +323,7 @@ ss_skb_adjust_data_len(struct sk_buff *skb, int delta) skb->len += delta; skb->data_len += delta; skb->truesize += delta; + ss_skb_adjust_client_mem(skb, delta); } /* @@ -359,6 +367,7 @@ ss_skb_alloc(size_t n) if (!skb) return NULL; skb_reserve(skb, MAX_TCP_HEADER); + memset(skb->cb, 0, sizeof(skb->cb)); return skb; } @@ -448,17 +457,18 @@ ss_skb_data_ptr_by_offset(struct sk_buff *skb, unsigned int off) char *ss_skb_fmt_src_addr(const struct sk_buff *skb, char *out_buf); -int ss_skb_alloc_data(struct sk_buff **skb_head, size_t len); +int ss_skb_alloc_data(struct sk_buff **skb_head, TfwClientMem *owner, + size_t len); struct sk_buff *ss_skb_split(struct sk_buff *skb, int len); int ss_skb_get_room_w_frag(struct sk_buff *skb_head, struct sk_buff *skb, - char *pspt, unsigned int len, TfwStr *it, int *fragn); + char *pspt, unsigned int len, TfwStr *it, + int *fragn); int ss_skb_expand_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, size_t head, size_t tail); int ss_skb_chop_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, size_t head, size_t tail); -int -ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, - size_t head, size_t trail); +int ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, + size_t head, size_t trail); int ss_skb_cutoff_data(struct sk_buff *skb_head, TfwStr *hdr, int skip, int tail); int skb_next_data(struct sk_buff *skb, char *last_ptr, TfwStr *it); @@ -473,9 +483,8 @@ int ss_skb_to_sgvec_with_new_pages(struct sk_buff *skb, struct scatterlist *sgl, struct page ***old_pages); int ss_skb_add_frag(struct sk_buff *skb_head, struct sk_buff **skb, char* addr, int *frag_idx, size_t frag_sz); -int -ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, - unsigned char *split_point); +int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, + unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); #if defined(DEBUG) && (DEBUG >= 4) diff --git a/fw/sync_socket.h b/fw/sync_socket.h index 84b85ed150..8fcbd9ccca 100644 --- a/fw/sync_socket.h +++ b/fw/sync_socket.h @@ -2,7 +2,7 @@ * Synchronous Socket API. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -65,7 +65,7 @@ enum { * Connection is in special state: it socket is DEAD * and wait until ACK to our FIN is come. */ - Conn_Closing = (0x3 << __Flag_Bits), + Conn_Closing = (0x4 << __Flag_Bits), }; typedef struct tfw_conn_t TfwConn; @@ -91,7 +91,7 @@ typedef struct ss_hooks { int (*connection_recv)(TfwConn *conn, struct sk_buff *skb); /* Callback to make some job after processing received data. */ - void (*connection_recv_finish)(TfwConn *conn); + int (*connection_recv_finish)(TfwConn *conn); /* Callback to make some job on connection shutdown. */ void (*connection_on_shutdown)(TfwConn *conn); @@ -194,7 +194,8 @@ bool ss_active(void); void ss_get_stat(SsStat *stat); void ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, unsigned char tls_type); -int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head); +int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd); /* * We should all linux kernel functions like `tcp_push` or diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 27f269efb6..5aa125b44a 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -33,6 +33,7 @@ */ #include "helpers.h" #include "http_msg.h" + #include "pool.c" #include "apm.h" #include "filter.h" @@ -43,9 +44,15 @@ #include "tf_conf.h" #include "tf_filter.h" -static TfwConn conn_req, conn_resp; - +static DEFINE_PER_CPU(long, mem); unsigned int tfw_cli_max_concurrent_streams; +TfwConn conn_req, conn_resp; +TfwClientMem cli_mem = { + .mem = &mem, +}; +TfwClient client = { + .cli_mem = &cli_mem, +}; TfwHttpReq * test_req_alloc(size_t data_len) @@ -58,17 +65,19 @@ test_req_alloc(size_t data_len) * tfw_http_msg_alloc(). It is removed because we need to test how it * initializes the message and we would not like to test the copy-paste. */ - hmreq = __tfw_http_msg_alloc(Conn_HttpClnt, true); + hmreq = __tfw_http_msg_alloc(&cli_mem, Conn_HttpClnt, true); BUG_ON(!hmreq); - ret = tfw_http_msg_setup(hmreq, &it, data_len); - BUG_ON(ret); - - memset(&conn_req, 0, sizeof(TfwConn)); tfw_connection_init(&conn_req); + conn_req.peer = (TfwPeer *)&client; conn_req.proto.type = Conn_HttpClnt; hmreq->conn = &conn_req; hmreq->stream = &conn_req.stream; + + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem(hmreq), + &hmreq->msg.skb_head, data_len); + BUG_ON(ret); + tfw_http_init_parser_req((TfwHttpReq *)hmreq); return (TfwHttpReq *)hmreq; @@ -85,31 +94,32 @@ test_req_free(TfwHttpReq *req) } TfwHttpResp * -test_resp_alloc(size_t data_len) +test_resp_alloc(size_t data_len, TfwHttpReq *req) { TfwMsgIter it; int ret; - TfwHttpResp *hmresp = test_resp_alloc_no_data(); + TfwHttpMsg *hmresp = (TfwHttpMsg *)test_resp_alloc_no_data(req); - ret = tfw_http_msg_setup((TfwHttpMsg *)hmresp, &it, data_len); + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem(hmresp), + &hmresp->msg.skb_head, data_len); BUG_ON(ret); return (TfwHttpResp *)hmresp; } TfwHttpResp * -test_resp_alloc_no_data() +test_resp_alloc_no_data(TfwHttpReq *req) { TfwHttpMsg *hmresp; - hmresp = __tfw_http_msg_alloc(Conn_HttpSrv, true); + hmresp = __tfw_http_msg_alloc(&cli_mem, Conn_HttpSrv, true); BUG_ON(!hmresp); - memset(&conn_resp, 0, sizeof(TfwConn)); tfw_connection_init(&conn_resp); conn_resp.proto.type = Conn_HttpSrv; hmresp->conn = &conn_resp; hmresp->stream = &conn_resp.stream; + tfw_http_msg_pair((TfwHttpResp *)hmresp, req); tfw_http_init_parser_resp((TfwHttpResp *)hmresp); return (TfwHttpResp *)hmresp; @@ -189,7 +199,8 @@ void ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, { } -int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb) +int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) { return 0; } @@ -436,6 +447,12 @@ frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len) return T_OK; } +int +frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) +{ + return T_OK; +} + int frang_sticky_cookie_handler(TfwHttpReq *req) { diff --git a/fw/t/unit/helpers.h b/fw/t/unit/helpers.h index 6b98d08abd..107f2142b6 100644 --- a/fw/t/unit/helpers.h +++ b/fw/t/unit/helpers.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2021 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -29,8 +29,11 @@ */ TfwHttpReq *test_req_alloc(size_t data_len); void test_req_free(TfwHttpReq *req); -TfwHttpResp *test_resp_alloc(size_t data_len); -TfwHttpResp *test_resp_alloc_no_data(void); +TfwHttpResp *test_resp_alloc(size_t data_len, TfwHttpReq *req); +TfwHttpResp *test_resp_alloc_no_data(TfwHttpReq *req); void test_resp_free(TfwHttpResp *req); +void test_req_resp_cleanup(void); + +extern TfwConn conn_req, conn_resp; #endif /* __TFW_TEST_HELPER_H__ */ diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index f30c55ba19..a2e444ffb0 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include "test.h" #include "test_http_parser_defs.h" #include "test_http_parser_common.h" +#include "helpers.h" int test_fail_counter; test_fixture_fn_t test_setup_fn; @@ -118,16 +119,20 @@ test_run_all(void) __fpu_schedule(); TEST_SUITE_MPART_RUN(http1_parser); + test_req_resp_cleanup(); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); - test_case_alloc_h2(); + TEST_SETUP(test_http2_parser_setup_fn); + TEST_TEARDOWN(test_http2_parser_teardown_fn); TEST_SUITE_MPART_RUN(http2_parser); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); - test_case_cleanup_h2(); TEST_SUITE_RUN(http2_parser_hpack); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SUITE_RUN(http_cache); @@ -137,6 +142,8 @@ test_run_all(void) __fpu_schedule(); TEST_SUITE_RUN(http_msg); + test_req_resp_cleanup(); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SUITE_RUN(hash); diff --git a/fw/t/unit/test_hpack.c b/fw/t/unit/test_hpack.c index 63ccf09fb8..c71a8a60a2 100644 --- a/fw/t/unit/test_hpack.c +++ b/fw/t/unit/test_hpack.c @@ -75,6 +75,13 @@ do { \ HDR_COMPOUND_STR(hdr_res, name, value); \ } while (0) +static DEFINE_PER_CPU(long, mem); +static TfwClientMem cli_mem = { + .mem = &mem, +}; +static TfwClient client = { + .cli_mem = &cli_mem, +}; static TfwH2Conn conn; static TfwH2Ctx *ctx; static TfwHttpReq *test_req; @@ -84,9 +91,10 @@ static inline TfwHttpReq * test_hpack_req_alloc(void) { TfwHttpReq *req = test_req_alloc(0); + TfwHttpMsg *hmreq = (TfwHttpMsg *)req; BUG_ON(!req); - req->pit.pool = __tfw_pool_new(0); + req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client_mem(hmreq)); BUG_ON(!req->pit.pool); req->pit.parsed_hdr = &req->stream->parser.hdr; __set_bit(TFW_HTTP_B_H2, req->flags); @@ -102,6 +110,7 @@ test_h2_setup(void) create_str_pool(); conn.h2 = ctx = tfw_h2_context_alloc(); BUG_ON(!ctx); + ((TfwConn *)&conn)->peer = (TfwPeer *)&client; r = tfw_h2_context_init(ctx, &conn); BUG_ON(r); test_req = test_hpack_req_alloc(); diff --git a/fw/t/unit/test_http1_parser.c b/fw/t/unit/test_http1_parser.c index 5b10bb7fdd..b19e895209 100644 --- a/fw/t/unit/test_http1_parser.c +++ b/fw/t/unit/test_http1_parser.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -4638,8 +4638,7 @@ do { \ test_case_parse_prepare_http(str); \ if (resp) \ test_resp_free(resp); \ - resp = test_resp_alloc(sizeof(str) - 1); \ - tfw_http_msg_pair(resp, sample_req); \ + resp = test_resp_alloc(sizeof(str) - 1, req); \ tfw_http_parse_resp(resp, str, sizeof(str) - 1, &parsed); \ } while (0) diff --git a/fw/t/unit/test_http2_parser.c b/fw/t/unit/test_http2_parser.c index b3e1df5880..cecbd16e14 100644 --- a/fw/t/unit/test_http2_parser.c +++ b/fw/t/unit/test_http2_parser.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2022-2025 Tempesta Technologies, Inc. + * Copyright (C) 2022-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -2756,6 +2756,8 @@ TEST(http2_parser, perf) #define REQ_PERF(frames_buf) \ do { \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ if (req) \ test_req_free(req); \ @@ -2819,7 +2821,9 @@ TEST(http2_parser, fuzzer) tfw_init_frames(); ADD_HEADERS_FRAME(str, headers_len); ADD_DATA_FRAME(str + headers_len, body_len); - test_case_parse_prepare_h2(); + test_req_resp_cleanup(); + tfw_h2_context_clear(conn.h2); + test_case_parse_prepare_h2(); switch (ret) { case FUZZ_VALID: TRY_PARSE_EXPECT_PASS(FUZZ_REQ_H2, CHUNK_ON); diff --git a/fw/t/unit/test_http2_parser_hpack.c b/fw/t/unit/test_http2_parser_hpack.c index 02fa9e9288..4bde6da7dd 100644 --- a/fw/t/unit/test_http2_parser_hpack.c +++ b/fw/t/unit/test_http2_parser_hpack.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2024 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -1698,9 +1698,8 @@ TEST(http2_parser_hpack, erased_indexes_not_come_back) TEST_SUITE(http2_parser_hpack) { - test_case_alloc_h2(); - - TEST_SETUP(test_case_parse_prepare_h2); + TEST_SETUP(test_http2_parser_setup_fn); + TEST_TEARDOWN(test_http2_parser_teardown_fn); TEST_RUN(http2_parser_hpack, literal_header_field_with_incremental_indexing); TEST_RUN(http2_parser_hpack, literal_header_field_without_indexing); @@ -1714,6 +1713,4 @@ TEST_SUITE(http2_parser_hpack) TEST_RUN(http2_parser_hpack, dup_with_equal_values_in_indexes); TEST_RUN(http2_parser_hpack, dup_with_diff_values_in_indexes); TEST_RUN(http2_parser_hpack, erased_indexes_not_come_back); - - test_case_cleanup_h2(); } diff --git a/fw/t/unit/test_http_match.c b/fw/t/unit/test_http_match.c index c84a6d172c..bcb9b576ae 100644 --- a/fw/t/unit/test_http_match.c +++ b/fw/t/unit/test_http_match.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2022 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -96,9 +96,14 @@ http_match_suite_rule_release(TfwHttpMatchRule *rule) static void http_match_suite_setup(void) { + TfwHttpMsg *hm; + test_req = test_req_alloc(1); + hm = (TfwHttpMsg *)test_req; - test_table = tfw_pool_new(TfwHttpTable, TFW_POOL_ZERO); + test_table = tfw_pool_new(TfwHttpTable, + tfw_http_msg_client_mem(hm), + TFW_POOL_ZERO); BUG_ON(!test_table); INIT_LIST_HEAD(&test_table->head); diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 544f6916f5..6a9e5a3ccc 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2023-2025 Tempesta Technologies, Inc. + * Copyright (C) 2023-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -22,19 +22,25 @@ #include "test.h" #include "helpers.h" #include "http_msg.h" +#include "helpers.h" static TfwHttpResp *resp; +static TfwHttpReq *req; static void http_msg_suite_setup(void) { - resp = test_resp_alloc_no_data(); + req = test_req_alloc(0); + BUG_ON(!req); + resp = test_resp_alloc_no_data(req); + BUG_ON(!resp); } static void http_msg_suite_teardown(void) { test_resp_free(resp); + test_req_free(req); } TEST(http_msg, hdr_in_array) @@ -103,6 +109,7 @@ static bool __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, unsigned short nr_frags) { + TfwHttpMsg *hmresp = (TfwHttpMsg*)resp; TfwMsgIter *it; struct sk_buff *skb; struct page *page; @@ -113,6 +120,9 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + tfw_http_msg_client_mem(hmresp), + skb->truesize); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; @@ -235,7 +245,7 @@ TEST(http_msg, expand_from_pool_for_headers) resp->body_start_skb = it->skb; resp->body.len = (MAX_SKB_FRAGS - 1) * SLEN("paged_body"); - tfw_http_msg_setup_transform_pool(&resp->mit, it, resp->pool); + tfw_http_msg_setup_transform_pool(&resp->mit, msg, resp->pool); EXPECT_EQ(tfw_http_msg_cutoff_headers(msg, &cleanup), 0); @@ -297,7 +307,7 @@ TEST(http_msg, expand_from_pool_for_trailers) EXPECT_NULL(cleanup.skb_head); it->frag = skb_shinfo(it->skb)->nr_frags - 1; - tfw_http_msg_setup_transform_pool(&resp->mit, it, resp->pool); + tfw_http_msg_setup_transform_pool(&resp->mit, msg, resp->pool); __set_bit(TFW_HTTP_B_RESP_ENCODE_TRAILERS, resp->flags); diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index 4ef01cb2cd..df32b5f3b3 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -391,7 +391,7 @@ split_and_parse_n(unsigned char *str, uint32_t type, uint32_t len, * See comments for @do_split_and_parse()/__TRY_PARSE_EXPECT_* */ *fchunks = chunks; - return r <= T_BAD || r == T_OK ? r : T_BAD; + return (r == T_OK || is_tfw_common_error_code(r)) ? r : T_BAD; } /** @@ -424,6 +424,8 @@ test_case_alloc_h2(void) { conn.h2 = tfw_h2_context_alloc(); BUG_ON(!conn.h2); + ((TfwConn *)&conn)->peer = (TfwPeer *)&client; + ((TfwConn *)&conn)->proto.type = Conn_H2Clnt; } void @@ -431,6 +433,7 @@ test_case_cleanup_h2(void) { BUG_ON(!conn.h2); + tfw_h2_context_clear(conn.h2); tfw_h2_context_free(conn.h2); conn.h2 = NULL; } @@ -494,6 +497,7 @@ do_split_and_parse(int type, int chunk_mode) req = test_req_alloc(frames_total_sz); } else if (type == FUZZ_REQ_H2) { + TfwHttpMsg *hmreq; /* * During the processing of a request, the HPACK dynamic table * is modified. The same query is used for each chunk size. @@ -520,15 +524,16 @@ do_split_and_parse(int type, int chunk_mode) req->stream = &stream; tfw_http_init_parser_req(req); stream.msg = (TfwMsg*)req; - req->pit.pool = __tfw_pool_new(0); + hmreq = (TfwHttpMsg *)req; + req->pit.pool = + __tfw_pool_new(0, tfw_http_msg_client_mem(hmreq)); BUG_ON(!req->pit.pool); __set_bit(TFW_HTTP_B_H2, req->flags); } else if (type == FUZZ_RESP) { if (resp) test_resp_free(resp); - resp = test_resp_alloc(frames_total_sz); - tfw_http_msg_pair(resp, sample_req); + resp = test_resp_alloc(frames_total_sz, sample_req); } else { BUG(); } @@ -688,3 +693,22 @@ get_next_str_val(TfwStr *str) return v; } + +void +test_req_resp_cleanup(void) +{ + if (sample_req) { + test_req_free(sample_req); + sample_req = NULL; + } + + if (req) { + test_req_free(req); + req = NULL; + } + + if (resp) { + test_resp_free(resp); + resp = NULL; + } +} diff --git a/fw/t/unit/test_http_parser_common.h b/fw/t/unit/test_http_parser_common.h index 455f8932f2..65a0946555 100644 --- a/fw/t/unit/test_http_parser_common.h +++ b/fw/t/unit/test_http_parser_common.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -552,6 +552,7 @@ extern TfwHttpReq *req, *sample_req; extern TfwHttpResp *resp; extern TfwH2Conn conn; extern TfwStream stream; +extern TfwClient client; int set_sample_req(unsigned char *str); @@ -563,6 +564,20 @@ int do_split_and_parse(int type, int chunk_mode); int validate_data_fully_parsed(int type, size_t sz_diff); +static inline void +test_http2_parser_setup_fn(void) +{ + test_case_alloc_h2(); + test_case_parse_prepare_h2(); +} + +static inline void +test_http2_parser_teardown_fn(void) +{ + test_req_resp_cleanup(); + test_case_cleanup_h2(); +} + #define __TRY_PARSE_EXPECT_PASS(type, sz_diff, chunk_mode) \ chunk_size_index = 0; \ while (({ \ @@ -619,6 +634,8 @@ do { \ #define FOR_REQ_H2(frames_definition) \ ASSIGN_FRAMES_FOR_H2(frames_definition); \ PRINT_REQ_H2(); \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ TRY_PARSE_EXPECT_PASS(FUZZ_REQ_H2, CHUNK_ON) @@ -635,6 +652,8 @@ do { \ #define EXPECT_BLOCK_REQ_H2(frames_definition) \ ASSIGN_FRAMES_FOR_H2(frames_definition); \ PRINT_REQ_H2(); \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ TRY_PARSE_EXPECT_BLOCK(FUZZ_REQ_H2, CHUNK_ON) diff --git a/fw/t/unit/test_pool.c b/fw/t/unit/test_pool.c index 0908ac5cd5..deb4f0973a 100644 --- a/fw/t/unit/test_pool.c +++ b/fw/t/unit/test_pool.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -28,8 +28,8 @@ TEST(pool, alignment) void *a, *b, *c, *d; bool np; - /* this should give us a single page minus the 32 byte pool headers */ - p = __tfw_pool_new(1001); + /* this should give us a single page minus the 40 byte pool headers */ + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -42,7 +42,7 @@ TEST(pool, alignment) EXPECT_TRUE(c == b + 1); /* 'c' must be tightly packed */ /* 'd' should still fit into the same page */ - d = tfw_pool_alloc_not_align_np(p, PAGE_SIZE - (32 + 10), &np); + d = tfw_pool_alloc_not_align_np(p, PAGE_SIZE - (40 + 10), &np); EXPECT_TRUE(d == c + 1); EXPECT_FALSE(np); @@ -57,7 +57,7 @@ TEST(pool, realloc) TfwPool *p; void *a, *b, *c, *d; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -73,11 +73,11 @@ TEST(pool, realloc) EXPECT_TRUE(d == c); /* allocate enough memory to use the entire chunk */ - d = tfw_pool_realloc(p, c, PAGE_SIZE - 300, PAGE_SIZE - 40); + d = tfw_pool_realloc(p, c, PAGE_SIZE - 300, PAGE_SIZE - 48); EXPECT_TRUE(d == c); /* the pool chunk must be exhausted now */ - d = tfw_pool_realloc(p, c, PAGE_SIZE - 40, PAGE_SIZE - 39); + d = tfw_pool_realloc(p, c, PAGE_SIZE - 48, PAGE_SIZE - 47); EXPECT_TRUE(d != c); } @@ -87,7 +87,7 @@ TEST(pool, clean_single) void *root, *curr, *first_ptr, *last_ptr; struct tfw_pool_chunk_t *head, *tail; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -146,7 +146,7 @@ TEST(pool, clean) TfwPool *p; struct tfw_pool_chunk_t *head, *tail; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); diff --git a/fw/t/unit/tfw_str_helper.c b/fw/t/unit/tfw_str_helper.c index 67ba025f97..a5e15d28a0 100644 --- a/fw/t/unit/tfw_str_helper.c +++ b/fw/t/unit/tfw_str_helper.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2015-2019 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ void create_str_pool(void) { BUG_ON(str_pool); - str_pool = __tfw_pool_new(1); + str_pool = __tfw_pool_new(1, NULL); BUG_ON(!str_pool); } diff --git a/fw/tcp.h b/fw/tcp.h index 5f50e347ce..63e39bfbe7 100644 --- a/fw/tcp.h +++ b/fw/tcp.h @@ -1,7 +1,7 @@ /** * TCP Socket API. * - * Copyright (C) 2015-2023 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -44,12 +44,6 @@ tfw_tcp_calc_snd_wnd(struct sock *sk, unsigned int mss_now) unsigned int qlen = skb_queue_len(&sk->sk_write_queue); unsigned int send_win, cong_win; - /* - * Update snd_cwnd if nedeed, to correct caclulation - * of count of bytes to send. - */ - tcp_slow_start_after_idle_check(sk); - if (in_flight + qlen >= tp->snd_cwnd) return 0; diff --git a/fw/tf_filter.c b/fw/tf_filter.c index 26c3814568..c8b214f8af 100644 --- a/fw/tf_filter.c +++ b/fw/tf_filter.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -89,7 +89,7 @@ put_fingerprint_rates(Storage *storage, Rates *rates) tdb_rec_put(storage->tdb, (char *)rates - sizeof(TdbRec)); } -static void +static int get_alloc_ctx_init_rec(TdbRec *rec, void *) { Rates *rates = (Rates *)rec->data; @@ -99,6 +99,8 @@ get_alloc_ctx_init_rec(TdbRec *rec, void *) spin_lock_init(&rates->conns_lock); spin_lock_init(&rates->recs_lock); tdb_rec_keep(rec); + + return 0; } /** diff --git a/fw/tls.c b/fw/tls.c index 0df47eb400..e7e160511b 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -3,7 +3,7 @@ * * Transport Layer Security (TLS) interfaces to Tempesta TLS. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -54,7 +54,7 @@ tfw_tls_purge_io_ctx(TlsIOCtx *io) struct sk_buff *skb; while ((skb = ss_skb_dequeue(&io->skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); ttls_reset_io_ctx(io); } @@ -89,6 +89,8 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) next_msg: spin_lock(&tls->lock); ss_skb_queue_tail(&tls->io_in.skb_list, skb); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + CLIENT_MEM_FROM_CONN(conn), skb->truesize); /* Call TLS layer to place skb into a TLS record on top of skb_list. */ parsed = 0; @@ -169,7 +171,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) TTLS_TAG_LEN); if (r) { tfw_tls_purge_io_ctx(&tls->io_in); - kfree_skb(nskb); + ss_kfree_skb(nskb); spin_unlock(&tls->lock); return T_BAD; } @@ -184,8 +186,8 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) /* Do upcall to http or websocket */ r = tfw_connection_recv(conn, data_up.skb); - if (r && r != T_POSTPONE && r != T_DROP) { - kfree_skb(nskb); + if (tfw_error_code_is_crucial(r)) { + ss_kfree_skb(nskb); return r; } } else { @@ -537,6 +539,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) { int r, flags = 0; TfwTlsConn *conn = container_of(tls, TfwTlsConn, tls); + TfwCliConn *cli_conn = &conn->cli_conn; TlsIOCtx *io = &tls->io_out; TfwMsgIter it; TfwStr str = {}; @@ -564,11 +567,12 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) T_DBG("TLS %lu bytes +%u segments (%u bytes, last msgtype %#x)" " are to be sent on conn=%pK/sk_write_xmit=%pK ready=%d\n", str.len, sgt ? sgt->nents : 0, io->msglen, io->msgtype, conn, - conn->cli_conn.sk->sk_write_xmit, ttls_xfrm_ready(tls)); + cli_conn->sk->sk_write_xmit, ttls_xfrm_ready(tls)); - if ((r = tfw_msg_iter_setup(&it, &io->skb_list, str.len))) + if ((r = tfw_msg_iter_setup(&it, CLIENT_MEM_FROM_CONN(cli_conn), + &io->skb_list, str.len))) goto out; - if ((r = tfw_msg_write(&it, &str))) + if ((r = tfw_msg_iter_write(&it, &str))) goto out; /* Only one skb should has been allocated. */ WARN_ON_ONCE(it.skb->next != io->skb_list @@ -584,6 +588,9 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + CLIENT_MEM_FROM_CONN(cli_conn), + skb->truesize); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } @@ -642,9 +649,9 @@ tfw_tls_conn_dtor(void *c) if (tls) { while ((skb = ss_skb_dequeue(&tls->io_in.skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); while ((skb = ss_skb_dequeue(&tls->io_out.skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); if (tls->peer_conf) tfw_vhost_put(tfw_vhost_from_tls_conf(tls->peer_conf)); @@ -802,10 +809,10 @@ tfw_tls_conn_send(TfwConn *c, TfwMsg *msg) return r; } -static void +static int tfw_tls_conn_recv_finish(TfwConn *c) { - tfw_conn_hook_call(TFW_FSM_HTTP, c, conn_recv_finish); + return tfw_conn_hook_call(TFW_FSM_HTTP, c, conn_recv_finish); } static TfwConnHooks tls_conn_hooks = { diff --git a/fw/token_tables.h b/fw/token_tables.h index 7cbf8cd356..120926179b 100644 --- a/fw/token_tables.h +++ b/fw/token_tables.h @@ -47,7 +47,7 @@ /* * ASCII table column bitmaps for HTTP token, e.g. header name (RFC 7230 3.2.6). - * + * * ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz * !#$%&'*+-.^_`|~0123456789 */ @@ -94,7 +94,7 @@ /* * ASCII codes to accept HTTP header values - * + * * RFC 7230, Appendix B; RFC 5234, Appendix B.1.: * field-value OWS = VCHAR SP HTAB = %x9 %x20-7E * diff --git a/fw/vhost.c b/fw/vhost.c index bdce492764..8daeceb046 100644 --- a/fw/vhost.c +++ b/fw/vhost.c @@ -994,7 +994,7 @@ tfw_cfgop_cache_use_stale(TfwCfgSpec *cs, TfwCfgEntry *ce, TfwLocation *loc) TFW_CFG_CHECK_NO_ATTRS(cs, ce); TFW_CFG_CHECK_VAL_N(>=, 1, cs, ce); - /* + /* * TODO: Revise and remove after #2123. */ if (tfw_vhost_is_default_reconfig(tfw_vhost_entry)) { @@ -1965,7 +1965,7 @@ tfw_vhost_create(const char *name) + sizeof(TfwStickyCookie) + sizeof(FrangGlobCfg) + tfw_tls_vhost_priv_data_sz(); - if (!(pool = __tfw_pool_new(0))) + if (!(pool = __tfw_pool_new(0, NULL))) return NULL; if (!(vhost = tfw_kzalloc(size, GFP_KERNEL))) { diff --git a/fw/websocket.c b/fw/websocket.c index 8c1d525122..3c813b3a2f 100644 --- a/fw/websocket.c +++ b/fw/websocket.c @@ -231,7 +231,7 @@ tfw_ws_msg_process(TfwConn *conn, struct sk_buff *skb) * which is wrong - please fix this if you see the warning. */ if (WARN_ON_ONCE(sock_flag(conn->sk, SOCK_DEAD))) { - kfree_skb(skb); + ss_kfree_skb(skb); return 0; } @@ -357,18 +357,27 @@ tfw_ws_conn_send(TfwConn *conn, TfwMsg *msg) return r; } +static int +tfw_ws_conn_recv_finish(TfwConn *conn) +{ + return tfw_conn_hook_call(TFW_CONN_HTTP_TYPE(conn), conn, + conn_recv_finish); +} + static TfwConnHooks ws_conn_hooks = { - .conn_close = tfw_ws_conn_close, - .conn_abort = tfw_ws_conn_abort, - .conn_drop = tfw_ws_conn_drop, - .conn_send = tfw_ws_conn_send, + .conn_close = tfw_ws_conn_close, + .conn_abort = tfw_ws_conn_abort, + .conn_drop = tfw_ws_conn_drop, + .conn_send = tfw_ws_conn_send, + .conn_recv_finish = tfw_ws_conn_recv_finish, }; static TfwConnHooks wss_conn_hooks = { - .conn_close = tfw_ws_conn_close, - .conn_abort = tfw_ws_conn_abort, - .conn_drop = tfw_ws_conn_drop, - .conn_send = tfw_ws_conn_send, + .conn_close = tfw_ws_conn_close, + .conn_abort = tfw_ws_conn_abort, + .conn_drop = tfw_ws_conn_drop, + .conn_send = tfw_ws_conn_send, + .conn_recv_finish = tfw_ws_conn_recv_finish, }; /* diff --git a/lib/fault_injection_alloc.c b/lib/fault_injection_alloc.c index 011dab6bb6..7240b85d61 100644 --- a/lib/fault_injection_alloc.c +++ b/lib/fault_injection_alloc.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -71,4 +71,29 @@ tfw__alloc_percpu(size_t size, size_t align) ALLOW_ERROR_INJECTION(tfw__alloc_percpu, NULL); EXPORT_SYMBOL(tfw__alloc_percpu); +void * +tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) +{ + return __alloc_percpu_gfp(size, align, gfp); +} +ALLOW_ERROR_INJECTION(tfw__alloc_percpu_gfp, NULL); +EXPORT_SYMBOL(tfw__alloc_percpu_gfp); + +unsigned long +tfw__get_free_pages(gfp_t gfp_mask, unsigned int order) +{ + return __get_free_pages(gfp_mask, order); +} +ALLOW_ERROR_INJECTION(tfw__get_free_pages, NULL); +EXPORT_SYMBOL(tfw__get_free_pages); + +int +tfw_percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, + unsigned int flags, gfp_t gfp) +{ + return percpu_ref_init(ref, release, flags, gfp); +} +ALLOW_ERROR_INJECTION(tfw_percpu_ref_init, ERRNO); +EXPORT_SYMBOL(tfw_percpu_ref_init); + #endif diff --git a/lib/fault_injection_alloc.h b/lib/fault_injection_alloc.h index 440ae9f83f..b75f7345f8 100644 --- a/lib/fault_injection_alloc.h +++ b/lib/fault_injection_alloc.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -28,9 +28,15 @@ #define tfw_kzalloc(size, flags) kzalloc(size, flags) #define tfw_kcalloc(n, size, flags) kcalloc(n, size, flags) #define tfw_kmalloc_node(size, flags, node) kmalloc_node(size, flags, node) -#define tfw_kvmalloc_node(size, flags, node) kvmalloc_node(size, flags, node) +#define tfw_kvmalloc_node(size, flags, node) \ + kvmalloc_node(size, flags, node) #define tfw__alloc_percpu(size, align) __alloc_percpu(size, align) #define tfw_alloc_percpu(t) alloc_percpu(t) +#define tfw_alloc_percpu_gfp(t, gfp) alloc_percpu_gfp(t, gfp) +#define tfw__get_free_pages(gfp_mask, order) \ + __get_free_pages(gfp_mask, order) +#define tfw_percpu_ref_init(ref, release, flags, gfp) \ + percpu_ref_init(ref, release, flags, gfp) #else @@ -40,8 +46,17 @@ void *tfw_kcalloc(size_t n, size_t size, gfp_t flags); void *tfw_kmalloc_node(size_t size, gfp_t flags, int node); void *tfw_kvmalloc_node(size_t size, gfp_t flags, int node); void *tfw__alloc_percpu(size_t size, size_t align); +void *tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); +int __must_check tfw_percpu_ref_init(struct percpu_ref *ref, + percpu_ref_func_t *release, + unsigned int flags, + gfp_t gfp); +unsigned long tfw__get_free_pages(gfp_t gfp_mask, unsigned int order); #define tfw_alloc_percpu(t) \ (typeof(t) __percpu *) tfw__alloc_percpu(sizeof(t), __alignof__(t)) +#define tfw_alloc_percpu_gfp(t, gfp) \ + (typeof(t) __percpu *) tfw__alloc_percpu_gfp(sizeof(t), \ + __alignof__(t), gfp) #endif diff --git a/lib/log.h b/lib/log.h index 3ad5471d23..e11e0e6231 100644 --- a/lib/log.h +++ b/lib/log.h @@ -1,7 +1,7 @@ /** * Tempesta kernel library * - * Copyright (C) 2015-2025 Tempesta Technologies, INC. + * Copyright (C) 2015-2026 Tempesta Technologies, INC. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -22,47 +22,112 @@ #ifdef __KERNEL__ #include +#include #else #define MAX_ERRNO 4095 #endif /* - * Return codes. + * Tempesta FW common return codes. */ -enum { - /* Compression error during hpack decoding. */ - T_COMPRESSION = -MAX_ERRNO + 7, +typedef enum { + /* The message looks good and we can safely pass it. */ + T_OK = 0, + /* - * Generic error. Connection should be shutdown gracefully - * with TCP_FIN. + * Common error codes boundary. All common error codes should be + * greater then this boundary. Error codes should be listed in this + * enum from the most crucial to the least crucial. */ - T_BAD = -MAX_ERRNO + 6, + __T_COMMON_ERROR_CODE_START = -MAX_ERRNO, + /* - * The message must be dropped. Connection should be alive or closed - * with TCP FIN depending on whether we can communicate with this - * client or not. + * The message must be blocked (typically on a security event). + * Tempesta send TCP RST in this case. */ - T_DROP = -MAX_ERRNO + 5, + T_BLOCK_WITH_RST = __T_COMMON_ERROR_CODE_START + 1, + /* * The message must be blocked (typically on a security event). * Tempesta send TCP FIN in this case. */ - T_BLOCK_WITH_FIN = -MAX_ERRNO + 4, + T_BLOCK_WITH_FIN = __T_COMMON_ERROR_CODE_START + 2, + /* - * The message must be blocked (typically on a security event). - * Tempesta send TCP RST in this case. + * Generic error. Connection should be shutdown gracefully + * with TCP_FIN. + */ + T_BAD = __T_COMMON_ERROR_CODE_START + 3, + + /* + * The message must be dropped. Connection should be alive or closed + * with TCP FIN depending on whether we can communicate with this + * client or not. */ - T_BLOCK_WITH_RST = -MAX_ERRNO + 3, + T_DROP = __T_COMMON_ERROR_CODE_START + 4, + + /* The message should be stashed (made by callback). */ + T_POSTPONE = __T_COMMON_ERROR_CODE_START + 5, + + /* Last common error code + 1 */ + __T_COMMON_ERROR_CODE_END, +} TfwRcCommon; + +/* + * Tempesta FW internal error codes. Can be returned from different + * modules (e.g. hpack, frang). Should be converted to common return + * code before use on low level (connection, socket) layer. + */ +typedef enum { + __T_INTERNAL_ERROR_CODE_START = __T_COMMON_ERROR_CODE_END + 1, + + /* Compression error during hpack decoding. */ + T_COMPRESSION = __T_INTERNAL_ERROR_CODE_START + 1, + /* * The message must be blocked (typically on a security event). * Sending TCP RST or TCP FIN depends on block action setting. */ - T_BLOCK = -MAX_ERRNO + 2, - /* The message should be stashed (made by callback). */ - T_POSTPONE = -MAX_ERRNO + 1, - /* The message looks good and we can safely pass it. */ - T_OK = 0, -}; + T_BLOCK = __T_INTERNAL_ERROR_CODE_START + 2, + + /* Last internal error code + 1 */ + __T_INTERNAL_ERROR_CODE_END, +} TfwInternalErrCodes; + +static inline bool +is_tfw_common_error_code(int err_code) +{ + return err_code > __T_COMMON_ERROR_CODE_START + && err_code < __T_COMMON_ERROR_CODE_END; +} + +static inline bool +is_tfw_internal_error_code(int err_code) +{ + return err_code > __T_INTERNAL_ERROR_CODE_START + && err_code < __T_INTERNAL_ERROR_CODE_END; +} + +static inline bool +tfw_error_code_more_crucial(int err_code1, int err_code2) +{ + WARN_ON_ONCE(err_code1 && !is_tfw_common_error_code(err_code1) + && !is_tfw_internal_error_code(err_code1)); + WARN_ON_ONCE(err_code2 && !is_tfw_common_error_code(err_code2) + && is_tfw_internal_error_code(err_code2)); + + return err_code1 < err_code2; +} + +static inline bool +tfw_error_code_is_crucial(int err_code) +{ + /* + * Also works with system error codes, not only Tempesta FW + * error codes. + */ + return err_code && err_code != T_POSTPONE && err_code != T_DROP; +} /* * BANNER variable must be defined before including the file! diff --git a/linux-6.12.12.patch b/linux-6.12.12.patch index c121a863e4..dee42bd37c 100644 --- a/linux-6.12.12.patch +++ b/linux-6.12.12.patch @@ -863,7 +863,7 @@ index 8896705cc..29e0d0428 100644 /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index 39f1d16f3..5457e0826 100644 +index 39f1d16f3..db9523d1f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -267,6 +267,12 @@ @@ -879,20 +879,21 @@ index 39f1d16f3..5457e0826 100644 /* return minimum truesize of one skb containing X bytes of data */ #define SKB_TRUESIZE(X) ((X) + \ -@@ -877,6 +883,12 @@ struct sk_buff { +@@ -877,6 +883,13 @@ struct sk_buff { * UDP receive path is one user. */ unsigned long dev_scratch; +#ifdef CONFIG_SECURITY_TEMPESTA -+ struct { -+ __u8 present : 1; -+ __u8 tls_type : 7; -+ } tfw_cb; ++ struct { ++ __u16 present : 1; ++ __u16 in_socket_write_queue : 1; ++ __u16 tls_type : 7; ++ } tfw_cb; +#endif }; }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ -@@ -938,11 +950,17 @@ struct sk_buff { +@@ -938,11 +951,17 @@ struct sk_buff { fclone:2, peeked:1, head_frag:1, @@ -910,11 +911,12 @@ index 39f1d16f3..5457e0826 100644 /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() -@@ -1113,6 +1131,42 @@ struct sk_buff { +@@ -1113,6 +1132,56 @@ struct sk_buff { #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 +#ifdef CONFIG_SECURITY_TEMPESTA ++ +static inline unsigned long +skb_tfw_is_present(struct sk_buff *skb) +{ @@ -936,6 +938,19 @@ index 39f1d16f3..5457e0826 100644 +} + +static inline void ++skb_tfw_set_in_socket_write_queue(struct sk_buff *skb) ++{ ++ skb->tfw_cb.present = 1; ++ skb->tfw_cb.in_socket_write_queue = 1; ++} ++ ++static inline bool ++skb_tfw_is_in_socket_write_queue(struct sk_buff *skb) ++{ ++ return skb->tfw_cb.present ? skb->tfw_cb.in_socket_write_queue : false; ++} ++ ++static inline void +skb_copy_tfw_cb(struct sk_buff *dst, struct sk_buff *src) +{ + dst->dev = src->dev; @@ -953,7 +968,7 @@ index 39f1d16f3..5457e0826 100644 /** * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves * @skb: buffer -@@ -1298,6 +1352,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); +@@ -1298,6 +1367,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); @@ -961,7 +976,7 @@ index 39f1d16f3..5457e0826 100644 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); -@@ -2465,7 +2520,11 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); +@@ -2465,7 +2535,11 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) { @@ -973,7 +988,7 @@ index 39f1d16f3..5457e0826 100644 } static inline unsigned int skb_headlen(const struct sk_buff *skb) -@@ -2821,6 +2880,20 @@ static inline unsigned int skb_headroom(const struct sk_buff *skb) +@@ -2821,6 +2895,20 @@ static inline unsigned int skb_headroom(const struct sk_buff *skb) return skb->data - skb->head; } @@ -994,6 +1009,15 @@ index 39f1d16f3..5457e0826 100644 /** * skb_tailroom - bytes at buffer end * @skb: buffer to check +@@ -3806,7 +3894,7 @@ static inline int skb_add_data(struct sk_buff *skb, + if (skb->ip_summed == CHECKSUM_NONE) { + __wsum csum = 0; + if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, +- &csum, from)) { ++ &csum, from)) { + skb->csum = csum_block_add(skb->csum, csum, off); + return 0; + } diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 0f3c58007..e2576c604 100644 --- a/include/linux/skbuff_ref.h diff --git a/tls/mpool.c b/tls/mpool.c index 337d9a8cdd..98bdcb464f 100644 --- a/tls/mpool.c +++ b/tls/mpool.c @@ -19,7 +19,7 @@ * implicitly for MPI math. Dynamically allocated pages are used instead of * static per-cpu ones. * - * Copyright (C) 2019-2024 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ #include "dhm.h" #include "ecp.h" #include "mpool.h" +#include "lib/fault_injection_alloc.h" #define MPI_POOL_DATA(mp) ((void *)((char *)(mp) + sizeof(TlsMpiPool))) #define MPI_POOL_FREE_PTR(mp) ((void *)((char *)(mp) + (mp)->curr)) @@ -208,7 +209,7 @@ ttls_mpi_pool_create(size_t order, gfp_t gfp_mask) TlsMpiPool *mp; unsigned long addr; - if (!(addr = __get_free_pages(gfp_mask | __GFP_ZERO, order))) + if (!(addr = tfw__get_free_pages(gfp_mask | __GFP_ZERO, order))) return NULL; WARN_ON_ONCE(addr & ((PAGE_SIZE << order) - 1)); @@ -336,7 +337,7 @@ __mpi_profile_clone(TlsCtx *tls, int ec) return -ENOMEM; } - ptr = (char *)__get_free_pages(GFP_ATOMIC, __MPOOL_HS_ORDER); + ptr = (char *)tfw__get_free_pages(GFP_ATOMIC, __MPOOL_HS_ORDER); if (unlikely(!ptr)) return -ENOMEM; @@ -399,8 +400,11 @@ ttls_mpool_exit(void) for_each_online_cpu(i) { mp = per_cpu(g_tmp_mpool, i); - ttls_bzero_safe(MPI_POOL_DATA(mp), mp->curr - sizeof(*mp)); - free_pages((unsigned long)mp, mp->order); + if (mp) { + ttls_bzero_safe(MPI_POOL_DATA(mp), + mp->curr - sizeof(*mp)); + free_pages((unsigned long)mp, mp->order); + } } } diff --git a/tls/tls_ticket.c b/tls/tls_ticket.c index 773224ccb3..6a90419aee 100644 --- a/tls/tls_ticket.c +++ b/tls/tls_ticket.c @@ -6,7 +6,7 @@ * Based on mbed TLS, https://tls.mbed.org. * * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "tls_ticket.h" #include "tls_internal.h" #include "lib/common.h" +#include "lib/fault_injection_alloc.h" ttls_cli_id_t *ttls_cli_id_cb; @@ -583,7 +584,8 @@ ttls_ticket_sess_load(TlsState *state, size_t len, unsigned long lifetime) * address it. */ sess->peer_cert->raw.order = get_order(state->cert_len + TTLS_CERT_LEN_LEN); - pg = __get_free_pages(GFP_ATOMIC | __GFP_COMP, sess->peer_cert->raw.order); + pg = tfw__get_free_pages(GFP_ATOMIC | __GFP_COMP, + sess->peer_cert->raw.order); if (!pg) { ttls_x509_crt_destroy(&sess->peer_cert); return TTLS_ERR_ALLOC_FAILED; diff --git a/tls/x509_crt.c b/tls/x509_crt.c index cb89806d55..c91c886286 100644 --- a/tls/x509_crt.c +++ b/tls/x509_crt.c @@ -15,7 +15,7 @@ * Based on mbed TLS, https://tls.mbed.org. * * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -910,8 +910,9 @@ ttls_x509_crt_parse(TlsX509Crt *crt, unsigned char *buf, size_t buflen) * to the multi-byte structures inside the raw data. */ crt->raw.order = get_order(buflen + crt_len_len); - crt->raw.pages = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_COMP, - crt->raw.order); + crt->raw.pages = + (unsigned char *)tfw__get_free_pages(GFP_KERNEL | __GFP_COMP, + crt->raw.order); if (!crt->raw.pages) return -ENOMEM; crt->raw.tot_len = 0;