From 975764fc326604e6ff2368888ffa2afc5fbbf13d Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Sat, 14 Jun 2025 05:06:57 +0300 Subject: [PATCH 01/23] Implement new config option `client_mem`. client_mem - controls haw many memory is used to store unanswered client requests and requests with linked responses which can not be forwarded to a client. --- fw/connection.h | 2 ++ fw/sock_clnt.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/fw/connection.h b/fw/connection.h index 769d02766..6be98cc63 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -612,6 +612,8 @@ tfw_peer_for_each_conn(TfwPeer *p, int (*cb)(TfwConn *)) } extern unsigned int tfw_cli_max_concurrent_streams; +extern u64 tfw_cli_soft_mem_limit; +extern u64 tfw_cli_hard_mem_limit; void tfw_connection_unlink_to_sk(TfwConn *conn); void tfw_connection_hooks_register(TfwConnHooks *hooks, int type); diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 45190d122..0f64f27eb 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -49,6 +49,8 @@ static struct kmem_cache *tfw_h2_conn_cache; static int tfw_cli_cfg_ka_timeout = -1; unsigned int tfw_cli_max_concurrent_streams; +u64 tfw_cli_soft_mem_limit; +u64 tfw_cli_hard_mem_limit; static inline struct kmem_cache * tfw_cli_cache(int type) @@ -724,6 +726,57 @@ tfw_cfgop_keepalive_timeout(TfwCfgSpec *cs, TfwCfgEntry *ce) return 0; } +static int +tfw_parse_client_mem(const char *val, unsigned long long *mem) +{ + size_t len = strlen(val); + char *p; + + *mem = memparse(val, &p); + if (p != val + len) + return -EINVAL; + + return 0; +} + +static int +tfw_cfgop_client_mem(TfwCfgSpec *cs, TfwCfgEntry *ce) +{ + int r; + + TFW_CFG_CHECK_NO_ATTRS(cs, ce); + TFW_CFG_CHECK_VAL_N(>=, 1, cs, ce); + TFW_CFG_CHECK_VAL_N(<, 3, cs, ce); + + r = tfw_parse_client_mem(ce->vals[0], &tfw_cli_soft_mem_limit); + if (unlikely(r)) { + T_ERR_NL("Invalid 'client_mem' value: '%s'", + ce->vals[0]); + return r; + } + + if (ce->val_n > 1) { + r = tfw_parse_client_mem(ce->vals[1], &tfw_cli_hard_mem_limit); + if (unlikely(r)) { + T_ERR_NL("Invalid 'client_mem' value: '%s'", + ce->vals[1]); + return r; + } + } else { + tfw_cli_hard_mem_limit = (tfw_cli_soft_mem_limit < U64_MAX / 2) ? + tfw_cli_soft_mem_limit * 2 : U64_MAX; + } + + if (tfw_cli_hard_mem_limit < tfw_cli_soft_mem_limit) { + T_ERR_NL("Invalid 'client_mem' value: hard limit (%llu) is" + " less then soft (%llu)", tfw_cli_hard_mem_limit, + tfw_cli_soft_mem_limit); + return -EINVAL; + } + + return 0; +} + static void tfw_cfgop_cleanup_sock_clnt(TfwCfgSpec *cs) { @@ -966,6 +1019,15 @@ static TfwCfgSpec tfw_sock_clnt_specs[] = { .allow_repeat = false, .allow_reconfig = true, }, + { + .name = "client_mem", + .deflt = "0 0", + .handler = tfw_cfgop_client_mem, + .cleanup = tfw_cfgop_cleanup_sock_clnt, + .allow_none = true, + .allow_repeat = false, + .allow_reconfig = true, + }, { 0 } }; From fb5cf83d7431b2a294e064d01572b6ac70213cc8 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Sat, 21 Jun 2025 03:14:31 +0300 Subject: [PATCH 02/23] Codestyle fixes --- fw/http2.c | 2 +- fw/ss_skb.h | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fw/http2.c b/fw/http2.c index f67c3e755..2c42c0546 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -759,7 +759,7 @@ tfw_h2_entail_stream_skb(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, } } *len -= skb->len; - ss_skb_tcp_entail(sk, skb, mark, tls_type); + ss_skb_tcp_entail(sk, skb, mark, tls_type); } /* diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 1d359b117..b7710f6be 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -456,9 +456,8 @@ int ss_skb_expand_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, size_t head, size_t tail); int ss_skb_chop_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, size_t head, size_t tail); -int -ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, - size_t head, size_t trail); +int ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, + size_t head, size_t trail); int ss_skb_cutoff_data(struct sk_buff *skb_head, TfwStr *hdr, int skip, int tail); int skb_next_data(struct sk_buff *skb, char *last_ptr, TfwStr *it); @@ -473,9 +472,8 @@ int ss_skb_to_sgvec_with_new_pages(struct sk_buff *skb, struct scatterlist *sgl, struct page ***old_pages); int ss_skb_add_frag(struct sk_buff *skb_head, struct sk_buff **skb, char* addr, int *frag_idx, size_t frag_sz); -int -ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, - unsigned char *split_point); +int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, + unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); #if defined(DEBUG) && (DEBUG >= 4) From 04f54b24b692c0bd1becd1e28580ba3396c3afc9 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Tue, 28 Apr 2026 11:35:56 +0300 Subject: [PATCH 03/23] Adjust frame size during `snd_wnd` calculation Adjust FRAME_HEADER_SIZE during calculation of send window during making frames. (There was a mistake with accuracy of send window calculation, we don't take into account, that each frame also contains frame header). --- fw/http_frame.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fw/http_frame.c b/fw/http_frame.c index 0e7b76e7f..5a429bd89 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2190,7 +2190,7 @@ do { \ #define FRAME_XMIT_FSM_NEXT(frame_length, state) \ do { \ - *snd_wnd -= frame_length; \ + *snd_wnd -= frame_length + FRAME_HEADER_SIZE; \ T_FSM_JMP(state); \ } while(0) From 4f6b09fefde82c72edbb62bd7d1695f6bdefacc8 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Tue, 24 Jun 2025 07:09:47 +0300 Subject: [PATCH 04/23] Change API of some functions. To track socket memory we should pass TfwHttpMsg * not TfwMsgIter * to most of http_nsg_* functions, because TfwHttpMsg has a pointer to connection and socket. --- fw/cache.c | 41 +++++++----- fw/hpack.c | 43 ++++++------ fw/http.c | 102 +++++++++++++++-------------- fw/http.h | 2 +- fw/http2.c | 2 +- fw/http_frame.c | 2 +- fw/http_msg.c | 134 +++++--------------------------------- fw/http_msg.h | 10 +-- fw/http_sess.c | 5 +- fw/msg.c | 124 ++++++++++++++++++++++++----------- fw/msg.h | 22 +------ fw/t/unit/helpers.c | 4 +- fw/t/unit/test_http_msg.c | 4 +- fw/tls.c | 2 +- 14 files changed, 211 insertions(+), 286 deletions(-) diff --git a/fw/cache.c b/fw/cache.c index 502ccb81b..827613700 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -812,14 +812,14 @@ tfw_cache_h2_write(TDB *db, TdbVRec **trec, TfwHttpResp *resp, char **data, TfwStr c = { 0 }; TdbVRec *tr = *trec; TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *it = &resp->iter; int r = 0, copied = 0; while (1) { c.data = *data; c.len = min(tr->data + tr->len - *data, (long)(len - copied)); if (!dc_iter->skip) { - r = tfw_http_msg_expand_data(it, &resp->msg.skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, + &resp->msg.skb_head, &c, &mit->start_off); if (unlikely(r)) break; @@ -887,11 +887,10 @@ static int tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, TdbVRec **trec, char **p, unsigned long *acc_len) { - int r; - TfwMsgIter *it = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; bool h2_mode = TFW_MSG_H2(resp->req); TfwDecodeCacheIter dc_iter = {}; + int r; if (h2_mode) resp->mit.start_off = FRAME_HEADER_SIZE; @@ -920,7 +919,8 @@ tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, H2_STAT_VAL_LEN)) return -E2BIG; - r = tfw_http_msg_expand_data(it, skb_head, &s_line, NULL); + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &s_line, NULL); if (unlikely(r)) return r; @@ -936,7 +936,8 @@ tfw_cache_set_status(TDB *db, TfwCacheEntry *ce, TfwHttpResp *resp, *acc_len += dc_iter.acc_len; if (!h2_mode) { - r = tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL); + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL); if (unlikely(r)) return r; @@ -1095,6 +1096,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) int r, i; TfwMsgIter *it; TfwHttpResp *resp; + TfwHttpMsg *hm; struct sk_buff **skb_head; unsigned long h_len = 0; TdbVRec *trec = &ce->trec; @@ -1106,11 +1108,12 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) if (!(resp = tfw_http_msg_alloc_resp_light(req))) goto err_create; + hm = (TfwHttpMsg *)resp; it = &resp->iter; skb_head = &resp->msg.skb_head; if (!TFW_MSG_H2(req)) { - r = tfw_http_prep_304(req, skb_head, it); + r = tfw_http_prep_304(req, skb_head, hm); if (unlikely(r)) goto err_setup; } else { @@ -1149,7 +1152,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) } if (!TFW_MSG_H2(req)) { - if (tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + if (tfw_http_msg_expand_data(hm, skb_head, &g_crlf, NULL)) goto err_setup; tfw_http_resp_fwd(resp); @@ -1166,7 +1169,7 @@ tfw_cache_send_304(TfwHttpReq *req, TfwCacheEntry *ce) return; err_setup: T_WARN("Can't build 304 response, key=%lx\n", ce->key); - tfw_http_msg_free((TfwHttpMsg *)resp); + tfw_http_msg_free(hm); err_create: tfw_http_resp_build_error(req); } @@ -2901,7 +2904,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, || (sh_frag && tfw_cache_should_append_body_skb(it, body_sz, chunked_body))) { - if ((r = tfw_msg_iter_append_skb(it))) + if ((r = tfw_http_msg_append_skb((TfwHttpMsg *)resp))) return r; } if (sh_frag) @@ -2935,7 +2938,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, __TFW_STR_CH(&b_len, 0)->len = digs; b_len.len += digs; - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &b_len, NULL); if (unlikely(r)) return r; @@ -2965,7 +2968,7 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, p = trec->data; if (it->frag + 1 == MAX_SKB_FRAGS - && (r = tfw_msg_iter_append_skb(it))) + && (r = tfw_http_msg_append_skb((TfwHttpMsg *)resp))) { return r; } @@ -2977,13 +2980,13 @@ tfw_cache_build_resp_body(TDB *db, TdbVRec *trec, TfwHttpResp *resp, char *p, .len = SLEN(S_ZERO S_CRLF) }; - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &g_crlf, NULL); if (unlikely(r)) return r; add_zero_chunk: - r = tfw_http_msg_expand_data(it, &it->skb_head, + r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, &it->skb_head, &b_len, NULL); if (unlikely(r)) return r; @@ -3027,11 +3030,11 @@ tfw_cache_set_hdr_age(TfwHttpResp *resp, TfwCacheEntry *ce, long age) if ((r = tfw_hpack_encode(resp, &h_age, false, false))) goto err; } else { - if ((r = tfw_http_msg_expand_data(&resp->iter, skb_head, + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &h_age, NULL))) goto err; - if ((r = tfw_http_msg_expand_data(&resp->iter, skb_head, + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &g_crlf, NULL))) goto err; } @@ -3160,7 +3163,8 @@ tfw_cache_build_resp(TfwHttpReq *req, TfwCacheEntry *ce, long age) && tfw_http_expand_stale_warn(resp)) || (!test_bit(TFW_HTTP_B_HDR_DATE, resp->flags) && tfw_http_expand_hdr_date(resp)) - || tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + || tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL)) { goto free; } @@ -3238,7 +3242,8 @@ tfw_cache_build_resp(TfwHttpReq *req, TfwCacheEntry *ce, long age) * trailer or chunked body. */ if (chunked_body && req->method != TFW_HTTP_METH_HEAD - && tfw_http_msg_expand_data(it, skb_head, &g_crlf, NULL)) + && tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &g_crlf, NULL)) goto free; return resp; diff --git a/fw/hpack.c b/fw/hpack.c index 93909763e..c67594232 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -1935,7 +1935,6 @@ tfw_hpack_cache_decode_expand(TfwHPack *__restrict hp, unsigned int state; int r = T_OK; TfwStr exp_str = {}; - TfwMsgIter *it = &resp->iter; const unsigned char *last = src + n; unsigned char *prev = src; struct sk_buff **skb_head = &resp->msg.skb_head; @@ -1955,7 +1954,8 @@ do { \ #define EXPAND_STR_DATA(str) \ do { \ - if ((r = tfw_http_msg_expand_data(it, skb_head, str, NULL))) \ + if ((r = tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, \ + str, NULL))) \ goto out; \ dc_iter->acc_len += (str)->len; \ } while (0) @@ -3378,25 +3378,25 @@ tfw_huffman_encode_string(TfwStr *str, TfwPool *pool) } static int -tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwHttpMsg *hm, struct sk_buff **skb_head, TfwStr *str, bool in_huffman) { - int r; - TfwHPackInt len; TfwStr len_str = { 0 }; unsigned short mask = in_huffman ? 0x80 : 0x0; + TfwHPackInt len; + int r; write_int(str->len, 0x7F, mask, &len); len_str.data = len.buf; len_str.len = len.sz; - r = tfw_http_msg_expand_data(it, skb_head, &len_str, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &len_str, NULL); if (unlikely(r)) return r; mit->acc_len += len_str.len; - r = tfw_http_msg_expand_data(it, skb_head, str, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, str, NULL); if (unlikely(r)) return r; mit->acc_len += str->len; @@ -3426,7 +3426,7 @@ tfw_hpack_str_expand_raw(TfwHttpTransIter *mit, TfwMsgIter *it, * thus avoiding Huffman encodings is completely RFC-compliant behaviour. */ static inline int -tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwHttpMsg *hm, struct sk_buff **skb_head, TfwStr *str, TfwPool *pool) { @@ -3440,7 +3440,7 @@ tfw_hpack_str_expand(TfwHttpTransIter *mit, TfwMsgIter *it, in_huffman = true; } - return tfw_hpack_str_expand_raw(mit, it, skb_head, str, in_huffman); + return tfw_hpack_str_expand_raw(mit, hm, skb_head, str, in_huffman); } static inline int @@ -3448,7 +3448,6 @@ tfw_hpack_write_idx(TfwHttpResp *__restrict resp, TfwHPackInt *__restrict idx, bool use_pool) { TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *iter = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; const TfwStr s_idx = { .data = idx->buf, @@ -3463,7 +3462,7 @@ tfw_hpack_write_idx(TfwHttpResp *__restrict resp, TfwHPackInt *__restrict idx, return tfw_h2_msg_expand_from_pool((TfwHttpMsg *)resp, &s_idx, &resp->mit); - return tfw_http_msg_expand_data(iter, skb_head, &s_idx, + return tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, &s_idx, &mit->start_off); } @@ -3532,7 +3531,6 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, int ret; TfwStr *c, *end; TfwHttpTransIter *mit = &resp->mit; - TfwMsgIter *iter = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; TfwStr s_val; @@ -3547,7 +3545,7 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, mit->acc_len += idx->sz; if (unlikely(!name_indexed)) { - ret = tfw_hpack_str_expand(mit, iter, skb_head, + ret = tfw_hpack_str_expand(mit, (TfwHttpMsg *)resp, skb_head, TFW_STR_CHUNK(hdr, 0), NULL); if (unlikely(ret)) return ret; @@ -3579,7 +3577,8 @@ tfw_hpack_hdr_expand(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, c = tfw_str_collect_cmp(c, end, &s_val, NULL); BUG_ON(c != end); - return tfw_hpack_str_expand(mit, iter, skb_head, &s_val, NULL); + return tfw_hpack_str_expand(mit, (TfwHttpMsg *)resp, skb_head, + &s_val, NULL); } static inline int @@ -3760,30 +3759,26 @@ tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) { - TfwMsgIter it = { - .skb = stream->xmit.skb_head, - .skb_head = stream->xmit.skb_head, - .frag = -1 - }; - TfwStr new_size = {}; TfwHPackInt tmp = {}; + TfwStr dst = {}; char *data; + unsigned int _; int r = 0; WARN_ON_ONCE(!tbl->wnd_changed); - write_int(tbl->window, 0x1F, 0x20, &tmp); - new_size.data = tmp.buf; - new_size.len = tmp.sz; data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, FRAME_HEADER_SIZE); BUG_ON(!data); - r = tfw_http_msg_insert(&it, &data, &new_size); + r = ss_skb_get_room_w_frag(stream->xmit.skb_head, + stream->xmit.skb_head, + data, tmp.sz, &dst, &_); if (unlikely(r)) return r; + memcpy_fast(dst.data, tmp.buf, tmp.sz); stream->xmit.h_len += tmp.sz; tbl->wnd_changed = false; diff --git a/fw/http.c b/fw/http.c index 074281124..943278032 100644 --- a/fw/http.c +++ b/fw/http.c @@ -658,7 +658,7 @@ tfw_h2_prep_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) __TFW_STR_CH(&hdr, 0)->len = name->len - SLEN(S_CRLF) - 2; if (__TFW_STR_CH(msg, i + 1)->nchunks) { - TfwMsgIter *iter = &resp->iter; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; struct sk_buff **skb_head = &resp->msg.skb_head; TfwHPackInt vlen; TfwStr s_vlen = {}; @@ -673,12 +673,12 @@ tfw_h2_prep_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) s_vlen.data = vlen.buf; s_vlen.len = vlen.sz; - r = tfw_http_msg_expand_data(iter, skb_head, &s_vlen, + r = tfw_http_msg_expand_data(hm, skb_head, &s_vlen, NULL); if (unlikely(r)) goto out; - r = tfw_http_msg_expand_data(iter, skb_head, val, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, val, NULL); if (unlikely(r)) goto out; @@ -728,7 +728,8 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - if ((r = tfw_http_msg_setup((TfwHttpMsg *)resp, &it, msg->len))) + r = tfw_msg_iter_setup(&it, &resp->msg.skb_head, msg->len); + if (unlikely(r)) return r; body = TFW_STR_BODY_CH(msg); @@ -738,7 +739,7 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) TFW_STR_FOR_EACH_CHUNK(c, msg, end) { if (c->data) { TFW_STR_FOR_EACH_CHUNK(field_c, c, field_end) { - if ((r = tfw_msg_write(&it, field_c))) + if ((r = tfw_msg_iter_write(&it, field_c))) return r; } } @@ -921,7 +922,7 @@ do { \ * Preparing 304 response (Not Modified) for HTTP/1.1-client. */ int -tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwMsgIter *it) +tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwHttpMsg *hm) { int ret = 0; static TfwStr rh = { @@ -938,12 +939,12 @@ tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, TfwMsgIter *it) else if (test_bit(TFW_HTTP_B_CONN_KA, req->flags)) end = &crlf_keep; - ret = tfw_http_msg_expand_data(it, skb_head, &rh, NULL); + ret = tfw_http_msg_expand_data(hm, skb_head, &rh, NULL); if (unlikely(ret)) return ret; if (end) { - ret = tfw_http_msg_expand_data(it, skb_head, end, NULL); + ret = tfw_http_msg_expand_data(hm, skb_head, end, NULL); if (unlikely(ret)) return ret; } @@ -3279,7 +3280,8 @@ tfw_http_expand_hbh(TfwHttpResp *resp, unsigned short status) tfw_http_req_set_conn_close(req); return add_h_conn - ? tfw_http_msg_expand_data(&resp->iter, skb_head, &h_conn, NULL) + ? tfw_http_msg_expand_data((TfwHttpMsg *)resp, skb_head, + &h_conn, NULL) : 0; } @@ -3333,6 +3335,7 @@ int tfw_http_expand_stale_warn(TfwHttpResp *resp) { /* TODO: adjust for #865 */ + TfwHttpMsg *hm = (TfwHttpMsg *)resp; struct sk_buff **skb_head = &resp->msg.skb_head; TfwStr wh = { .chunks = (TfwStr []){ @@ -3345,13 +3348,12 @@ tfw_http_expand_stale_warn(TfwHttpResp *resp) .nchunks = 4, }; - return tfw_http_msg_expand_data(&resp->iter, skb_head, &wh, NULL); + return tfw_http_msg_expand_data(hm, skb_head, &wh, NULL); } static __always_inline int __tfw_http_add_hdr_date(TfwHttpResp *resp, bool cache) { - int r; struct sk_buff **skb_head = &resp->msg.skb_head; char *date = *this_cpu_ptr(&g_buf); TfwStr h_date = { @@ -3363,16 +3365,17 @@ __tfw_http_add_hdr_date(TfwHttpResp *resp, bool cache) .len = SLEN(S_F_DATE) + SLEN(S_V_DATE) + SLEN(S_CRLF), .nchunks = 3 }; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; + int r; tfw_http_prep_date_from(date, resp->date); BUILD_BUG_ON(!__builtin_constant_p(cache)); if (!cache) - r = tfw_http_msg_expand_from_pool((TfwHttpMsg *)resp, &h_date); + r = tfw_http_msg_expand_from_pool(hm, &h_date); else - r = tfw_http_msg_expand_data(&resp->iter, skb_head, &h_date, - NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &h_date, NULL); if (unlikely(r)) T_ERR("Unable to add Date: header to resp [%p]\n", resp); @@ -3403,15 +3406,16 @@ tfw_http_expand_hdr_date(TfwHttpResp *resp) static int __tfw_http_add_hdr_server(TfwHttpResp *resp, bool cache) { - int r; struct sk_buff **skb_head = &resp->msg.skb_head; static char s_server[] = S_F_SERVER TFW_NAME "/" TFW_VERSION S_CRLF; TfwStr hdr = { .data = s_server, .len = SLEN(s_server) }; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; + int r; if (!cache) - r = tfw_http_msg_expand_from_pool((TfwHttpMsg *)resp, &hdr); + r = tfw_http_msg_expand_from_pool(hm, &hdr); else - r = tfw_http_msg_expand_data(&resp->iter, skb_head, &hdr, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &hdr, NULL); if (unlikely(r)) T_ERR("Unable to add Server: header to resp [%p]\n", resp); @@ -3469,12 +3473,11 @@ __tfw_http_add_hdr_via(TfwHttpMsg *hm, int http_version, bool from_cache) &TFW_STR_STRING(S_CRLF)); } else { struct sk_buff **skb_head = &hm->msg.skb_head; - TfwMsgIter *it = &hm->iter; - r = tfw_http_msg_expand_data(it, skb_head, &rh, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &rh, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &STR_CRLF, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &STR_CRLF, NULL); } if (unlikely(r)) @@ -3702,7 +3705,7 @@ tfw_h1_add_loc_hdrs(TfwHttpMsg *hm, const TfwHdrMods *h_mods, bool from_cache) */ if (from_cache) { struct sk_buff **skb_head = &hm->msg.skb_head; - TfwMsgIter *it = &hm->iter; + /* * Skip the configured header if the header is * configured for deletion (without value chunk). @@ -3710,11 +3713,11 @@ tfw_h1_add_loc_hdrs(TfwHttpMsg *hm, const TfwHdrMods *h_mods, bool from_cache) if (h_mdf.nchunks < 3) continue; /* h_mdf->eolen is ignored, add explicit CRLF. */ - r = tfw_http_msg_expand_data(it, skb_head, &h_mdf, + r = tfw_http_msg_expand_data(hm, skb_head, &h_mdf, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &STR_CRLF, + r = tfw_http_msg_expand_data(hm, skb_head, &STR_CRLF, NULL); } else { r = tfw_http_msg_expand_from_pool(hm, &h_mdf); @@ -4114,18 +4117,18 @@ write_merged_cookie_headers(TfwStr *hdr, TfwMsgIter *it) hval.nchunks--; hval.len -= chunk->len; } - r = tfw_msg_write(it, cookie_dlm); + r = tfw_msg_iter_write(it, cookie_dlm); if (unlikely(r)) return r; - r = tfw_msg_write(it, &hval); + r = tfw_msg_iter_write(it, &hval); if (unlikely(r)) return r; cookie_dlm = &val_dlm; } - return tfw_msg_write(it, &STR_CRLF); + return tfw_msg_iter_write(it, &STR_CRLF); } static int @@ -4136,12 +4139,12 @@ __h2_write_method(TfwHttpReq *req, TfwMsgIter *it) if (test_bit(TFW_HTTP_B_REQ_HEAD_TO_GET, req->flags)) { static const DEFINE_TFW_STR(meth_get, "GET"); - return tfw_msg_write(it, &meth_get); + return tfw_msg_iter_write(it, &meth_get); } else { TfwStr meth = {}; __h2_msg_hdr_val(&ht->tbl[TFW_HTTP_HDR_H2_METHOD], &meth); - return tfw_msg_write(it, &meth); + return tfw_msg_iter_write(it, &meth); } } ALLOW_ERROR_INJECTION(__h2_write_method, ERRNO); @@ -4171,7 +4174,6 @@ tfw_h2_adjust_req(TfwHttpReq *req) size_t pseudo_num; TfwStr tmp_host = {}, *host_val, *field, *end; struct sk_buff *new_head = NULL, *old_head = NULL; - TfwMsgIter it; TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, TFW_VHOST_HDRMOD_REQ); static const DEFINE_TFW_STR(sp, " "); @@ -4217,6 +4219,8 @@ tfw_h2_adjust_req(TfwHttpReq *req) char cl_data[TFW_ULTOA_BUF_SIZ] = {0}; size_t cl_data_len = 0; size_t cl_len = 0; + TfwMsgIter it; + /* * The Transfer-Encoding header field cannot be in the h2 request, because * requests with Transfer-Encoding are blocked. @@ -4349,13 +4353,13 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &sp); + r = tfw_msg_iter_write(&it, &sp); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &req->uri_path); + r = tfw_msg_iter_write(&it, &req->uri_path); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &fl_end); /* start of Host: header */ + r = tfw_msg_iter_write(&it, &fl_end); /* start of Host: header */ if (unlikely(r)) goto err; if (h_mods && test_bit(TFW_HTTP_HDR_HOST, h_mods->spec_hdrs)) { @@ -4370,10 +4374,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) __h2_msg_hdr_val(&ht->tbl[TFW_HTTP_HDR_HOST], &tmp_host); host_val = &tmp_host; } - r = tfw_msg_write(&it, host_val); + r = tfw_msg_iter_write(&it, host_val); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; @@ -4386,13 +4390,13 @@ tfw_h2_adjust_req(TfwHttpReq *req) case TFW_HTTP_HDR_HOST: continue; /* Already written. */ case TFW_HTTP_HDR_X_FORWARDED_FOR: - r = tfw_msg_write(&it, &h_xff); + r = tfw_msg_iter_write(&it, &h_xff); if (unlikely(r)) goto err; continue; case TFW_HTTP_HDR_CONTENT_TYPE: if (h_ct_replace) { - r = tfw_msg_write(&it, &h_ct); + r = tfw_msg_iter_write(&it, &h_ct); if (unlikely(r)) goto err; continue; @@ -4427,10 +4431,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) hval.nchunks++; hval.len += chunk->len; } - r = tfw_msg_write(&it, &hval); + r = tfw_msg_iter_write(&it, &hval); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &dlm); + r = tfw_msg_iter_write(&it, &dlm); if (unlikely(r)) goto err; @@ -4438,11 +4442,11 @@ tfw_h2_adjust_req(TfwHttpReq *req) hval.nchunks = dup->nchunks - hval.nchunks; hval.len = dup->len - hval.len; - r = tfw_msg_write(&it, &hval); + r = tfw_msg_iter_write(&it, &hval); if (unlikely(r)) goto err; - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; } @@ -4450,7 +4454,7 @@ tfw_h2_adjust_req(TfwHttpReq *req) goto err; } - r = tfw_msg_write(&it, &h_via); + r = tfw_msg_iter_write(&it, &h_via); if (unlikely(r)) goto err; @@ -4465,12 +4469,12 @@ tfw_h2_adjust_req(TfwHttpReq *req) .len = cl_len, .nchunks = 4 }; - r = tfw_msg_write(&it, &h_cl); + r = tfw_msg_iter_write(&it, &h_cl); if (unlikely(r)) goto err; } /* Finally close headers. */ - r = tfw_msg_write(&it, &STR_CRLF); + r = tfw_msg_iter_write(&it, &STR_CRLF); if (unlikely(r)) goto err; @@ -5353,6 +5357,7 @@ tfw_h2_hpack_encode_headers(TfwHttpResp *resp, const TfwHdrMods *h_mods) static int tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) { + TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwMsgIter *it = &resp->iter; size_t len, max_copy = PAGE_SIZE; char *data; @@ -5369,7 +5374,7 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) it->frag = skb_shinfo(it->skb)->nr_frags - 1; if (it->frag + 1 >= MAX_SKB_FRAGS) { - if ((r = tfw_msg_iter_append_skb(it))) + if ((r = tfw_http_msg_append_skb(hm))) return r; } @@ -5393,7 +5398,7 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) ss_skb_adjust_data_len(it->skb, copy); if (it->frag + 1 == MAX_SKB_FRAGS - && (r = tfw_msg_iter_append_skb(it))) + && (r = tfw_http_msg_append_skb(hm))) { return r; } @@ -5823,6 +5828,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) { int r = -EINVAL; TfwHttpReq *req = resp->req; + TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpTransIter *mit = &resp->mit; TfwHttpMsgCleanup cleanup = {}; TfwStr codings = {}; @@ -5874,7 +5880,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) * Alloc room for frame header. After this call resp->pool * must be used only as skb paged data. */ - r = tfw_http_msg_setup_transform_pool(mit, &resp->iter, resp->pool); + r = tfw_http_msg_setup_transform_pool(mit, hm, resp->pool); if (unlikely(r)) return r; @@ -7788,9 +7794,9 @@ tfw_http_hm_srv_send(TfwServer *srv, char *data, unsigned long len) if (!(req = tfw_http_msg_alloc_req_light())) return; hmreq = (TfwHttpMsg *)req; - if (tfw_http_msg_setup(hmreq, &it, msg.len)) + if (tfw_msg_iter_setup(&it, &hmreq->msg.skb_head, msg.len)) goto cleanup; - if (tfw_msg_write(&it, &msg)) + if (tfw_msg_iter_write(&it, &msg)) goto cleanup; __set_bit(TFW_HTTP_B_HMONITOR, req->flags); diff --git a/fw/http.h b/fw/http.h index cc586ddb7..3ba75774e 100644 --- a/fw/http.h +++ b/fw/http.h @@ -777,7 +777,7 @@ int tfw_h2_resp_encode_headers(TfwHttpResp *resp); int tfw_http_prep_redir(TfwHttpResp *resp, unsigned short status, TfwStr *cookie, TfwStr *body); int tfw_http_prep_304(TfwHttpReq *req, struct sk_buff **skb_head, - TfwMsgIter *it); + TfwHttpMsg *hm); void tfw_http_conn_msg_free(TfwHttpMsg *hm); void tfw_http_resp_pair_free_and_put_conn(void *opaque_data); void tfw_http_send_err_resp(TfwHttpReq *req, int status, const char *reason); diff --git a/fw/http2.c b/fw/http2.c index 2c42c0546..ee74cd452 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -676,7 +676,7 @@ tfw_h2_stream_xmit_prepare_resp(TfwStream *stream) resp->iter.skb = resp->msg.skb_head->prev; resp->iter.frag = skb_shinfo(resp->iter.skb)->nr_frags - 1; - tfw_http_msg_setup_transform_pool(mit, &resp->iter, + tfw_http_msg_setup_transform_pool(mit, (TfwHttpMsg *)resp, resp->pool); r = tfw_h2_hpack_encode_trailer_headers(resp); diff --git a/fw/http_frame.c b/fw/http_frame.c index 5a429bd89..a56c2191a 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -329,7 +329,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, if ((r = tfw_msg_iter_setup(&it, &msg.skb_head, msg.len))) goto err; - if ((r = tfw_msg_write(&it, data))) + if ((r = tfw_msg_iter_write(&it, data))) goto err; switch (type) { diff --git a/fw/http_msg.c b/fw/http_msg.c index 498bd0a62..268e4baee 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -807,106 +807,26 @@ tfw_http_msg_cutoff_body_chunks(TfwHttpResp *resp) } /** - * Set up @hm with empty SKB space of size @data_len for data writing. - * Set up the iterator @it to support consecutive writes. - * - * This function is intended to work together with tfw_msg_write() - * or tfw_http_msg_add_data() which use the @it iterator. - * - * @hm must be allocated dynamically (NOT statically) as it may have - * to sit in a queue long after the caller has finished. It's assumed - * that @hm is properly initialized. - * - * It's essential to understand, that "properly initialized" for @hm - * may mean different things depending on the intended use. Currently - * this function is called to send a response from cache, or to send - * an error response. An error response is not parsed or adjusted, so - * a shorter/faster version of message allocation and initialization - * may be used. (See __tfw_http_msg_alloc(full=False)). + * Allocate and add a single empty skb (with a place for TCP headers though) + * to the @hm iterator. The allocated skb has no space for the data, user is + * expected to add new paged fragments. */ int -tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len) +tfw_http_msg_append_skb(TfwHttpMsg *hm) { + TfwMsgIter *it = &hm->iter; int r; - if ((r = tfw_msg_iter_setup(it, &hm->msg.skb_head, data_len))) + if ((r = ss_skb_alloc_data(&it->skb_head, 0))) return r; - T_DBG2("Set up HTTP message %pK with %lu bytes data\n", hm, data_len); - - return 0; -} -EXPORT_SYMBOL(tfw_http_msg_setup); - -/** - * Fill up an HTTP message by iterator @it with data from string @data. - * Properly maintain @hm header @field, so that @hm can be used in regular - * transformations. However, the header name and the value are not split into - * different chunks, so advanced headers matching is not available for @hm. - */ -int -tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field, - const TfwStr *data) -{ - const TfwStr *c, *end; - - BUG_ON(TFW_STR_DUP(data)); - if (WARN_ON_ONCE(it->frag >= skb_shinfo(it->skb)->nr_frags)) - return -E2BIG; - - TFW_STR_FOR_EACH_CHUNK(c, data, end) { - char *p; - unsigned int c_off = 0, c_size, f_room, n_copy; -this_chunk: - c_size = c->len - c_off; - if (it->frag >= 0) { - unsigned int f_size; - skb_frag_t *frag = &skb_shinfo(it->skb)->frags[it->frag]; - - f_size = skb_frag_size(frag); - f_room = PAGE_SIZE - skb_frag_off(frag) - f_size; - p = (char *)skb_frag_address(frag) + f_size; - n_copy = min(c_size, f_room); - skb_frag_size_add(frag, n_copy); - ss_skb_adjust_data_len(it->skb, n_copy); - } else { - f_room = skb_tailroom(it->skb); - n_copy = min(c_size, f_room); - p = skb_put(it->skb, n_copy); - } - - memcpy_fast(p, c->data + c_off, n_copy); - if (field && n_copy - && __tfw_http_msg_add_str_data(hm, field, p, n_copy, - it->skb)) - { - return -ENOMEM; - } + it->skb = ss_skb_peek_tail(&it->skb_head); + it->frag = -1; - /* - * The chunk occupied all the spare space in the SKB fragment, - * switch to the next fragment. - */ - if (c_size >= f_room) { - if (WARN_ON_ONCE(tfw_msg_iter_next_data_frag(it) - && ((c_size != f_room) - || (c + 1 < end)))) - { - return -E2BIG; - } - /* - * Not all data from the chunk has been copied, - * stay in the current chunk and copy the rest to the - * next fragment. - */ - if (c_size != f_room) { - c_off += n_copy; - goto this_chunk; - } - } - } + skb_shinfo(it->skb)->flags = skb_shinfo(it->skb->prev)->flags; return 0; } +EXPORT_SYMBOL(tfw_http_msg_append_skb); void tfw_http_msg_pair(TfwHttpResp *resp, TfwHttpReq *req) @@ -1003,9 +923,10 @@ __tfw_http_msg_alloc(int type, bool full) * MUST be used only for messages from cache or messages constructed locally. */ int -tfw_http_msg_expand_data(TfwMsgIter *it, struct sk_buff **skb_head, +tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, const TfwStr *src, unsigned int *start_off) { + TfwMsgIter *it = &hm->iter; const TfwStr *c, *end; TFW_STR_FOR_EACH_CHUNK(c, src, end) { @@ -1138,13 +1059,14 @@ tfw_http_msg_alloc_from_pool(TfwMsgIter *it, TfwPool* pool, size_t size) * data, which will split the paged fragment. */ int -tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwMsgIter *it, +tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwHttpMsg *msg, TfwPool* pool) { - int r; + TfwMsgIter *it = &msg->iter; + unsigned int room = TFW_POOL_CHUNK_ROOM(pool); char* addr; bool np; - unsigned int room = TFW_POOL_CHUNK_ROOM(pool); + int r; BUG_ON(room < 0); @@ -1512,27 +1434,3 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) return r; } - -/** - * Insert data from string @data to message at offset defined by message - * iterator @it and @off. This function doesn't maintain message structure. - * After insertion message iterator and @data will point at the start of - * inserted data fragment. - */ -int -tfw_http_msg_insert(TfwMsgIter *it, char **off, const TfwStr *data) -{ - int r; - TfwStr dst = {}; - - if ((r = ss_skb_get_room_w_frag(it->skb_head, it->skb, *off, data->len, - &dst, &it->frag))) - { - return r; - } - - *off = dst.data; - it->skb = dst.skb; - - return tfw_strcpy(&dst, data); -} diff --git a/fw/http_msg.h b/fw/http_msg.h index fd987938b..d94642f2f 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -145,17 +145,14 @@ unsigned int tfw_http_msg_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); int tfw_http_msg_del_str(TfwHttpMsg *hm, TfwStr *str); int tfw_http_msg_cutoff_body_chunks(TfwHttpResp *resp); - -int tfw_http_msg_setup(TfwHttpMsg *hm, TfwMsgIter *it, size_t data_len); -int tfw_http_msg_add_data(TfwMsgIter *it, TfwHttpMsg *hm, TfwStr *field, - const TfwStr *data); +int tfw_http_msg_append_skb(TfwHttpMsg *hm); void tfw_http_msg_hdr_open(TfwHttpMsg *hm, unsigned char *hdr_start); int tfw_http_msg_hdr_close(TfwHttpMsg *hm); int tfw_http_msg_grow_hdr_tbl(TfwHttpMsg *hm); void tfw_http_msg_free(TfwHttpMsg *m); -int tfw_http_msg_expand_data(TfwMsgIter *it, struct sk_buff **skb_head, +int tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, const TfwStr *src, unsigned int *start_off); -int tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwMsgIter *it, +int tfw_http_msg_setup_transform_pool(TfwHttpTransIter *mit, TfwHttpMsg *hm, TfwPool* pool); int tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str); int tfw_h2_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, @@ -165,7 +162,6 @@ int tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, int __hdr_name_cmp(const TfwStr *hdr, const TfwStr *cmp_hdr); int __http_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup); -int tfw_http_msg_insert(TfwMsgIter *it, char **off, const TfwStr *data); #define TFW_H2_MSG_HDR_ADD(hm, name, val, idx) \ tfw_h2_msg_hdr_add(hm, name, sizeof(name) - 1, val, \ diff --git a/fw/http_sess.c b/fw/http_sess.c index 02c3d2cff..de43d4b36 100644 --- a/fw/http_sess.c +++ b/fw/http_sess.c @@ -365,13 +365,12 @@ tfw_http_sticky_add(TfwHttpResp *resp, bool cache) set_cookie.hpack_idx = 55; r = tfw_hpack_encode(resp, &set_cookie, !cache, !cache); } else if (cache) { - TfwMsgIter *it = &resp->iter; struct sk_buff **skb_head = &resp->msg.skb_head; - r = tfw_http_msg_expand_data(it, skb_head, &set_cookie, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &set_cookie, NULL); if (unlikely(r)) goto err; - r = tfw_http_msg_expand_data(it, skb_head, &crlf, NULL); + r = tfw_http_msg_expand_data(hm, skb_head, &crlf, NULL); } else { r = tfw_http_msg_expand_from_pool(hm, &set_cookie); diff --git a/fw/msg.c b/fw/msg.c index b10c0b444..918ea9fde 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -22,24 +22,6 @@ #include "http_msg.h" #include "ss_skb.h" -/** - * Fill up an HTTP message by iterator @it with data from string @data. - * This is a quick message creator which doesn't maintain properly - * parts of the message structure like headers table. So the HTTP message - * cannot be used where HTTP message transformations are required. - * - * An iterator @it is used to support multiple calls to this function - * after the set up. This function can only be called after a call to - * tfw_http_msg_setup(). It works only with empty SKB space prepared - * by the function. - */ -int -tfw_msg_write(TfwMsgIter *it, const TfwStr *data) -{ - return tfw_http_msg_add_data(it, NULL, NULL, data); -} -EXPORT_SYMBOL(tfw_msg_write); - /** * Allocate list of skbs to store data with given length @data_len and * initialise the iterator it. Shouldn't be called against previously used @@ -60,41 +42,105 @@ tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, size_t data_len) return 0; } -/** - * Allocate and add a single empty skb (with a place for TCP headers though) - * to the iterator. The allocated skb has no space for the data, user is - * expected to add new paged fragments. - */ -int -tfw_msg_iter_append_skb(TfwMsgIter *it) +static inline int +tfw_msg_iter_next_data_frag(TfwMsgIter *it) { - int r; + if (skb_shinfo(it->skb)->nr_frags > it->frag + 1) { + ++it->frag; + return 0; + } - if ((r = ss_skb_alloc_data(&it->skb_head, 0))) - return r; - it->skb = ss_skb_peek_tail(&it->skb_head); + it->skb = it->skb->next; + if (it->skb == it->skb_head || !skb_shinfo(it->skb)->nr_frags) { + it->frag = MAX_SKB_FRAGS; + return -EINVAL; + } it->frag = -1; - skb_shinfo(it->skb)->flags = skb_shinfo(it->skb->prev)->flags; return 0; } /** - * Find origin fragment of data @off and set it as active message iterator - * fragment. + * Fill up an HTTP message by iterator @it with data from string @data. + * Properly maintain @hm header @field, so that @hm can be used in regular + * transformations. However, the header name and the value are not split into + * different chunks, so advanced headers matching is not available for @hm. */ -int tfw_http_iter_set_at(TfwMsgIter *it, char *off) +static int +tfw_msg_iter_add_data(TfwMsgIter *it, const TfwStr *data) { - do { - if (!ss_skb_find_frag_by_offset(it->skb, off, &it->frag)) - return 0; - it->skb = it->skb->next; + const TfwStr *c, *end; + + BUG_ON(TFW_STR_DUP(data)); + if (WARN_ON_ONCE(it->frag >= skb_shinfo(it->skb)->nr_frags)) + return -E2BIG; + + TFW_STR_FOR_EACH_CHUNK(c, data, end) { + char *p; + unsigned int c_off = 0, c_size, f_room, n_copy; +this_chunk: + c_size = c->len - c_off; + if (it->frag >= 0) { + unsigned int f_size; + skb_frag_t *frag = &skb_shinfo(it->skb)->frags[it->frag]; + + f_size = skb_frag_size(frag); + f_room = PAGE_SIZE - skb_frag_off(frag) - f_size; + p = (char *)skb_frag_address(frag) + f_size; + n_copy = min(c_size, f_room); + skb_frag_size_add(frag, n_copy); + ss_skb_adjust_data_len(it->skb, n_copy); + } else { + f_room = skb_tailroom(it->skb); + n_copy = min(c_size, f_room); + p = skb_put(it->skb, n_copy); + } + + memcpy_fast(p, c->data + c_off, n_copy); + /* + * The chunk occupied all the spare space in the SKB fragment, + * switch to the next fragment. + */ + if (c_size >= f_room) { + if (WARN_ON_ONCE(tfw_msg_iter_next_data_frag(it) + && ((c_size != f_room) + || (c + 1 < end)))) + { + return -E2BIG; + } + /* + * Not all data from the chunk has been copied, + * stay in the current chunk and copy the rest to the + * next fragment. + */ + if (c_size != f_room) { + c_off += n_copy; + goto this_chunk; + } + } + } - } while (it->skb != it->skb_head); + return 0; +} - return -E2BIG; +/** + * Fill up an HTTP message by iterator @it with data from string @data. + * This is a quick message creator which doesn't maintain properly + * parts of the message structure like headers table. So the HTTP message + * cannot be used where HTTP message transformations are required. + * + * An iterator @it is used to support multiple calls to this function + * after the set up. This function can only be called after a call to + * tfw_http_msg_setup(). It works only with empty SKB space prepared + * by the function. + */ +int +tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data) +{ + return tfw_msg_iter_add_data(it, data); } +EXPORT_SYMBOL(tfw_msg_iter_write); /** * Move message iterator from @data pointer by @sz symbols right. diff --git a/fw/msg.h b/fw/msg.h index 6917489d4..25422cd87 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -97,29 +97,9 @@ typedef struct { TfwStr hdr; } TfwMsgParseIter; -int tfw_msg_write(TfwMsgIter *it, const TfwStr *data); +int tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data); int tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, size_t data_len); -int tfw_msg_iter_append_skb(TfwMsgIter *it); -int tfw_http_iter_set_at(TfwMsgIter *it, char *off); int tfw_msg_iter_move(TfwMsgIter *it, unsigned char **data, unsigned long sz); -static inline int -tfw_msg_iter_next_data_frag(TfwMsgIter *it) -{ - if (skb_shinfo(it->skb)->nr_frags > it->frag + 1) { - ++it->frag; - return 0; - } - - it->skb = it->skb->next; - if (it->skb == it->skb_head || !skb_shinfo(it->skb)->nr_frags) { - it->frag = MAX_SKB_FRAGS; - return -EINVAL; - } - it->frag = -1; - - return 0; -} - #endif /* __TFW_MSG_H__ */ diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 27f269efb..5997aa552 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -61,7 +61,7 @@ test_req_alloc(size_t data_len) hmreq = __tfw_http_msg_alloc(Conn_HttpClnt, true); BUG_ON(!hmreq); - ret = tfw_http_msg_setup(hmreq, &it, data_len); + ret = tfw_msg_iter_setup(&it, &hmreq->msg.skb_head, data_len); BUG_ON(ret); memset(&conn_req, 0, sizeof(TfwConn)); @@ -91,7 +91,7 @@ test_resp_alloc(size_t data_len) int ret; TfwHttpResp *hmresp = test_resp_alloc_no_data(); - ret = tfw_http_msg_setup((TfwHttpMsg *)hmresp, &it, data_len); + ret = tfw_msg_iter_setup(&it, &hmresp->msg.skb_head, data_len); BUG_ON(ret); return (TfwHttpResp *)hmresp; diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 544f6916f..890dd5e2e 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -235,7 +235,7 @@ TEST(http_msg, expand_from_pool_for_headers) resp->body_start_skb = it->skb; resp->body.len = (MAX_SKB_FRAGS - 1) * SLEN("paged_body"); - tfw_http_msg_setup_transform_pool(&resp->mit, it, resp->pool); + tfw_http_msg_setup_transform_pool(&resp->mit, msg, resp->pool); EXPECT_EQ(tfw_http_msg_cutoff_headers(msg, &cleanup), 0); @@ -297,7 +297,7 @@ TEST(http_msg, expand_from_pool_for_trailers) EXPECT_NULL(cleanup.skb_head); it->frag = skb_shinfo(it->skb)->nr_frags - 1; - tfw_http_msg_setup_transform_pool(&resp->mit, it, resp->pool); + tfw_http_msg_setup_transform_pool(&resp->mit, msg, resp->pool); __set_bit(TFW_HTTP_B_RESP_ENCODE_TRAILERS, resp->flags); diff --git a/fw/tls.c b/fw/tls.c index 0df47eb40..47b917812 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -568,7 +568,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) if ((r = tfw_msg_iter_setup(&it, &io->skb_list, str.len))) goto out; - if ((r = tfw_msg_write(&it, &str))) + if ((r = tfw_msg_iter_write(&it, &str))) goto out; /* Only one skb should has been allocated. */ WARN_ON_ONCE(it.skb->next != io->skb_list From aba94a6f27414b6f941f5457296e86fc0d20e5d7 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Sun, 29 Jun 2025 21:34:15 +0300 Subject: [PATCH 05/23] Adjust memory used by Tempesta FW. In task #498 we decide to use `client_mem` option to limit count of memory used by client. This commit is a part of this task - now Tempesta FW uses `sk->sk_rmem_alloc` to adjust memory used by Tempesta FW for this client connection. --- fw/http.c | 16 +++++++++--- fw/http.h | 17 ++++++++++++ fw/http_frame.c | 8 +++--- fw/http_msg.c | 8 ++++-- fw/msg.c | 5 ++-- fw/msg.h | 4 +-- fw/ss_skb.c | 40 +++++++++++++++++++++++++++-- fw/ss_skb.h | 7 ++++- fw/t/unit/helpers.c | 12 ++++++--- fw/t/unit/helpers.h | 4 +++ fw/t/unit/test.c | 9 +++++++ fw/t/unit/test_http_msg.c | 2 ++ fw/t/unit/test_http_parser_common.c | 19 ++++++++++++++ fw/tls.c | 6 +++-- 14 files changed, 134 insertions(+), 23 deletions(-) diff --git a/fw/http.c b/fw/http.c index 943278032..65827028c 100644 --- a/fw/http.c +++ b/fw/http.c @@ -728,7 +728,8 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - r = tfw_msg_iter_setup(&it, &resp->msg.skb_head, msg->len); + r = tfw_msg_iter_setup(&it, resp->req->conn->sk, &resp->msg.skb_head, + msg->len); if (unlikely(r)) return r; @@ -4344,7 +4345,8 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (WARN_ON_ONCE(h1_hdrs_sz < 0)) return -EINVAL; - r = tfw_msg_iter_setup(&it, &new_head, h1_hdrs_sz); + r = tfw_msg_iter_setup(&it, req->conn->sk, &new_head, + h1_hdrs_sz); if (unlikely(r)) return r; @@ -4579,6 +4581,7 @@ tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) if (unlikely(!nskb)) return -ENOMEM; + ss_skb_set_owner(nskb, resp->msg.skb_head->sk); nskb->mark = resp->msg.skb_head->mark; cleanup->skb_head = resp->msg.skb_head; resp->msg.skb_head = NULL; @@ -6548,6 +6551,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, actor = tfw_http_parse_req; req->tfh.version = TFW_HTTP_TFH_HTTP_REQ; } + ss_skb_set_owner(skb, conn->sk); r = ss_skb_process(skb, actor, req, &req->chunk_cnt, &parsed); req->msg.len += parsed; @@ -7367,6 +7371,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, T_DBG2("Received %u server data bytes on conn=%p msg=%p\n", skb->len, conn, stream->msg); + /* * Process pipelined requests in a loop * until all data in the SKB is processed. @@ -7381,7 +7386,9 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, if (TFW_FSM_TYPE(cli_conn->proto.type) == TFW_FSM_H2) conn_stop = !hmresp->req->stream; else - conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); + conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, + hmresp->req->flags); + ss_skb_set_owner(skb, cli_conn->sk); } else { conn_stop = false; } @@ -7794,7 +7801,8 @@ tfw_http_hm_srv_send(TfwServer *srv, char *data, unsigned long len) if (!(req = tfw_http_msg_alloc_req_light())) return; hmreq = (TfwHttpMsg *)req; - if (tfw_msg_iter_setup(&it, &hmreq->msg.skb_head, msg.len)) + if (tfw_msg_iter_setup(&it, NULL, &hmreq->msg.skb_head, + msg.len)) goto cleanup; if (tfw_msg_iter_write(&it, &msg)) goto cleanup; diff --git a/fw/http.h b/fw/http.h index 3ba75774e..458e33ec1 100644 --- a/fw/http.h +++ b/fw/http.h @@ -588,6 +588,23 @@ void tfw_http_exit(void); T_WARN("%s, status %d: %s\n", \ msg, status, addr_str)) +static inline bool +tfw_http_msg_is_req(TfwHttpMsg *msg) +{ + /* + * msg->conn can be equal to zero only for response + * which is served from cache or error response. + */ + return msg->conn && TFW_CONN_TYPE(msg->conn) & Conn_Clnt; +} + +static inline struct sock * +tfw_http_msg_sock(TfwHttpMsg *msg) +{ + return tfw_http_msg_is_req(msg) ? + msg->conn->sk : msg->pair->conn->sk; +} + static inline int tfw_http_resp_code_range(const int n) { diff --git a/fw/http_frame.c b/fw/http_frame.c index a56c2191a..f4e4a72af 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -312,7 +312,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, TfwMsg msg = {}; unsigned char buf[FRAME_HEADER_SIZE]; TfwStr *hdr_str = TFW_STR_CHUNK(data, 0); - TfwH2Conn *conn = ctx->conn; + TfwConn *conn = (TfwConn *)ctx->conn; BUG_ON(hdr_str->data); hdr_str->data = buf; @@ -326,7 +326,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, T_DBG2("Preparing HTTP/2 message with %lu bytes data\n", data->len); msg.len = data->len; - if ((r = tfw_msg_iter_setup(&it, &msg.skb_head, msg.len))) + if ((r = tfw_msg_iter_setup(&it, conn->sk, &msg.skb_head, msg.len))) goto err; if ((r = tfw_msg_iter_write(&it, data))) @@ -357,7 +357,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, tfw_h2_on_tcp_entail_ack; } - if ((r = tfw_connection_send((TfwConn *)conn, &msg))) + if ((r = tfw_connection_send(conn, &msg))) goto err; /* * We do not close client connection automatically here in case @@ -366,7 +366,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, * was successful - to avoid hanged unclosed client connection. */ if (type == TFW_FRAME_CLOSE || type == TFW_FRAME_SHUTDOWN) - TFW_CONN_TYPE((TfwConn *)conn) |= Conn_Stop; + TFW_CONN_TYPE(conn) |= Conn_Stop; return 0; diff --git a/fw/http_msg.c b/fw/http_msg.c index 268e4baee..a3db303f1 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -817,11 +817,12 @@ tfw_http_msg_append_skb(TfwHttpMsg *hm) TfwMsgIter *it = &hm->iter; int r; - if ((r = ss_skb_alloc_data(&it->skb_head, 0))) + r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_sock(hm), 0); + if (unlikely(r)) return r; + it->skb = ss_skb_peek_tail(&it->skb_head); it->frag = -1; - skb_shinfo(it->skb)->flags = skb_shinfo(it->skb->prev)->flags; return 0; @@ -936,6 +937,8 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, if (!it->skb) { if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; + + ss_skb_set_owner(it->skb, tfw_http_msg_sock(hm)); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; if (!it->skb_head) @@ -1205,6 +1208,7 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; + ss_skb_set_owner(nskb, tfw_http_msg_sock(hm)); /* * TODO #2136: Remove this flag during reworking * this function. Try to process headers and diff --git a/fw/msg.c b/fw/msg.c index 918ea9fde..ce7d8db6d 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -28,11 +28,12 @@ * iterator, since its current state is to be rewritten. */ int -tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, size_t data_len) +tfw_msg_iter_setup(TfwMsgIter *it, struct sock *sk, struct sk_buff **skb_head, + size_t data_len) { int r; - if ((r = ss_skb_alloc_data(skb_head, data_len))) + if ((r = ss_skb_alloc_data(skb_head, sk, data_len))) return r; it->skb = it->skb_head = *skb_head; it->frag = -1; diff --git a/fw/msg.h b/fw/msg.h index 25422cd87..2c7b2be0c 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -98,8 +98,8 @@ typedef struct { } TfwMsgParseIter; int tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data); -int tfw_msg_iter_setup(TfwMsgIter *it, struct sk_buff **skb_head, - size_t data_len); +int tfw_msg_iter_setup(TfwMsgIter *it, struct sock *sk, + struct sk_buff **skb_head, size_t data_len); int tfw_msg_iter_move(TfwMsgIter *it, unsigned char **data, unsigned long sz); #endif /* __TFW_MSG_H__ */ diff --git a/fw/ss_skb.c b/fw/ss_skb.c index e1d9fa7de..ac763906c 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -109,7 +109,7 @@ ss_skb_alloc_pages(size_t len) * segmentation. The allocated payload space will be filled with data. */ int -ss_skb_alloc_data(struct sk_buff **skb_head, size_t len) +ss_skb_alloc_data(struct sk_buff **skb_head, struct sock *sk, size_t len) { int i_skb, nr_skbs = len ? DIV_ROUND_UP(len, SS_SKB_MAX_DATA_LEN) : 1; size_t n = 0; @@ -120,6 +120,7 @@ ss_skb_alloc_data(struct sk_buff **skb_head, size_t len) skb = ss_skb_alloc_pages(n); if (!skb) return -ENOMEM; + ss_skb_set_owner(skb, sk); ss_skb_queue_tail(skb_head, skb); } @@ -217,6 +218,8 @@ __extend_pgfrags(struct sk_buff *skb_head, struct sk_buff *skb, int from, int n) nskb = ss_skb_alloc(0); if (nskb == NULL) return -ENOMEM; + + ss_skb_set_owner(nskb, skb->sk); skb_shinfo(nskb)->flags = skb_shinfo(skb)->flags; ss_skb_insert_after(skb, nskb); skb_shinfo(nskb)->nr_frags = n_excess; @@ -392,6 +395,7 @@ __split_linear_data(struct sk_buff *skb_head, struct sk_buff *skb, char *pspt, skb->tail -= tail_len; skb->data_len += tail_len; skb->truesize += tail_len; + ss_skb_adjust_client_mem(skb, tail_len); /* Make the fragment with the tail part. */ __skb_fill_page_desc(skb, alloc, page, tail_off, tail_len); @@ -1306,6 +1310,8 @@ ss_skb_split(struct sk_buff *skb, int len) skb->truesize -= nlen; buff->mark = skb->mark; + ss_skb_adjust_client_mem(skb, -nlen); + /* * These are orphaned SKBs that are taken out of the TCP/IP * stack and are completely owned by Tempesta. There is no @@ -1330,7 +1336,7 @@ ss_skb_init_for_xmit(struct sk_buff *skb) struct skb_shared_info *shinfo = skb_shinfo(skb); __u8 pfmemalloc = skb->pfmemalloc; - WARN_ON_ONCE(skb->sk); + skb_orphan(skb); skb_dst_drop(skb); INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); @@ -1707,3 +1713,33 @@ ss_skb_realloc_headroom(struct sk_buff *skb) return pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); } ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); + +static void +ss_sock_rfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + BUG_ON(!sk); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sock_put(sk); +} + +void +ss_skb_set_owner(struct sk_buff *skb, struct sock *sk) +{ + if (sk) { + BUG_ON(skb->sk); + + sock_hold(sk); + skb->sk = sk; + skb->destructor = ss_sock_rfree; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + } +} + +void +ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) +{ + if (skb->sk) + atomic_add(delta, &skb->sk->sk_rmem_alloc); +} diff --git a/fw/ss_skb.h b/fw/ss_skb.h index b7710f6be..cf4ff1708 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -77,6 +77,9 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) +void ss_skb_set_owner(struct sk_buff *skb, struct sock *sk); +void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); + static inline bool ss_skb_is_within_fragment(char *begin_fragment, char *position, char *end_fragment) @@ -316,6 +319,8 @@ ss_skb_adjust_data_len(struct sk_buff *skb, int delta) skb->len += delta; skb->data_len += delta; skb->truesize += delta; + if (skb->sk) + ss_skb_adjust_client_mem(skb, delta); } /* @@ -448,7 +453,7 @@ ss_skb_data_ptr_by_offset(struct sk_buff *skb, unsigned int off) char *ss_skb_fmt_src_addr(const struct sk_buff *skb, char *out_buf); -int ss_skb_alloc_data(struct sk_buff **skb_head, size_t len); +int ss_skb_alloc_data(struct sk_buff **skb_head, struct sock *sk, size_t len); struct sk_buff *ss_skb_split(struct sk_buff *skb, int len); int ss_skb_get_room_w_frag(struct sk_buff *skb_head, struct sk_buff *skb, char *pspt, unsigned int len, TfwStr *it, int *fragn); diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 5997aa552..26a55b6e2 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -33,6 +33,8 @@ */ #include "helpers.h" #include "http_msg.h" +#include "helpers.h" + #include "pool.c" #include "apm.h" #include "filter.h" @@ -43,9 +45,9 @@ #include "tf_conf.h" #include "tf_filter.h" -static TfwConn conn_req, conn_resp; - unsigned int tfw_cli_max_concurrent_streams; +TfwConn conn_req, conn_resp; +struct sock sk; TfwHttpReq * test_req_alloc(size_t data_len) @@ -61,12 +63,13 @@ test_req_alloc(size_t data_len) hmreq = __tfw_http_msg_alloc(Conn_HttpClnt, true); BUG_ON(!hmreq); - ret = tfw_msg_iter_setup(&it, &hmreq->msg.skb_head, data_len); + ret = tfw_msg_iter_setup(&it, &sk, &hmreq->msg.skb_head, data_len); BUG_ON(ret); memset(&conn_req, 0, sizeof(TfwConn)); tfw_connection_init(&conn_req); conn_req.proto.type = Conn_HttpClnt; + conn_req.sk = &sk; hmreq->conn = &conn_req; hmreq->stream = &conn_req.stream; tfw_http_init_parser_req((TfwHttpReq *)hmreq); @@ -91,7 +94,7 @@ test_resp_alloc(size_t data_len) int ret; TfwHttpResp *hmresp = test_resp_alloc_no_data(); - ret = tfw_msg_iter_setup(&it, &hmresp->msg.skb_head, data_len); + ret = tfw_msg_iter_setup(&it, &sk, &hmresp->msg.skb_head, data_len); BUG_ON(ret); return (TfwHttpResp *)hmresp; @@ -108,6 +111,7 @@ test_resp_alloc_no_data() memset(&conn_resp, 0, sizeof(TfwConn)); tfw_connection_init(&conn_resp); conn_resp.proto.type = Conn_HttpSrv; + conn_resp.sk = &sk; hmresp->conn = &conn_resp; hmresp->stream = &conn_resp.stream; tfw_http_init_parser_resp((TfwHttpResp *)hmresp); diff --git a/fw/t/unit/helpers.h b/fw/t/unit/helpers.h index 6b98d08ab..81fc00821 100644 --- a/fw/t/unit/helpers.h +++ b/fw/t/unit/helpers.h @@ -32,5 +32,9 @@ void test_req_free(TfwHttpReq *req); TfwHttpResp *test_resp_alloc(size_t data_len); TfwHttpResp *test_resp_alloc_no_data(void); void test_resp_free(TfwHttpResp *req); +void test_req_resp_cleanup(void); + +extern TfwConn conn_req, conn_resp; +extern struct sock sk; #endif /* __TFW_TEST_HELPER_H__ */ diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index f30c55ba1..240a9a6e2 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -23,6 +23,7 @@ #include "test.h" #include "test_http_parser_defs.h" #include "test_http_parser_common.h" +#include "helpers.h" int test_fail_counter; test_fixture_fn_t test_setup_fn; @@ -118,16 +119,22 @@ test_run_all(void) __fpu_schedule(); TEST_SUITE_MPART_RUN(http1_parser); + test_req_resp_cleanup(); + EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); __fpu_schedule(); test_case_alloc_h2(); TEST_SUITE_MPART_RUN(http2_parser); + test_req_resp_cleanup(); + EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); __fpu_schedule(); test_case_cleanup_h2(); TEST_SUITE_RUN(http2_parser_hpack); + test_req_resp_cleanup(); + EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); __fpu_schedule(); TEST_SUITE_RUN(http_cache); @@ -137,6 +144,8 @@ test_run_all(void) __fpu_schedule(); TEST_SUITE_RUN(http_msg); + test_req_resp_cleanup(); + EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); __fpu_schedule(); TEST_SUITE_RUN(hash); diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 890dd5e2e..fe4c9d885 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -22,6 +22,7 @@ #include "test.h" #include "helpers.h" #include "http_msg.h" +#include "helpers.h" static TfwHttpResp *resp; @@ -113,6 +114,7 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; + ss_skb_set_owner(skb, &sk); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index 4ef01cb2c..8eb974d65 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -688,3 +688,22 @@ get_next_str_val(TfwStr *str) return v; } + +void +test_req_resp_cleanup(void) +{ + if (sample_req) { + test_req_free(sample_req); + sample_req = NULL; + } + + if (req) { + test_req_free(req); + req = NULL; + } + + if (resp) { + test_resp_free(resp); + resp = NULL; + } +} diff --git a/fw/tls.c b/fw/tls.c index 47b917812..4e5d20c74 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -537,6 +537,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) { int r, flags = 0; TfwTlsConn *conn = container_of(tls, TfwTlsConn, tls); + struct sock *sk = conn->cli_conn.sk; TlsIOCtx *io = &tls->io_out; TfwMsgIter it; TfwStr str = {}; @@ -564,9 +565,9 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) T_DBG("TLS %lu bytes +%u segments (%u bytes, last msgtype %#x)" " are to be sent on conn=%pK/sk_write_xmit=%pK ready=%d\n", str.len, sgt ? sgt->nents : 0, io->msglen, io->msgtype, conn, - conn->cli_conn.sk->sk_write_xmit, ttls_xfrm_ready(tls)); + sk->sk_write_xmit, ttls_xfrm_ready(tls)); - if ((r = tfw_msg_iter_setup(&it, &io->skb_list, str.len))) + if ((r = tfw_msg_iter_setup(&it, sk, &io->skb_list, str.len))) goto out; if ((r = tfw_msg_iter_write(&it, &str))) goto out; @@ -584,6 +585,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } + ss_skb_set_owner(skb, sk); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } From fd2876fc431bc0724133de5958439babf13e8114 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Tue, 1 Jul 2025 14:41:18 +0300 Subject: [PATCH 06/23] Adjust memory used by Tempesta FW. In task #498 we decide to use `client_mem` option to limit count of memory used by client. This commit is a part of this task and the next step of implementaion. Previosly Tempesta FW uses `sk->sk_rmem_alloc` to adjust memory used by Tempesta FW for this client connection, now we adjust memory for the whole TfwClient, because the can be a lot of connection for one client and for all other cases we use limitation for TfwClient and block it if necessary. --- fw/client.c | 1 + fw/client.h | 8 +++++ fw/http.c | 15 ++++----- fw/http.h | 8 ++--- fw/http_frame.c | 2 +- fw/http_msg.c | 6 ++-- fw/msg.c | 4 +-- fw/msg.h | 2 +- fw/ss_skb.c | 50 ++++++++++++++++++++--------- fw/ss_skb.h | 4 +-- fw/t/unit/helpers.c | 25 ++++++++------- fw/t/unit/helpers.h | 5 ++- fw/t/unit/test.c | 8 ++--- fw/t/unit/test_http1_parser.c | 3 +- fw/t/unit/test_http_msg.c | 9 ++++-- fw/t/unit/test_http_parser_common.c | 3 +- fw/tls.c | 8 ++--- 17 files changed, 96 insertions(+), 65 deletions(-) diff --git a/fw/client.c b/fw/client.c index 9c6524ea9..4da5dfb66 100644 --- a/fw/client.c +++ b/fw/client.c @@ -202,6 +202,7 @@ tfw_client_ent_init(TdbRec *rec, void *data) ctx->init(cli); tfw_peer_init((TfwPeer *)cli, &ctx->addr); + atomic_set(&cli->mem, 0); ent->xff_addr = ctx->xff_addr; tfw_str_to_cstr(&ctx->user_agent, ent->user_agent, sizeof(ent->user_agent)); diff --git a/fw/client.h b/fw/client.h index 4487cb8cf..bc6a11594 100644 --- a/fw/client.h +++ b/fw/client.h @@ -31,11 +31,13 @@ * Typically it's large and wastes memory in vain if * no any classification logic is used; * list_head - entry in the lru list; + * @mem - memory used by current client; */ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; + atomic_t mem; } TfwClient; int tfw_client_init(void); @@ -51,4 +53,10 @@ void tfw_cli_abort_all(void); void tfw_tls_connection_lost(TfwConn *conn); +static inline void +tfw_cli_conn_adjust_mem(TfwCliConn *cli_conn, int delta) +{ + atomic_add(delta, &((TfwClient *)cli_conn->peer)->mem); +} + #endif /* __TFW_CLIENT_H__ */ diff --git a/fw/http.c b/fw/http.c index 65827028c..6adbbc7ef 100644 --- a/fw/http.c +++ b/fw/http.c @@ -728,8 +728,8 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - r = tfw_msg_iter_setup(&it, resp->req->conn->sk, &resp->msg.skb_head, - msg->len); + r = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn((TfwHttpMsg *)resp), + &resp->msg.skb_head, msg->len); if (unlikely(r)) return r; @@ -4345,8 +4345,8 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (WARN_ON_ONCE(h1_hdrs_sz < 0)) return -EINVAL; - r = tfw_msg_iter_setup(&it, req->conn->sk, &new_head, - h1_hdrs_sz); + r = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn((TfwHttpMsg *)req), + &new_head, h1_hdrs_sz); if (unlikely(r)) return r; @@ -6551,7 +6551,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, actor = tfw_http_parse_req; req->tfh.version = TFW_HTTP_TFH_HTTP_REQ; } - ss_skb_set_owner(skb, conn->sk); + ss_skb_set_owner(skb, conn); r = ss_skb_process(skb, actor, req, &req->chunk_cnt, &parsed); req->msg.len += parsed; @@ -7388,7 +7388,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, else conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); - ss_skb_set_owner(skb, cli_conn->sk); + ss_skb_set_owner(skb, cli_conn); } else { conn_stop = false; } @@ -7801,8 +7801,7 @@ tfw_http_hm_srv_send(TfwServer *srv, char *data, unsigned long len) if (!(req = tfw_http_msg_alloc_req_light())) return; hmreq = (TfwHttpMsg *)req; - if (tfw_msg_iter_setup(&it, NULL, &hmreq->msg.skb_head, - msg.len)) + if (tfw_msg_iter_setup(&it, NULL, &hmreq->msg.skb_head, msg.len)) goto cleanup; if (tfw_msg_iter_write(&it, &msg)) goto cleanup; diff --git a/fw/http.h b/fw/http.h index 458e33ec1..35243fdab 100644 --- a/fw/http.h +++ b/fw/http.h @@ -598,11 +598,11 @@ tfw_http_msg_is_req(TfwHttpMsg *msg) return msg->conn && TFW_CONN_TYPE(msg->conn) & Conn_Clnt; } -static inline struct sock * -tfw_http_msg_sock(TfwHttpMsg *msg) +static inline TfwCliConn * +tfw_http_msg_cli_conn(TfwHttpMsg *msg) { - return tfw_http_msg_is_req(msg) ? - msg->conn->sk : msg->pair->conn->sk; + return (TfwCliConn *)(tfw_http_msg_is_req(msg) ? + msg->conn : msg->pair->conn); } static inline int diff --git a/fw/http_frame.c b/fw/http_frame.c index f4e4a72af..65032720f 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -326,7 +326,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, T_DBG2("Preparing HTTP/2 message with %lu bytes data\n", data->len); msg.len = data->len; - if ((r = tfw_msg_iter_setup(&it, conn->sk, &msg.skb_head, msg.len))) + if ((r = tfw_msg_iter_setup(&it, conn, &msg.skb_head, msg.len))) goto err; if ((r = tfw_msg_iter_write(&it, data))) diff --git a/fw/http_msg.c b/fw/http_msg.c index a3db303f1..afb410da4 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -817,7 +817,7 @@ tfw_http_msg_append_skb(TfwHttpMsg *hm) TfwMsgIter *it = &hm->iter; int r; - r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_sock(hm), 0); + r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_cli_conn(hm), 0); if (unlikely(r)) return r; @@ -938,7 +938,7 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; - ss_skb_set_owner(it->skb, tfw_http_msg_sock(hm)); + ss_skb_set_owner(it->skb, tfw_http_msg_cli_conn(hm)); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; if (!it->skb_head) @@ -1208,7 +1208,7 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; - ss_skb_set_owner(nskb, tfw_http_msg_sock(hm)); + ss_skb_set_owner(nskb, tfw_http_msg_cli_conn(hm)); /* * TODO #2136: Remove this flag during reworking * this function. Try to process headers and diff --git a/fw/msg.c b/fw/msg.c index ce7d8db6d..87a14ab3d 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -28,12 +28,12 @@ * iterator, since its current state is to be rewritten. */ int -tfw_msg_iter_setup(TfwMsgIter *it, struct sock *sk, struct sk_buff **skb_head, +tfw_msg_iter_setup(TfwMsgIter *it, void *owner, struct sk_buff **skb_head, size_t data_len) { int r; - if ((r = ss_skb_alloc_data(skb_head, sk, data_len))) + if ((r = ss_skb_alloc_data(skb_head, owner, data_len))) return r; it->skb = it->skb_head = *skb_head; it->frag = -1; diff --git a/fw/msg.h b/fw/msg.h index 2c7b2be0c..27e2aaefc 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -98,7 +98,7 @@ typedef struct { } TfwMsgParseIter; int tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data); -int tfw_msg_iter_setup(TfwMsgIter *it, struct sock *sk, +int tfw_msg_iter_setup(TfwMsgIter *it, void *skb_owner, struct sk_buff **skb_head, size_t data_len); int tfw_msg_iter_move(TfwMsgIter *it, unsigned char **data, unsigned long sz); diff --git a/fw/ss_skb.c b/fw/ss_skb.c index ac763906c..8df18ece8 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -109,7 +109,7 @@ ss_skb_alloc_pages(size_t len) * segmentation. The allocated payload space will be filled with data. */ int -ss_skb_alloc_data(struct sk_buff **skb_head, struct sock *sk, size_t len) +ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len) { int i_skb, nr_skbs = len ? DIV_ROUND_UP(len, SS_SKB_MAX_DATA_LEN) : 1; size_t n = 0; @@ -120,7 +120,7 @@ ss_skb_alloc_data(struct sk_buff **skb_head, struct sock *sk, size_t len) skb = ss_skb_alloc_pages(n); if (!skb) return -ENOMEM; - ss_skb_set_owner(skb, sk); + ss_skb_set_owner(skb, owner); ss_skb_queue_tail(skb_head, skb); } @@ -1340,6 +1340,12 @@ ss_skb_init_for_xmit(struct sk_buff *skb) skb_dst_drop(skb); INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + + /* + * dev is used to save connection for memory accounting + * clear it before pass skb to the kernel. + */ + skb->dev = NULL; /* * Since we use skb->sb for our purpose we should * zeroed it before pass skb to the kernel. @@ -1715,31 +1721,45 @@ ss_skb_realloc_headroom(struct sk_buff *skb) ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); static void -ss_sock_rfree(struct sk_buff *skb) +ss_skb_destructor(struct sk_buff *skb) { - struct sock *sk = skb->sk; + TfwCliConn *conn = (TfwCliConn *)skb->sk; - BUG_ON(!sk); - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sock_put(sk); + tfw_cli_conn_adjust_mem(conn, -skb->truesize); + tfw_connection_put((TfwConn *)conn); } void -ss_skb_set_owner(struct sk_buff *skb, struct sock *sk) +ss_skb_set_owner(struct sk_buff *skb, void *owner) { - if (sk) { + /* + * Can be zero when this function is called from `__extend_pgfrags` + * for already orphaned SKBs. + */ + if (owner) { + /* + * All SKBs were orphaned when Tempesta FW received them. + * We can safely use `skb->sk` for our purposes until + * this SKBs will be passed to the socket write queue. + */ BUG_ON(skb->sk); - sock_hold(sk); - skb->sk = sk; - skb->destructor = ss_sock_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); + tfw_connection_get((TfwConn *)owner); + skb->sk = owner; + skb->destructor = ss_skb_destructor; + tfw_cli_conn_adjust_mem((TfwCliConn *)owner, skb->truesize); } } void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) { - if (skb->sk) - atomic_add(delta, &skb->sk->sk_rmem_alloc); + TfwCliConn *conn = (TfwCliConn *)skb->sk; + + /* + * conn can be zero here when this function is called + * from `ss_skb_split` for SKBs which are already orphaned + */ + if (conn) + tfw_cli_conn_adjust_mem(conn, delta); } diff --git a/fw/ss_skb.h b/fw/ss_skb.h index cf4ff1708..4558ec7c5 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -77,7 +77,7 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) -void ss_skb_set_owner(struct sk_buff *skb, struct sock *sk); +void ss_skb_set_owner(struct sk_buff *skb, void *owner); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); static inline bool @@ -453,7 +453,7 @@ ss_skb_data_ptr_by_offset(struct sk_buff *skb, unsigned int off) char *ss_skb_fmt_src_addr(const struct sk_buff *skb, char *out_buf); -int ss_skb_alloc_data(struct sk_buff **skb_head, struct sock *sk, size_t len); +int ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len); struct sk_buff *ss_skb_split(struct sk_buff *skb, int len); int ss_skb_get_room_w_frag(struct sk_buff *skb_head, struct sk_buff *skb, char *pspt, unsigned int len, TfwStr *it, int *fragn); diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 26a55b6e2..265800ff1 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -47,7 +47,7 @@ unsigned int tfw_cli_max_concurrent_streams; TfwConn conn_req, conn_resp; -struct sock sk; +TfwClient client; TfwHttpReq * test_req_alloc(size_t data_len) @@ -63,15 +63,16 @@ test_req_alloc(size_t data_len) hmreq = __tfw_http_msg_alloc(Conn_HttpClnt, true); BUG_ON(!hmreq); - ret = tfw_msg_iter_setup(&it, &sk, &hmreq->msg.skb_head, data_len); - BUG_ON(ret); - - memset(&conn_req, 0, sizeof(TfwConn)); tfw_connection_init(&conn_req); + conn_req.peer = (TfwPeer *)&client; conn_req.proto.type = Conn_HttpClnt; - conn_req.sk = &sk; hmreq->conn = &conn_req; hmreq->stream = &conn_req.stream; + + ret = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn(hmreq), + &hmreq->msg.skb_head, data_len); + BUG_ON(ret); + tfw_http_init_parser_req((TfwHttpReq *)hmreq); return (TfwHttpReq *)hmreq; @@ -88,32 +89,32 @@ test_req_free(TfwHttpReq *req) } TfwHttpResp * -test_resp_alloc(size_t data_len) +test_resp_alloc(size_t data_len, TfwHttpReq *req) { TfwMsgIter it; int ret; - TfwHttpResp *hmresp = test_resp_alloc_no_data(); + TfwHttpMsg *hmresp = (TfwHttpMsg *)test_resp_alloc_no_data(req); - ret = tfw_msg_iter_setup(&it, &sk, &hmresp->msg.skb_head, data_len); + ret = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn(hmresp), + &hmresp->msg.skb_head, data_len); BUG_ON(ret); return (TfwHttpResp *)hmresp; } TfwHttpResp * -test_resp_alloc_no_data() +test_resp_alloc_no_data(TfwHttpReq *req) { TfwHttpMsg *hmresp; hmresp = __tfw_http_msg_alloc(Conn_HttpSrv, true); BUG_ON(!hmresp); - memset(&conn_resp, 0, sizeof(TfwConn)); tfw_connection_init(&conn_resp); conn_resp.proto.type = Conn_HttpSrv; - conn_resp.sk = &sk; hmresp->conn = &conn_resp; hmresp->stream = &conn_resp.stream; + tfw_http_msg_pair((TfwHttpResp *)hmresp, req); tfw_http_init_parser_resp((TfwHttpResp *)hmresp); return (TfwHttpResp *)hmresp; diff --git a/fw/t/unit/helpers.h b/fw/t/unit/helpers.h index 81fc00821..6ce68ea0c 100644 --- a/fw/t/unit/helpers.h +++ b/fw/t/unit/helpers.h @@ -29,12 +29,11 @@ */ TfwHttpReq *test_req_alloc(size_t data_len); void test_req_free(TfwHttpReq *req); -TfwHttpResp *test_resp_alloc(size_t data_len); -TfwHttpResp *test_resp_alloc_no_data(void); +TfwHttpResp *test_resp_alloc(size_t data_len, TfwHttpReq *req); +TfwHttpResp *test_resp_alloc_no_data(TfwHttpReq *req); void test_resp_free(TfwHttpResp *req); void test_req_resp_cleanup(void); extern TfwConn conn_req, conn_resp; -extern struct sock sk; #endif /* __TFW_TEST_HELPER_H__ */ diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index 240a9a6e2..07f4cc979 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -120,21 +120,21 @@ test_run_all(void) TEST_SUITE_MPART_RUN(http1_parser); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); + EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); test_case_alloc_h2(); TEST_SUITE_MPART_RUN(http2_parser); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); + EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); test_case_cleanup_h2(); TEST_SUITE_RUN(http2_parser_hpack); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); + EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); TEST_SUITE_RUN(http_cache); @@ -145,7 +145,7 @@ test_run_all(void) TEST_SUITE_RUN(http_msg); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&sk.sk_rmem_alloc), 0); + EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); TEST_SUITE_RUN(hash); diff --git a/fw/t/unit/test_http1_parser.c b/fw/t/unit/test_http1_parser.c index 5b10bb7fd..35d2e86b0 100644 --- a/fw/t/unit/test_http1_parser.c +++ b/fw/t/unit/test_http1_parser.c @@ -4638,8 +4638,7 @@ do { \ test_case_parse_prepare_http(str); \ if (resp) \ test_resp_free(resp); \ - resp = test_resp_alloc(sizeof(str) - 1); \ - tfw_http_msg_pair(resp, sample_req); \ + resp = test_resp_alloc(sizeof(str) - 1, req); \ tfw_http_parse_resp(resp, str, sizeof(str) - 1, &parsed); \ } while (0) diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index fe4c9d885..eb762bf4b 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -25,17 +25,22 @@ #include "helpers.h" static TfwHttpResp *resp; +static TfwHttpReq *req; static void http_msg_suite_setup(void) { - resp = test_resp_alloc_no_data(); + req = test_req_alloc(0); + BUG_ON(!req); + resp = test_resp_alloc_no_data(req); + BUG_ON(!resp); } static void http_msg_suite_teardown(void) { test_resp_free(resp); + test_req_free(req); } TEST(http_msg, hdr_in_array) @@ -114,7 +119,7 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; - ss_skb_set_owner(skb, &sk); + ss_skb_set_owner(skb, tfw_http_msg_cli_conn((TfwHttpMsg*)resp)); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index 8eb974d65..c55991845 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -527,8 +527,7 @@ do_split_and_parse(int type, int chunk_mode) if (resp) test_resp_free(resp); - resp = test_resp_alloc(frames_total_sz); - tfw_http_msg_pair(resp, sample_req); + resp = test_resp_alloc(frames_total_sz, sample_req); } else { BUG(); } diff --git a/fw/tls.c b/fw/tls.c index 4e5d20c74..047e02d87 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -537,7 +537,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) { int r, flags = 0; TfwTlsConn *conn = container_of(tls, TfwTlsConn, tls); - struct sock *sk = conn->cli_conn.sk; + TfwCliConn *cli_conn = &conn->cli_conn; TlsIOCtx *io = &tls->io_out; TfwMsgIter it; TfwStr str = {}; @@ -565,9 +565,9 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) T_DBG("TLS %lu bytes +%u segments (%u bytes, last msgtype %#x)" " are to be sent on conn=%pK/sk_write_xmit=%pK ready=%d\n", str.len, sgt ? sgt->nents : 0, io->msglen, io->msgtype, conn, - sk->sk_write_xmit, ttls_xfrm_ready(tls)); + cli_conn->sk->sk_write_xmit, ttls_xfrm_ready(tls)); - if ((r = tfw_msg_iter_setup(&it, sk, &io->skb_list, str.len))) + if ((r = tfw_msg_iter_setup(&it, cli_conn, &io->skb_list, str.len))) goto out; if ((r = tfw_msg_iter_write(&it, &str))) goto out; @@ -585,7 +585,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } - ss_skb_set_owner(skb, sk); + ss_skb_set_owner(skb, cli_conn); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } From a900e16a3017d9af9c010f42dcd17fac22913396 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Tue, 8 Jul 2025 13:51:38 +0300 Subject: [PATCH 07/23] Drop connetion with TCP RST if client mem is exceeded If administrator specify `client_mem` and the memory used by all connection of current client exceeded this value Tempesta FW drops connection and block client by ip if `ip_block on;` is specified. --- fw/connection.h | 2 +- fw/http.c | 34 +++++-- fw/http_frame.c | 5 + fw/http_limits.c | 29 ++++++ fw/http_limits.h | 1 + fw/http_types.h | 236 ++++++++++++++++++++++---------------------- fw/t/unit/helpers.c | 7 +- 7 files changed, 188 insertions(+), 126 deletions(-) diff --git a/fw/connection.h b/fw/connection.h index 6be98cc63..c8134322e 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -168,7 +168,7 @@ typedef struct tfw_conn_t { * timestamp, low 16 bits are count of misses; * */ -typedef struct { +typedef struct tfw_cli_conn_t { TFW_CONN_COMMON; struct timer_list timer; struct list_head seq_queue; diff --git a/fw/http.c b/fw/http.c index 6adbbc7ef..d8e85e5e3 100644 --- a/fw/http.c +++ b/fw/http.c @@ -6553,6 +6553,15 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, } ss_skb_set_owner(skb, conn); + r = frang_client_mem_limit((TfwCliConn *)conn, false); + if (unlikely(r)) { + BUG_ON(r != T_BLOCK); + TFW_INC_STAT_BH(clnt.msgs_filtout); + return tfw_http_req_parse_block(req, 403, + "parsed request has been filtered out", + HTTP2_ECODE_PROTO); + } + r = ss_skb_process(skb, actor, req, &req->chunk_cnt, &parsed); req->msg.len += parsed; TFW_ADD_STAT_BH(parsed, clnt.rx_bytes); @@ -7084,6 +7093,18 @@ tfw_http_popreq(TfwHttpMsg *hmresp, bool fwd_unsent) tfw_http_req_zap_error(&eq); } +static inline int +tfw_http_resp_filtout(TfwHttpMsg *hmresp) +{ + TfwHttpReq *req = hmresp->req; + + tfw_http_popreq(hmresp, false); + TFW_INC_STAT_BH(serv.msgs_filtout); + /* The response is freed by tfw_http_req_block(). */ + return tfw_http_req_block(req, 403, "response blocked: filtered out", + HTTP2_ECODE_PROTO); +} + /* * Post-process the response. Pass it to modules registered with GFSM * for further processing. Finish the request/response exchange properly @@ -7093,7 +7114,6 @@ static int tfw_http_resp_gfsm(TfwHttpMsg *hmresp, TfwFsmData *data) { int r; - TfwHttpReq *req = hmresp->req; BUG_ON(!hmresp->conn); @@ -7113,11 +7133,7 @@ tfw_http_resp_gfsm(TfwHttpMsg *hmresp, TfwFsmData *data) BUG_ON(r != T_BLOCK); error: - tfw_http_popreq(hmresp, false); - TFW_INC_STAT_BH(serv.msgs_filtout); - /* The response is freed by tfw_http_req_block(). */ - return tfw_http_req_block(req, 403, "response blocked: filtered out", - HTTP2_ECODE_PROTO); + return tfw_http_resp_filtout(hmresp); } /* @@ -7393,6 +7409,12 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, conn_stop = false; } + r = frang_client_mem_limit(cli_conn, false); + if (unlikely(r)) { + BUG_ON(r != T_BLOCK); + return tfw_http_resp_filtout(hmresp); + } + r = ss_skb_process(skb, tfw_http_parse_resp, hmresp, &chunks_unused, &parsed); hmresp->msg.len += parsed; diff --git a/fw/http_frame.c b/fw/http_frame.c index 65032720f..3bd3ba284 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -1878,6 +1878,11 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) parsed, skb->len); } + r = frang_client_mem_limit((TfwCliConn *)c, true); + if (unlikely(r)) + return T_BLOCK_WITH_RST; + + /* * For fully received frames possibly there are other frames * in the current @skb, so create an skb sibling with next diff --git a/fw/http_limits.c b/fw/http_limits.c index 147e1e553..aa3465200 100644 --- a/fw/http_limits.c +++ b/fw/http_limits.c @@ -1671,6 +1671,35 @@ frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len) } +int +frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) +{ + TfwClient *cli = (TfwClient *)conn->peer; + + if (tfw_cli_hard_mem_limit + && atomic_read(&cli->mem) > tfw_cli_hard_mem_limit) + { + if (block_if_exceeded) { + TfwVhost *dflt_vh = tfw_vhost_lookup_default(); + + if (WARN_ON_ONCE(!dflt_vh)) + return T_BLOCK; + + if (dflt_vh->frang_gconf->ip_block) { + unsigned int duration = + dflt_vh->frang_gconf->ip_block_duration; + + tfw_filter_block_ip(cli, duration); + } + tfw_vhost_put(dflt_vh); + } + return T_BLOCK; + } + + return 0; +} + + static int frang_sticky_cookie_limit(FrangAcc *ra, TfwCliConn *conn, unsigned int max_misses) diff --git a/fw/http_limits.h b/fw/http_limits.h index 47a908309..fc79f1258 100644 --- a/fw/http_limits.h +++ b/fw/http_limits.h @@ -204,6 +204,7 @@ int frang_tls_handler(TlsCtx *tls, int state); int frang_sticky_cookie_handler(TfwHttpReq *req); bool frang_req_is_whitelisted(TfwHttpReq *req); int frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len); +int frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded); static inline int frang_time_in_frame(const unsigned long tcur, const unsigned long tprev) diff --git a/fw/http_types.h b/fw/http_types.h index e487d413f..9306d2056 100644 --- a/fw/http_types.h +++ b/fw/http_types.h @@ -21,101 +21,100 @@ #define __TFW_HTTP_TYPES_H__ enum { - /* Common flags for requests and responses. */ - TFW_HTTP_FLAGS_COMMON = 0, - /* - * Connection management flags. - * - * CONN_CLOSE: the connection is to be closed after response is - * forwarded to the client. Set if: - * - 'Connection:' header contains 'close' term; - * - there is no possibility to serve further requests from the same - * connection due to errors or protocol restrictions. - * - * CONN_KA: 'Connection:' header contains 'keep-alive' term. The flag - * is not set for HTTP/1.1 connections which are persistent by default. - * CONN_EXTRA: 'Connection:' header contains additional terms.(NOT used) - * - * CONN_CLOSE and CONN_KA flags are mutual exclusive. - */ - TFW_HTTP_B_CONN_CLOSE = TFW_HTTP_FLAGS_COMMON, - /* - * This flag is set only together with previos one. - * Typically we close connection gracefully with - * TCP shutdown, but in case of attack, we should - * do it immediately using tcp_close. - */ - TFW_HTTP_B_CONN_CLOSE_FORCE, - TFW_HTTP_B_CONN_KA, - TFW_HTTP_B_CONN_UPGRADE, - TFW_HTTP_B_CONN_EXTRA, - /* Message is a websocket upgrade request */ - TFW_HTTP_B_UPGRADE_WEBSOCKET, - /* Message upgrade header contains extra fields */ - TFW_HTTP_B_UPGRADE_EXTRA, - /* - * Chunked is last transfer encoding. - * It is important to notice that there is a valid case - * when we receive chunked encoded response with empty - * body on HEAD request. - */ - TFW_HTTP_B_CHUNKED, - /* Chunked in the middle of applied transfer encodings. */ - TFW_HTTP_B_CHUNKED_APPLIED, - /* Message has chunked trailer headers part. */ - TFW_HTTP_B_CHUNKED_TRAILER, - /* Message has transfer encodings other than chunked. */ - TFW_HTTP_B_TE_EXTRA, - /* The message body is limited by the connection closing. */ - TFW_HTTP_B_UNLIMITED, - /* Media type is multipart/form-data. */ - TFW_HTTP_B_CT_MULTIPART, - /* Multipart/form-data request has a boundary parameter. */ - TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, - /* Content-length header was parsed. */ - TFW_HTTP_B_REQ_CONTENT_LENGTH_PARSED, - /* Singular header presents more than once. */ - TFW_HTTP_B_FIELD_DUPENTRY, - /* Message headers are fully parsed */ - TFW_HTTP_B_HEADERS_PARSED, - /* Message is fully parsed */ - TFW_HTTP_B_FULLY_PARSED, - /* Message has HTTP/2 format. */ - TFW_HTTP_B_H2, - /* - * Message has all mandatory pseudo-headers - * (applicable for HTTP/2 mode only). - */ - TFW_HTTP_B_H2_HDRS_FULL, - - /* Request flags. */ - TFW_HTTP_FLAGS_REQ, - /* Sticky cookie is found and verified. */ - TFW_HTTP_B_HAS_STICKY = TFW_HTTP_FLAGS_REQ, - /* Request fitted no cache cookie rule */ - TFW_HTTP_B_CHAIN_NO_CACHE, - /* Request is non-idempotent. */ - TFW_HTTP_B_NON_IDEMP, - /* Request stated 'Accept: text/html' header */ - TFW_HTTP_B_ACCEPT_HTML, - /* Request is created by HTTP health monitor. */ - TFW_HTTP_B_HMONITOR, - /* Client was disconnected, drop the request. */ - TFW_HTTP_B_REQ_DROP, - /* Request is PURGE with an 'X-Tempesta-Cache: get' header. */ - TFW_HTTP_B_PURGE_GET, - /* - * Request should be challenged, but requested resourse - * is non-challengeable. Try to service such request - * from cache. - */ - TFW_HTTP_B_JS_NOT_SUPPORTED, - /* - * Response is fully processed and ready to be - * forwarded to the client. - */ - TFW_HTTP_B_REQ_RESP_READY, + /* Common flags for requests and responses. */ + TFW_HTTP_FLAGS_COMMON = 0, + /* + * Connection management flags. + * + * CONN_CLOSE: the connection is to be closed after response is + * forwarded to the client. Set if: + * - 'Connection:' header contains 'close' term; + * - there is no possibility to serve further requests from the same + * connection due to errors or protocol restrictions. + * + * CONN_KA: 'Connection:' header contains 'keep-alive' term. The flag + * is not set for HTTP/1.1 connections which are persistent by default. + * CONN_EXTRA: 'Connection:' header contains additional terms.(NOT used) + * + * CONN_CLOSE and CONN_KA flags are mutual exclusive. + */ + TFW_HTTP_B_CONN_CLOSE = TFW_HTTP_FLAGS_COMMON, + /* + * This flag is set only together with previos one. + * Typically we close connection gracefully with + * TCP shutdown, but in case of attack, we should + * do it immediately using tcp_close. + */ + TFW_HTTP_B_CONN_CLOSE_FORCE, + TFW_HTTP_B_CONN_KA, + TFW_HTTP_B_CONN_UPGRADE, + TFW_HTTP_B_CONN_EXTRA, + /* Message is a websocket upgrade request */ + TFW_HTTP_B_UPGRADE_WEBSOCKET, + /* Message upgrade header contains extra fields */ + TFW_HTTP_B_UPGRADE_EXTRA, + /* + * Chunked is last transfer encoding. + * It is important to notice that there is a valid case + * when we receive chunked encoded response with empty + * body on HEAD request. + */ + TFW_HTTP_B_CHUNKED, + /* Chunked in the middle of applied transfer encodings. */ + TFW_HTTP_B_CHUNKED_APPLIED, + /* Message has chunked trailer headers part. */ + TFW_HTTP_B_CHUNKED_TRAILER, + /* Message has transfer encodings other than chunked. */ + TFW_HTTP_B_TE_EXTRA, + /* The message body is limited by the connection closing. */ + TFW_HTTP_B_UNLIMITED, + /* Media type is multipart/form-data. */ + TFW_HTTP_B_CT_MULTIPART, + /* Multipart/form-data request has a boundary parameter. */ + TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + /* Content-length header was parsed. */ + TFW_HTTP_B_REQ_CONTENT_LENGTH_PARSED, + /* Singular header presents more than once. */ + TFW_HTTP_B_FIELD_DUPENTRY, + /* Message headers are fully parsed */ + TFW_HTTP_B_HEADERS_PARSED, + /* Message is fully parsed */ + TFW_HTTP_B_FULLY_PARSED, + /* Message has HTTP/2 format. */ + TFW_HTTP_B_H2, + /* + * Message has all mandatory pseudo-headers + * (applicable for HTTP/2 mode only). + */ + TFW_HTTP_B_H2_HDRS_FULL, + /* Request flags. */ + TFW_HTTP_FLAGS_REQ, + /* Sticky cookie is found and verified. */ + TFW_HTTP_B_HAS_STICKY = TFW_HTTP_FLAGS_REQ, + /* Request fitted no cache cookie rule */ + TFW_HTTP_B_CHAIN_NO_CACHE, + /* Request is non-idempotent. */ + TFW_HTTP_B_NON_IDEMP, + /* Request stated 'Accept: text/html' header */ + TFW_HTTP_B_ACCEPT_HTML, + /* Request is created by HTTP health monitor. */ + TFW_HTTP_B_HMONITOR, + /* Client was disconnected, drop the request. */ + TFW_HTTP_B_REQ_DROP, + /* Request is PURGE with an 'X-Tempesta-Cache: get' header. */ + TFW_HTTP_B_PURGE_GET, + /* + * Request should be challenged, but requested resourse + * is non-challengeable. Try to service such request + * from cache. + */ + TFW_HTTP_B_JS_NOT_SUPPORTED, + /* + * Response is fully processed and ready to be + * forwarded to the client. + */ + TFW_HTTP_B_REQ_RESP_READY, /* * Rewrite method from HEAD to GET. Applicable only to request that can * be employed from cache. @@ -127,34 +126,34 @@ enum { /* 100-continue response has been queued. */ TFW_HTTP_B_CONTINUE_QUEUED, - /* Response flags */ - TFW_HTTP_FLAGS_RESP, - /* Response has no body. */ - TFW_HTTP_B_VOID_BODY = TFW_HTTP_FLAGS_RESP, - /* Response has header 'Date:'. */ - TFW_HTTP_B_HDR_DATE, - /* Response has header 'Last-Modified:'. */ - TFW_HTTP_B_HDR_LMODIFIED, - /* - * Response has header 'Etag: ' and this header is - * not enclosed in double quotes. - */ - TFW_HTTP_B_HDR_ETAG_HAS_NO_QOUTES, - /* Request URI is absolute (HTTP/1.x only) */ - TFW_HTTP_B_ABSOLUTE_URI, - /* - * This is the error response, connection - * will be closed after sending it. - */ - TFW_HTTP_B_CLOSE_ERROR_RESPONSE, + /* Response flags */ + TFW_HTTP_FLAGS_RESP, + /* Response has no body. */ + TFW_HTTP_B_VOID_BODY = TFW_HTTP_FLAGS_RESP, + /* Response has header 'Date:'. */ + TFW_HTTP_B_HDR_DATE, + /* Response has header 'Last-Modified:'. */ + TFW_HTTP_B_HDR_LMODIFIED, + /* + * Response has header 'Etag: ' and this header is + * not enclosed in double quotes. + */ + TFW_HTTP_B_HDR_ETAG_HAS_NO_QOUTES, + /* Request URI is absolute (HTTP/1.x only) */ + TFW_HTTP_B_ABSOLUTE_URI, + /* + * This is the error response, connection + * will be closed after sending it. + */ + TFW_HTTP_B_CLOSE_ERROR_RESPONSE, /* This is 100-continue response. */ TFW_HTTP_B_CONTINUE_RESP, - /* This response is during trailers encoding. */ - TFW_HTTP_B_RESP_ENCODE_TRAILERS, + /* This response is during trailers encoding. */ + TFW_HTTP_B_RESP_ENCODE_TRAILERS, - _TFW_HTTP_FLAGS_NUM + _TFW_HTTP_FLAGS_NUM }; /* Forward declaration of common HTTP types. */ @@ -168,6 +167,7 @@ typedef struct tfw_hdr_mods_t TfwHdrMods; typedef struct frang_global_cfg_t FrangGlobCfg; typedef struct frang_vhost_cfg_t FrangVhostCfg; typedef struct tfw_http_cookie_t TfwStickyCookie; -typedef struct tfw_http_stream_t TfwStream; +typedef struct tfw_http_stream_t TfwStream; +typedef struct tfw_cli_conn_t TfwCliConn; #endif /* __TFW_HTTP_TYPES_H__ */ diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 265800ff1..13c12ea95 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -33,7 +33,6 @@ */ #include "helpers.h" #include "http_msg.h" -#include "helpers.h" #include "pool.c" #include "apm.h" @@ -441,6 +440,12 @@ frang_http_hdr_limit(TfwHttpReq *req, unsigned int new_hdr_len) return T_OK; } +int +frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) +{ + return T_OK; +} + int frang_sticky_cookie_handler(TfwHttpReq *req) { From 19e0f1b75b6e521315c48f65e9a13d5aa4b8e1c0 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 9 Jul 2025 18:31:46 +0300 Subject: [PATCH 08/23] Inplement TfwClient refcounting for memory adjustment Previosuly we get connection when we adjust memory for skb, but it leads to several problems: - we can't adjust memory for skb before tls decryption, because skb from `tls->io_in.skb_list` are freed during connection released (but connection will be never released if we increment it's reference counter for these skbs). - We have the same problems for skbs, which are wait for appropriate tcp window to be pushed in socket write queue. Now we increment/decrement reference counter for TfwClient and adjust skb memory for requests before tls decryption. --- fw/client.c | 1 + fw/client.h | 26 ++++++++++++++++++++++++-- fw/http.c | 25 +++++++++++++++---------- fw/http.h | 8 +++++--- fw/http_frame.c | 2 +- fw/http_msg.c | 6 +++--- fw/ss_skb.c | 18 +++++++++--------- fw/t/unit/helpers.c | 4 ++-- fw/t/unit/test_http_msg.c | 2 +- fw/tls.c | 6 ++++-- 10 files changed, 65 insertions(+), 33 deletions(-) diff --git a/fw/client.c b/fw/client.c index 4da5dfb66..1c17e690a 100644 --- a/fw/client.c +++ b/fw/client.c @@ -203,6 +203,7 @@ tfw_client_ent_init(TdbRec *rec, void *data) tfw_peer_init((TfwPeer *)cli, &ctx->addr); atomic_set(&cli->mem, 0); + atomic_set(&cli->refcnt, 0); ent->xff_addr = ctx->xff_addr; tfw_str_to_cstr(&ctx->user_agent, ent->user_agent, sizeof(ent->user_agent)); diff --git a/fw/client.h b/fw/client.h index bc6a11594..01546f9f0 100644 --- a/fw/client.h +++ b/fw/client.h @@ -32,12 +32,14 @@ * no any classification logic is used; * list_head - entry in the lru list; * @mem - memory used by current client; + * @refcnt - refcount for light versions of get/put client; */ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; atomic_t mem; + atomic_t refcnt; } TfwClient; int tfw_client_init(void); @@ -54,9 +56,29 @@ void tfw_cli_abort_all(void); void tfw_tls_connection_lost(TfwConn *conn); static inline void -tfw_cli_conn_adjust_mem(TfwCliConn *cli_conn, int delta) +tfw_client_adjust_mem(TfwClient *cli, int delta) { - atomic_add(delta, &((TfwClient *)cli_conn->peer)->mem); + atomic_add(delta, &cli->mem); +} + +static inline void +tfw_client_get_light(TfwClient *cli) +{ + int rc; + + rc = atomic_inc_return(&cli->refcnt); + if (rc == 1) + tfw_client_obtain(cli->addr, NULL, NULL, NULL); +} + +static inline void +tfw_client_put_light(TfwClient *cli) +{ + int rc; + + rc = atomic_dec_return(&cli->refcnt); + if (!rc) + tfw_client_put(cli); } #endif /* __TFW_CLIENT_H__ */ diff --git a/fw/http.c b/fw/http.c index d8e85e5e3..a2273bd0c 100644 --- a/fw/http.c +++ b/fw/http.c @@ -728,7 +728,7 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - r = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn((TfwHttpMsg *)resp), + r = tfw_msg_iter_setup(&it, tfw_http_msg_client((TfwHttpMsg *)resp), &resp->msg.skb_head, msg->len); if (unlikely(r)) return r; @@ -4345,7 +4345,7 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (WARN_ON_ONCE(h1_hdrs_sz < 0)) return -EINVAL; - r = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn((TfwHttpMsg *)req), + r = tfw_msg_iter_setup(&it, tfw_http_msg_client((TfwHttpMsg *)req), &new_head, h1_hdrs_sz); if (unlikely(r)) return r; @@ -6551,7 +6551,12 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, actor = tfw_http_parse_req; req->tfh.version = TFW_HTTP_TFH_HTTP_REQ; } - ss_skb_set_owner(skb, conn); + /* + * For tls connections we already set `skb->owner` before + * tls decryption. + */ + if (!skb->sk) + ss_skb_set_owner(skb, conn->peer); r = frang_client_mem_limit((TfwCliConn *)conn, false); if (unlikely(r)) { @@ -7404,17 +7409,17 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, else conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); - ss_skb_set_owner(skb, cli_conn); + ss_skb_set_owner(skb, cli_conn->peer); + + r = frang_client_mem_limit(cli_conn, false); + if (unlikely(r)) { + BUG_ON(r != T_BLOCK); + return tfw_http_resp_filtout(hmresp); + } } else { conn_stop = false; } - r = frang_client_mem_limit(cli_conn, false); - if (unlikely(r)) { - BUG_ON(r != T_BLOCK); - return tfw_http_resp_filtout(hmresp); - } - r = ss_skb_process(skb, tfw_http_parse_resp, hmresp, &chunks_unused, &parsed); hmresp->msg.len += parsed; diff --git a/fw/http.h b/fw/http.h index 35243fdab..755d5d6a6 100644 --- a/fw/http.h +++ b/fw/http.h @@ -598,11 +598,13 @@ tfw_http_msg_is_req(TfwHttpMsg *msg) return msg->conn && TFW_CONN_TYPE(msg->conn) & Conn_Clnt; } -static inline TfwCliConn * -tfw_http_msg_cli_conn(TfwHttpMsg *msg) +static inline TfwClient * +tfw_http_msg_client(TfwHttpMsg *msg) { - return (TfwCliConn *)(tfw_http_msg_is_req(msg) ? + TfwCliConn *conn = (TfwCliConn *)(tfw_http_msg_is_req(msg) ? msg->conn : msg->pair->conn); + + return (TfwClient *)conn->peer; } static inline int diff --git a/fw/http_frame.c b/fw/http_frame.c index 3bd3ba284..88ee361bc 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -326,7 +326,7 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, T_DBG2("Preparing HTTP/2 message with %lu bytes data\n", data->len); msg.len = data->len; - if ((r = tfw_msg_iter_setup(&it, conn, &msg.skb_head, msg.len))) + if ((r = tfw_msg_iter_setup(&it, conn->peer, &msg.skb_head, msg.len))) goto err; if ((r = tfw_msg_iter_write(&it, data))) diff --git a/fw/http_msg.c b/fw/http_msg.c index afb410da4..4c9623669 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -817,7 +817,7 @@ tfw_http_msg_append_skb(TfwHttpMsg *hm) TfwMsgIter *it = &hm->iter; int r; - r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_cli_conn(hm), 0); + r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_client(hm), 0); if (unlikely(r)) return r; @@ -938,7 +938,7 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; - ss_skb_set_owner(it->skb, tfw_http_msg_cli_conn(hm)); + ss_skb_set_owner(it->skb, tfw_http_msg_client(hm)); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; if (!it->skb_head) @@ -1208,7 +1208,7 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; - ss_skb_set_owner(nskb, tfw_http_msg_cli_conn(hm)); + ss_skb_set_owner(nskb, tfw_http_msg_client(hm)); /* * TODO #2136: Remove this flag during reworking * this function. Try to process headers and diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 8df18ece8..1dc937fd3 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1723,10 +1723,10 @@ ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); static void ss_skb_destructor(struct sk_buff *skb) { - TfwCliConn *conn = (TfwCliConn *)skb->sk; + TfwClient *cli = (TfwClient *)skb->sk; - tfw_cli_conn_adjust_mem(conn, -skb->truesize); - tfw_connection_put((TfwConn *)conn); + ss_skb_adjust_client_mem(cli, -skb->truesize); + tfw_client_put_light(cli); } void @@ -1744,22 +1744,22 @@ ss_skb_set_owner(struct sk_buff *skb, void *owner) */ BUG_ON(skb->sk); - tfw_connection_get((TfwConn *)owner); + tfw_client_get_light((TfwClient *)owner); skb->sk = owner; skb->destructor = ss_skb_destructor; - tfw_cli_conn_adjust_mem((TfwCliConn *)owner, skb->truesize); + ss_skb_adjust_client_mem((TfwClient *)owner, skb->truesize); } } void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) { - TfwCliConn *conn = (TfwCliConn *)skb->sk; + TfwClient *cli = (TfwClient *)skb->sk; /* - * conn can be zero here when this function is called + * `cli` can be zero here when this function is called * from `ss_skb_split` for SKBs which are already orphaned */ - if (conn) - tfw_cli_conn_adjust_mem(conn, delta); + if (cli) + tfw_client_adjust_mem(cli, delta); } diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 13c12ea95..07773066f 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -68,7 +68,7 @@ test_req_alloc(size_t data_len) hmreq->conn = &conn_req; hmreq->stream = &conn_req.stream; - ret = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn(hmreq), + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client(hmreq), &hmreq->msg.skb_head, data_len); BUG_ON(ret); @@ -94,7 +94,7 @@ test_resp_alloc(size_t data_len, TfwHttpReq *req) int ret; TfwHttpMsg *hmresp = (TfwHttpMsg *)test_resp_alloc_no_data(req); - ret = tfw_msg_iter_setup(&it, tfw_http_msg_cli_conn(hmresp), + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client(hmresp), &hmresp->msg.skb_head, data_len); BUG_ON(ret); diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index eb762bf4b..1d4b89bcc 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -119,7 +119,7 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; - ss_skb_set_owner(skb, tfw_http_msg_cli_conn((TfwHttpMsg*)resp)); + ss_skb_set_owner(skb, tfw_http_msg_client((TfwHttpMsg*)resp)); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; diff --git a/fw/tls.c b/fw/tls.c index 047e02d87..557c83ae9 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -89,6 +89,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) next_msg: spin_lock(&tls->lock); ss_skb_queue_tail(&tls->io_in.skb_list, skb); + ss_skb_set_owner(skb, conn->peer); /* Call TLS layer to place skb into a TLS record on top of skb_list. */ parsed = 0; @@ -567,7 +568,8 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) str.len, sgt ? sgt->nents : 0, io->msglen, io->msgtype, conn, cli_conn->sk->sk_write_xmit, ttls_xfrm_ready(tls)); - if ((r = tfw_msg_iter_setup(&it, cli_conn, &io->skb_list, str.len))) + if ((r = tfw_msg_iter_setup(&it, cli_conn->peer, &io->skb_list, + str.len))) goto out; if ((r = tfw_msg_iter_write(&it, &str))) goto out; @@ -585,7 +587,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } - ss_skb_set_owner(skb, cli_conn); + ss_skb_set_owner(skb, cli_conn->peer); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } From 3b4e560746462040f298489fc0e6d4927d53e280 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Tue, 28 Oct 2025 17:56:42 +0200 Subject: [PATCH 09/23] Adjust tcp send window for all sending data. Previously we adjust tcp send window only for http2 connection and only during making HEADER or DATA frames, but if we want to control client memory usage we should do it for all type of sending data. (We orphane skb and decrease memory usage when we pass skb to the socket write queue, so we we don't adjust tcp send window we push a lot of skbs in socket write queue and don't adjust it's memory). --- fw/connection.c | 82 ++++++++++++++++++++++++++++++++++++++ fw/connection.h | 7 +++- fw/http.c | 4 +- fw/http.h | 2 +- fw/http2.h | 6 +++ fw/http_frame.c | 53 ++++++++++++++++++------- fw/http_frame.h | 3 +- fw/http_stream.c | 2 +- fw/sock.c | 96 ++++++++++++++++++++++++--------------------- fw/sock_clnt.c | 47 ++++------------------ fw/sock_srv.c | 3 ++ fw/ss_skb.c | 4 +- fw/ss_skb.h | 3 +- fw/sync_socket.h | 17 +++++++- fw/t/unit/helpers.c | 3 +- fw/tcp.h | 6 --- fw/websocket.c | 2 + 17 files changed, 222 insertions(+), 118 deletions(-) diff --git a/fw/connection.c b/fw/connection.c index 1f7e3d1ed..084e6502d 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -26,6 +26,7 @@ #include "sync_socket.h" #include "http.h" #include "websocket.h" +#include "tcp.h" TfwConnHooks *conn_hooks[TFW_CONN_MAX_PROTOS]; @@ -244,3 +245,84 @@ tfw_connection_unlink_to_sk(TfwConn *conn) conn->sk = NULL; ss_sock_put(sk); } + +void +tfw_connection_on_send(TfwConn *conn, struct sk_buff **skb_head) +{ + ss_skb_queue_splice(&conn->write_queue, skb_head); + sock_set_flag(conn->sk, SOCK_TEMPESTA_HAS_DATA); +} + +static inline void +tfw_connection_shutdown(TfwConn *conn) +{ + struct sock *sk = conn->sk; + + SS_IN_USE_PROTECT({ + tcp_shutdown(sk, SEND_SHUTDOWN); + }); +} + +int +tfw_connection_push(TfwConn *conn, unsigned int mss_now) +{ + struct sock *sk = conn->sk; + TfwH2Ctx *h2; + unsigned long snd_wnd; + int r; + + assert_spin_locked(&sk->sk_lock.slock); + WARN_ON(SS_CONN_TYPE(sk) & Conn_Closing); + + /* + * Update snd_cwnd if nedeed, to correct caclulation + * of count of bytes to send. + */ + tcp_slow_start_after_idle_check(sk); + + /* + * First of all Tempesta FW entails skb from connection write queue + * (all http1 data, control frames, tls alerts and so on for http2), + * then if `snd_wnd` is not exceeded make frames for http2. + */ + r = ss_skb_tcp_entail_list(sk, &conn->write_queue, + mss_now, &snd_wnd); + if (unlikely(r)) + return r; + + /* + * This function can be called both for HTTP1 and HTTP2 connections. + * Moreover this function can be called when HTTP2 connection is + * shutdowned before TLS hadshake was finished. + */ + h2 = TFW_CONN_PROTO(conn) == TFW_FSM_H2 ? + tfw_h2_context_safe(conn) : NULL; + if (!h2) { + if (unlikely(!conn->write_queue)) { + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); + if (unlikely(SS_CONN_TYPE(sk) & Conn_Shutdown)) + tfw_connection_shutdown(conn); + } + return 0; + } + + r = tfw_h2_make_frames(sk, h2, mss_now, snd_wnd); + if (unlikely(r)) + return r; + + if (unlikely(!conn->write_queue)) { + /* + * If connection is shutdowned and error responce was sent + * shutdown the whole connection. + */ + if (unlikely(SS_CONN_TYPE(sk) & Conn_Shutdown) + && (!h2->error + || tfw_h2_conn_or_stream_wnd_is_exceeded(h2, + h2->error))) + tfw_connection_shutdown(conn); + if (!tfw_h2_is_ready_to_send(h2)) + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); + } + + return r; +} diff --git a/fw/connection.h b/fw/connection.h index c8134322e..274c24f27 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -98,8 +98,9 @@ enum { * @list - member in the list of connections with @peer; * @refcnt - number of users of the connection structure instance; * @stream - instance for control messages processing; + * @write_queue - queue of skb to push to socket write queue; * @peer - TfwClient or TfwServer handler. Hop-by-hop peer; - * @pair - Paired TfwCliConn or TfwSrvConn for websocket connections; + * @pair - paired TfwCliConn or TfwSrvConn for websocket connections; * @sk - an appropriate sock handler; * @destructor - called when a connection is destroyed; */ @@ -110,6 +111,7 @@ typedef struct tfw_conn_t TfwConn; struct list_head list; \ atomic_t refcnt; \ TfwStream stream; \ + struct sk_buff *write_queue; \ TfwPeer *peer; \ TfwConn *pair; \ struct sock *sk; \ @@ -579,6 +581,7 @@ tfw_connection_validate_cleanup(TfwConn *conn) BUG_ON(!conn); BUG_ON(!list_empty(&conn->list)); BUG_ON(conn->stream.msg); + BUG_ON(conn->write_queue); rc = atomic_read(&conn->refcnt); BUG_ON(rc && rc != TFW_CONN_DEATHCNT); @@ -632,5 +635,7 @@ int tfw_connection_close(TfwConn *conn, bool sync); void tfw_connection_abort(TfwConn *conn); void tfw_connection_drop(TfwConn *conn); void tfw_connection_release(TfwConn *conn); +void tfw_connection_on_send(TfwConn *conn, struct sk_buff **sk_buff); +int tfw_connection_push(TfwConn *conn, unsigned int mss_now); #endif /* __TFW_CONNECTION_H__ */ diff --git a/fw/http.c b/fw/http.c index a2273bd0c..50ff7126a 100644 --- a/fw/http.c +++ b/fw/http.c @@ -5412,7 +5412,7 @@ tfw_h2_append_predefined_body(TfwHttpResp *resp, const TfwStr *body) ALLOW_ERROR_INJECTION(tfw_h2_append_predefined_body, ERRNO); int -tfw_http_on_send_resp(void *conn, struct sk_buff **skb_head) +tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head) { TfwH2Ctx *ctx = tfw_h2_context_unsafe((TfwConn *)conn); struct tfw_skb_cb *tfw_cb = TFW_SKB_CB(*skb_head); @@ -7392,7 +7392,6 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, T_DBG2("Received %u server data bytes on conn=%p msg=%p\n", skb->len, conn, stream->msg); - /* * Process pipelined requests in a loop * until all data in the SKB is processed. @@ -7402,6 +7401,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, hmsib = NULL; hmresp = (TfwHttpMsg *)stream->msg; cli_conn = (TfwCliConn *)hmresp->req->conn; + /* `cli_conn` is equal to zero for health monitor requests. */ if (likely(cli_conn)) { if (TFW_FSM_TYPE(cli_conn->proto.type) == TFW_FSM_H2) diff --git a/fw/http.h b/fw/http.h index 755d5d6a6..148a0bcab 100644 --- a/fw/http.h +++ b/fw/http.h @@ -814,6 +814,6 @@ int tfw_http_resp_copy_encodings(TfwHttpResp *resp, TfwStr* dst, void tfw_http_extract_request_authority(TfwHttpReq *req); bool tfw_http_mark_is_in_whitlist(unsigned int mark); char *tfw_http_resp_status_line(int status, size_t *len); -int tfw_http_on_send_resp(void *conn, struct sk_buff **skb_head); +int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head); #endif /* __TFW_HTTP_H__ */ diff --git a/fw/http2.h b/fw/http2.h index 80dc782da..b8d9b35dd 100644 --- a/fw/http2.h +++ b/fw/http2.h @@ -204,4 +204,10 @@ tfw_h2_is_ready_to_send(TfwH2Ctx *ctx) return ctx->sched.root.active_cnt && ctx->rem_wnd; } +static inline bool +tfw_h2_conn_or_stream_wnd_is_exceeded(TfwH2Ctx *ctx, TfwStream *stream) +{ + return ctx->rem_wnd <= 0 || stream->rem_wnd <= 0; +} + #endif /* __HTTP2__ */ diff --git a/fw/http_frame.c b/fw/http_frame.c index 88ee361bc..849717ebe 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -2144,6 +2144,24 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, return r; } +static int +tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) +{ + TfwConn *conn = (TfwConn *)sk->sk_user_data; + int r; + + BUG_ON(conn->write_queue); + r = ss_skb_tcp_entail_list(sk, skb_head, mss_now, snd_wnd); + if (unlikely(r)) + return r; + + ss_skb_queue_splice(&conn->write_queue, skb_head); + sock_set_flag(sk, SOCK_TEMPESTA_HAS_DATA); + + return 0; +} + static int tfw_h2_stream_xmit_process(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, bool stream_is_exclusive, unsigned int mss_now, @@ -2255,7 +2273,7 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_DATA_FRAMES) { - if (unlikely(ctx->rem_wnd <= 0 || stream->rem_wnd <= 0)) + if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) ADJUST_BLOCKED_STREAMS_AND_EXIT(0, HTTP2_DATA); CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_DATA, @@ -2313,13 +2331,15 @@ do { \ if (stream->xmit.h_len) { T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); } else { - if (stream->xmit.postponed + if (unlikely(stream->xmit.postponed) && !stream->xmit.frame_length - && !ctx->cur_send_headers) { - struct sk_buff **postponed = - &stream->xmit.postponed; + && !ctx->cur_send_headers) + { + struct sk_buff **head = &stream->xmit.postponed; - r = ss_skb_tcp_entail_list(sk, postponed); + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2348,7 +2368,10 @@ do { \ * response is sent. */ if (unlikely(stream->xmit.skb_head)) { - r = ss_skb_tcp_entail_list(sk, &stream->xmit.skb_head); + struct sk_buff **head = &stream->xmit.skb_head; + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2378,11 +2401,14 @@ do { \ T_WARN("Failed to send frame %d", r); return r; } - if (stream->xmit.postponed && !ctx->cur_send_headers) { - struct sk_buff **postponed = - &stream->xmit.postponed; + if (unlikely(stream->xmit.postponed) + && !ctx->cur_send_headers) + { + struct sk_buff **head = &stream->xmit.postponed; - r = ss_skb_tcp_entail_list(sk, postponed); + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); if (unlikely(r)) { T_WARN("Failed to send postponed" " frames %d", r); @@ -2399,10 +2425,10 @@ do { \ } int -tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now) +tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, + unsigned long snd_wnd) { TfwStreamSched *sched = &ctx->sched; - unsigned long snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now); bool stop = false; int r = 0; TfwStream *stream; @@ -2439,7 +2465,6 @@ tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now) r = tfw_h2_stream_xmit_process(sk, ctx, stream, stream_is_exclusive, mss_now, &snd_wnd, &stop); - if (!tfw_h2_stream_is_active(stream)) { tfw_h2_sched_deactivate_stream(sched, stream); if (!stream->xmit.skb_head) { diff --git a/fw/http_frame.h b/fw/http_frame.h index dfac951be..385ae9d45 100644 --- a/fw/http_frame.h +++ b/fw/http_frame.h @@ -159,7 +159,8 @@ int tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next); int tfw_h2_send_rst_stream(TfwH2Ctx *ctx, unsigned int id, TfwH2Err err_code); int tfw_h2_send_goaway(TfwH2Ctx *ctx, TfwH2Err err_code, bool attack); -int tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now); +int tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, + unsigned long snd_wnd); static inline void tfw_h2_pack_frame_header(unsigned char *p, const TfwFrameHdr *hdr) diff --git a/fw/http_stream.c b/fw/http_stream.c index bc47c8578..9e7c875f8 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -830,7 +830,7 @@ tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, ss_skb_setup_opaque_data(skb_head, resp, tfw_http_resp_pair_free_and_put_conn); - TFW_SKB_CB(skb_head)->on_send = tfw_http_on_send_resp; + TFW_SKB_CB(skb_head)->on_send = tfw_h2_on_send_resp; TFW_SKB_CB(skb_head)->stream_id = stream->id; stream->xmit.resp = NULL; diff --git a/fw/sock.c b/fw/sock.c index 10ff818ad..909b63e36 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -41,6 +41,7 @@ #include "tempesta_fw.h" #include "work_queue.h" #include "http_limits.h" +#include "tcp.h" typedef struct { struct sock *sk; @@ -224,11 +225,17 @@ ss_conn_drop_guard_exit(struct sock *sk) if (!sk->sk_user_data) return; - SS_CONN_TYPE(sk) &= ~Conn_Closing; + SS_CONN_TYPE(sk) &= ~(Conn_Closing | Conn_Shutdown | Conn_Stop); SS_CALL(connection_drop, sk); ss_active_guard_exit(SS_V_ACT_LIVECONN); } +static int +ss_fill_write_queue(struct sock *sk, unsigned int mss) +{ + return SS_CALL(connection_push, sk->sk_user_data, mss); +} + static void ss_ipi(struct irq_work *work) { @@ -484,16 +491,21 @@ ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, } int -ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) +ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) { - struct sk_buff *skb, *tail, *next, *to_destroy; + struct sk_buff *tail, *next, *to_destroy; unsigned char tls_type = 0; unsigned int mark = 0; void *opaque_data = NULL; void (*destructor)(void *) = NULL; int r; - while ((skb = ss_skb_dequeue(skb_head))) { + while ((*snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now))) { + struct sk_buff *skb = ss_skb_dequeue(skb_head); + + if (!skb) + break; /* * @skb_head can be the head of several different skb * lists. We set tls type for the head of each new @@ -530,6 +542,9 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) ss_skb_tcp_entail(sk, skb, mark, tls_type); } + if (*skb_head && !TFW_SKB_CB(*skb_head)->is_head) + ss_skb_setup_head_of_list(*skb_head, mark, tls_type); + return 0; restore_sk_write_queue: @@ -541,7 +556,7 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) } } ss_skb_setup_opaque_data(*skb_head, opaque_data, destructor); - return r; + return r; } /** @@ -550,16 +565,15 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head) static void ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) { - int size, mss = tcp_send_mss(sk, &size, MSG_DONTWAIT); void *conn = sk->sk_user_data; unsigned char tls_type = flags & SS_F_ENCRYPT ? SS_SKB_F2TYPE(flags) : 0; T_DBG3("[%d]: %s: sk=%pK queue_empty=%d send_head=%pK" - " sk_state=%d mss=%d size=%d\n", + " sk_state=%d\n", smp_processor_id(), __func__, sk, tcp_write_queue_empty(sk), tcp_send_head(sk), - sk->sk_state, mss, size); + sk->sk_state); /* If the socket is inactive, there's no recourse. Drop the data. */ if (unlikely(!conn || !ss_sock_active(sk))) @@ -569,51 +583,33 @@ ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) if (ss_skb_on_send(conn, skb_head)) goto cleanup; + if (*skb_head) + SS_CALL(connection_on_send, sk->sk_user_data, skb_head); - /* - * If skbs were pushed to scheuler tree, @skb_head is - * empty and `ss_skb_tcp_entail_list` doesn't make - * any job. - */ - if (ss_skb_tcp_entail_list(sk, skb_head)) { - ss_linkerror(sk, SS_F_ABORT); - goto cleanup; - } - - T_DBG3("[%d]: %s: sk=%p send_head=%p sk_state=%d flags=%x\n", - smp_processor_id(), __func__, - sk, tcp_send_head(sk), sk->sk_state, flags); - - /* - * If connection close flag is specified, then @ss_do_close is used to - * set FIN on final SKB and push all pending frames to the stack. - */ if (flags & SS_F_CONN_CLOSE) return; /* * We set SOCK_TEMPESTA_HAS_DATA when we add some skb in our - * scheduler tree. - * So there are two cases here: - * - packets out is equal to zero and sock flag is set, - * this means that we should call `tcp_push_pending_frames`. - * In this function our scheduler choose the most priority - * stream, make frames for this stream and push them to the - * socket write queue. - * - socket flag is not set, this means that we push skb directly - * to the socket write queue so we call `tcp_push` and don't - * run scheduler. - * If packets_out is not equal to zero `tcp_push_pending_frames` - * will be called later from `tcp_data_snd_check` when we receive - * ack from the peer. + * scheduler tree or connection write queue. + * So there are three cases here: + * - TCP window is not equal to zero. In this case Tempesta FW pushes + * skbs from connection write queue to socket write queue according + * TCP window and then (if there is a still available TCP window and + * this is http2 client connection) calls our scheduler to choose the + * most priority stream, make frames for this stream and push them to + * the socket write queue. + * - TCP window is equal to zero. In this case `tcp_push_pending_frames` + * doesn't do anything. It will be called later, when we receive ack + * from the peer. + * - SOCK_TEMPESTA_HAS_DATA flag is not set. This is a rare case, when + * we send goaway/tls alert after error response, but this error + * response exceeded http2 window. In this case SOCK_TEMPESTA_HAS_DATA + * will be set during WINDOW_UPDATE processing and this function + * (`tcp_push_pending_frames`) will be called again. */ SS_IN_USE_PROTECT({ - if (sock_flag(sk, SOCK_TEMPESTA_HAS_DATA)) { - tcp_push_pending_frames(sk); - } else { - tcp_push(sk, MSG_DONTWAIT, mss, - TCP_NAGLE_OFF | TCP_NAGLE_PUSH, size); - } + tcp_push_pending_frames(sk); }); SS_STATE_PROCESS_RETURN(sk); @@ -681,6 +677,7 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) r = -ENOMEM; goto err; } + ss_skb_set_owner(twin_skb, skb->sk); ss_skb_queue_tail(&sw.skb_head, twin_skb); skb = skb->next; } while (skb != *skb_head); @@ -1349,6 +1346,7 @@ ss_set_callbacks(struct sock *sk) sk->sk_data_ready = ss_tcp_data_ready; sk->sk_state_change = ss_tcp_state_change; sk->sk_destroy_cb = ss_conn_drop_guard_exit; + sk->sk_fill_write_queue = ss_fill_write_queue; } EXPORT_SYMBOL(ss_set_callbacks); @@ -1622,12 +1620,20 @@ EXPORT_SYMBOL(ss_getpeername); static void __sk_close_locked(struct sock *sk, int flags) { + int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); + + ss_fill_write_queue(sk, mss_now); ss_do_close(sk, flags); if (!sk_stream_closing(sk)) { ss_conn_drop_guard_exit(sk); } else { BUG_ON(!sock_flag(sk, SOCK_DEAD) || ((flags & SS_F_ABORT) == SS_F_ABORT)); + /* + * Tempesta FW sends all pending data in socket + * write queue and doesn't push anymore. + */ + sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); SS_CONN_TYPE(sk) |= Conn_Closing; } bh_unlock_sock(sk); diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 0f64f27eb..05641c94d 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -35,7 +35,6 @@ #include "server.h" #include "sync_socket.h" #include "tls.h" -#include "tcp.h" /* * ------------------------------------------------------------------------ @@ -86,7 +85,7 @@ tfw_sock_cli_keepalive_timer_cb(struct timer_list *t) T_DBG("Client timeout end\n"); - if (TFW_CONN_TYPE(conn) & Conn_Closing) { + if (TFW_CONN_TYPE(conn) & Conn_Shutdown) { /* * If socket was shut down it is in TCP_FIN_WAIT1 or * TCP_FIN_WAIT2 state depends on receiving ack from @@ -112,6 +111,7 @@ tfw_cli_conn_alloc(int type) return NULL; tfw_connection_init((TfwConn *)cli_conn); + cli_conn->write_queue = NULL; INIT_LIST_HEAD(&cli_conn->seq_queue); spin_lock_init(&cli_conn->seq_qlock); spin_lock_init(&cli_conn->ret_qlock); @@ -149,6 +149,7 @@ tfw_cli_conn_free(TfwCliConn *cli_conn) void tfw_cli_conn_release(TfwCliConn *cli_conn) { + ss_skb_queue_purge(&cli_conn->write_queue); /* Paired with @frang_conn_new client obtain. */ if (likely(cli_conn->sk)) tfw_connection_unlink_to_sk((TfwConn *)cli_conn); @@ -195,43 +196,6 @@ tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg) return r; } -static int -tfw_sk_fill_write_queue(struct sock *sk, unsigned int mss_now) -{ - TfwConn *conn = sk->sk_user_data; - TfwH2Ctx *h2; - int r; - - assert_spin_locked(&sk->sk_lock.slock); - /* - * This function is called under the socket lock, same as dropping a - * connection. Moreover this function is never called when socket - * state is TCP_CLOSE. When client closes the connection, we drop it - * from tcp_done() -> ss_conn_drop_guard_exit(), and socket state is - * set to TCP_CLOSE, so this function will never be called after it. - */ - BUG_ON(!conn); - - /* - * This function can be called both for HTTP1 and HTTP2 connections. - * Moreover this function can be called when HTTP2 connection is - * shut down before TLS hadshake was finished. - */ - h2 = TFW_CONN_PROTO(conn) == TFW_FSM_H2 ? - tfw_h2_context_safe(conn) : NULL; - if (!h2) - return 0; - - r = tfw_h2_make_frames(sk, h2, mss_now); - if (unlikely(r < 0)) - return r; - - if (!tfw_h2_is_ready_to_send(h2)) - sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); - - return r; -} - /** * This hook is called when a new client connection is established. */ @@ -291,7 +255,6 @@ tfw_sock_clnt_new(struct sock *sk) * find a simple and better solution. */ sk->sk_write_xmit = tfw_tls_encrypt; - sk->sk_fill_write_queue = tfw_sk_fill_write_queue; } /* Activate keepalive timer. */ @@ -390,6 +353,8 @@ static const SsHooks tfw_sock_http_clnt_ss_hooks = { .connection_drop = tfw_sock_clnt_drop, .connection_recv = tfw_connection_recv, .connection_on_shutdown = tfw_cli_conn_on_shutdown, + .connection_on_send = tfw_connection_on_send, + .connection_push = tfw_connection_push, }; static const SsHooks tfw_sock_tls_clnt_ss_hooks = { @@ -398,6 +363,8 @@ static const SsHooks tfw_sock_tls_clnt_ss_hooks = { .connection_recv = tfw_tls_connection_recv, .connection_recv_finish = tfw_connection_recv_finish, .connection_on_shutdown = tfw_cli_conn_on_shutdown, + .connection_on_send = tfw_connection_on_send, + .connection_push = tfw_connection_push, }; /* diff --git a/fw/sock_srv.c b/fw/sock_srv.c index f20ab3cea..15e7ab0d1 100644 --- a/fw/sock_srv.c +++ b/fw/sock_srv.c @@ -421,6 +421,7 @@ tfw_srv_conn_release(TfwSrvConn *srv_conn) { TfwServer *srv = (TfwServer *)srv_conn->peer; + ss_skb_queue_purge(&srv_conn->write_queue); tfw_connection_release((TfwConn *)srv_conn); /* * conn->sk may be zeroed if we get here after a failed @@ -719,6 +720,8 @@ static const SsHooks tfw_sock_srv_ss_hooks = { .connection_new = tfw_sock_srv_connect_complete, .connection_drop = tfw_sock_srv_connect_drop, .connection_recv = tfw_connection_recv, + .connection_on_send = tfw_connection_on_send, + .connection_push = tfw_connection_push, }; static int diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 1dc937fd3..c7532d03f 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1725,7 +1725,7 @@ ss_skb_destructor(struct sk_buff *skb) { TfwClient *cli = (TfwClient *)skb->sk; - ss_skb_adjust_client_mem(cli, -skb->truesize); + tfw_client_adjust_mem(cli, -skb->truesize); tfw_client_put_light(cli); } @@ -1747,7 +1747,7 @@ ss_skb_set_owner(struct sk_buff *skb, void *owner) tfw_client_get_light((TfwClient *)owner); skb->sk = owner; skb->destructor = ss_skb_destructor; - ss_skb_adjust_client_mem((TfwClient *)owner, skb->truesize); + tfw_client_adjust_mem((TfwClient *)owner, skb->truesize); } } diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 4558ec7c5..ddec02d65 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -319,8 +319,7 @@ ss_skb_adjust_data_len(struct sk_buff *skb, int delta) skb->len += delta; skb->data_len += delta; skb->truesize += delta; - if (skb->sk) - ss_skb_adjust_client_mem(skb, delta); + ss_skb_adjust_client_mem(skb, delta); } /* diff --git a/fw/sync_socket.h b/fw/sync_socket.h index 84b85ed15..8241e5186 100644 --- a/fw/sync_socket.h +++ b/fw/sync_socket.h @@ -65,7 +65,7 @@ enum { * Connection is in special state: it socket is DEAD * and wait until ACK to our FIN is come. */ - Conn_Closing = (0x3 << __Flag_Bits), + Conn_Closing = (0x4 << __Flag_Bits), }; typedef struct tfw_conn_t TfwConn; @@ -95,6 +95,18 @@ typedef struct ss_hooks { /* Callback to make some job on connection shutdown. */ void (*connection_on_shutdown)(TfwConn *conn); + + /* + * Default callback which is called before push skb + * to socket write queue. + */ + void (*connection_on_send)(TfwConn *conn, struct sk_buff **skb_head); + + /* + * Push skbs from connection write queue to socket write queue + * according TCP window. + */ + int (*connection_push)(TfwConn *conn, unsigned int mss_now); } SsHooks; /** @@ -194,7 +206,8 @@ bool ss_active(void); void ss_get_stat(SsStat *stat); void ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, unsigned char tls_type); -int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head); +int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd); /* * We should all linux kernel functions like `tcp_push` or diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 07773066f..1bb9ee2af 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -193,7 +193,8 @@ void ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, { } -int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb) +int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, + unsigned int mss_now, unsigned long *snd_wnd) { return 0; } diff --git a/fw/tcp.h b/fw/tcp.h index 5f50e347c..8650c913f 100644 --- a/fw/tcp.h +++ b/fw/tcp.h @@ -44,12 +44,6 @@ tfw_tcp_calc_snd_wnd(struct sock *sk, unsigned int mss_now) unsigned int qlen = skb_queue_len(&sk->sk_write_queue); unsigned int send_win, cong_win; - /* - * Update snd_cwnd if nedeed, to correct caclulation - * of count of bytes to send. - */ - tcp_slow_start_after_idle_check(sk); - if (in_flight + qlen >= tp->snd_cwnd) return 0; diff --git a/fw/websocket.c b/fw/websocket.c index 8c1d52512..763a76ca3 100644 --- a/fw/websocket.c +++ b/fw/websocket.c @@ -70,6 +70,8 @@ static const SsHooks tfw_ws_srv_ss_hooks = { .connection_new = NULL, .connection_drop = tfw_ws_srv_ss_hook_drop, .connection_recv = tfw_connection_recv, + .connection_on_send = tfw_connection_on_send, + .connection_push = tfw_connection_push, }; /** From 24ea69f81b2052d25283b4d0f82070ad293fc84b Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 19 Nov 2025 12:43:47 +0200 Subject: [PATCH 10/23] Fix accroding review and after rebase - remove `client_get_light/client_put_light` functions, because after removing lock from `client` structure we don't need these functions at all. - Adjust memory usage of skb in `skb->cb`. Usually it is equal to `skb->truesize, but for some cases ( skb which was created by `pskb_copy_for_clone` for example it is different). --- fw/client.c | 6 +++++ fw/client.h | 21 +---------------- fw/connection.c | 11 +++++---- fw/http.c | 6 ++--- fw/http_msg.c | 6 +++-- fw/sock.c | 23 +++++++++++------- fw/ss_skb.c | 49 ++++++++++++++++++++++++++++++++------- fw/ss_skb.h | 8 ++++++- fw/t/unit/helpers.c | 5 ++++ fw/t/unit/test_http_msg.c | 2 +- fw/tls.c | 4 ++-- 11 files changed, 90 insertions(+), 51 deletions(-) diff --git a/fw/client.c b/fw/client.c index 1c17e690a..ca92cbeab 100644 --- a/fw/client.c +++ b/fw/client.c @@ -143,6 +143,12 @@ tfw_client_put(TfwClient *cli) tdb_rec_put(client_db, rec); } +void +tfw_client_get(TfwClient *cli) +{ + tdb_rec_keep(((TdbFRec *)cli) - 1); +} + typedef struct { TfwAddr addr; TfwAddr xff_addr; diff --git a/fw/client.h b/fw/client.h index 01546f9f0..692da74f3 100644 --- a/fw/client.h +++ b/fw/client.h @@ -46,6 +46,7 @@ int tfw_client_init(void); void tfw_client_exit(void); TfwClient *tfw_client_obtain(TfwAddr addr, TfwAddr *cli_addr, TfwStr *user_agent, void (*init)(void *)); +void tfw_client_get(TfwClient *cli); void tfw_client_put(TfwClient *cli); int tfw_client_for_each(int (*fn)(void *)); void tfw_cli_conn_release(TfwCliConn *cli_conn); @@ -61,24 +62,4 @@ tfw_client_adjust_mem(TfwClient *cli, int delta) atomic_add(delta, &cli->mem); } -static inline void -tfw_client_get_light(TfwClient *cli) -{ - int rc; - - rc = atomic_inc_return(&cli->refcnt); - if (rc == 1) - tfw_client_obtain(cli->addr, NULL, NULL, NULL); -} - -static inline void -tfw_client_put_light(TfwClient *cli) -{ - int rc; - - rc = atomic_dec_return(&cli->refcnt); - if (!rc) - tfw_client_put(cli); -} - #endif /* __TFW_CLIENT_H__ */ diff --git a/fw/connection.c b/fw/connection.c index 084e6502d..d03948bf9 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -253,7 +253,7 @@ tfw_connection_on_send(TfwConn *conn, struct sk_buff **skb_head) sock_set_flag(conn->sk, SOCK_TEMPESTA_HAS_DATA); } -static inline void +static inline int tfw_connection_shutdown(TfwConn *conn) { struct sock *sk = conn->sk; @@ -261,6 +261,9 @@ tfw_connection_shutdown(TfwConn *conn) SS_IN_USE_PROTECT({ tcp_shutdown(sk, SEND_SHUTDOWN); }); + if (unlikely(sk->sk_state == TCP_CLOSE)) + return -ENOMEM; + return 0; } int @@ -301,9 +304,9 @@ tfw_connection_push(TfwConn *conn, unsigned int mss_now) if (unlikely(!conn->write_queue)) { sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); if (unlikely(SS_CONN_TYPE(sk) & Conn_Shutdown)) - tfw_connection_shutdown(conn); + r = tfw_connection_shutdown(conn); } - return 0; + return r; } r = tfw_h2_make_frames(sk, h2, mss_now, snd_wnd); @@ -319,7 +322,7 @@ tfw_connection_push(TfwConn *conn, unsigned int mss_now) && (!h2->error || tfw_h2_conn_or_stream_wnd_is_exceeded(h2, h2->error))) - tfw_connection_shutdown(conn); + r = tfw_connection_shutdown(conn); if (!tfw_h2_is_ready_to_send(h2)) sock_reset_flag(sk, SOCK_TEMPESTA_HAS_DATA); } diff --git a/fw/http.c b/fw/http.c index 50ff7126a..193b5a715 100644 --- a/fw/http.c +++ b/fw/http.c @@ -4581,7 +4581,7 @@ tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) if (unlikely(!nskb)) return -ENOMEM; - ss_skb_set_owner(nskb, resp->msg.skb_head->sk); + ss_skb_set_owner(nskb, resp->msg.skb_head->sk, nskb->truesize); nskb->mark = resp->msg.skb_head->mark; cleanup->skb_head = resp->msg.skb_head; resp->msg.skb_head = NULL; @@ -6556,7 +6556,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, * tls decryption. */ if (!skb->sk) - ss_skb_set_owner(skb, conn->peer); + ss_skb_set_owner(skb, conn->peer, skb->truesize); r = frang_client_mem_limit((TfwCliConn *)conn, false); if (unlikely(r)) { @@ -7409,7 +7409,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, else conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); - ss_skb_set_owner(skb, cli_conn->peer); + ss_skb_set_owner(skb, cli_conn->peer, skb->truesize); r = frang_client_mem_limit(cli_conn, false); if (unlikely(r)) { diff --git a/fw/http_msg.c b/fw/http_msg.c index 4c9623669..d71e0c728 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -938,7 +938,8 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; - ss_skb_set_owner(it->skb, tfw_http_msg_client(hm)); + ss_skb_set_owner(it->skb, tfw_http_msg_client(hm), + it->skb->truesize); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; if (!it->skb_head) @@ -1208,7 +1209,8 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; - ss_skb_set_owner(nskb, tfw_http_msg_client(hm)); + ss_skb_set_owner(nskb, tfw_http_msg_client(hm), + nskb->truesize); /* * TODO #2136: Remove this flag during reworking * this function. Try to process headers and diff --git a/fw/sock.c b/fw/sock.c index 909b63e36..0f7071535 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -677,7 +677,8 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) r = -ENOMEM; goto err; } - ss_skb_set_owner(twin_skb, skb->sk); + memset(twin_skb->cb, 0, sizeof(twin_skb)->cb); + ss_skb_set_owner(twin_skb, skb->sk, skb_headlen(skb)); ss_skb_queue_tail(&sw.skb_head, twin_skb); skb = skb->next; } while (skb != *skb_head); @@ -1622,7 +1623,11 @@ __sk_close_locked(struct sock *sk, int flags) { int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); - ss_fill_write_queue(sk, mss_now); + if (ss_fill_write_queue(sk, mss_now)) { + ss_linkerror(sk, 0); + bh_unlock_sock(sk); + return; + } ss_do_close(sk, flags); if (!sk_stream_closing(sk)) { ss_conn_drop_guard_exit(sk); @@ -1643,16 +1648,16 @@ __sk_close_locked(struct sock *sk, int flags) static inline void ss_do_shutdown(struct sock *sk) { + int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); /* - * Prevent calling `tcp_done` from `tcp_shutdown` if error - * occurs to prevent double free. + * `tcp_shutdown` will ne called from `ss_fill_write_queue` + * after sending all pending data. */ - SS_IN_USE_PROTECT({ - tcp_shutdown(sk, SEND_SHUTDOWN); - }); - SS_STATE_PROCESS_RETURN(sk); SS_CONN_TYPE(sk) |= Conn_Shutdown; - SS_CALL(connection_on_shutdown, sk->sk_user_data); + if (ss_fill_write_queue(sk, mss_now)) + ss_linkerror(sk, 0); + else + SS_CALL(connection_on_shutdown, sk->sk_user_data); } static inline bool diff --git a/fw/ss_skb.c b/fw/ss_skb.c index c7532d03f..271549d8b 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -120,7 +120,7 @@ ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len) skb = ss_skb_alloc_pages(n); if (!skb) return -ENOMEM; - ss_skb_set_owner(skb, owner); + ss_skb_set_owner(skb, owner, skb->truesize); ss_skb_queue_tail(skb_head, skb); } @@ -219,7 +219,7 @@ __extend_pgfrags(struct sk_buff *skb_head, struct sk_buff *skb, int from, int n) if (nskb == NULL) return -ENOMEM; - ss_skb_set_owner(nskb, skb->sk); + ss_skb_set_owner(nskb, skb->sk, nskb->truesize); skb_shinfo(nskb)->flags = skb_shinfo(skb)->flags; ss_skb_insert_after(skb, nskb); skb_shinfo(nskb)->nr_frags = n_excess; @@ -1302,6 +1302,7 @@ ss_skb_split(struct sk_buff *skb, int len) if (!buff) return NULL; + memset(buff->cb, 0, sizeof(buff)->cb); skb_reserve(buff, MAX_TCP_HEADER); /* @buff already accounts @n in truesize. */ @@ -1712,11 +1713,37 @@ int ss_skb_realloc_headroom(struct sk_buff *skb) { int delta = MAX_TCP_HEADER - skb_headroom(skb); + bool skb_has_owner = (skb->sk != NULL); + void *owner; + long int mem; + unsigned int old_truesize; + int r; if (likely(delta <= 0)) return 0; - return pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); + /* + * `pskb_expand_head` doesn't change skb->truesize for not + * orphaned skbs (there is a special comment about it in the + * kernel code). It is not safe for us to not break `skb->truesize` + * calculation here, so we should orphan skb and then restore it's + * owner later. + */ + if (skb_has_owner) { + owner = skb->sk; + mem = TFW_SKB_CB(skb)->mem; + old_truesize = skb->truesize; + skb_orphan(skb); + } + + r = pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); + if (unlikely(r)) + return r; + + if (skb_has_owner) + ss_skb_set_owner(skb, owner, mem + skb->truesize - old_truesize); + + return 0; } ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); @@ -1725,12 +1752,12 @@ ss_skb_destructor(struct sk_buff *skb) { TfwClient *cli = (TfwClient *)skb->sk; - tfw_client_adjust_mem(cli, -skb->truesize); - tfw_client_put_light(cli); + ss_skb_adjust_client_mem(skb, -TFW_SKB_CB(skb)->mem); + tfw_client_put(cli); } void -ss_skb_set_owner(struct sk_buff *skb, void *owner) +ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int mem) { /* * Can be zero when this function is called from `__extend_pgfrags` @@ -1743,11 +1770,12 @@ ss_skb_set_owner(struct sk_buff *skb, void *owner) * this SKBs will be passed to the socket write queue. */ BUG_ON(skb->sk); + BUG_ON(TFW_SKB_CB(skb)->mem != 0); - tfw_client_get_light((TfwClient *)owner); + tfw_client_get((TfwClient *)owner); skb->sk = owner; skb->destructor = ss_skb_destructor; - tfw_client_adjust_mem((TfwClient *)owner, skb->truesize); + ss_skb_adjust_client_mem(skb, mem); } } @@ -1760,6 +1788,9 @@ ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) * `cli` can be zero here when this function is called * from `ss_skb_split` for SKBs which are already orphaned */ - if (cli) + if (cli) { + TFW_SKB_CB(skb)->mem += delta; + BUG_ON(TFW_SKB_CB(skb)->mem < 0); tfw_client_adjust_mem(cli, delta); + } } diff --git a/fw/ss_skb.h b/fw/ss_skb.h index ddec02d65..d29b91d63 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -63,7 +63,11 @@ typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); * @on_send - callback to special handling this skb before sending; * @on_tcp_entail - callback to special handling this skb before pushing * to socket write queue; + * @mem - memory used for this skb, used to account appropriate + * client memory; * @stream_id - id of sender stream; + * @tls_type - tls type of current skb, if it's data should be + * encrypted; * @is_head - flag indicates that this is a head of skb list; */ struct tfw_skb_cb { @@ -71,13 +75,14 @@ struct tfw_skb_cb { void (*destructor)(void *opaque_data); on_send_cb_t on_send; on_tcp_entail_t on_tcp_entail; + long int mem; unsigned int stream_id; bool is_head; }; #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) -void ss_skb_set_owner(struct sk_buff *skb, void *owner); +void ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); static inline bool @@ -363,6 +368,7 @@ ss_skb_alloc(size_t n) if (!skb) return NULL; skb_reserve(skb, MAX_TCP_HEADER); + memset(skb->cb, 0, sizeof(skb->cb)); return skb; } diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 1bb9ee2af..ade7089e5 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -199,6 +199,11 @@ int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, return 0; } +void +tfw_client_get(TfwClient *cli) +{ +} + void tfw_client_put(TfwClient *cli) { diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 1d4b89bcc..ba1e857b5 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -119,7 +119,7 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; - ss_skb_set_owner(skb, tfw_http_msg_client((TfwHttpMsg*)resp)); + ss_skb_set_owner(skb, tfw_http_msg_client((TfwHttpMsg*)resp), skb->truesize); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; diff --git a/fw/tls.c b/fw/tls.c index 557c83ae9..486e589d4 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -89,7 +89,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) next_msg: spin_lock(&tls->lock); ss_skb_queue_tail(&tls->io_in.skb_list, skb); - ss_skb_set_owner(skb, conn->peer); + ss_skb_set_owner(skb, conn->peer, skb->truesize); /* Call TLS layer to place skb into a TLS record on top of skb_list. */ parsed = 0; @@ -587,7 +587,7 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } - ss_skb_set_owner(skb, cli_conn->peer); + ss_skb_set_owner(skb, cli_conn->peer, skb->truesize); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } From 7aa328439e5b5591ea983ea8b793ab39b2718c2f Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 14 Jan 2026 11:44:52 +0200 Subject: [PATCH 11/23] Fix according review Do not use `skb->sk` and `skb->destructor` to check memory used by skb, use `skb->cb` for this purposes. - Implement our own version of `skb_orphan` with name `ss_skb_orphan` which is called when skb is freed in Tempesta FW code our just before pushing skb to socket write queue. - Implement wrappers over `__kfree_skb` and `kfree_skb` where we call `ss_skb_orphan` before free skb. - Check that skb is pushed to socket write queue, using new ipmlemented function `skb_tfw_is_in_socket_write_queue` from linux kernel, to skip adjusting memory used be skb, when it belongs to kernel (when `ss_skb_*` functions called from `tls_encrypt`). --- fw/connection.c | 2 +- fw/http.c | 62 +++++++++++++++++++++++---------- fw/http2.c | 2 +- fw/http_frame.c | 12 +++---- fw/http_msg.c | 6 ++-- fw/http_stream.c | 14 ++++++-- fw/http_stream.h | 1 + fw/sock.c | 23 ++++++------- fw/ss_skb.c | 70 ++++++++++++++++++------------------- fw/ss_skb.h | 72 +++++++++++++++++++++++++-------------- fw/t/unit/test_http_msg.c | 4 ++- fw/tls.c | 16 +++++---- fw/websocket.c | 2 +- linux-6.12.12.patch | 46 +++++++++++++++++++------ 14 files changed, 207 insertions(+), 125 deletions(-) diff --git a/fw/connection.c b/fw/connection.c index d03948bf9..7cff3b9e4 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -187,7 +187,7 @@ tfw_connection_recv(TfwConn *conn, struct sk_buff *skb) next = split; } } else { - __kfree_skb(skb); + __ss_kfree_skb(skb); } } diff --git a/fw/http.c b/fw/http.c index 193b5a715..5467b18e8 100644 --- a/fw/http.c +++ b/fw/http.c @@ -1149,21 +1149,36 @@ tfw_h2_resp_status_write(TfwHttpResp *resp, unsigned short status, void tfw_h2_resp_fwd(TfwHttpResp *resp) { - bool resp_in_xmit = - (TFW_SKB_CB(resp->msg.skb_head)->opaque_data == resp); + bool resp_in_xmit = !!TFW_SKB_CB(resp->msg.skb_head)->stream_id; TfwHttpReq *req = resp->req; TfwConn *conn = req->conn; int status = READ_ONCE(resp->status); + bool need_extra_put = false; tfw_connection_get(conn); + /* + * We need this extra get, because if send fails, connection + * will be put during freeing skbs of sending response (in + * skb destructor). + */ + if (resp_in_xmit) { + void *owner = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; + + BUG_ON(owner != resp->req->conn->peer); + TFW_SKB_CB(resp->msg.skb_head)->opaque_data = resp; + TFW_SKB_CB(resp->msg.skb_head)->destructor = + tfw_h2_stream_skb_destructor; + need_extra_put = true; + tfw_connection_get(conn); + } do_access_log(resp); if (tfw_cli_conn_send((TfwCliConn *)conn, (TfwMsg *)resp)) { T_DBG("%s: cannot send data to client via HTTP/2\n", __func__); TFW_INC_STAT_BH(serv.msgs_otherr); - tfw_connection_close(conn, true); /* We can't send response, so we should free it here. */ - resp_in_xmit = false; + tfw_connection_close(conn, true); + resp_in_xmit = !resp_in_xmit || !resp->msg.skb_head; } else { TFW_INC_STAT_BH(serv.msgs_forwarded); tfw_inc_global_hm_stats(status); @@ -1171,6 +1186,8 @@ tfw_h2_resp_fwd(TfwHttpResp *resp) if (!resp_in_xmit) tfw_http_resp_pair_free_and_put_conn(resp); + if (need_extra_put) + tfw_connection_put(conn); } /* @@ -1807,7 +1824,7 @@ __tfw_http_free_cleanup(TfwHttpMsgCleanup *cleanup) struct sk_buff *skb; while ((skb = ss_skb_dequeue(&cleanup->skb_head))) - __kfree_skb(skb); + __ss_kfree_skb(skb); for (i = 0; i < cleanup->pages_sz; i++) /* @@ -4574,14 +4591,16 @@ tfw_http_resp_get_conn_flags(TfwHttpResp *resp) static int tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) { - struct sk_buff *nskb; + void *opaque_data = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; TfwMsgIter *iter = &resp->iter; + struct sk_buff *nskb; nskb = ss_skb_alloc(0); if (unlikely(!nskb)) return -ENOMEM; - ss_skb_set_owner(nskb, resp->msg.skb_head->sk, nskb->truesize); + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + opaque_data, nskb->truesize); nskb->mark = resp->msg.skb_head->mark; cleanup->skb_head = resp->msg.skb_head; resp->msg.skb_head = NULL; @@ -5415,10 +5434,11 @@ int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head) { TfwH2Ctx *ctx = tfw_h2_context_unsafe((TfwConn *)conn); - struct tfw_skb_cb *tfw_cb = TFW_SKB_CB(*skb_head); + TfwHttpResp *resp = TFW_SKB_CB(*skb_head)->opaque_data; + unsigned int stream_id = TFW_SKB_CB(*skb_head)->stream_id; TfwStream *stream; - stream = tfw_h2_find_not_closed_stream(ctx, tfw_cb->stream_id, false); + stream = tfw_h2_find_not_closed_stream(ctx, stream_id, false); /* * Very unlikely case. We check that stream is active, before * calling ss_send, but there is a very small chance, that @@ -5428,8 +5448,11 @@ tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head) if (unlikely(!stream)) return -EPIPE; - BUG_ON(stream->xmit.skb_head); - stream->xmit.resp = (TfwHttpResp *)tfw_cb->opaque_data; + BUG_ON(stream->xmit.skb_head || stream->xmit.resp); + TFW_SKB_CB(*skb_head)->opaque_data = resp->req->conn->peer; + TFW_SKB_CB(*skb_head)->destructor = ss_skb_dflt_destructor; + stream->xmit.resp = resp; + if (test_bit(TFW_HTTP_B_CLOSE_ERROR_RESPONSE, stream->xmit.resp->flags)) ctx->error = stream; swap(stream->xmit.skb_head, *skb_head); @@ -6254,7 +6277,7 @@ tfw_h1_req_process(TfwStream *stream, struct sk_buff *skb) if (test_bit(TFW_HTTP_B_CONN_CLOSE, req->flags)) { TFW_CONN_TYPE(req->conn) |= Conn_Stop; if (unlikely(skb)) { - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = NULL; } } @@ -6273,7 +6296,7 @@ tfw_h1_req_process(TfwStream *stream, struct sk_buff *skb) TFW_CONN_TYPE(req->conn) |= Conn_Stop; tfw_http_conn_error_log(req->conn, "Can't create" " pipelined request"); - __kfree_skb(skb); + __ss_kfree_skb(skb); } } @@ -6555,8 +6578,10 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, * For tls connections we already set `skb->owner` before * tls decryption. */ - if (!skb->sk) - ss_skb_set_owner(skb, conn->peer, skb->truesize); + if (!TFW_SKB_CB(skb)->opaque_data) { + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + conn->peer, skb->truesize); + } r = frang_client_mem_limit((TfwCliConn *)conn, false); if (unlikely(r)) { @@ -7409,7 +7434,8 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, else conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); - ss_skb_set_owner(skb, cli_conn->peer, skb->truesize); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + cli_conn->peer, skb->truesize); r = frang_client_mem_limit(cli_conn, false); if (unlikely(r)) { @@ -7584,7 +7610,7 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, TFW_INC_STAT_BH(serv.msgs_otherr); tfw_http_conn_error_log(conn, "Can't create pipelined" " response"); - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = NULL; conn_stop = true; } @@ -7778,7 +7804,7 @@ tfw_http_msg_process_generic(TfwConn *conn, TfwStream *stream, return r; err: - __kfree_skb(skb); + __ss_kfree_skb(skb); return r; } diff --git a/fw/http2.c b/fw/http2.c index ee74cd452..ade6596ef 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -729,7 +729,7 @@ tfw_h2_entail_stream_skb(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, T_DBG3("[%d]: %s: drop skb=%px data_len=%u len=%u\n", smp_processor_id(), __func__, skb, skb->data_len, skb->len); - kfree_skb(skb); + ss_kfree_skb(skb); continue; } diff --git a/fw/http_frame.c b/fw/http_frame.c index 849717ebe..c728862ce 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -1931,7 +1931,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) while (unlikely(h2->skb_head->len <= h2->data_off)) { struct sk_buff *skb = ss_skb_dequeue(&h2->skb_head); h2->data_off -= skb->len; - kfree_skb(skb); + ss_kfree_skb(skb); /* * Special case when the frame is postponed just * in the beginning of the app data, after all @@ -1951,7 +1951,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) pskb = h2->skb_head; if ((r = ss_skb_chop_head_tail(NULL, pskb, h2->data_off, 0))) { - kfree_skb(nskb); + ss_kfree_skb(nskb); goto out; } h2->data_off = 0; @@ -1960,7 +1960,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) /* TODO #1490: Check this place, when working on the task. */ if (r && r != T_DROP) { WARN_ON_ONCE(r == T_POSTPONE); - kfree_skb(nskb); + ss_kfree_skb(nskb); goto out; } } @@ -1977,7 +1977,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) while (unlikely(h2->skb_head != end)) { pskb = ss_skb_dequeue(&h2->skb_head); h2->data_off -= pskb->len; - kfree_skb(pskb); + ss_kfree_skb(pskb); } pskb = h2->skb_head; @@ -1988,7 +1988,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) /* TODO #1490: Check this place, when working on the task. */ if (r && r != T_DROP) { WARN_ON_ONCE(r == T_POSTPONE); - kfree_skb(nskb); + ss_kfree_skb(nskb); goto out; } } @@ -2073,7 +2073,7 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, while (skb && unlikely(!skb->len)) { ss_skb_unlink(&stream->xmit.skb_head, skb); - kfree_skb(skb); + ss_kfree_skb(skb); skb = stream->xmit.skb_head; } } diff --git a/fw/http_msg.c b/fw/http_msg.c index d71e0c728..3918377de 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -938,7 +938,8 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, if (!(it->skb = ss_skb_alloc(SKB_MAX_HEADER))) return -ENOMEM; - ss_skb_set_owner(it->skb, tfw_http_msg_client(hm), + ss_skb_set_owner(it->skb, ss_skb_dflt_destructor, + tfw_http_msg_client(hm), it->skb->truesize); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; @@ -1209,7 +1210,8 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, if (!nskb) return -ENOMEM; - ss_skb_set_owner(nskb, tfw_http_msg_client(hm), + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + tfw_http_msg_client(hm), nskb->truesize); /* * TODO #2136: Remove this flag during reworking diff --git a/fw/http_stream.c b/fw/http_stream.c index 9e7c875f8..71cf4ea52 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -149,7 +149,7 @@ tfw_h2_stream_purge_send_queue(TfwStream *stream) BUG_ON(!skb); len -= skb->len; - kfree_skb(skb); + ss_kfree_skb(skb); } stream->xmit.h_len = stream->xmit.b_len = stream->xmit.t_len = stream->xmit.frame_length = 0; @@ -812,6 +812,16 @@ tfw_h2_delete_stream(TfwH2Ctx *ctx, TfwStream *stream) kmem_cache_free(stream_cache, stream); } +void +tfw_h2_stream_skb_destructor(struct sk_buff *skb) +{ + TfwHttpResp *resp = (TfwHttpResp *)TFW_SKB_CB(skb)->opaque_data; + + TFW_SKB_CB(skb)->opaque_data = resp->req->conn->peer; + ss_skb_dflt_destructor(skb); + tfw_http_resp_pair_free_and_put_conn(resp); +} + int tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, unsigned long h_len, unsigned long b_len) @@ -828,8 +838,6 @@ tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, return -EPIPE; } - ss_skb_setup_opaque_data(skb_head, resp, - tfw_http_resp_pair_free_and_put_conn); TFW_SKB_CB(skb_head)->on_send = tfw_h2_on_send_resp; TFW_SKB_CB(skb_head)->stream_id = stream->id; diff --git a/fw/http_stream.h b/fw/http_stream.h index 165becdef..f9a7263a9 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -220,6 +220,7 @@ void tfw_h2_stream_add_closed(TfwH2Ctx *ctx, TfwStream *stream); void tfw_h2_stream_add_idle(TfwH2Ctx *ctx, TfwStream *idle); void tfw_h2_stream_purge_send_queue(TfwStream *stream); void tfw_h2_stream_purge_all_and_free_response(TfwStream *stream); +void tfw_h2_stream_skb_destructor(struct sk_buff *skb); static inline TfwStreamState tfw_h2_get_stream_state(TfwStream *stream) diff --git a/fw/sock.c b/fw/sock.c index 0f7071535..bd51c6f70 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -461,6 +461,7 @@ ss_skb_try_collapse(struct sock *sk, struct sk_buff *skb, tp->write_seq += skb->len; sk_wmem_queued_add(sk, delta); sk_mem_charge(sk, delta); + ss_skb_orphan(skb); kfree_skb_partial(skb, stolen); return true; @@ -483,6 +484,7 @@ ss_skb_tcp_entail(struct sock *sk, struct sk_buff *skb, unsigned int mark, skb->mark = mark; if (tls_type) skb_set_tfw_tls_type(skb, tls_type); + skb_tfw_set_in_socket_write_queue(skb); ss_forced_mem_schedule(sk, skb->truesize); tcp_skb_entail(sk, skb); tp->write_seq += skb->len; @@ -497,8 +499,6 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, struct sk_buff *tail, *next, *to_destroy; unsigned char tls_type = 0; unsigned int mark = 0; - void *opaque_data = NULL; - void (*destructor)(void *) = NULL; int r; while ((*snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now))) { @@ -516,8 +516,6 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, if (TFW_SKB_CB(skb)->is_head) { tls_type = skb_tfw_tls_type(skb); mark = skb->mark; - opaque_data = TFW_SKB_CB(skb)->opaque_data; - destructor = TFW_SKB_CB(skb)->destructor; tail = tcp_write_queue_tail(sk); } /* @@ -529,7 +527,7 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, T_DBG3("[%d]: %s: drop skb=%pK data_len=%u len=%u\n", smp_processor_id(), __func__, skb, skb->data_len, skb->len); - kfree_skb(skb); + ss_kfree_skb(skb); continue; } @@ -555,7 +553,9 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, tcp_wmem_free_skb(sk, to_destroy); } } - ss_skb_setup_opaque_data(*skb_head, opaque_data, destructor); + if (*skb_head && !TFW_SKB_CB(*skb_head)->is_head) + ss_skb_setup_head_of_list(*skb_head, mark, tls_type); + return r; } @@ -617,7 +617,6 @@ ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) return; cleanup: - ss_skb_destroy_opaque_data(*skb_head); ss_skb_queue_purge(skb_head); } @@ -677,8 +676,10 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) r = -ENOMEM; goto err; } - memset(twin_skb->cb, 0, sizeof(twin_skb)->cb); - ss_skb_set_owner(twin_skb, skb->sk, skb_headlen(skb)); + memset(twin_skb->cb, 0, sizeof(twin_skb->cb)); + ss_skb_set_owner(twin_skb, ss_skb_dflt_destructor, + TFW_SKB_CB(skb)->opaque_data, + skb_headlen(skb)); ss_skb_queue_tail(&sw.skb_head, twin_skb); skb = skb->next; } while (skb != *skb_head); @@ -1776,11 +1777,9 @@ ss_tx_action(void) } dead_sock: sock_put(sk); /* paired with push() calls */ - if (sw.skb_head) - ss_skb_destroy_opaque_data(sw.skb_head); while ((skb = ss_skb_dequeue(&sw.skb_head))) - kfree_skb(skb); + ss_kfree_skb(skb); } /* diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 271549d8b..42e441a17 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -91,7 +91,7 @@ ss_skb_alloc_pages(size_t len) for (i = 0; i < nr_frags; ++i) { struct page *page = alloc_page(GFP_ATOMIC); if (!page) { - kfree_skb(skb); + ss_kfree_skb(skb); return NULL; } skb_fill_page_desc(skb, i, page, 0, 0); @@ -120,7 +120,8 @@ ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len) skb = ss_skb_alloc_pages(n); if (!skb) return -ENOMEM; - ss_skb_set_owner(skb, owner, skb->truesize); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + owner, skb->truesize); ss_skb_queue_tail(skb_head, skb); } @@ -219,7 +220,11 @@ __extend_pgfrags(struct sk_buff *skb_head, struct sk_buff *skb, int from, int n) if (nskb == NULL) return -ENOMEM; - ss_skb_set_owner(nskb, skb->sk, nskb->truesize); + if (!skb_tfw_is_in_socket_write_queue(skb)) { + ss_skb_set_owner(nskb, ss_skb_dflt_destructor, + TFW_SKB_CB(skb)->opaque_data, + nskb->truesize); + } skb_shinfo(nskb)->flags = skb_shinfo(skb)->flags; ss_skb_insert_after(skb, nskb); skb_shinfo(nskb)->nr_frags = n_excess; @@ -954,7 +959,7 @@ ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, skb->next->prev = skb->prev; skb->prev->next = skb->next; *skb_list_head = skb_hd = skb->next; - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = skb_hd; if (unlikely(skb->next == skb)) goto single_buff; @@ -973,7 +978,7 @@ ss_skb_list_chop_head_tail(struct sk_buff **skb_list_head, trail -= skb->len; skb_hd->prev = skb->prev; skb->prev->next = skb_hd; - __kfree_skb(skb); + __ss_kfree_skb(skb); skb = skb_hd->prev; if (unlikely(skb == skb_hd)) goto single_buff; @@ -1011,11 +1016,11 @@ __ss_skb_free_empty(struct sk_buff **skb_head, struct sk_buff *skb, TfwStr *it) it->skb = it->skb->next; it->data = __skb_data_address(it->skb, &fragn); ss_skb_unlink(skb_head, it->skb); - kfree_skb(to_delete); + ss_kfree_skb(to_delete); } if (unlikely(!is_same && !skb->len)) { ss_skb_unlink(skb_head, skb); - kfree_skb(skb); + ss_kfree_skb(skb); } return was_updated; @@ -1302,7 +1307,7 @@ ss_skb_split(struct sk_buff *skb, int len) if (!buff) return NULL; - memset(buff->cb, 0, sizeof(buff)->cb); + memset(buff->cb, 0, sizeof(buff->cb)); skb_reserve(buff, MAX_TCP_HEADER); /* @buff already accounts @n in truesize. */ @@ -1337,7 +1342,7 @@ ss_skb_init_for_xmit(struct sk_buff *skb) struct skb_shared_info *shinfo = skb_shinfo(skb); __u8 pfmemalloc = skb->pfmemalloc; - skb_orphan(skb); + ss_skb_orphan(skb); skb_dst_drop(skb); INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); @@ -1466,7 +1471,8 @@ ss_skb_unroll_slow(struct sk_buff **skb_head, struct sk_buff *skb) return 0; cleanup: - ss_skb_queue_purge(skb_head); + while ((skb = ss_skb_dequeue(skb_head)) != NULL) + kfree_skb(skb); return -ENOMEM; } @@ -1713,51 +1719,39 @@ int ss_skb_realloc_headroom(struct sk_buff *skb) { int delta = MAX_TCP_HEADER - skb_headroom(skb); - bool skb_has_owner = (skb->sk != NULL); - void *owner; - long int mem; unsigned int old_truesize; int r; if (likely(delta <= 0)) return 0; - /* - * `pskb_expand_head` doesn't change skb->truesize for not - * orphaned skbs (there is a special comment about it in the - * kernel code). It is not safe for us to not break `skb->truesize` - * calculation here, so we should orphan skb and then restore it's - * owner later. - */ - if (skb_has_owner) { - owner = skb->sk; - mem = TFW_SKB_CB(skb)->mem; + if (TFW_SKB_CB(skb)->opaque_data) old_truesize = skb->truesize; - skb_orphan(skb); - } r = pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC); if (unlikely(r)) return r; - if (skb_has_owner) - ss_skb_set_owner(skb, owner, mem + skb->truesize - old_truesize); + if (TFW_SKB_CB(skb)->opaque_data) + ss_skb_adjust_client_mem(skb, skb->truesize - old_truesize); return 0; } ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); -static void -ss_skb_destructor(struct sk_buff *skb) +void +ss_skb_dflt_destructor(struct sk_buff *skb) { - TfwClient *cli = (TfwClient *)skb->sk; + TfwClient *cli = (TfwClient *)TFW_SKB_CB(skb)->opaque_data; + BUG_ON(skb_tfw_is_in_socket_write_queue(skb)); ss_skb_adjust_client_mem(skb, -TFW_SKB_CB(skb)->mem); tfw_client_put(cli); } void -ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int mem) +ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), + void *owner, unsigned int mem) { /* * Can be zero when this function is called from `__extend_pgfrags` @@ -1769,12 +1763,12 @@ ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int mem) * We can safely use `skb->sk` for our purposes until * this SKBs will be passed to the socket write queue. */ - BUG_ON(skb->sk); - BUG_ON(TFW_SKB_CB(skb)->mem != 0); + BUG_ON(TFW_SKB_CB(skb)->opaque_data); + WARN_ON(TFW_SKB_CB(skb)->mem != 0); tfw_client_get((TfwClient *)owner); - skb->sk = owner; - skb->destructor = ss_skb_destructor; + TFW_SKB_CB(skb)->opaque_data = owner; + TFW_SKB_CB(skb)->destructor = destructor; ss_skb_adjust_client_mem(skb, mem); } } @@ -1782,8 +1776,12 @@ ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int mem) void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) { - TfwClient *cli = (TfwClient *)skb->sk; + TfwClient *cli; + + if (skb_tfw_is_in_socket_write_queue(skb)) + return; + cli = (TfwClient *)TFW_SKB_CB(skb)->opaque_data; /* * `cli` can be zero here when this function is called * from `ss_skb_split` for SKBs which are already orphaned diff --git a/fw/ss_skb.h b/fw/ss_skb.h index d29b91d63..042dfec2e 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -54,6 +54,7 @@ enum { typedef int (*on_send_cb_t)(void *conn, struct sk_buff **skb_head); typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); +typedef void (*on_send_fail_cb_t)(void *conn, struct sk_buff *skb_head); /* * Tempesta FW sk_buff private data. @@ -72,7 +73,7 @@ typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); */ struct tfw_skb_cb { void *opaque_data; - void (*destructor)(void *opaque_data); + void (*destructor)(struct sk_buff *); on_send_cb_t on_send; on_tcp_entail_t on_tcp_entail; long int mem; @@ -82,8 +83,10 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) -void ss_skb_set_owner(struct sk_buff *skb, void *owner, unsigned int delta); +void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), + void *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); +void ss_skb_dflt_destructor(struct sk_buff *skb); static inline bool ss_skb_is_within_fragment(char *begin_fragment, char *position, @@ -96,35 +99,15 @@ static inline void ss_skb_setup_head_of_list(struct sk_buff *skb_head, unsigned int mark, unsigned char tls_type) { + BUILD_BUG_ON(sizeof(struct tfw_skb_cb) > + sizeof(((struct sk_buff *)(0))->cb)); + if (tls_type) skb_set_tfw_tls_type(skb_head, tls_type); skb_head->mark = mark; TFW_SKB_CB(skb_head)->is_head = true; } -static inline void -ss_skb_setup_opaque_data(struct sk_buff *skb_head, void *opaque_data, - void (*destructor)(void *)) -{ - TFW_SKB_CB(skb_head)->opaque_data = opaque_data; - TFW_SKB_CB(skb_head)->destructor = destructor; -} - -static inline void -ss_skb_destroy_opaque_data(struct sk_buff *skb_head) -{ - void *opaque_data = TFW_SKB_CB(skb_head)->opaque_data; - void (*destructor)(void *) = TFW_SKB_CB(skb_head)->destructor; - - BUILD_BUG_ON(sizeof(struct tfw_skb_cb) > - sizeof(((struct sk_buff *)(0))->cb)); - - if (opaque_data) { - BUG_ON(!destructor); - destructor(opaque_data); - } -} - static inline int ss_skb_on_send(void *conn, struct sk_buff **skb_head) { @@ -206,6 +189,43 @@ ss_skb_queue_splice(struct sk_buff **skb_head, struct sk_buff **skb) *skb = NULL; } +static inline void +ss_skb_orphan(struct sk_buff *skb) +{ + void (*destructor)(struct sk_buff *); + + if (skb_tfw_is_in_socket_write_queue(skb)) + return; + + destructor = TFW_SKB_CB(skb)->destructor; + if (destructor) { + BUG_ON(!TFW_SKB_CB(skb)->opaque_data); + destructor(skb); + TFW_SKB_CB(skb)->destructor = NULL; + TFW_SKB_CB(skb)->opaque_data = NULL; + } else { + BUG_ON(TFW_SKB_CB(skb)->opaque_data); + } +} + +static inline void +__ss_kfree_skb(struct sk_buff *skb) +{ + if (!skb) + return; + ss_skb_orphan(skb); + __kfree_skb(skb); +} + +static inline void +ss_kfree_skb(struct sk_buff *skb) +{ + if (!skb) + return; + ss_skb_orphan(skb); + kfree_skb(skb); +} + static inline void ss_skb_remove(struct sk_buff *skb) { @@ -315,7 +335,7 @@ ss_skb_queue_purge(struct sk_buff **skb_head) { struct sk_buff *skb; while ((skb = ss_skb_dequeue(skb_head)) != NULL) - kfree_skb(skb); + ss_kfree_skb(skb); } static inline void diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index ba1e857b5..cc71b957a 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -119,7 +119,9 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, if (!skb) return false; - ss_skb_set_owner(skb, tfw_http_msg_client((TfwHttpMsg*)resp), skb->truesize); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + tfw_http_msg_client((TfwHttpMsg*)resp), + skb->truesize); skb->next = skb->prev = skb; it = &resp->iter; resp->msg.skb_head = it->skb = it->skb_head = skb; diff --git a/fw/tls.c b/fw/tls.c index 486e589d4..bdf3b3f54 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -54,7 +54,7 @@ tfw_tls_purge_io_ctx(TlsIOCtx *io) struct sk_buff *skb; while ((skb = ss_skb_dequeue(&io->skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); ttls_reset_io_ctx(io); } @@ -89,7 +89,8 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) next_msg: spin_lock(&tls->lock); ss_skb_queue_tail(&tls->io_in.skb_list, skb); - ss_skb_set_owner(skb, conn->peer, skb->truesize); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + conn->peer, skb->truesize); /* Call TLS layer to place skb into a TLS record on top of skb_list. */ parsed = 0; @@ -170,7 +171,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) TTLS_TAG_LEN); if (r) { tfw_tls_purge_io_ctx(&tls->io_in); - kfree_skb(nskb); + ss_kfree_skb(nskb); spin_unlock(&tls->lock); return T_BAD; } @@ -186,7 +187,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) /* Do upcall to http or websocket */ r = tfw_connection_recv(conn, data_up.skb); if (r && r != T_POSTPONE && r != T_DROP) { - kfree_skb(nskb); + ss_kfree_skb(nskb); return r; } } else { @@ -587,7 +588,8 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) r = -ENOMEM; goto out; } - ss_skb_set_owner(skb, cli_conn->peer, skb->truesize); + ss_skb_set_owner(skb, ss_skb_dflt_destructor, + cli_conn->peer, skb->truesize); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } @@ -646,9 +648,9 @@ tfw_tls_conn_dtor(void *c) if (tls) { while ((skb = ss_skb_dequeue(&tls->io_in.skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); while ((skb = ss_skb_dequeue(&tls->io_out.skb_list))) - kfree_skb(skb); + ss_kfree_skb(skb); if (tls->peer_conf) tfw_vhost_put(tfw_vhost_from_tls_conf(tls->peer_conf)); diff --git a/fw/websocket.c b/fw/websocket.c index 763a76ca3..710171124 100644 --- a/fw/websocket.c +++ b/fw/websocket.c @@ -233,7 +233,7 @@ tfw_ws_msg_process(TfwConn *conn, struct sk_buff *skb) * which is wrong - please fix this if you see the warning. */ if (WARN_ON_ONCE(sock_flag(conn->sk, SOCK_DEAD))) { - kfree_skb(skb); + ss_kfree_skb(skb); return 0; } diff --git a/linux-6.12.12.patch b/linux-6.12.12.patch index c121a863e..dee42bd37 100644 --- a/linux-6.12.12.patch +++ b/linux-6.12.12.patch @@ -863,7 +863,7 @@ index 8896705cc..29e0d0428 100644 /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index 39f1d16f3..5457e0826 100644 +index 39f1d16f3..db9523d1f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -267,6 +267,12 @@ @@ -879,20 +879,21 @@ index 39f1d16f3..5457e0826 100644 /* return minimum truesize of one skb containing X bytes of data */ #define SKB_TRUESIZE(X) ((X) + \ -@@ -877,6 +883,12 @@ struct sk_buff { +@@ -877,6 +883,13 @@ struct sk_buff { * UDP receive path is one user. */ unsigned long dev_scratch; +#ifdef CONFIG_SECURITY_TEMPESTA -+ struct { -+ __u8 present : 1; -+ __u8 tls_type : 7; -+ } tfw_cb; ++ struct { ++ __u16 present : 1; ++ __u16 in_socket_write_queue : 1; ++ __u16 tls_type : 7; ++ } tfw_cb; +#endif }; }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ -@@ -938,11 +950,17 @@ struct sk_buff { +@@ -938,11 +951,17 @@ struct sk_buff { fclone:2, peeked:1, head_frag:1, @@ -910,11 +911,12 @@ index 39f1d16f3..5457e0826 100644 /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() -@@ -1113,6 +1131,42 @@ struct sk_buff { +@@ -1113,6 +1132,56 @@ struct sk_buff { #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 +#ifdef CONFIG_SECURITY_TEMPESTA ++ +static inline unsigned long +skb_tfw_is_present(struct sk_buff *skb) +{ @@ -936,6 +938,19 @@ index 39f1d16f3..5457e0826 100644 +} + +static inline void ++skb_tfw_set_in_socket_write_queue(struct sk_buff *skb) ++{ ++ skb->tfw_cb.present = 1; ++ skb->tfw_cb.in_socket_write_queue = 1; ++} ++ ++static inline bool ++skb_tfw_is_in_socket_write_queue(struct sk_buff *skb) ++{ ++ return skb->tfw_cb.present ? skb->tfw_cb.in_socket_write_queue : false; ++} ++ ++static inline void +skb_copy_tfw_cb(struct sk_buff *dst, struct sk_buff *src) +{ + dst->dev = src->dev; @@ -953,7 +968,7 @@ index 39f1d16f3..5457e0826 100644 /** * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves * @skb: buffer -@@ -1298,6 +1352,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); +@@ -1298,6 +1367,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); @@ -961,7 +976,7 @@ index 39f1d16f3..5457e0826 100644 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); -@@ -2465,7 +2520,11 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); +@@ -2465,7 +2535,11 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) { @@ -973,7 +988,7 @@ index 39f1d16f3..5457e0826 100644 } static inline unsigned int skb_headlen(const struct sk_buff *skb) -@@ -2821,6 +2880,20 @@ static inline unsigned int skb_headroom(const struct sk_buff *skb) +@@ -2821,6 +2895,20 @@ static inline unsigned int skb_headroom(const struct sk_buff *skb) return skb->data - skb->head; } @@ -994,6 +1009,15 @@ index 39f1d16f3..5457e0826 100644 /** * skb_tailroom - bytes at buffer end * @skb: buffer to check +@@ -3806,7 +3894,7 @@ static inline int skb_add_data(struct sk_buff *skb, + if (skb->ip_summed == CHECKSUM_NONE) { + __wsum csum = 0; + if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, +- &csum, from)) { ++ &csum, from)) { + skb->csum = csum_block_add(skb->csum, csum, off); + return 0; + } diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 0f3c58007..e2576c604 100644 --- a/include/linux/skbuff_ref.h From 5f04e5925d63e88e0ae9ab81ba4b3d555ee357ed Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Mon, 26 Jan 2026 14:24:13 +0200 Subject: [PATCH 12/23] Remove extra connections callbacks - Usually we use callbacks which are set in `skb->cb` for different purposes. So remove to callbacks, which was added in previous patches and use callbacks saved in `skb->cb`. --- fw/connection.c | 9 +-------- fw/connection.h | 3 +-- fw/sock.c | 15 +++------------ fw/sock_clnt.c | 11 +++++++---- fw/sock_srv.c | 9 +++++++-- fw/ss_skb.c | 7 +++++++ fw/ss_skb.h | 3 +++ fw/sync_socket.h | 12 ------------ fw/websocket.c | 2 -- 9 files changed, 29 insertions(+), 42 deletions(-) diff --git a/fw/connection.c b/fw/connection.c index 7cff3b9e4..ab6bc1c3d 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -246,13 +246,6 @@ tfw_connection_unlink_to_sk(TfwConn *conn) ss_sock_put(sk); } -void -tfw_connection_on_send(TfwConn *conn, struct sk_buff **skb_head) -{ - ss_skb_queue_splice(&conn->write_queue, skb_head); - sock_set_flag(conn->sk, SOCK_TEMPESTA_HAS_DATA); -} - static inline int tfw_connection_shutdown(TfwConn *conn) { @@ -267,7 +260,7 @@ tfw_connection_shutdown(TfwConn *conn) } int -tfw_connection_push(TfwConn *conn, unsigned int mss_now) +tfw_connection_fill_sk_write_queue(TfwConn *conn, unsigned int mss_now) { struct sock *sk = conn->sk; TfwH2Ctx *h2; diff --git a/fw/connection.h b/fw/connection.h index 274c24f27..43beabfa5 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -635,7 +635,6 @@ int tfw_connection_close(TfwConn *conn, bool sync); void tfw_connection_abort(TfwConn *conn); void tfw_connection_drop(TfwConn *conn); void tfw_connection_release(TfwConn *conn); -void tfw_connection_on_send(TfwConn *conn, struct sk_buff **sk_buff); -int tfw_connection_push(TfwConn *conn, unsigned int mss_now); +int tfw_connection_fill_sk_write_queue(TfwConn *conn, unsigned int mss_now); #endif /* __TFW_CONNECTION_H__ */ diff --git a/fw/sock.c b/fw/sock.c index bd51c6f70..89f387852 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -230,12 +230,6 @@ ss_conn_drop_guard_exit(struct sock *sk) ss_active_guard_exit(SS_V_ACT_LIVECONN); } -static int -ss_fill_write_queue(struct sock *sk, unsigned int mss) -{ - return SS_CALL(connection_push, sk->sk_user_data, mss); -} - static void ss_ipi(struct irq_work *work) { @@ -583,8 +577,6 @@ ss_do_send(struct sock *sk, struct sk_buff **skb_head, int flags) if (ss_skb_on_send(conn, skb_head)) goto cleanup; - if (*skb_head) - SS_CALL(connection_on_send, sk->sk_user_data, skb_head); if (flags & SS_F_CONN_CLOSE) return; @@ -1348,7 +1340,6 @@ ss_set_callbacks(struct sock *sk) sk->sk_data_ready = ss_tcp_data_ready; sk->sk_state_change = ss_tcp_state_change; sk->sk_destroy_cb = ss_conn_drop_guard_exit; - sk->sk_fill_write_queue = ss_fill_write_queue; } EXPORT_SYMBOL(ss_set_callbacks); @@ -1624,7 +1615,7 @@ __sk_close_locked(struct sock *sk, int flags) { int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); - if (ss_fill_write_queue(sk, mss_now)) { + if (sk->sk_fill_write_queue(sk, mss_now)) { ss_linkerror(sk, 0); bh_unlock_sock(sk); return; @@ -1651,11 +1642,11 @@ ss_do_shutdown(struct sock *sk) { int size, mss_now = tcp_send_mss(sk, &size, MSG_DONTWAIT); /* - * `tcp_shutdown` will ne called from `ss_fill_write_queue` + * `tcp_shutdown` will ne called from `sk->sk_fill_write_queue` * after sending all pending data. */ SS_CONN_TYPE(sk) |= Conn_Shutdown; - if (ss_fill_write_queue(sk, mss_now)) + if (sk->sk_fill_write_queue(sk, mss_now)) ss_linkerror(sk, 0); else SS_CALL(connection_on_shutdown, sk->sk_user_data); diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 05641c94d..9262a274a 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -196,6 +196,12 @@ tfw_cli_conn_send(TfwCliConn *cli_conn, TfwMsg *msg) return r; } +static int +tfw_sock_clnt_fill_write_queue(struct sock *sk, unsigned int mss_now) +{ + return tfw_connection_fill_sk_write_queue(sk->sk_user_data, mss_now); +} + /** * This hook is called when a new client connection is established. */ @@ -256,6 +262,7 @@ tfw_sock_clnt_new(struct sock *sk) */ sk->sk_write_xmit = tfw_tls_encrypt; } + sk->sk_fill_write_queue = tfw_sock_clnt_fill_write_queue; /* Activate keepalive timer. */ mod_timer(&((TfwCliConn *)conn)->timer, @@ -353,8 +360,6 @@ static const SsHooks tfw_sock_http_clnt_ss_hooks = { .connection_drop = tfw_sock_clnt_drop, .connection_recv = tfw_connection_recv, .connection_on_shutdown = tfw_cli_conn_on_shutdown, - .connection_on_send = tfw_connection_on_send, - .connection_push = tfw_connection_push, }; static const SsHooks tfw_sock_tls_clnt_ss_hooks = { @@ -363,8 +368,6 @@ static const SsHooks tfw_sock_tls_clnt_ss_hooks = { .connection_recv = tfw_tls_connection_recv, .connection_recv_finish = tfw_connection_recv_finish, .connection_on_shutdown = tfw_cli_conn_on_shutdown, - .connection_on_send = tfw_connection_on_send, - .connection_push = tfw_connection_push, }; /* diff --git a/fw/sock_srv.c b/fw/sock_srv.c index 15e7ab0d1..b48d12e10 100644 --- a/fw/sock_srv.c +++ b/fw/sock_srv.c @@ -442,6 +442,12 @@ tfw_srv_conn_release(TfwSrvConn *srv_conn) tfw_srv_conn_stop(srv_conn); } +static int +tfw_sock_srv_fill_write_queue(struct sock *sk, unsigned int mss_now) +{ + return tfw_connection_fill_sk_write_queue(sk->sk_user_data, mss_now); +} + /** * Initiate a non-blocking connect attempt. * Returns immediately without waiting until a connection is established. @@ -493,6 +499,7 @@ tfw_sock_srv_connect_try(TfwSrvConn *srv_conn) tfw_srv_conn_init_as_dead(srv_conn); sk->sk_uid.val = SS_SRV_USER; ss_set_callbacks(sk); + sk->sk_fill_write_queue = tfw_sock_srv_fill_write_queue; /* * Set connection destructor such that connection failover can * take place if the connection attempt fails. @@ -720,8 +727,6 @@ static const SsHooks tfw_sock_srv_ss_hooks = { .connection_new = tfw_sock_srv_connect_complete, .connection_drop = tfw_sock_srv_connect_drop, .connection_recv = tfw_connection_recv, - .connection_on_send = tfw_connection_on_send, - .connection_push = tfw_connection_push, }; static int diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 42e441a17..15bd3ef17 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1749,6 +1749,13 @@ ss_skb_dflt_destructor(struct sk_buff *skb) tfw_client_put(cli); } +void +ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head) +{ + ss_skb_queue_splice(&((TfwConn *)conn)->write_queue, skb_head); + sock_set_flag(((TfwConn *)conn)->sk, SOCK_TEMPESTA_HAS_DATA); +} + void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), void *owner, unsigned int mem) diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 042dfec2e..ab1897033 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -87,6 +87,7 @@ void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), void *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); void ss_skb_dflt_destructor(struct sk_buff *skb); +void ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head); static inline bool ss_skb_is_within_fragment(char *begin_fragment, char *position, @@ -116,6 +117,8 @@ ss_skb_on_send(void *conn, struct sk_buff **skb_head) if (on_send) r = on_send(conn, skb_head); + if (!r && *skb_head) + ss_skb_on_send_dflt(conn, skb_head); return r; } diff --git a/fw/sync_socket.h b/fw/sync_socket.h index 8241e5186..c47dd0581 100644 --- a/fw/sync_socket.h +++ b/fw/sync_socket.h @@ -95,18 +95,6 @@ typedef struct ss_hooks { /* Callback to make some job on connection shutdown. */ void (*connection_on_shutdown)(TfwConn *conn); - - /* - * Default callback which is called before push skb - * to socket write queue. - */ - void (*connection_on_send)(TfwConn *conn, struct sk_buff **skb_head); - - /* - * Push skbs from connection write queue to socket write queue - * according TCP window. - */ - int (*connection_push)(TfwConn *conn, unsigned int mss_now); } SsHooks; /** diff --git a/fw/websocket.c b/fw/websocket.c index 710171124..5ef4c6494 100644 --- a/fw/websocket.c +++ b/fw/websocket.c @@ -70,8 +70,6 @@ static const SsHooks tfw_ws_srv_ss_hooks = { .connection_new = NULL, .connection_drop = tfw_ws_srv_ss_hook_drop, .connection_recv = tfw_connection_recv, - .connection_on_send = tfw_connection_on_send, - .connection_push = tfw_connection_push, }; /** From aae277855eab9346a18cbb3489e9590c56c7667b Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 28 Jan 2026 19:27:35 +0200 Subject: [PATCH 13/23] Adjust http level memory. - Since we use pool for http memory allocation, change api of all `tfw_pool_*` functions to pass `TfwClient` and accounting memory in this structure. - Remove `TfwClient` refcounter (it not used, can be done in previous commits). - Fix unit tests to check memory accounting, cleanup memory after each test, to check that client memory is equal to zero after test. --- fw/client.c | 1 - fw/client.h | 2 -- fw/hpack.c | 8 ++--- fw/hpack.h | 2 +- fw/http.c | 19 +++++++++-- fw/http2.c | 3 +- fw/http_msg.c | 6 ++-- fw/http_msg.h | 9 ++--- fw/http_tbl.c | 2 +- fw/pool.c | 53 ++++++++++++++++++++--------- fw/pool.h | 8 +++-- fw/t/unit/helpers.c | 4 +-- fw/t/unit/test.c | 6 ++-- fw/t/unit/test_hpack.c | 3 +- fw/t/unit/test_http2_parser.c | 6 +++- fw/t/unit/test_http2_parser_hpack.c | 7 ++-- fw/t/unit/test_http_match.c | 4 ++- fw/t/unit/test_http_parser_common.c | 7 +++- fw/t/unit/test_http_parser_common.h | 19 +++++++++++ fw/t/unit/test_pool.c | 16 ++++----- fw/t/unit/tfw_str_helper.c | 2 +- fw/vhost.c | 2 +- 22 files changed, 125 insertions(+), 64 deletions(-) diff --git a/fw/client.c b/fw/client.c index ca92cbeab..3ee40cc6d 100644 --- a/fw/client.c +++ b/fw/client.c @@ -209,7 +209,6 @@ tfw_client_ent_init(TdbRec *rec, void *data) tfw_peer_init((TfwPeer *)cli, &ctx->addr); atomic_set(&cli->mem, 0); - atomic_set(&cli->refcnt, 0); ent->xff_addr = ctx->xff_addr; tfw_str_to_cstr(&ctx->user_agent, ent->user_agent, sizeof(ent->user_agent)); diff --git a/fw/client.h b/fw/client.h index 692da74f3..09ede31a4 100644 --- a/fw/client.h +++ b/fw/client.h @@ -32,14 +32,12 @@ * no any classification logic is used; * list_head - entry in the lru list; * @mem - memory used by current client; - * @refcnt - refcount for light versions of get/put client; */ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; atomic_t mem; - atomic_t refcnt; } TfwClient; int tfw_client_init(void); diff --git a/fw/hpack.c b/fw/hpack.c index c67594232..5ec842df7 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -1155,7 +1155,7 @@ tfw_huffman_init(TfwHPack *__restrict hp) } int -tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz) +tfw_hpack_init(TfwHPack *__restrict hp, void *owner, unsigned int htbl_sz) { bool np; TfwHPackETbl *et = &hp->enc_tbl; @@ -1169,14 +1169,14 @@ tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz) tfw_huffman_init(hp); dt->window = hp->max_window = htbl_sz; - if (!(dt->pool = __tfw_pool_new(0))) + if (!(dt->pool = __tfw_pool_new(0, owner))) return -ENOMEM; - if (!(dt->h_pool = __tfw_pool_new(0))) + if (!(dt->h_pool = __tfw_pool_new(0, owner))) goto err_dt; et->window = htbl_sz; et->rb_size = HPACK_ENC_TABLE_MAX_SIZE; - if (!(et->pool = __tfw_pool_new(HPACK_ENC_TABLE_MAX_SIZE))) + if (!(et->pool = __tfw_pool_new(HPACK_ENC_TABLE_MAX_SIZE, owner))) goto err_et; et->rbuf = tfw_pool_alloc_np(et->pool, HPACK_ENC_TABLE_MAX_SIZE, &np); diff --git a/fw/hpack.h b/fw/hpack.h index 732e924ec..a23983919 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -299,7 +299,7 @@ typedef struct { void write_int(unsigned long index, unsigned short max, unsigned short mask, TfwHPackInt *__restrict res_idx); -int tfw_hpack_init(TfwHPack *__restrict hp, unsigned int htbl_sz); +int tfw_hpack_init(TfwHPack *__restrict hp, void *owner, unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, diff --git a/fw/http.c b/fw/http.c index 5467b18e8..852d5c6d0 100644 --- a/fw/http.c +++ b/fw/http.c @@ -2864,7 +2864,10 @@ static TfwMsg * tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) { int type = TFW_CONN_TYPE(conn); - TfwHttpMsg *hm = __tfw_http_msg_alloc(type, true); + void *owner = type & Conn_Clnt ? conn->peer : NULL; + TfwHttpMsg *hm; + + hm = __tfw_http_msg_alloc(owner, type, true); if (unlikely(!hm)) return NULL; @@ -2881,7 +2884,7 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) if (TFW_FSM_TYPE(conn->proto.type) == TFW_FSM_H2) { TfwHttpReq *req = (TfwHttpReq *)hm; - if(!(req->pit.pool = __tfw_pool_new(0))) + if(!(req->pit.pool = __tfw_pool_new(0, owner))) goto clean; req->pit.parsed_hdr = &req->stream->parser.hdr; __set_bit(TFW_HTTP_B_H2, req->flags); @@ -2895,6 +2898,16 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) if (unlikely(tfw_http_resp_pair(hm))) goto clean; + /* Can be equal to zero for health monitor requests. */ + if (likely(hm->req->conn)) { + TfwClient *cli = (TfwClient *)hm->req->conn->peer; + + hm->pool->owner = cli; + tfw_client_get(cli); + tfw_client_adjust_mem(cli, + PAGE_SIZE << hm->pool->order); + } + if (TFW_MSG_H2(hm->req)) { size_t sz = TFW_HDR_MAP_SZ(TFW_HDR_MAP_INIT_CNT); TfwHttpTransIter *mit = &((TfwHttpResp *)hm)->mit; @@ -7851,7 +7864,7 @@ tfw_http_hm_srv_send(TfwServer *srv, char *data, unsigned long len) TfwHttpActionResult res; int r; - if (!(req = tfw_http_msg_alloc_req_light())) + if (!(req = tfw_http_msg_alloc_req_light(NULL))) return; hmreq = (TfwHttpMsg *)req; if (tfw_msg_iter_setup(&it, NULL, &hmreq->msg.skb_head, msg.len)) diff --git a/fw/http2.c b/fw/http2.c index ade6596ef..74d54bc14 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -320,7 +320,8 @@ tfw_h2_context_init(TfwH2Ctx *ctx, TfwH2Conn *conn) rset->wnd_sz = DEF_WND_SIZE; ctx->conn = conn; - return tfw_hpack_init(&ctx->hpack, HPACK_TABLE_DEF_SIZE); + return tfw_hpack_init(&ctx->hpack, ((TfwConn *)conn)->peer, + HPACK_TABLE_DEF_SIZE); } void diff --git a/fw/http_msg.c b/fw/http_msg.c index 3918377de..cd030da4d 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -875,12 +875,12 @@ tfw_http_msg_free(TfwHttpMsg *m) * for parsing and subsequent adjustment. */ TfwHttpMsg * -__tfw_http_msg_alloc(int type, bool full) +__tfw_http_msg_alloc(void *owner, int type, bool full) { TfwHttpMsg *hm = (type & Conn_Clnt) - ? (TfwHttpMsg *)tfw_pool_new(TfwHttpReq, + ? (TfwHttpMsg *)tfw_pool_new(TfwHttpReq, owner, TFW_POOL_ZERO) - : (TfwHttpMsg *)tfw_pool_new(TfwHttpResp, + : (TfwHttpMsg *)tfw_pool_new(TfwHttpResp, owner, TFW_POOL_ZERO); if (!hm) { T_WARN("Insufficient memory to create %s message\n", diff --git a/fw/http_msg.h b/fw/http_msg.h index d94642f2f..ac9c0de46 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -76,18 +76,19 @@ tfw_http_msg_srvhdr_val(TfwStr *hdr, unsigned id, TfwStr *val) void tfw_http_msg_pair(TfwHttpResp *resp, TfwHttpReq *req); void tfw_http_msg_unpair(TfwHttpMsg *msg); -TfwHttpMsg *__tfw_http_msg_alloc(int type, bool full); +TfwHttpMsg *__tfw_http_msg_alloc(void *owner, int type, bool full); static inline TfwHttpReq * -tfw_http_msg_alloc_req_light(void) +tfw_http_msg_alloc_req_light(void *owner) { - return (TfwHttpReq *)__tfw_http_msg_alloc(Conn_Clnt, false); + return (TfwHttpReq *)__tfw_http_msg_alloc(owner, Conn_Clnt, false); } static inline TfwHttpResp * __tfw_http_msg_alloc_resp(TfwHttpReq *req, bool full) { - TfwHttpResp *resp = (TfwHttpResp *)__tfw_http_msg_alloc(Conn_Srv, full); + TfwHttpResp *resp = (TfwHttpResp *) + __tfw_http_msg_alloc(req->conn->peer, Conn_Srv, full); if (resp) tfw_http_msg_pair(resp, req); diff --git a/fw/http_tbl.c b/fw/http_tbl.c index b6d37120e..9f4394ba6 100644 --- a/fw/http_tbl.c +++ b/fw/http_tbl.c @@ -281,7 +281,7 @@ tfw_http_tbl_cfgstart(void) { BUG_ON(tfw_table_reconfig); - tfw_table_reconfig = tfw_pool_new(TfwHttpTable, TFW_POOL_ZERO); + tfw_table_reconfig = tfw_pool_new(TfwHttpTable, NULL, TFW_POOL_ZERO); if (!tfw_table_reconfig) { T_ERR_NL("Can't create a memory pool\n"); return -ENOMEM; diff --git a/fw/pool.c b/fw/pool.c index e16225a09..54f1963aa 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -47,6 +47,7 @@ #include "lib/str.h" #include "lib/fault_injection_alloc.h" +#include "fw/client.h" #include "pool.h" #define TFW_POOL_HEAD_OFF (TFW_POOL_ALIGN_SZ(sizeof(TfwPool)) \ @@ -70,10 +71,10 @@ static unsigned long __percpu (*pg_cache)[TFW_POOL_PGCACHE_SZ]; * through buddies coalescing). So we never cache multi-pages. */ static unsigned long -tfw_pool_alloc_pages(unsigned int order) +tfw_pool_alloc_pages(TfwClient *cli, unsigned int order) { + unsigned long pg_res = 0; unsigned int *pgn; - unsigned long pg_res; gfp_t flags; local_bh_disable(); @@ -83,20 +84,23 @@ tfw_pool_alloc_pages(unsigned int order) if (likely(*pgn && !order)) { --*pgn; pg_res = ((unsigned long *)this_cpu_ptr(pg_cache))[*pgn]; - - local_bh_enable(); - - return pg_res; } local_bh_enable(); - flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; - return __get_free_pages(flags, order); + if (!pg_res) { + flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; + pg_res = __get_free_pages(flags, order); + } + if (likely(pg_res) && cli) + tfw_client_adjust_mem(cli, PAGE_SIZE << order); + + return pg_res; + } ALLOW_ERROR_INJECTION(tfw_pool_alloc_pages, NULL); static void -tfw_pool_free_pages(unsigned long addr, unsigned int order) +tfw_pool_free_pages(TfwClient *cli, unsigned long addr, unsigned int order) { unsigned int *pgn; int refcnt; @@ -106,6 +110,9 @@ tfw_pool_free_pages(unsigned long addr, unsigned int order) pgn = this_cpu_ptr(&pg_next); refcnt = page_count(virt_to_page(addr)); + if (cli) + tfw_client_adjust_mem(cli, -(PAGE_SIZE << order)); + if (likely(*pgn < TFW_POOL_PGCACHE_SZ && !order && refcnt == 1)) { ((unsigned long *)this_cpu_ptr(pg_cache))[*pgn] = addr; ++*pgn; @@ -129,7 +136,7 @@ __tfw_pool_alloc_page(TfwPool *p, size_t n, bool align) unsigned int off = desc_size + n; unsigned int order = get_order(off); - c = (TfwPoolChunk *)tfw_pool_alloc_pages(order); + c = (TfwPoolChunk *)tfw_pool_alloc_pages(p->owner, order); if (!c) return NULL; c->next = curr; @@ -186,7 +193,9 @@ tfw_pool_free(TfwPool *p, void *ptr, size_t n) /* Free empty chunk which doesn't contain the pool header. */ if (unlikely(p->off == TFW_POOL_ALIGN_SZ(sizeof(TfwPoolChunk)))) { TfwPoolChunk *next = p->curr->next; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(p->curr), p->order); + + tfw_pool_free_pages(p->owner, TFW_POOL_CHUNK_BASE(p->curr), + p->order); p->curr = next; p->order = next->order; p->off = next->off; @@ -211,7 +220,8 @@ tfw_pool_clean_single(TfwPool *pool, void *ptr) if ((char *)ptr >= (char *)TFW_POOL_CHUNK_BASE(c) && (char *)ptr < (char *)TFW_POOL_CHUNK_BASE(c) + c->off) { - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(pool->owner, TFW_POOL_CHUNK_BASE(c), + c->order); prev->next = next; return; } @@ -236,7 +246,8 @@ tfw_pool_clean(TfwPool *pool) if (!(next = c->next)) break; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(pool->owner, TFW_POOL_CHUNK_BASE(c), + c->order); pool->curr->next = next; } } @@ -245,22 +256,27 @@ tfw_pool_clean(TfwPool *pool) * Allocate bit more pages than we need. */ TfwPool * -__tfw_pool_new(size_t n) +__tfw_pool_new(size_t n, void *owner) { + TfwClient *cli = (TfwClient *)owner; TfwPool *p; TfwPoolChunk *c; unsigned int order; order = get_order(TFW_POOL_ALIGN_SZ(n) + TFW_POOL_HEAD_OFF); - c = (TfwPoolChunk *)tfw_pool_alloc_pages(order); + c = (TfwPoolChunk *)tfw_pool_alloc_pages(cli, order); if (unlikely(!c)) return NULL; + if (cli) + tfw_client_get(cli); + p = (TfwPool *)((char *)c + TFW_POOL_ALIGN_SZ(sizeof(*c))); c->next = NULL; p->order = c->order = order; + p->owner = owner; p->off = c->off = TFW_POOL_HEAD_OFF; p->curr = c; @@ -271,14 +287,19 @@ void tfw_pool_destroy(TfwPool *p) { TfwPoolChunk *c, *next; + TfwClient *cli; if (!p) return; + cli = p->owner; for (c = p->curr; c; c = next) { next = c->next; - tfw_pool_free_pages(TFW_POOL_CHUNK_BASE(c), c->order); + tfw_pool_free_pages(p->owner, TFW_POOL_CHUNK_BASE(c), + c->order); } + if (cli) + tfw_client_put(cli); } int diff --git a/fw/pool.h b/fw/pool.h index 2fb623fc2..c2ebe000e 100644 --- a/fw/pool.h +++ b/fw/pool.h @@ -53,18 +53,20 @@ typedef struct tfw_pool_chunk_t { * Memory pool descriptor. * * @curr - current chunk to allocate memory from; + * @owner - owner for memory accounting; * @order,@off - cached members of @curr; */ typedef struct { TfwPoolChunk *curr; + void *owner; unsigned int order; unsigned int off; } TfwPool; -#define tfw_pool_new(struct_name, mask) \ +#define tfw_pool_new(struct_name, owner, mask) \ ({ \ struct_name *s = NULL; \ - TfwPool *p = __tfw_pool_new(sizeof(struct_name)); \ + TfwPool *p = __tfw_pool_new(sizeof(struct_name), owner); \ if (likely(p)) { \ s = tfw_pool_alloc(p, sizeof(struct_name)); \ BUG_ON(!s); \ @@ -79,7 +81,7 @@ typedef struct { int tfw_pool_init(void); void tfw_pool_exit(void); -TfwPool *__tfw_pool_new(size_t n); +TfwPool *__tfw_pool_new(size_t n, void *owner); void *__tfw_pool_alloc_page(TfwPool *p, size_t n, bool align); void tfw_pool_free(TfwPool *p, void *ptr, size_t n); void tfw_pool_clean(TfwPool *p); diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index ade7089e5..1ae4abeb6 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -59,7 +59,7 @@ test_req_alloc(size_t data_len) * tfw_http_msg_alloc(). It is removed because we need to test how it * initializes the message and we would not like to test the copy-paste. */ - hmreq = __tfw_http_msg_alloc(Conn_HttpClnt, true); + hmreq = __tfw_http_msg_alloc(&client, Conn_HttpClnt, true); BUG_ON(!hmreq); tfw_connection_init(&conn_req); @@ -106,7 +106,7 @@ test_resp_alloc_no_data(TfwHttpReq *req) { TfwHttpMsg *hmresp; - hmresp = __tfw_http_msg_alloc(Conn_HttpSrv, true); + hmresp = __tfw_http_msg_alloc(req->conn->peer, Conn_HttpSrv, true); BUG_ON(!hmresp); tfw_connection_init(&conn_resp); diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index 07f4cc979..2d5b0d3a2 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -123,17 +123,15 @@ test_run_all(void) EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); - test_case_alloc_h2(); + TEST_SETUP(test_http2_parser_setup_fn); + TEST_TEARDOWN(test_http2_parser_teardown_fn); TEST_SUITE_MPART_RUN(http2_parser); - test_req_resp_cleanup(); EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); - test_case_cleanup_h2(); TEST_SUITE_RUN(http2_parser_hpack); - test_req_resp_cleanup(); EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); __fpu_schedule(); diff --git a/fw/t/unit/test_hpack.c b/fw/t/unit/test_hpack.c index 63ccf09fb..339943ea3 100644 --- a/fw/t/unit/test_hpack.c +++ b/fw/t/unit/test_hpack.c @@ -84,9 +84,10 @@ static inline TfwHttpReq * test_hpack_req_alloc(void) { TfwHttpReq *req = test_req_alloc(0); + TfwHttpMsg *hmreq = (TfwHttpMsg *)req; BUG_ON(!req); - req->pit.pool = __tfw_pool_new(0); + req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client(hmreq)); BUG_ON(!req->pit.pool); req->pit.parsed_hdr = &req->stream->parser.hdr; __set_bit(TFW_HTTP_B_H2, req->flags); diff --git a/fw/t/unit/test_http2_parser.c b/fw/t/unit/test_http2_parser.c index b3e1df588..45b91e4a2 100644 --- a/fw/t/unit/test_http2_parser.c +++ b/fw/t/unit/test_http2_parser.c @@ -2756,6 +2756,8 @@ TEST(http2_parser, perf) #define REQ_PERF(frames_buf) \ do { \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ if (req) \ test_req_free(req); \ @@ -2819,7 +2821,9 @@ TEST(http2_parser, fuzzer) tfw_init_frames(); ADD_HEADERS_FRAME(str, headers_len); ADD_DATA_FRAME(str + headers_len, body_len); - test_case_parse_prepare_h2(); + test_req_resp_cleanup(); + tfw_h2_context_clear(conn.h2); + test_case_parse_prepare_h2(); switch (ret) { case FUZZ_VALID: TRY_PARSE_EXPECT_PASS(FUZZ_REQ_H2, CHUNK_ON); diff --git a/fw/t/unit/test_http2_parser_hpack.c b/fw/t/unit/test_http2_parser_hpack.c index 02fa9e928..808acab5b 100644 --- a/fw/t/unit/test_http2_parser_hpack.c +++ b/fw/t/unit/test_http2_parser_hpack.c @@ -1698,9 +1698,8 @@ TEST(http2_parser_hpack, erased_indexes_not_come_back) TEST_SUITE(http2_parser_hpack) { - test_case_alloc_h2(); - - TEST_SETUP(test_case_parse_prepare_h2); + TEST_SETUP(test_http2_parser_setup_fn); + TEST_TEARDOWN(test_http2_parser_teardown_fn); TEST_RUN(http2_parser_hpack, literal_header_field_with_incremental_indexing); TEST_RUN(http2_parser_hpack, literal_header_field_without_indexing); @@ -1714,6 +1713,4 @@ TEST_SUITE(http2_parser_hpack) TEST_RUN(http2_parser_hpack, dup_with_equal_values_in_indexes); TEST_RUN(http2_parser_hpack, dup_with_diff_values_in_indexes); TEST_RUN(http2_parser_hpack, erased_indexes_not_come_back); - - test_case_cleanup_h2(); } diff --git a/fw/t/unit/test_http_match.c b/fw/t/unit/test_http_match.c index c84a6d172..92d42acca 100644 --- a/fw/t/unit/test_http_match.c +++ b/fw/t/unit/test_http_match.c @@ -98,7 +98,9 @@ http_match_suite_setup(void) { test_req = test_req_alloc(1); - test_table = tfw_pool_new(TfwHttpTable, TFW_POOL_ZERO); + test_table = tfw_pool_new(TfwHttpTable, + tfw_http_msg_client((TfwHttpMsg *)test_req), + TFW_POOL_ZERO); BUG_ON(!test_table); INIT_LIST_HEAD(&test_table->head); diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index c55991845..d72843279 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -424,6 +424,8 @@ test_case_alloc_h2(void) { conn.h2 = tfw_h2_context_alloc(); BUG_ON(!conn.h2); + ((TfwConn *)&conn)->peer = (TfwPeer *)&client; + ((TfwConn *)&conn)->proto.type = Conn_H2Clnt; } void @@ -431,6 +433,7 @@ test_case_cleanup_h2(void) { BUG_ON(!conn.h2); + tfw_h2_context_clear(conn.h2); tfw_h2_context_free(conn.h2); conn.h2 = NULL; } @@ -494,6 +497,7 @@ do_split_and_parse(int type, int chunk_mode) req = test_req_alloc(frames_total_sz); } else if (type == FUZZ_REQ_H2) { + TfwHttpMsg *hmreq; /* * During the processing of a request, the HPACK dynamic table * is modified. The same query is used for each chunk size. @@ -520,7 +524,8 @@ do_split_and_parse(int type, int chunk_mode) req->stream = &stream; tfw_http_init_parser_req(req); stream.msg = (TfwMsg*)req; - req->pit.pool = __tfw_pool_new(0); + hmreq = (TfwHttpMsg *)req; + req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client(hmreq)); BUG_ON(!req->pit.pool); __set_bit(TFW_HTTP_B_H2, req->flags); } else if (type == FUZZ_RESP) { diff --git a/fw/t/unit/test_http_parser_common.h b/fw/t/unit/test_http_parser_common.h index 455f8932f..0d284e84b 100644 --- a/fw/t/unit/test_http_parser_common.h +++ b/fw/t/unit/test_http_parser_common.h @@ -552,6 +552,7 @@ extern TfwHttpReq *req, *sample_req; extern TfwHttpResp *resp; extern TfwH2Conn conn; extern TfwStream stream; +extern TfwClient client; int set_sample_req(unsigned char *str); @@ -563,6 +564,20 @@ int do_split_and_parse(int type, int chunk_mode); int validate_data_fully_parsed(int type, size_t sz_diff); +static inline void +test_http2_parser_setup_fn(void) +{ + test_case_alloc_h2(); + test_case_parse_prepare_h2(); +} + +static inline void +test_http2_parser_teardown_fn(void) +{ + test_req_resp_cleanup(); + test_case_cleanup_h2(); +} + #define __TRY_PARSE_EXPECT_PASS(type, sz_diff, chunk_mode) \ chunk_size_index = 0; \ while (({ \ @@ -619,6 +634,8 @@ do { \ #define FOR_REQ_H2(frames_definition) \ ASSIGN_FRAMES_FOR_H2(frames_definition); \ PRINT_REQ_H2(); \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ TRY_PARSE_EXPECT_PASS(FUZZ_REQ_H2, CHUNK_ON) @@ -635,6 +652,8 @@ do { \ #define EXPECT_BLOCK_REQ_H2(frames_definition) \ ASSIGN_FRAMES_FOR_H2(frames_definition); \ PRINT_REQ_H2(); \ + test_req_resp_cleanup(); \ + tfw_h2_context_clear(conn.h2); \ test_case_parse_prepare_h2(); \ TRY_PARSE_EXPECT_BLOCK(FUZZ_REQ_H2, CHUNK_ON) diff --git a/fw/t/unit/test_pool.c b/fw/t/unit/test_pool.c index 0908ac5cd..f778c9a1d 100644 --- a/fw/t/unit/test_pool.c +++ b/fw/t/unit/test_pool.c @@ -28,8 +28,8 @@ TEST(pool, alignment) void *a, *b, *c, *d; bool np; - /* this should give us a single page minus the 32 byte pool headers */ - p = __tfw_pool_new(1001); + /* this should give us a single page minus the 40 byte pool headers */ + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -42,7 +42,7 @@ TEST(pool, alignment) EXPECT_TRUE(c == b + 1); /* 'c' must be tightly packed */ /* 'd' should still fit into the same page */ - d = tfw_pool_alloc_not_align_np(p, PAGE_SIZE - (32 + 10), &np); + d = tfw_pool_alloc_not_align_np(p, PAGE_SIZE - (40 + 10), &np); EXPECT_TRUE(d == c + 1); EXPECT_FALSE(np); @@ -57,7 +57,7 @@ TEST(pool, realloc) TfwPool *p; void *a, *b, *c, *d; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -73,11 +73,11 @@ TEST(pool, realloc) EXPECT_TRUE(d == c); /* allocate enough memory to use the entire chunk */ - d = tfw_pool_realloc(p, c, PAGE_SIZE - 300, PAGE_SIZE - 40); + d = tfw_pool_realloc(p, c, PAGE_SIZE - 300, PAGE_SIZE - 48); EXPECT_TRUE(d == c); /* the pool chunk must be exhausted now */ - d = tfw_pool_realloc(p, c, PAGE_SIZE - 40, PAGE_SIZE - 39); + d = tfw_pool_realloc(p, c, PAGE_SIZE - 48, PAGE_SIZE - 47); EXPECT_TRUE(d != c); } @@ -87,7 +87,7 @@ TEST(pool, clean_single) void *root, *curr, *first_ptr, *last_ptr; struct tfw_pool_chunk_t *head, *tail; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); @@ -146,7 +146,7 @@ TEST(pool, clean) TfwPool *p; struct tfw_pool_chunk_t *head, *tail; - p = __tfw_pool_new(1001); + p = __tfw_pool_new(1001, NULL); EXPECT_NOT_NULL(p); EXPECT_TRUE(TFW_POOL_CHUNK_SZ(p) == PAGE_SIZE); diff --git a/fw/t/unit/tfw_str_helper.c b/fw/t/unit/tfw_str_helper.c index 67ba025f9..8e8bcdd3e 100644 --- a/fw/t/unit/tfw_str_helper.c +++ b/fw/t/unit/tfw_str_helper.c @@ -31,7 +31,7 @@ void create_str_pool(void) { BUG_ON(str_pool); - str_pool = __tfw_pool_new(1); + str_pool = __tfw_pool_new(1, NULL); BUG_ON(!str_pool); } diff --git a/fw/vhost.c b/fw/vhost.c index bdce49276..dd6511b4c 100644 --- a/fw/vhost.c +++ b/fw/vhost.c @@ -1965,7 +1965,7 @@ tfw_vhost_create(const char *name) + sizeof(TfwStickyCookie) + sizeof(FrangGlobCfg) + tfw_tls_vhost_priv_data_sz(); - if (!(pool = __tfw_pool_new(0))) + if (!(pool = __tfw_pool_new(0, NULL))) return NULL; if (!(vhost = tfw_kzalloc(size, GFP_KERNEL))) { From 96fed018f9c998ddd112527459f7df8f2485e0c8 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 4 Feb 2026 09:11:54 +0000 Subject: [PATCH 14/23] Use per cpu counter instead of atomic A big performance degradation was found after this patch. During investigation it was found that the problem is in usage atomic counter for client mem accounting. Usage per_cpu array instead of atomic counter fix a performance issue. --- db/core/main.c | 12 +++++++++--- db/core/tdb.h | 2 +- fw/client.c | 15 +++++++++++++-- fw/client.h | 17 +++++++++++++++-- fw/http_limits.c | 2 +- fw/http_sess.c | 4 +++- fw/t/unit/test.c | 8 ++++---- fw/tf_filter.c | 4 +++- 8 files changed, 49 insertions(+), 15 deletions(-) diff --git a/db/core/main.c b/db/core/main.c index b1e5a96c4..09dac4e30 100644 --- a/db/core/main.c +++ b/db/core/main.c @@ -355,11 +355,17 @@ tdb_rec_get_alloc(TDB *db, unsigned long key, TdbGetAllocCtx *ctx) } ctx->is_new = true; r = tdb_entry_alloc(db, key, &ctx->len); - if (!r) { + if (unlikely(!r)) { spin_unlock(&db->ga_lock); - return r; + return NULL; + } + if (unlikely(ctx->init_rec(r, ctx->ctx))) { + tdb_rec_put(db, r); + tdb_entry_remove(db, key, NULL, NULL, true); + spin_unlock(&db->ga_lock); + return NULL; } - ctx->init_rec(r, ctx->ctx); + tdb_entry_mark_complete(r); spin_unlock(&db->ga_lock); diff --git a/db/core/tdb.h b/db/core/tdb.h index 4473ecc0b..74b72e410 100644 --- a/db/core/tdb.h +++ b/db/core/tdb.h @@ -160,7 +160,7 @@ typedef struct { typedef struct { bool (*eq_rec)(TdbRec *rec, void *ctx); int (*precreate_rec)(void *ctx); - void (*init_rec)(TdbRec *rec, void *ctx); + int (*init_rec)(TdbRec *rec, void *ctx); void *ctx; size_t len; bool is_new; diff --git a/fw/client.c b/fw/client.c index 3ee40cc6d..6a7dc2fb9 100644 --- a/fw/client.c +++ b/fw/client.c @@ -30,6 +30,7 @@ #include "log.h" #include "procfs.h" #include "tdb.h" +#include "lib/fault_injection_alloc.h" #include "lib/str.h" #include "lib/common.h" @@ -191,12 +192,17 @@ tfw_client_addr_eq(TdbRec *rec, void *data) return true; } -static void +static int tfw_client_ent_init(TdbRec *rec, void *data) { TfwClientEntry *ent = (TfwClientEntry *)rec->data; TfwClient *cli = &ent->cli; TfwClientEqCtx *ctx = (TfwClientEqCtx *)data; + int cpu; + + cli->mem = tfw_alloc_percpu(long); + if (unlikely(!cli->mem)) + return -ENOMEM; assert_spin_locked(&client_db->ga_lock); @@ -207,8 +213,9 @@ tfw_client_ent_init(TdbRec *rec, void *data) if (ctx->init) ctx->init(cli); + for_each_online_cpu(cpu) + *(per_cpu_ptr(cli->mem, cpu)) = 0; tfw_peer_init((TfwPeer *)cli, &ctx->addr); - atomic_set(&cli->mem, 0); ent->xff_addr = ctx->xff_addr; tfw_str_to_cstr(&ctx->user_agent, ent->user_agent, sizeof(ent->user_agent)); @@ -217,6 +224,8 @@ tfw_client_ent_init(TdbRec *rec, void *data) T_DBG("new client: cli=%p\n", cli); T_DBG_ADDR("client address", &cli->addr, TFW_NO_PORT); T_DBG2("client %p, users=%d\n", cli, 1); + + return 0; } /** @@ -331,6 +340,8 @@ tfw_client_stop(void) { if (tfw_runstate_is_reconfig()) return; + + tfw_client_free_lru(); if (client_db) { tfw_client_free_lru(); tdb_close(client_db); diff --git a/fw/client.h b/fw/client.h index 09ede31a4..1bf22b25a 100644 --- a/fw/client.h +++ b/fw/client.h @@ -37,7 +37,7 @@ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; - atomic_t mem; + long __percpu *mem; } TfwClient; int tfw_client_init(void); @@ -57,7 +57,20 @@ void tfw_tls_connection_lost(TfwConn *conn); static inline void tfw_client_adjust_mem(TfwClient *cli, int delta) { - atomic_add(delta, &cli->mem); + this_cpu_add(*cli->mem, delta); + +} + +static inline long +tfw_client_mem(TfwClient *cli) +{ + long mem = 0; + int cpu; + + for_each_online_cpu(cpu) + mem += *(per_cpu_ptr(cli->mem, cpu)); + + return mem; } #endif /* __TFW_CLIENT_H__ */ diff --git a/fw/http_limits.c b/fw/http_limits.c index aa3465200..76fba65ac 100644 --- a/fw/http_limits.c +++ b/fw/http_limits.c @@ -1677,7 +1677,7 @@ frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) TfwClient *cli = (TfwClient *)conn->peer; if (tfw_cli_hard_mem_limit - && atomic_read(&cli->mem) > tfw_cli_hard_mem_limit) + && tfw_client_mem(cli) > tfw_cli_hard_mem_limit) { if (block_if_exceeded) { TfwVhost *dflt_vh = tfw_vhost_lookup_default(); diff --git a/fw/http_sess.c b/fw/http_sess.c index de43d4b36..927bb2a6a 100644 --- a/fw/http_sess.c +++ b/fw/http_sess.c @@ -771,7 +771,7 @@ tfw_http_sess_precreate(void *data) return 0; } -static void +static int tfw_sess_ent_init(TdbRec *rec, void *data) { TfwSessEntry *ent = (TfwSessEntry *)rec->data; @@ -802,6 +802,8 @@ tfw_sess_ent_init(TdbRec *rec, void *data) rwlock_init(&sess->lock); T_DBG("http_sess was newly created, %pK\n", sess); + + return 0; } /** diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index 2d5b0d3a2..82c11a254 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -120,19 +120,19 @@ test_run_all(void) TEST_SUITE_MPART_RUN(http1_parser); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SETUP(test_http2_parser_setup_fn); TEST_TEARDOWN(test_http2_parser_teardown_fn); TEST_SUITE_MPART_RUN(http2_parser); - EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SUITE_RUN(http2_parser_hpack); - EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SUITE_RUN(http_cache); @@ -143,7 +143,7 @@ test_run_all(void) TEST_SUITE_RUN(http_msg); test_req_resp_cleanup(); - EXPECT_EQ(atomic_read(&((TfwClient *)conn_req.peer)->mem), 0); + EXPECT_EQ(tfw_client_mem((TfwClient *)conn_req.peer), 0); __fpu_schedule(); TEST_SUITE_RUN(hash); diff --git a/fw/tf_filter.c b/fw/tf_filter.c index 26c381456..02f9635bd 100644 --- a/fw/tf_filter.c +++ b/fw/tf_filter.c @@ -89,7 +89,7 @@ put_fingerprint_rates(Storage *storage, Rates *rates) tdb_rec_put(storage->tdb, (char *)rates - sizeof(TdbRec)); } -static void +static int get_alloc_ctx_init_rec(TdbRec *rec, void *) { Rates *rates = (Rates *)rec->data; @@ -99,6 +99,8 @@ get_alloc_ctx_init_rec(TdbRec *rec, void *) spin_lock_init(&rates->conns_lock); spin_lock_init(&rates->recs_lock); tdb_rec_keep(rec); + + return 0; } /** From 44cd4df8a45f4d9322945a2c4474e641af65d901 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 4 Mar 2026 18:24:25 +0200 Subject: [PATCH 15/23] Fix removing still used clients. Previously we remove client entry from TDB if there is no entry in `client_lru.free_list` and new client is allocated, even if such removed client still have any active connections. There is a BUG in such strategy - if this removed client has hung connections, we can't close and destroy them during Tempesta FW unloading, because we close and destroy connections during iteration through active clients (`tfw_client_for_each`). In new strategy we change logic in `tdb_htrie_put_rec`. We add pointer to the bucket in the record structure. When we remove record we zeroed this pointer. If record reference counter became equal to zero, but bucket pointer is still not NULL (record was not removed) we remove such record from the bucket using this pointer. For clients we just use tfw_client_put, without record removing, when client reference counter became equal to zero client record will be removed from bucket and freed. --- fw/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fw/client.c b/fw/client.c index 6a7dc2fb9..ffafbbb4b 100644 --- a/fw/client.c +++ b/fw/client.c @@ -104,6 +104,7 @@ tfw_client_free(TdbRec *rec) * Tempesta FW shut down from `tfw_client_free_lru` */ WARN_ON(!list_empty(&cli->list)); + free_percpu(cli->mem); } static void @@ -341,7 +342,6 @@ tfw_client_stop(void) if (tfw_runstate_is_reconfig()) return; - tfw_client_free_lru(); if (client_db) { tfw_client_free_lru(); tdb_close(client_db); From 33f4eb9e9710e156b3ca25d15f41b97d02ba2cb1 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 18 Mar 2026 10:53:06 +0200 Subject: [PATCH 16/23] Improve performance and make fixes We can't call tfw_client_get/put on each allocated or orphaned skb. (Or each pool creation/destroing). Under pressure when we have a lot of cpus that lead to atomic contention and bad performance degradation. To fix this problem we implement special TfwClientMem structure, with it's own reference accounting (using struct percpu_ref!) and save in the client structure point to it. We use percpu_ref_tryget/percpu_ref_put during skb allocation/deallocation (it's very cheap). When we destroy client we schedule work, call `percpu_ref_kill_and_confirm` and wait until all skbs will be orphaned. Also make some fixes according review: - Call `tfw_client_free` for incomplete records also. - Implement `tfw_alloc_percpu_gfp` same as `alloc_percpu_gfp` but with error injection - Fix memory accouting during copying skbs. --- fw/cache.c | 2 + fw/client.c | 75 ++++++++++++++++++++++++----- fw/client.h | 33 ++++++++++--- fw/http.c | 24 +++++---- fw/http.h | 6 +-- fw/http2.c | 2 +- fw/http_frame.c | 3 +- fw/http_msg.c | 6 +-- fw/http_msg.h | 3 +- fw/http_stream.c | 2 +- fw/pool.c | 31 ++++++------ fw/sock.c | 9 +++- fw/ss_skb.c | 43 ++++++----------- fw/t/unit/helpers.c | 21 ++++---- fw/t/unit/test_hpack.c | 10 +++- fw/t/unit/test_http_match.c | 5 +- fw/t/unit/test_http_msg.c | 3 +- fw/t/unit/test_http_parser_common.c | 3 +- fw/tls.c | 9 ++-- lib/fault_injection_alloc.c | 8 +++ lib/fault_injection_alloc.h | 5 ++ 21 files changed, 203 insertions(+), 100 deletions(-) diff --git a/fw/cache.c b/fw/cache.c index 827613700..4d10a3484 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -2820,6 +2820,8 @@ tfw_cache_add_body_page(TfwMsgIter *it, char *p, int sz, bool h2) if (!h2) skb_frag_ref(it->skb, it->frag); ss_skb_adjust_data_len(it->skb, sz); + if (!h2) + ss_skb_adjust_client_mem(it->skb, -sz); return 0; } diff --git a/fw/client.c b/fw/client.c index ffafbbb4b..2ffbd8cbd 100644 --- a/fw/client.c +++ b/fw/client.c @@ -65,6 +65,9 @@ static struct { static TDB *client_db; +static atomic_t shutdown_pending = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(shutdown_wq); + /* * Called only under db->ga_lock. * @@ -104,7 +107,11 @@ tfw_client_free(TdbRec *rec) * Tempesta FW shut down from `tfw_client_free_lru` */ WARN_ON(!list_empty(&cli->list)); - free_percpu(cli->mem); + if (likely(cli->cli_mem)) { + atomic_inc(&shutdown_pending); + if (!schedule_work(&cli->cli_mem->kill_work)) + atomic_dec(&shutdown_pending); + } } static void @@ -145,12 +152,6 @@ tfw_client_put(TfwClient *cli) tdb_rec_put(client_db, rec); } -void -tfw_client_get(TfwClient *cli) -{ - tdb_rec_keep(((TdbFRec *)cli) - 1); -} - typedef struct { TfwAddr addr; TfwAddr xff_addr; @@ -193,16 +194,67 @@ tfw_client_addr_eq(TdbRec *rec, void *data) return true; } +static void +cli_mem_release(struct percpu_ref *ref) +{ + TfwClientMem *cli_mem = container_of(ref, TfwClientMem, refcnt); + + percpu_ref_exit(&cli_mem->refcnt); + free_percpu(cli_mem->mem); + kfree(cli_mem); + + if (atomic_dec_and_test(&shutdown_pending)) + wake_up(&shutdown_wq); +} + +static void +tfw_cli_mem_kill_work_fn(struct work_struct *work) +{ + TfwClientMem *cli_mem = container_of(work, TfwClientMem, kill_work); + + percpu_ref_kill(&cli_mem->refcnt); + percpu_ref_put(&cli_mem->refcnt); +} + +static inline TfwClientMem * +tfw_client_mem_alloc(void) +{ + TfwClientMem *cli_mem; + + cli_mem = tfw_kmalloc(sizeof(TfwClientMem), GFP_ATOMIC); + if (unlikely(!cli_mem)) + return NULL; + + cli_mem->mem = tfw_alloc_percpu_gfp(long, GFP_ATOMIC | __GFP_ZERO); + if (!cli_mem->mem) + goto free_cli_mem; + + if (percpu_ref_init(&cli_mem->refcnt, cli_mem_release, 0, GFP_ATOMIC)) + goto free_per_cpu_mem; + + percpu_ref_get(&cli_mem->refcnt); + + INIT_WORK(&cli_mem->kill_work, tfw_cli_mem_kill_work_fn); + + return cli_mem; + +free_per_cpu_mem: + free_percpu(cli_mem->mem); +free_cli_mem: + kfree(cli_mem); + + return NULL; +} + static int tfw_client_ent_init(TdbRec *rec, void *data) { TfwClientEntry *ent = (TfwClientEntry *)rec->data; TfwClient *cli = &ent->cli; TfwClientEqCtx *ctx = (TfwClientEqCtx *)data; - int cpu; - cli->mem = tfw_alloc_percpu(long); - if (unlikely(!cli->mem)) + cli->cli_mem = tfw_client_mem_alloc(); + if (unlikely(!cli->cli_mem)) return -ENOMEM; assert_spin_locked(&client_db->ga_lock); @@ -214,8 +266,6 @@ tfw_client_ent_init(TdbRec *rec, void *data) if (ctx->init) ctx->init(cli); - for_each_online_cpu(cpu) - *(per_cpu_ptr(cli->mem, cpu)) = 0; tfw_peer_init((TfwPeer *)cli, &ctx->addr); ent->xff_addr = ctx->xff_addr; tfw_str_to_cstr(&ctx->user_agent, ent->user_agent, @@ -344,6 +394,7 @@ tfw_client_stop(void) if (client_db) { tfw_client_free_lru(); + wait_event(shutdown_wq, !atomic_read(&shutdown_pending)); tdb_close(client_db); client_db = NULL; } diff --git a/fw/client.h b/fw/client.h index 1bf22b25a..c0ac8a62c 100644 --- a/fw/client.h +++ b/fw/client.h @@ -24,27 +24,32 @@ #include "http_limits.h" #include "connection.h" +typedef struct { + struct percpu_ref refcnt; + struct work_struct kill_work; + long __percpu *mem; +} TfwClientMem; + /** * Client descriptor. * * @class_prvt - private client accounting data for classifier module. * Typically it's large and wastes memory in vain if * no any classification logic is used; - * list_head - entry in the lru list; - * @mem - memory used by current client; + * @list_head - entry in the lru list; + * @cli_mem - memory used by current client; */ typedef struct { TFW_PEER_COMMON; TfwClassifierPrvt class_prvt; struct list_head list; - long __percpu *mem; + TfwClientMem *cli_mem; } TfwClient; int tfw_client_init(void); void tfw_client_exit(void); TfwClient *tfw_client_obtain(TfwAddr addr, TfwAddr *cli_addr, TfwStr *user_agent, void (*init)(void *)); -void tfw_client_get(TfwClient *cli); void tfw_client_put(TfwClient *cli); int tfw_client_for_each(int (*fn)(void *)); void tfw_cli_conn_release(TfwCliConn *cli_conn); @@ -54,11 +59,25 @@ void tfw_cli_abort_all(void); void tfw_tls_connection_lost(TfwConn *conn); +#define CLIENT_MEM_FROM_CONN(conn) \ + ((TfwClient *)((TfwConn *)conn)->peer)->cli_mem + static inline void -tfw_client_adjust_mem(TfwClient *cli, int delta) +tfw_client_adjust_mem(TfwClientMem *cli_mem, int delta) { - this_cpu_add(*cli->mem, delta); + this_cpu_add(*cli_mem->mem, delta); +} +static inline bool +tfw_client_mem_get(TfwClientMem *cli_mem) +{ + return percpu_ref_tryget(&cli_mem->refcnt); +} + +static inline void +tfw_client_mem_put(TfwClientMem *cli_mem) +{ + percpu_ref_put(&cli_mem->refcnt); } static inline long @@ -68,7 +87,7 @@ tfw_client_mem(TfwClient *cli) int cpu; for_each_online_cpu(cpu) - mem += *(per_cpu_ptr(cli->mem, cpu)); + mem += *(per_cpu_ptr(cli->cli_mem->mem, cpu)); return mem; } diff --git a/fw/http.c b/fw/http.c index 852d5c6d0..fb8bf5043 100644 --- a/fw/http.c +++ b/fw/http.c @@ -728,7 +728,7 @@ tfw_h1_write_resp(TfwHttpResp *resp, unsigned short status, TfwStr *msg) int r = 0; TfwStr *c, *end, *field_c, *field_end; - r = tfw_msg_iter_setup(&it, tfw_http_msg_client((TfwHttpMsg *)resp), + r = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem((TfwHttpMsg *)resp), &resp->msg.skb_head, msg->len); if (unlikely(r)) return r; @@ -1164,7 +1164,7 @@ tfw_h2_resp_fwd(TfwHttpResp *resp) if (resp_in_xmit) { void *owner = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; - BUG_ON(owner != resp->req->conn->peer); + BUG_ON(owner != CLIENT_MEM_FROM_CONN(resp->req->conn)); TFW_SKB_CB(resp->msg.skb_head)->opaque_data = resp; TFW_SKB_CB(resp->msg.skb_head)->destructor = tfw_h2_stream_skb_destructor; @@ -2864,7 +2864,7 @@ static TfwMsg * tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) { int type = TFW_CONN_TYPE(conn); - void *owner = type & Conn_Clnt ? conn->peer : NULL; + void *owner = type & Conn_Clnt ? CLIENT_MEM_FROM_CONN(conn) : NULL; TfwHttpMsg *hm; hm = __tfw_http_msg_alloc(owner, type, true); @@ -2901,10 +2901,11 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) /* Can be equal to zero for health monitor requests. */ if (likely(hm->req->conn)) { TfwClient *cli = (TfwClient *)hm->req->conn->peer; + TfwClientMem *cli_mem = cli->cli_mem; - hm->pool->owner = cli; - tfw_client_get(cli); - tfw_client_adjust_mem(cli, + hm->pool->owner = cli_mem; + BUG_ON(!tfw_client_mem_get(cli_mem)); + tfw_client_adjust_mem(cli_mem, PAGE_SIZE << hm->pool->order); } @@ -4375,7 +4376,7 @@ tfw_h2_adjust_req(TfwHttpReq *req) if (WARN_ON_ONCE(h1_hdrs_sz < 0)) return -EINVAL; - r = tfw_msg_iter_setup(&it, tfw_http_msg_client((TfwHttpMsg *)req), + r = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem((TfwHttpMsg *)req), &new_head, h1_hdrs_sz); if (unlikely(r)) return r; @@ -5462,7 +5463,8 @@ tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head) return -EPIPE; BUG_ON(stream->xmit.skb_head || stream->xmit.resp); - TFW_SKB_CB(*skb_head)->opaque_data = resp->req->conn->peer; + TFW_SKB_CB(*skb_head)->opaque_data = + CLIENT_MEM_FROM_CONN(resp->req->conn); TFW_SKB_CB(*skb_head)->destructor = ss_skb_dflt_destructor; stream->xmit.resp = resp; @@ -6593,7 +6595,8 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, */ if (!TFW_SKB_CB(skb)->opaque_data) { ss_skb_set_owner(skb, ss_skb_dflt_destructor, - conn->peer, skb->truesize); + CLIENT_MEM_FROM_CONN(conn), + skb->truesize); } r = frang_client_mem_limit((TfwCliConn *)conn, false); @@ -7448,7 +7451,8 @@ tfw_http_resp_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, conn_stop = test_bit(TFW_HTTP_B_REQ_DROP, hmresp->req->flags); ss_skb_set_owner(skb, ss_skb_dflt_destructor, - cli_conn->peer, skb->truesize); + CLIENT_MEM_FROM_CONN(cli_conn), + skb->truesize); r = frang_client_mem_limit(cli_conn, false); if (unlikely(r)) { diff --git a/fw/http.h b/fw/http.h index 148a0bcab..d39656be9 100644 --- a/fw/http.h +++ b/fw/http.h @@ -598,13 +598,13 @@ tfw_http_msg_is_req(TfwHttpMsg *msg) return msg->conn && TFW_CONN_TYPE(msg->conn) & Conn_Clnt; } -static inline TfwClient * -tfw_http_msg_client(TfwHttpMsg *msg) +static inline TfwClientMem * +tfw_http_msg_client_mem(TfwHttpMsg *msg) { TfwCliConn *conn = (TfwCliConn *)(tfw_http_msg_is_req(msg) ? msg->conn : msg->pair->conn); - return (TfwClient *)conn->peer; + return ((TfwClient *)conn->peer)->cli_mem; } static inline int diff --git a/fw/http2.c b/fw/http2.c index 74d54bc14..380be4f18 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -320,7 +320,7 @@ tfw_h2_context_init(TfwH2Ctx *ctx, TfwH2Conn *conn) rset->wnd_sz = DEF_WND_SIZE; ctx->conn = conn; - return tfw_hpack_init(&ctx->hpack, ((TfwConn *)conn)->peer, + return tfw_hpack_init(&ctx->hpack, CLIENT_MEM_FROM_CONN(conn), HPACK_TABLE_DEF_SIZE); } diff --git a/fw/http_frame.c b/fw/http_frame.c index c728862ce..ae1e3ef12 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -326,7 +326,8 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, T_DBG2("Preparing HTTP/2 message with %lu bytes data\n", data->len); msg.len = data->len; - if ((r = tfw_msg_iter_setup(&it, conn->peer, &msg.skb_head, msg.len))) + if ((r = tfw_msg_iter_setup(&it, CLIENT_MEM_FROM_CONN(conn), + &msg.skb_head, msg.len))) goto err; if ((r = tfw_msg_iter_write(&it, data))) diff --git a/fw/http_msg.c b/fw/http_msg.c index cd030da4d..1fe58f53b 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -817,7 +817,7 @@ tfw_http_msg_append_skb(TfwHttpMsg *hm) TfwMsgIter *it = &hm->iter; int r; - r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_client(hm), 0); + r = ss_skb_alloc_data(&it->skb_head, tfw_http_msg_client_mem(hm), 0); if (unlikely(r)) return r; @@ -939,7 +939,7 @@ tfw_http_msg_expand_data(TfwHttpMsg *hm, struct sk_buff **skb_head, return -ENOMEM; ss_skb_set_owner(it->skb, ss_skb_dflt_destructor, - tfw_http_msg_client(hm), + tfw_http_msg_client_mem(hm), it->skb->truesize); ss_skb_queue_tail(skb_head, it->skb); it->frag = -1; @@ -1211,7 +1211,7 @@ __tfw_http_msg_expand_from_pool(TfwHttpMsg *hm, const TfwStr *str, return -ENOMEM; ss_skb_set_owner(nskb, ss_skb_dflt_destructor, - tfw_http_msg_client(hm), + tfw_http_msg_client_mem(hm), nskb->truesize); /* * TODO #2136: Remove this flag during reworking diff --git a/fw/http_msg.h b/fw/http_msg.h index ac9c0de46..62c81fc72 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -88,7 +88,8 @@ static inline TfwHttpResp * __tfw_http_msg_alloc_resp(TfwHttpReq *req, bool full) { TfwHttpResp *resp = (TfwHttpResp *) - __tfw_http_msg_alloc(req->conn->peer, Conn_Srv, full); + __tfw_http_msg_alloc(CLIENT_MEM_FROM_CONN(req->conn), + Conn_Srv, full); if (resp) tfw_http_msg_pair(resp, req); diff --git a/fw/http_stream.c b/fw/http_stream.c index 71cf4ea52..88ed2daae 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -817,7 +817,7 @@ tfw_h2_stream_skb_destructor(struct sk_buff *skb) { TfwHttpResp *resp = (TfwHttpResp *)TFW_SKB_CB(skb)->opaque_data; - TFW_SKB_CB(skb)->opaque_data = resp->req->conn->peer; + TFW_SKB_CB(skb)->opaque_data = CLIENT_MEM_FROM_CONN(resp->req->conn); ss_skb_dflt_destructor(skb); tfw_http_resp_pair_free_and_put_conn(resp); } diff --git a/fw/pool.c b/fw/pool.c index 54f1963aa..9ea015088 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -71,7 +71,7 @@ static unsigned long __percpu (*pg_cache)[TFW_POOL_PGCACHE_SZ]; * through buddies coalescing). So we never cache multi-pages. */ static unsigned long -tfw_pool_alloc_pages(TfwClient *cli, unsigned int order) +tfw_pool_alloc_pages(TfwClientMem *cli_mem, unsigned int order) { unsigned long pg_res = 0; unsigned int *pgn; @@ -91,8 +91,8 @@ tfw_pool_alloc_pages(TfwClient *cli, unsigned int order) flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; pg_res = __get_free_pages(flags, order); } - if (likely(pg_res) && cli) - tfw_client_adjust_mem(cli, PAGE_SIZE << order); + if (likely(pg_res) && cli_mem) + tfw_client_adjust_mem(cli_mem, PAGE_SIZE << order); return pg_res; @@ -100,7 +100,8 @@ tfw_pool_alloc_pages(TfwClient *cli, unsigned int order) ALLOW_ERROR_INJECTION(tfw_pool_alloc_pages, NULL); static void -tfw_pool_free_pages(TfwClient *cli, unsigned long addr, unsigned int order) +tfw_pool_free_pages(TfwClientMem *cli_mem, unsigned long addr, + unsigned int order) { unsigned int *pgn; int refcnt; @@ -110,8 +111,8 @@ tfw_pool_free_pages(TfwClient *cli, unsigned long addr, unsigned int order) pgn = this_cpu_ptr(&pg_next); refcnt = page_count(virt_to_page(addr)); - if (cli) - tfw_client_adjust_mem(cli, -(PAGE_SIZE << order)); + if (cli_mem) + tfw_client_adjust_mem(cli_mem, -(PAGE_SIZE << order)); if (likely(*pgn < TFW_POOL_PGCACHE_SZ && !order && refcnt == 1)) { ((unsigned long *)this_cpu_ptr(pg_cache))[*pgn] = addr; @@ -258,25 +259,25 @@ tfw_pool_clean(TfwPool *pool) TfwPool * __tfw_pool_new(size_t n, void *owner) { - TfwClient *cli = (TfwClient *)owner; + TfwClientMem *cli_mem = (TfwClientMem *)owner; TfwPool *p; TfwPoolChunk *c; unsigned int order; order = get_order(TFW_POOL_ALIGN_SZ(n) + TFW_POOL_HEAD_OFF); - c = (TfwPoolChunk *)tfw_pool_alloc_pages(cli, order); + c = (TfwPoolChunk *)tfw_pool_alloc_pages(cli_mem, order); if (unlikely(!c)) return NULL; - if (cli) - tfw_client_get(cli); + if (cli_mem) + BUG_ON(!tfw_client_mem_get(cli_mem)); p = (TfwPool *)((char *)c + TFW_POOL_ALIGN_SZ(sizeof(*c))); c->next = NULL; p->order = c->order = order; - p->owner = owner; + p->owner = cli_mem; p->off = c->off = TFW_POOL_HEAD_OFF; p->curr = c; @@ -287,19 +288,19 @@ void tfw_pool_destroy(TfwPool *p) { TfwPoolChunk *c, *next; - TfwClient *cli; + TfwClientMem *cli_mem; if (!p) return; - cli = p->owner; + cli_mem = p->owner; for (c = p->curr; c; c = next) { next = c->next; tfw_pool_free_pages(p->owner, TFW_POOL_CHUNK_BASE(c), c->order); } - if (cli) - tfw_client_put(cli); + if (cli_mem) + tfw_client_mem_put(cli_mem); } int diff --git a/fw/sock.c b/fw/sock.c index 89f387852..a2a56de89 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -658,6 +658,8 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) * and after the transmission. */ if (flags & SS_F_KEEP_SKB) { + unsigned int head_data, copied_truesize; + skb = *skb_head; do { /* tcp_transmit_skb() will clone the skb. */ @@ -669,9 +671,14 @@ ss_send(struct sock *sk, struct sk_buff **skb_head, int flags) goto err; } memset(twin_skb->cb, 0, sizeof(twin_skb->cb)); + head_data = MAX_TCP_HEADER + skb_headlen(twin_skb); + copied_truesize = + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + + SKB_DATA_ALIGN(head_data + + sizeof(struct skb_shared_info)); ss_skb_set_owner(twin_skb, ss_skb_dflt_destructor, TFW_SKB_CB(skb)->opaque_data, - skb_headlen(skb)); + copied_truesize); ss_skb_queue_tail(&sw.skb_head, twin_skb); skb = skb->next; } while (skb != *skb_head); diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 15bd3ef17..1fc3bec59 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1742,11 +1742,12 @@ ALLOW_ERROR_INJECTION(ss_skb_realloc_headroom, ERRNO); void ss_skb_dflt_destructor(struct sk_buff *skb) { - TfwClient *cli = (TfwClient *)TFW_SKB_CB(skb)->opaque_data; + TfwClientMem *cli_mem = + (TfwClientMem *)TFW_SKB_CB(skb)->opaque_data; BUG_ON(skb_tfw_is_in_socket_write_queue(skb)); ss_skb_adjust_client_mem(skb, -TFW_SKB_CB(skb)->mem); - tfw_client_put(cli); + tfw_client_mem_put(cli_mem); } void @@ -1760,42 +1761,30 @@ void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), void *owner, unsigned int mem) { - /* - * Can be zero when this function is called from `__extend_pgfrags` - * for already orphaned SKBs. - */ - if (owner) { - /* - * All SKBs were orphaned when Tempesta FW received them. - * We can safely use `skb->sk` for our purposes until - * this SKBs will be passed to the socket write queue. - */ - BUG_ON(TFW_SKB_CB(skb)->opaque_data); - WARN_ON(TFW_SKB_CB(skb)->mem != 0); + TfwClientMem *cli_mem = (TfwClientMem *)owner; - tfw_client_get((TfwClient *)owner); - TFW_SKB_CB(skb)->opaque_data = owner; - TFW_SKB_CB(skb)->destructor = destructor; - ss_skb_adjust_client_mem(skb, mem); - } + if (!cli_mem || !tfw_client_mem_get(cli_mem)) + return; + + WARN_ON(TFW_SKB_CB(skb)->opaque_data); + WARN_ON(TFW_SKB_CB(skb)->mem != 0); + TFW_SKB_CB(skb)->opaque_data = cli_mem; + TFW_SKB_CB(skb)->destructor = destructor; + ss_skb_adjust_client_mem(skb, mem); } void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) { - TfwClient *cli; + TfwClientMem *cli_mem; if (skb_tfw_is_in_socket_write_queue(skb)) return; - cli = (TfwClient *)TFW_SKB_CB(skb)->opaque_data; - /* - * `cli` can be zero here when this function is called - * from `ss_skb_split` for SKBs which are already orphaned - */ - if (cli) { + cli_mem = (TfwClientMem *)TFW_SKB_CB(skb)->opaque_data; + if (cli_mem) { TFW_SKB_CB(skb)->mem += delta; BUG_ON(TFW_SKB_CB(skb)->mem < 0); - tfw_client_adjust_mem(cli, delta); + tfw_client_adjust_mem(cli_mem, delta); } } diff --git a/fw/t/unit/helpers.c b/fw/t/unit/helpers.c index 1ae4abeb6..5aa125b44 100644 --- a/fw/t/unit/helpers.c +++ b/fw/t/unit/helpers.c @@ -44,9 +44,15 @@ #include "tf_conf.h" #include "tf_filter.h" +static DEFINE_PER_CPU(long, mem); unsigned int tfw_cli_max_concurrent_streams; TfwConn conn_req, conn_resp; -TfwClient client; +TfwClientMem cli_mem = { + .mem = &mem, +}; +TfwClient client = { + .cli_mem = &cli_mem, +}; TfwHttpReq * test_req_alloc(size_t data_len) @@ -59,7 +65,7 @@ test_req_alloc(size_t data_len) * tfw_http_msg_alloc(). It is removed because we need to test how it * initializes the message and we would not like to test the copy-paste. */ - hmreq = __tfw_http_msg_alloc(&client, Conn_HttpClnt, true); + hmreq = __tfw_http_msg_alloc(&cli_mem, Conn_HttpClnt, true); BUG_ON(!hmreq); tfw_connection_init(&conn_req); @@ -68,7 +74,7 @@ test_req_alloc(size_t data_len) hmreq->conn = &conn_req; hmreq->stream = &conn_req.stream; - ret = tfw_msg_iter_setup(&it, tfw_http_msg_client(hmreq), + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem(hmreq), &hmreq->msg.skb_head, data_len); BUG_ON(ret); @@ -94,7 +100,7 @@ test_resp_alloc(size_t data_len, TfwHttpReq *req) int ret; TfwHttpMsg *hmresp = (TfwHttpMsg *)test_resp_alloc_no_data(req); - ret = tfw_msg_iter_setup(&it, tfw_http_msg_client(hmresp), + ret = tfw_msg_iter_setup(&it, tfw_http_msg_client_mem(hmresp), &hmresp->msg.skb_head, data_len); BUG_ON(ret); @@ -106,7 +112,7 @@ test_resp_alloc_no_data(TfwHttpReq *req) { TfwHttpMsg *hmresp; - hmresp = __tfw_http_msg_alloc(req->conn->peer, Conn_HttpSrv, true); + hmresp = __tfw_http_msg_alloc(&cli_mem, Conn_HttpSrv, true); BUG_ON(!hmresp); tfw_connection_init(&conn_resp); @@ -199,11 +205,6 @@ int ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, return 0; } -void -tfw_client_get(TfwClient *cli) -{ -} - void tfw_client_put(TfwClient *cli) { diff --git a/fw/t/unit/test_hpack.c b/fw/t/unit/test_hpack.c index 339943ea3..c71a8a60a 100644 --- a/fw/t/unit/test_hpack.c +++ b/fw/t/unit/test_hpack.c @@ -75,6 +75,13 @@ do { \ HDR_COMPOUND_STR(hdr_res, name, value); \ } while (0) +static DEFINE_PER_CPU(long, mem); +static TfwClientMem cli_mem = { + .mem = &mem, +}; +static TfwClient client = { + .cli_mem = &cli_mem, +}; static TfwH2Conn conn; static TfwH2Ctx *ctx; static TfwHttpReq *test_req; @@ -87,7 +94,7 @@ test_hpack_req_alloc(void) TfwHttpMsg *hmreq = (TfwHttpMsg *)req; BUG_ON(!req); - req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client(hmreq)); + req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client_mem(hmreq)); BUG_ON(!req->pit.pool); req->pit.parsed_hdr = &req->stream->parser.hdr; __set_bit(TFW_HTTP_B_H2, req->flags); @@ -103,6 +110,7 @@ test_h2_setup(void) create_str_pool(); conn.h2 = ctx = tfw_h2_context_alloc(); BUG_ON(!ctx); + ((TfwConn *)&conn)->peer = (TfwPeer *)&client; r = tfw_h2_context_init(ctx, &conn); BUG_ON(r); test_req = test_hpack_req_alloc(); diff --git a/fw/t/unit/test_http_match.c b/fw/t/unit/test_http_match.c index 92d42acca..b20f009c5 100644 --- a/fw/t/unit/test_http_match.c +++ b/fw/t/unit/test_http_match.c @@ -96,10 +96,13 @@ http_match_suite_rule_release(TfwHttpMatchRule *rule) static void http_match_suite_setup(void) { + TfwHttpMsg *hm; + test_req = test_req_alloc(1); + hm = (TfwHttpMsg *)test_req; test_table = tfw_pool_new(TfwHttpTable, - tfw_http_msg_client((TfwHttpMsg *)test_req), + tfw_http_msg_client_mem(hm), TFW_POOL_ZERO); BUG_ON(!test_table); INIT_LIST_HEAD(&test_table->head); diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index cc71b957a..24edb075a 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -109,6 +109,7 @@ static bool __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, unsigned short nr_frags) { + TfwHttpMsg *hmresp = (TfwHttpMsg*)resp; TfwMsgIter *it; struct sk_buff *skb; struct page *page; @@ -120,7 +121,7 @@ __test_resp_data_alloc(TfwStr *head_data, TfwStr *paged_data, return false; ss_skb_set_owner(skb, ss_skb_dflt_destructor, - tfw_http_msg_client((TfwHttpMsg*)resp), + tfw_http_msg_client_mem(hmresp), skb->truesize); skb->next = skb->prev = skb; it = &resp->iter; diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index d72843279..dc364e43f 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -525,7 +525,8 @@ do_split_and_parse(int type, int chunk_mode) tfw_http_init_parser_req(req); stream.msg = (TfwMsg*)req; hmreq = (TfwHttpMsg *)req; - req->pit.pool = __tfw_pool_new(0, tfw_http_msg_client(hmreq)); + req->pit.pool = + __tfw_pool_new(0, tfw_http_msg_client_mem(hmreq)); BUG_ON(!req->pit.pool); __set_bit(TFW_HTTP_B_H2, req->flags); } else if (type == FUZZ_RESP) { diff --git a/fw/tls.c b/fw/tls.c index bdf3b3f54..d9ac945e8 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -90,7 +90,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) spin_lock(&tls->lock); ss_skb_queue_tail(&tls->io_in.skb_list, skb); ss_skb_set_owner(skb, ss_skb_dflt_destructor, - conn->peer, skb->truesize); + CLIENT_MEM_FROM_CONN(conn), skb->truesize); /* Call TLS layer to place skb into a TLS record on top of skb_list. */ parsed = 0; @@ -569,8 +569,8 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) str.len, sgt ? sgt->nents : 0, io->msglen, io->msgtype, conn, cli_conn->sk->sk_write_xmit, ttls_xfrm_ready(tls)); - if ((r = tfw_msg_iter_setup(&it, cli_conn->peer, &io->skb_list, - str.len))) + if ((r = tfw_msg_iter_setup(&it, CLIENT_MEM_FROM_CONN(cli_conn), + &io->skb_list, str.len))) goto out; if ((r = tfw_msg_iter_write(&it, &str))) goto out; @@ -589,7 +589,8 @@ tfw_tls_send(TlsCtx *tls, struct sg_table *sgt) goto out; } ss_skb_set_owner(skb, ss_skb_dflt_destructor, - cli_conn->peer, skb->truesize); + CLIENT_MEM_FROM_CONN(cli_conn), + skb->truesize); ss_skb_queue_tail(&io->skb_list, skb); i = 0; } diff --git a/lib/fault_injection_alloc.c b/lib/fault_injection_alloc.c index 011dab6bb..710a316f1 100644 --- a/lib/fault_injection_alloc.c +++ b/lib/fault_injection_alloc.c @@ -71,4 +71,12 @@ tfw__alloc_percpu(size_t size, size_t align) ALLOW_ERROR_INJECTION(tfw__alloc_percpu, NULL); EXPORT_SYMBOL(tfw__alloc_percpu); +void * +tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) +{ + return __alloc_percpu_gfp(size, align, gfp); +} +ALLOW_ERROR_INJECTION(tfw__alloc_percpu_gfp, NULL); +EXPORT_SYMBOL(tfw__alloc_percpu_gfp); + #endif diff --git a/lib/fault_injection_alloc.h b/lib/fault_injection_alloc.h index 440ae9f83..26bcf43db 100644 --- a/lib/fault_injection_alloc.h +++ b/lib/fault_injection_alloc.h @@ -31,6 +31,7 @@ #define tfw_kvmalloc_node(size, flags, node) kvmalloc_node(size, flags, node) #define tfw__alloc_percpu(size, align) __alloc_percpu(size, align) #define tfw_alloc_percpu(t) alloc_percpu(t) +#define tfw_alloc_percpu_gfp(t, gfp) alloc_percpu_gfp(t, gfp) #else @@ -40,8 +41,12 @@ void *tfw_kcalloc(size_t n, size_t size, gfp_t flags); void *tfw_kmalloc_node(size_t size, gfp_t flags, int node); void *tfw_kvmalloc_node(size_t size, gfp_t flags, int node); void *tfw__alloc_percpu(size_t size, size_t align); +void *tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); #define tfw_alloc_percpu(t) \ (typeof(t) __percpu *) tfw__alloc_percpu(sizeof(t), __alignof__(t)) +#define tfw_alloc_percpu_gfp(t, gfp) \ + (typeof(t) __percpu *) tfw__alloc_percpu_gfp(sizeof(t), \ + __alignof__(t), gfp) #endif From 10fdcd8c0e4f4c9c19f5bba7a4fa78258d31d308 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Wed, 15 Apr 2026 12:10:23 +0300 Subject: [PATCH 17/23] Fix according review - Use cache for client mem allocations - Use typedef for TfwClientMem structure and pass TfwClientMem * pointer instread of void * in all functions. - Make client_mem option reconfigurable. --- fw/client.c | 14 +++++++++++--- fw/client.h | 2 +- fw/hpack.c | 3 ++- fw/hpack.h | 3 ++- fw/http.c | 12 +++++++----- fw/http_msg.c | 2 +- fw/http_msg.h | 4 ++-- fw/http_types.h | 1 + fw/msg.c | 4 ++-- fw/msg.h | 2 +- fw/pool.c | 2 +- fw/pool.h | 6 ++++-- fw/ss_skb.c | 6 +++--- fw/ss_skb.h | 9 ++++++--- 14 files changed, 44 insertions(+), 26 deletions(-) diff --git a/fw/client.c b/fw/client.c index 2ffbd8cbd..4b0cb0efa 100644 --- a/fw/client.c +++ b/fw/client.c @@ -68,6 +68,8 @@ static TDB *client_db; static atomic_t shutdown_pending = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(shutdown_wq); +static struct kmem_cache *client_mem_cache; + /* * Called only under db->ga_lock. * @@ -201,7 +203,7 @@ cli_mem_release(struct percpu_ref *ref) percpu_ref_exit(&cli_mem->refcnt); free_percpu(cli_mem->mem); - kfree(cli_mem); + kmem_cache_free(client_mem_cache, cli_mem); if (atomic_dec_and_test(&shutdown_pending)) wake_up(&shutdown_wq); @@ -221,7 +223,7 @@ tfw_client_mem_alloc(void) { TfwClientMem *cli_mem; - cli_mem = tfw_kmalloc(sizeof(TfwClientMem), GFP_ATOMIC); + cli_mem = kmem_cache_alloc(client_mem_cache, GFP_ATOMIC); if (unlikely(!cli_mem)) return NULL; @@ -241,7 +243,7 @@ tfw_client_mem_alloc(void) free_per_cpu_mem: free_percpu(cli_mem->mem); free_cli_mem: - kfree(cli_mem); + kmem_cache_free(client_mem_cache, cli_mem); return NULL; } @@ -442,6 +444,11 @@ TfwMod tfw_client_mod = { int __init tfw_client_init(void) { + client_mem_cache = kmem_cache_create("client_mem_cache", + sizeof(TfwClientMem), + 0, 0, NULL); + if (!client_mem_cache) + return -ENOMEM; tfw_mod_register(&tfw_client_mod); return 0; @@ -450,5 +457,6 @@ tfw_client_init(void) void tfw_client_exit(void) { + kmem_cache_destroy(client_mem_cache); tfw_mod_unregister(&tfw_client_mod); } diff --git a/fw/client.h b/fw/client.h index c0ac8a62c..55fc2b093 100644 --- a/fw/client.h +++ b/fw/client.h @@ -24,7 +24,7 @@ #include "http_limits.h" #include "connection.h" -typedef struct { +typedef struct tfw_client_mem_t { struct percpu_ref refcnt; struct work_struct kill_work; long __percpu *mem; diff --git a/fw/hpack.c b/fw/hpack.c index 5ec842df7..ee07d0c9c 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -1155,7 +1155,8 @@ tfw_huffman_init(TfwHPack *__restrict hp) } int -tfw_hpack_init(TfwHPack *__restrict hp, void *owner, unsigned int htbl_sz) +tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, + unsigned int htbl_sz) { bool np; TfwHPackETbl *et = &hp->enc_tbl; diff --git a/fw/hpack.h b/fw/hpack.h index a23983919..6780aee13 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -299,7 +299,8 @@ typedef struct { void write_int(unsigned long index, unsigned short max, unsigned short mask, TfwHPackInt *__restrict res_idx); -int tfw_hpack_init(TfwHPack *__restrict hp, void *owner, unsigned int htbl_sz); +int tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, + unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, diff --git a/fw/http.c b/fw/http.c index fb8bf5043..77d4b9d77 100644 --- a/fw/http.c +++ b/fw/http.c @@ -1162,9 +1162,10 @@ tfw_h2_resp_fwd(TfwHttpResp *resp) * skb destructor). */ if (resp_in_xmit) { - void *owner = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; + TfwClientMem *owner = + TFW_SKB_CB(resp->msg.skb_head)->opaque_data; - BUG_ON(owner != CLIENT_MEM_FROM_CONN(resp->req->conn)); + WARN_ON(owner != CLIENT_MEM_FROM_CONN(resp->req->conn)); TFW_SKB_CB(resp->msg.skb_head)->opaque_data = resp; TFW_SKB_CB(resp->msg.skb_head)->destructor = tfw_h2_stream_skb_destructor; @@ -2864,7 +2865,8 @@ static TfwMsg * tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) { int type = TFW_CONN_TYPE(conn); - void *owner = type & Conn_Clnt ? CLIENT_MEM_FROM_CONN(conn) : NULL; + TfwClientMem *owner = type & Conn_Clnt ? + CLIENT_MEM_FROM_CONN(conn) : NULL; TfwHttpMsg *hm; hm = __tfw_http_msg_alloc(owner, type, true); @@ -2902,11 +2904,11 @@ tfw_http_conn_msg_alloc(TfwConn *conn, TfwStream *stream) if (likely(hm->req->conn)) { TfwClient *cli = (TfwClient *)hm->req->conn->peer; TfwClientMem *cli_mem = cli->cli_mem; + int delta = PAGE_SIZE << hm->pool->order; hm->pool->owner = cli_mem; BUG_ON(!tfw_client_mem_get(cli_mem)); - tfw_client_adjust_mem(cli_mem, - PAGE_SIZE << hm->pool->order); + tfw_client_adjust_mem(cli_mem, delta); } if (TFW_MSG_H2(hm->req)) { diff --git a/fw/http_msg.c b/fw/http_msg.c index 1fe58f53b..b637600f4 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -875,7 +875,7 @@ tfw_http_msg_free(TfwHttpMsg *m) * for parsing and subsequent adjustment. */ TfwHttpMsg * -__tfw_http_msg_alloc(void *owner, int type, bool full) +__tfw_http_msg_alloc(TfwClientMem *owner, int type, bool full) { TfwHttpMsg *hm = (type & Conn_Clnt) ? (TfwHttpMsg *)tfw_pool_new(TfwHttpReq, owner, diff --git a/fw/http_msg.h b/fw/http_msg.h index 62c81fc72..4408017b4 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -76,10 +76,10 @@ tfw_http_msg_srvhdr_val(TfwStr *hdr, unsigned id, TfwStr *val) void tfw_http_msg_pair(TfwHttpResp *resp, TfwHttpReq *req); void tfw_http_msg_unpair(TfwHttpMsg *msg); -TfwHttpMsg *__tfw_http_msg_alloc(void *owner, int type, bool full); +TfwHttpMsg *__tfw_http_msg_alloc(TfwClientMem *owner, int type, bool full); static inline TfwHttpReq * -tfw_http_msg_alloc_req_light(void *owner) +tfw_http_msg_alloc_req_light(TfwClientMem *owner) { return (TfwHttpReq *)__tfw_http_msg_alloc(owner, Conn_Clnt, false); } diff --git a/fw/http_types.h b/fw/http_types.h index 9306d2056..8ce576807 100644 --- a/fw/http_types.h +++ b/fw/http_types.h @@ -169,5 +169,6 @@ typedef struct frang_vhost_cfg_t FrangVhostCfg; typedef struct tfw_http_cookie_t TfwStickyCookie; typedef struct tfw_http_stream_t TfwStream; typedef struct tfw_cli_conn_t TfwCliConn; +typedef struct tfw_client_mem_t TfwClientMem; #endif /* __TFW_HTTP_TYPES_H__ */ diff --git a/fw/msg.c b/fw/msg.c index 87a14ab3d..ab2d14f01 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -28,8 +28,8 @@ * iterator, since its current state is to be rewritten. */ int -tfw_msg_iter_setup(TfwMsgIter *it, void *owner, struct sk_buff **skb_head, - size_t data_len) +tfw_msg_iter_setup(TfwMsgIter *it, TfwClientMem *owner, + struct sk_buff **skb_head, size_t data_len) { int r; diff --git a/fw/msg.h b/fw/msg.h index 27e2aaefc..96cf1698a 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -98,7 +98,7 @@ typedef struct { } TfwMsgParseIter; int tfw_msg_iter_write(TfwMsgIter *it, const TfwStr *data); -int tfw_msg_iter_setup(TfwMsgIter *it, void *skb_owner, +int tfw_msg_iter_setup(TfwMsgIter *it, TfwClientMem *owner, struct sk_buff **skb_head, size_t data_len); int tfw_msg_iter_move(TfwMsgIter *it, unsigned char **data, unsigned long sz); diff --git a/fw/pool.c b/fw/pool.c index 9ea015088..479644317 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -257,7 +257,7 @@ tfw_pool_clean(TfwPool *pool) * Allocate bit more pages than we need. */ TfwPool * -__tfw_pool_new(size_t n, void *owner) +__tfw_pool_new(size_t n, TfwClientMem *owner) { TfwClientMem *cli_mem = (TfwClientMem *)owner; TfwPool *p; diff --git a/fw/pool.h b/fw/pool.h index c2ebe000e..6e33d818c 100644 --- a/fw/pool.h +++ b/fw/pool.h @@ -36,6 +36,8 @@ #define TFW_POOL_ALIGN_SZ(n) (((n) + 7) & ~7UL) #define TFW_POOL_ALIGN_PTR(p) ((void *)TFW_POOL_ALIGN_SZ((unsigned long)p)) +typedef struct tfw_client_mem_t TfwClientMem; + /** * Memory pool chunk descriptor. * @@ -58,7 +60,7 @@ typedef struct tfw_pool_chunk_t { */ typedef struct { TfwPoolChunk *curr; - void *owner; + TfwClientMem *owner; unsigned int order; unsigned int off; } TfwPool; @@ -81,7 +83,7 @@ typedef struct { int tfw_pool_init(void); void tfw_pool_exit(void); -TfwPool *__tfw_pool_new(size_t n, void *owner); +TfwPool *__tfw_pool_new(size_t n, TfwClientMem *owner); void *__tfw_pool_alloc_page(TfwPool *p, size_t n, bool align); void tfw_pool_free(TfwPool *p, void *ptr, size_t n); void tfw_pool_clean(TfwPool *p); diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 1fc3bec59..299bb494d 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -109,7 +109,7 @@ ss_skb_alloc_pages(size_t len) * segmentation. The allocated payload space will be filled with data. */ int -ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len) +ss_skb_alloc_data(struct sk_buff **skb_head, TfwClientMem *owner, size_t len) { int i_skb, nr_skbs = len ? DIV_ROUND_UP(len, SS_SKB_MAX_DATA_LEN) : 1; size_t n = 0; @@ -1759,7 +1759,7 @@ ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head) void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), - void *owner, unsigned int mem) + TfwClientMem *owner, unsigned int mem) { TfwClientMem *cli_mem = (TfwClientMem *)owner; @@ -1784,7 +1784,7 @@ ss_skb_adjust_client_mem(struct sk_buff *skb, int delta) cli_mem = (TfwClientMem *)TFW_SKB_CB(skb)->opaque_data; if (cli_mem) { TFW_SKB_CB(skb)->mem += delta; - BUG_ON(TFW_SKB_CB(skb)->mem < 0); + WARN_ON(TFW_SKB_CB(skb)->mem < 0); tfw_client_adjust_mem(cli_mem, delta); } } diff --git a/fw/ss_skb.h b/fw/ss_skb.h index ab1897033..930a1a687 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -55,6 +55,7 @@ enum { typedef int (*on_send_cb_t)(void *conn, struct sk_buff **skb_head); typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); typedef void (*on_send_fail_cb_t)(void *conn, struct sk_buff *skb_head); +typedef struct tfw_client_mem_t TfwClientMem; /* * Tempesta FW sk_buff private data. @@ -84,7 +85,7 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), - void *owner, unsigned int delta); + TfwClientMem *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); void ss_skb_dflt_destructor(struct sk_buff *skb); void ss_skb_on_send_dflt(void *conn, struct sk_buff **skb_head); @@ -481,10 +482,12 @@ ss_skb_data_ptr_by_offset(struct sk_buff *skb, unsigned int off) char *ss_skb_fmt_src_addr(const struct sk_buff *skb, char *out_buf); -int ss_skb_alloc_data(struct sk_buff **skb_head, void *owner, size_t len); +int ss_skb_alloc_data(struct sk_buff **skb_head, TfwClientMem *owner, + size_t len); struct sk_buff *ss_skb_split(struct sk_buff *skb, int len); int ss_skb_get_room_w_frag(struct sk_buff *skb_head, struct sk_buff *skb, - char *pspt, unsigned int len, TfwStr *it, int *fragn); + char *pspt, unsigned int len, TfwStr *it, + int *fragn); int ss_skb_expand_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, size_t head, size_t tail); int ss_skb_chop_head_tail(struct sk_buff *skb_head, struct sk_buff *skb, From c96790010d5ca0d8a8525240253c7348b8a31bc5 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Fri, 10 Apr 2026 19:12:11 +0300 Subject: [PATCH 18/23] Implement client mem allocation optimization - Preallocate and initialize TfwClientMem structures according to client_cfg.lru_size. During TfwCLient structure allocation try to get TfwClientMem from preallocated pool. If this pool is empty alloc TfwClientMem from cache. - Implement new fault injection alloc functions to cover new code. 1396: small: finished in 50.03s, 1294710.94 req/s, 1002.60MB/s finished in 50.03s, 1287197.04 req/s, 998.01MB/s large: finished in 50.03s, 103497.42 req/s, 9.90GB/s finished in 50.03s, 108665.42 req/s, 10.39GB/s 1396 with client_mem: small: finished in 50.03s, 1225390.98 req/s, 948.74MB/s finished in 50.03s, 1223275.66 req/s, 947.72MB/s large: finished in 50.08s, 78906.58 req/s, 7.55GB/s finished in 50.08s, 86201.98 req/s, 8.24GB/s master: small: finished in 50.03s, 1294782.10 req/s, 1002.49MB/s finished in 50.03s, 1294782.10 req/s, 1001.33MB/s large: finished in 50.04s, 98625.80 req/s, 9.43GB/s finished in 50.04s, 97767.22 req/s, 9.35GB/s --- fw/apm.c | 4 +- fw/client.c | 249 ++++++++++++++++++++++++++++-------- fw/client.h | 1 + fw/http.c | 3 +- fw/pool.c | 2 +- lib/fault_injection_alloc.c | 17 +++ lib/fault_injection_alloc.h | 12 +- tls/mpool.c | 5 +- tls/tls_ticket.c | 4 +- tls/x509_crt.c | 5 +- 10 files changed, 240 insertions(+), 62 deletions(-) diff --git a/fw/apm.c b/fw/apm.c index 9de85c5a7..00e418c6a 100644 --- a/fw/apm.c +++ b/fw/apm.c @@ -1659,8 +1659,8 @@ tfw_cfgop_apm_add_hm_req(const char *req_cstr, TfwApmHM *hm_entry) unsigned long size; size = strlen(req_cstr); - hm_entry->req = (char *)__get_free_pages(GFP_KERNEL, - get_order(size)); + hm_entry->req = (char *)tfw__get_free_pages(GFP_KERNEL, + get_order(size)); if (!hm_entry->req) { T_ERR_NL("Can't allocate memory for health monitoring request" "\n"); diff --git a/fw/client.c b/fw/client.c index 4b0cb0efa..520ea4072 100644 --- a/fw/client.c +++ b/fw/client.c @@ -59,16 +59,177 @@ typedef struct { } TfwClientEntry; static struct { - struct list_head head; - unsigned int lru_size; -} client_lru; + struct list_head head; + unsigned int lru_size; +} client_lru = { + .head = LIST_HEAD_INIT(client_lru.head), + .lru_size = 0, +}; static TDB *client_db; static atomic_t shutdown_pending = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(shutdown_wq); -static struct kmem_cache *client_mem_cache; +static struct kmem_cache *cli_mem_cache; +static struct { + TfwClientMem *mem; + struct list_head free_list; + unsigned int size; + unsigned int order; +} cli_mem_pool = { + .mem = NULL, + .free_list = LIST_HEAD_INIT(cli_mem_pool.free_list), + .size = 0, + .order = 0, +}; + +static inline bool +tfw_cli_mem_belongs_to_pool(TfwClientMem *cli_mem) +{ + return cli_mem >= cli_mem_pool.mem + && cli_mem < cli_mem_pool.mem + cli_mem_pool.size; +} + +static void +__cli_mem_release(TfwClientMem *cli_mem) +{ + percpu_ref_exit(&cli_mem->refcnt); + free_percpu(cli_mem->mem); + if (!tfw_cli_mem_belongs_to_pool(cli_mem)) + kmem_cache_free(cli_mem_cache, cli_mem); +} + +static inline void +tfw_cli_mem_pool_free(TfwClientMem *cli_mem) +{ + int cpu; + + assert_spin_locked(&client_db->ga_lock); + + for_each_online_cpu(cpu) + *per_cpu_ptr(cli_mem->mem, cpu) = 0; + percpu_ref_reinit(&cli_mem->refcnt); + list_add_tail(&cli_mem->in_free_list, &cli_mem_pool.free_list); +} + +static inline TfwClientMem * +tfw_cli_mem_pool_alloc(void) +{ + TfwClientMem *cli_mem; + + assert_spin_locked(&client_db->ga_lock); + + cli_mem = list_first_entry_or_null(&cli_mem_pool.free_list, + TfwClientMem, in_free_list); + if (!cli_mem) + return NULL; + + list_del_init(&cli_mem->in_free_list); + + return cli_mem; +} + +static void +cli_mem_release(struct percpu_ref *ref) +{ + TfwClientMem *cli_mem = container_of(ref, TfwClientMem, refcnt); + + spin_lock_bh(&client_db->ga_lock); + + WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + if (tfw_cli_mem_belongs_to_pool(cli_mem)) + tfw_cli_mem_pool_free(cli_mem); + else + __cli_mem_release(cli_mem); + + spin_unlock_bh(&client_db->ga_lock); + + if (atomic_dec_and_test(&shutdown_pending)) + wake_up(&shutdown_wq); +} + +static void +tfw_cli_mem_kill_work_fn(struct work_struct *work) +{ + TfwClientMem *cli_mem = container_of(work, TfwClientMem, kill_work); + + percpu_ref_kill(&cli_mem->refcnt); + percpu_ref_put(&cli_mem->refcnt); +} + +static inline int +tfw_cli_mem_init(TfwClientMem *cli_mem, gfp_t flags) +{ + int r; + + cli_mem->mem = tfw_alloc_percpu_gfp(long, flags | __GFP_ZERO); + if (unlikely(!cli_mem->mem)) + return -ENOMEM; + + r = tfw_percpu_ref_init(&cli_mem->refcnt, cli_mem_release, + PERCPU_REF_ALLOW_REINIT, flags); + if (unlikely(r)) + goto free_per_cpu_mem; + + INIT_LIST_HEAD(&cli_mem->in_free_list); + INIT_WORK(&cli_mem->kill_work, tfw_cli_mem_kill_work_fn); + + return 0; + +free_per_cpu_mem: + free_percpu(cli_mem->mem); + + return r; +} + +static inline void +tfw_cli_mem_pool_exit(void) +{ + TfwClientMem *curr, *tmp; + + list_for_each_entry_safe(curr, tmp, &cli_mem_pool.free_list, + in_free_list) + { + list_del_init(&curr->in_free_list); + __cli_mem_release(curr); + } + + free_pages((unsigned long)cli_mem_pool.mem, cli_mem_pool.order); + cli_mem_pool.mem = NULL; +} + +static inline int +tfw_cli_mem_pool_init(void) +{ + TfwClientMem *block; + unsigned int order; + int i, r; + + if (WARN_ON_ONCE(!client_cfg.lru_size)) + return -EINVAL; + + order = get_order(sizeof(TfwClientMem) * client_cfg.lru_size); + if (order > MAX_PAGE_ORDER) + order = MAX_PAGE_ORDER; + + cli_mem_pool.order = order; + cli_mem_pool.mem = (TfwClientMem *)tfw__get_free_pages(GFP_KERNEL, + order); + if (unlikely(!cli_mem_pool.mem)) + return -ENOMEM; + + block = cli_mem_pool.mem; + for (i = 0; i < client_cfg.lru_size; i++) { + r = tfw_cli_mem_init(&block[i], GFP_KERNEL); + if (unlikely(r)) + return r; + list_add(&block[i].in_free_list, &cli_mem_pool.free_list); + cli_mem_pool.size++; + } + + return 0; +} /* * Called only under db->ga_lock. @@ -116,13 +277,6 @@ tfw_client_free(TdbRec *rec) } } -static void -tfw_client_init_lru(void) -{ - INIT_LIST_HEAD(&client_lru.head); - client_lru.lru_size = 0; -} - static void tfw_client_free_lru(void) { @@ -196,56 +350,40 @@ tfw_client_addr_eq(TdbRec *rec, void *data) return true; } -static void -cli_mem_release(struct percpu_ref *ref) +static inline TfwClientMem * +tfw_cli_mem_alloc_from_cache(void) { - TfwClientMem *cli_mem = container_of(ref, TfwClientMem, refcnt); + TfwClientMem *cli_mem; - percpu_ref_exit(&cli_mem->refcnt); - free_percpu(cli_mem->mem); - kmem_cache_free(client_mem_cache, cli_mem); + cli_mem = kmem_cache_alloc(cli_mem_cache, GFP_ATOMIC); + if (unlikely(!cli_mem)) + return NULL; - if (atomic_dec_and_test(&shutdown_pending)) - wake_up(&shutdown_wq); -} + if (unlikely(tfw_cli_mem_init(cli_mem, GFP_ATOMIC))) + goto free_cli_mem; -static void -tfw_cli_mem_kill_work_fn(struct work_struct *work) -{ - TfwClientMem *cli_mem = container_of(work, TfwClientMem, kill_work); + return cli_mem; - percpu_ref_kill(&cli_mem->refcnt); - percpu_ref_put(&cli_mem->refcnt); +free_cli_mem: + kmem_cache_free(cli_mem_cache, cli_mem); + + return NULL; } static inline TfwClientMem * -tfw_client_mem_alloc(void) +tfw_cli_mem_alloc(void) { TfwClientMem *cli_mem; - cli_mem = kmem_cache_alloc(client_mem_cache, GFP_ATOMIC); + cli_mem = tfw_cli_mem_pool_alloc(); + if (!cli_mem) + cli_mem = tfw_cli_mem_alloc_from_cache(); if (unlikely(!cli_mem)) return NULL; - cli_mem->mem = tfw_alloc_percpu_gfp(long, GFP_ATOMIC | __GFP_ZERO); - if (!cli_mem->mem) - goto free_cli_mem; - - if (percpu_ref_init(&cli_mem->refcnt, cli_mem_release, 0, GFP_ATOMIC)) - goto free_per_cpu_mem; - percpu_ref_get(&cli_mem->refcnt); - INIT_WORK(&cli_mem->kill_work, tfw_cli_mem_kill_work_fn); - return cli_mem; - -free_per_cpu_mem: - free_percpu(cli_mem->mem); -free_cli_mem: - kmem_cache_free(client_mem_cache, cli_mem); - - return NULL; } static int @@ -255,13 +393,14 @@ tfw_client_ent_init(TdbRec *rec, void *data) TfwClient *cli = &ent->cli; TfwClientEqCtx *ctx = (TfwClientEqCtx *)data; - cli->cli_mem = tfw_client_mem_alloc(); + INIT_LIST_HEAD(&cli->list); + + cli->cli_mem = tfw_cli_mem_alloc(); if (unlikely(!cli->cli_mem)) return -ENOMEM; assert_spin_locked(&client_db->ga_lock); - INIT_LIST_HEAD(&cli->list); tfw_client_update_lru(cli); bzero_fast(&cli->class_prvt, sizeof(cli->class_prvt)); @@ -369,6 +508,8 @@ tfw_client_for_each(int (*fn)(void *)) static int tfw_client_start(void) { + int r; + if (tfw_runstate_is_reconfig()) return 0; /* @@ -382,8 +523,11 @@ tfw_client_start(void) if (!client_db) return -EINVAL; + r = tfw_cli_mem_pool_init(); + if (unlikely(r)) + return r; + client_db->hdr->before_free = tfw_client_free; - tfw_client_init_lru(); return 0; } @@ -397,6 +541,7 @@ tfw_client_stop(void) if (client_db) { tfw_client_free_lru(); wait_event(shutdown_wq, !atomic_read(&shutdown_pending)); + tfw_cli_mem_pool_exit(); tdb_close(client_db); client_db = NULL; } @@ -444,10 +589,10 @@ TfwMod tfw_client_mod = { int __init tfw_client_init(void) { - client_mem_cache = kmem_cache_create("client_mem_cache", - sizeof(TfwClientMem), - 0, 0, NULL); - if (!client_mem_cache) + cli_mem_cache = kmem_cache_create("cli_mem_cache", + sizeof(TfwClientMem), + 0, 0, NULL); + if (!cli_mem_cache) return -ENOMEM; tfw_mod_register(&tfw_client_mod); @@ -457,6 +602,6 @@ tfw_client_init(void) void tfw_client_exit(void) { - kmem_cache_destroy(client_mem_cache); + kmem_cache_destroy(cli_mem_cache); tfw_mod_unregister(&tfw_client_mod); } diff --git a/fw/client.h b/fw/client.h index 55fc2b093..d14409ba2 100644 --- a/fw/client.h +++ b/fw/client.h @@ -28,6 +28,7 @@ typedef struct tfw_client_mem_t { struct percpu_ref refcnt; struct work_struct kill_work; long __percpu *mem; + struct list_head in_free_list; } TfwClientMem; /** diff --git a/fw/http.c b/fw/http.c index 77d4b9d77..158a95688 100644 --- a/fw/http.c +++ b/fw/http.c @@ -8208,7 +8208,8 @@ __tfw_http_msg_body_dup(const char *filename, TfwStr *c_len, size_t *len, } t_sz += b_sz; - b_start = res = (char *)__get_free_pages(GFP_KERNEL, get_order(t_sz)); + b_start = res = (char *)tfw__get_free_pages(GFP_KERNEL, + get_order(t_sz)); if (!res) { T_ERR_NL("Can't allocate memory storing file %s as response " "body\n", filename); diff --git a/fw/pool.c b/fw/pool.c index 479644317..87f1146aa 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -89,7 +89,7 @@ tfw_pool_alloc_pages(TfwClientMem *cli_mem, unsigned int order) if (!pg_res) { flags = order > 0 ? GFP_ATOMIC | __GFP_COMP : GFP_ATOMIC; - pg_res = __get_free_pages(flags, order); + pg_res = tfw__get_free_pages(flags, order); } if (likely(pg_res) && cli_mem) tfw_client_adjust_mem(cli_mem, PAGE_SIZE << order); diff --git a/lib/fault_injection_alloc.c b/lib/fault_injection_alloc.c index 710a316f1..bc29eebfe 100644 --- a/lib/fault_injection_alloc.c +++ b/lib/fault_injection_alloc.c @@ -79,4 +79,21 @@ tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) ALLOW_ERROR_INJECTION(tfw__alloc_percpu_gfp, NULL); EXPORT_SYMBOL(tfw__alloc_percpu_gfp); +unsigned long +tfw__get_free_pages(gfp_t gfp_mask, unsigned int order) +{ + return __get_free_pages(gfp_mask, order); +} +ALLOW_ERROR_INJECTION(tfw__get_free_pages, NULL); +EXPORT_SYMBOL(tfw__get_free_pages); + +int +tfw_percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, + unsigned int flags, gfp_t gfp) +{ + return percpu_ref_init(ref, release, flags, gfp); +} +ALLOW_ERROR_INJECTION(tfw_percpu_ref_init, ERRNO); +EXPORT_SYMBOL(tfw_percpu_ref_init); + #endif diff --git a/lib/fault_injection_alloc.h b/lib/fault_injection_alloc.h index 26bcf43db..50ae90942 100644 --- a/lib/fault_injection_alloc.h +++ b/lib/fault_injection_alloc.h @@ -28,10 +28,15 @@ #define tfw_kzalloc(size, flags) kzalloc(size, flags) #define tfw_kcalloc(n, size, flags) kcalloc(n, size, flags) #define tfw_kmalloc_node(size, flags, node) kmalloc_node(size, flags, node) -#define tfw_kvmalloc_node(size, flags, node) kvmalloc_node(size, flags, node) +#define tfw_kvmalloc_node(size, flags, node) \ + kvmalloc_node(size, flags, node) #define tfw__alloc_percpu(size, align) __alloc_percpu(size, align) #define tfw_alloc_percpu(t) alloc_percpu(t) #define tfw_alloc_percpu_gfp(t, gfp) alloc_percpu_gfp(t, gfp) +#define tfw__get_free_pages(gfp_mask, order) \ + __get_free_pages(gfp_mask, order) +#define tfw_percpu_ref_init(ref, release, flags, gfp) \ + percpu_ref_init(ref, release, flags, gfp) #else @@ -42,6 +47,11 @@ void *tfw_kmalloc_node(size_t size, gfp_t flags, int node); void *tfw_kvmalloc_node(size_t size, gfp_t flags, int node); void *tfw__alloc_percpu(size_t size, size_t align); void *tfw__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); +int __must_check tfw_percpu_ref_init(struct percpu_ref *ref, + percpu_ref_func_t *release, + unsigned int flags, + gfp_t gfp); +unsigned long tfw__get_free_pages(gfp_t gfp_mask, unsigned int order); #define tfw_alloc_percpu(t) \ (typeof(t) __percpu *) tfw__alloc_percpu(sizeof(t), __alignof__(t)) #define tfw_alloc_percpu_gfp(t, gfp) \ diff --git a/tls/mpool.c b/tls/mpool.c index 337d9a8cd..de55d0e0d 100644 --- a/tls/mpool.c +++ b/tls/mpool.c @@ -45,6 +45,7 @@ #include "dhm.h" #include "ecp.h" #include "mpool.h" +#include "lib/fault_injection_alloc.h" #define MPI_POOL_DATA(mp) ((void *)((char *)(mp) + sizeof(TlsMpiPool))) #define MPI_POOL_FREE_PTR(mp) ((void *)((char *)(mp) + (mp)->curr)) @@ -208,7 +209,7 @@ ttls_mpi_pool_create(size_t order, gfp_t gfp_mask) TlsMpiPool *mp; unsigned long addr; - if (!(addr = __get_free_pages(gfp_mask | __GFP_ZERO, order))) + if (!(addr = tfw__get_free_pages(gfp_mask | __GFP_ZERO, order))) return NULL; WARN_ON_ONCE(addr & ((PAGE_SIZE << order) - 1)); @@ -336,7 +337,7 @@ __mpi_profile_clone(TlsCtx *tls, int ec) return -ENOMEM; } - ptr = (char *)__get_free_pages(GFP_ATOMIC, __MPOOL_HS_ORDER); + ptr = (char *)tfw__get_free_pages(GFP_ATOMIC, __MPOOL_HS_ORDER); if (unlikely(!ptr)) return -ENOMEM; diff --git a/tls/tls_ticket.c b/tls/tls_ticket.c index 773224ccb..6700530b9 100644 --- a/tls/tls_ticket.c +++ b/tls/tls_ticket.c @@ -30,6 +30,7 @@ #include "tls_ticket.h" #include "tls_internal.h" #include "lib/common.h" +#include "lib/fault_injection_alloc.h" ttls_cli_id_t *ttls_cli_id_cb; @@ -583,7 +584,8 @@ ttls_ticket_sess_load(TlsState *state, size_t len, unsigned long lifetime) * address it. */ sess->peer_cert->raw.order = get_order(state->cert_len + TTLS_CERT_LEN_LEN); - pg = __get_free_pages(GFP_ATOMIC | __GFP_COMP, sess->peer_cert->raw.order); + pg = tfw__get_free_pages(GFP_ATOMIC | __GFP_COMP, + sess->peer_cert->raw.order); if (!pg) { ttls_x509_crt_destroy(&sess->peer_cert); return TTLS_ERR_ALLOC_FAILED; diff --git a/tls/x509_crt.c b/tls/x509_crt.c index cb89806d5..52275b38e 100644 --- a/tls/x509_crt.c +++ b/tls/x509_crt.c @@ -910,8 +910,9 @@ ttls_x509_crt_parse(TlsX509Crt *crt, unsigned char *buf, size_t buflen) * to the multi-byte structures inside the raw data. */ crt->raw.order = get_order(buflen + crt_len_len); - crt->raw.pages = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_COMP, - crt->raw.order); + crt->raw.pages = + (unsigned char *)tfw__get_free_pages(GFP_KERNEL | __GFP_COMP, + crt->raw.order); if (!crt->raw.pages) return -ENOMEM; crt->raw.tot_len = 0; From 304a26231c0b5d0a3325213a0024276fe7b8e0b3 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Mon, 13 Apr 2026 13:53:14 +0300 Subject: [PATCH 19/23] Fix BUG in case of allocation fail. We should check that TlsMpiPool was allocated during `ttls_mpool_exit` --- tls/mpool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tls/mpool.c b/tls/mpool.c index de55d0e0d..1c1d45023 100644 --- a/tls/mpool.c +++ b/tls/mpool.c @@ -400,8 +400,11 @@ ttls_mpool_exit(void) for_each_online_cpu(i) { mp = per_cpu(g_tmp_mpool, i); - ttls_bzero_safe(MPI_POOL_DATA(mp), mp->curr - sizeof(*mp)); - free_pages((unsigned long)mp, mp->order); + if (mp) { + ttls_bzero_safe(MPI_POOL_DATA(mp), + mp->curr - sizeof(*mp)); + free_pages((unsigned long)mp, mp->order); + } } } From 4fe14e1549bd6366960282cdaeb5426eb749e050 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Thu, 16 Apr 2026 08:09:05 +0300 Subject: [PATCH 20/23] Rework error code library. - Remove SS_* errocodes, use only T_* error codes, since we already include "lib/log.h" in all files. - Split enum with error codes to two enums (one for common error codes and one for internal). - List all error codes from least crusial to most crusial - Implement some useful functions. --- fw/connection.c | 7 +- fw/http_frame.c | 10 +-- fw/sock.c | 34 ++++----- fw/sock_clnt.c | 13 ++++ fw/ss_skb.c | 6 +- fw/ss_skb.h | 25 ------- fw/t/unit/test_http_parser_common.c | 2 +- fw/tls.c | 2 +- lib/log.h | 107 ++++++++++++++++++++++------ 9 files changed, 127 insertions(+), 79 deletions(-) diff --git a/fw/connection.c b/fw/connection.c index ab6bc1c3d..5b92b3261 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -192,17 +192,16 @@ tfw_connection_recv(TfwConn *conn, struct sk_buff *skb) } /* - * T_BLOCK is error code for high level modules (like frang), - * here we should deal with error code, which accurately + * Here we should deal with error code, which accurately * determine further closing behavior. * When error occurs during response processing * we should close connection with backend immediatly * and try to reastablish it later, so we should not * return T_DROP for server connections. */ - BUG_ON(r == T_BLOCK || + BUG_ON(is_tfw_internal_error_code(r) || (r == T_DROP && TFW_CONN_TYPE(conn) & Conn_Srv)); - return r <= T_BAD || r == T_OK ? r : T_BAD; + return (r == T_OK || is_tfw_common_error_code(r)) ? r : T_BAD; } void diff --git a/fw/http_frame.c b/fw/http_frame.c index ae1e3ef12..d0511525d 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -1958,9 +1958,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) h2->data_off = 0; h2->skb_head = pskb->next = pskb->prev = NULL; r = tfw_http_msg_process_generic(c, h2->cur_stream, pskb, next); - /* TODO #1490: Check this place, when working on the task. */ - if (r && r != T_DROP) { - WARN_ON_ONCE(r == T_POSTPONE); + if (tfw_error_code_is_crucial(r)) { ss_kfree_skb(nskb); goto out; } @@ -1986,9 +1984,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) h2->data_off = 0; /* The skb will not be parsed, just flags will be checked. */ r = tfw_http_msg_process_generic(c, h2->cur_stream, pskb, next); - /* TODO #1490: Check this place, when working on the task. */ - if (r && r != T_DROP) { - WARN_ON_ONCE(r == T_POSTPONE); + if (tfw_error_code_is_crucial(r)) { ss_kfree_skb(nskb); goto out; } @@ -2009,7 +2005,7 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) out: ss_skb_queue_purge(&h2->skb_head); - if (r && r != T_POSTPONE && r != T_DROP) + if (tfw_error_code_is_crucial(r)) tfw_h2_context_reinit(h2, false); return r; diff --git a/fw/sock.c b/fw/sock.c index a2a56de89..f0e11b322 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -171,7 +171,7 @@ ss_sk_incoming_cpu_update(struct sock *sk) * to shutdown. The only exception is closing activity - this is the only * activity allowed in progress of shutdown process. * - * Returns zero (SS_OK) if we're in critical section and SS_BAD if shutdown + * Returns zero (T_OK) if we're in critical section and T_BAD if shutdown * process in progress and we can't enter the section. */ static int @@ -184,7 +184,7 @@ ss_active_guard_enter(unsigned long val) * if we commited to shutdown. */ if (unlikely(!READ_ONCE(__ss_active))) - return SS_BAD; + return T_BAD; atomic64_add(val, acnt); @@ -195,10 +195,10 @@ ss_active_guard_enter(unsigned long val) */ if (unlikely(!READ_ONCE(__ss_active))) { atomic64_sub(val, acnt); - return SS_BAD; + return T_BAD; } - return SS_OK; + return T_OK; } ALLOW_ERROR_INJECTION(ss_active_guard_enter, ERRNO); @@ -898,7 +898,7 @@ ss_close(struct sock *sk, int flags) }; if (unlikely(!sk)) - return SS_OK; + return T_OK; ss_sk_incoming_cpu_update(sk); cpu = sk->sk_incoming_cpu; @@ -906,7 +906,7 @@ ss_close(struct sock *sk, int flags) sock_hold(sk); ticket = ss_wq_push(&sw, cpu); if (!ticket) - return SS_OK; + return T_OK; if (!(flags & SS_F_SYNC)) goto err; @@ -919,10 +919,10 @@ ss_close(struct sock *sk, int flags) goto err; } - return SS_OK; + return T_OK; err: sock_put(sk); - return SS_BAD; + return T_BAD; } /* @@ -956,7 +956,7 @@ do { \ tp->copied_seq += tcp_fin; ADJUST_PROCESSED_SKB(skb, tp, count, offset, processed); __kfree_skb(skb); - return SS_BAD; + return T_BAD; } while ((skb = ss_skb_dequeue(&skb_head))) { @@ -986,7 +986,7 @@ do { \ ss_skb_chop_head_tail(NULL, skb, offset, 0) != 0)) { __kfree_skb(skb); - r = SS_BAD; + r = T_BAD; goto out; } offset = 0; @@ -1020,7 +1020,7 @@ do { \ sk, smp_processor_id()); ++tp->copied_seq; if (!r) - r = SS_BAD; + r = T_BAD; } while ((skb = ss_skb_dequeue(&skb_head))) { if (unlikely(offset >= skb->len)) { @@ -1154,16 +1154,16 @@ ss_tcp_data_ready(struct sock *sk) } switch (ss_tcp_process_data(sk)) { - case SS_OK: - case SS_POSTPONE: - case SS_DROP: + case T_OK: + case T_POSTPONE: + case T_DROP: SS_STATE_PROCESS_RETURN(sk); return; - case SS_BAD: - case SS_BLOCK_WITH_FIN: + case T_BAD: + case T_BLOCK_WITH_FIN: flags = SS_F_SYNC; break; - case SS_BLOCK_WITH_RST: + case T_BLOCK_WITH_RST: flags = SS_F_ABORT_FORCE; break; default: diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 9262a274a..11c175451 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -709,6 +709,19 @@ tfw_parse_client_mem(const char *val, unsigned long long *mem) return 0; } +static int +tfw_parse_client_mem(const char *val, unsigned long long *mem) +{ + size_t len = strlen(val); + char *p; + + *mem = memparse(val, &p); + if (p != val + len) + return -EINVAL; + + return 0; +} + static int tfw_cfgop_client_mem(TfwCfgSpec *cs, TfwCfgEntry *ce) { diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 299bb494d..12ce304f2 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -1221,7 +1221,7 @@ int ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, unsigned int *chunks, unsigned int *processed) { - int i, r = SS_OK; + int i, r = T_OK; unsigned int headlen = skb_headlen(skb); unsigned int _processed; struct skb_shared_info *si = skb_shinfo(skb); @@ -1235,7 +1235,7 @@ ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, _processed = 0; r = actor(objdata, skb->data, headlen, &_processed); *processed += _processed; - if (r != SS_POSTPONE) + if (r != T_POSTPONE) return r; } @@ -1251,7 +1251,7 @@ ss_skb_process(struct sk_buff *skb, ss_skb_actor_t actor, void *objdata, r = actor(objdata, skb_frag_address(frag), skb_frag_size(frag), &_processed); *processed += _processed; - if (r != SS_POSTPONE) + if (r != T_POSTPONE) return r; } diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 930a1a687..0bc3bbb33 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -26,31 +26,6 @@ #include #include "str.h" -#include "lib/log.h" - -/** - * Responses from socket hook functions. - */ -enum { - /* Generic socket error. */ - SS_BAD = T_BAD, - /* The packet must be dropped, but connection should be alive. */ - SS_DROP = T_DROP, - /* - * The packet must be blocked with TCP FIN (typically on a - * security event, when we reply to client). - */ - SS_BLOCK_WITH_FIN = T_BLOCK_WITH_FIN, - /* - * The packet must be blocked with TCP RST (typically on a - * security event). - */ - SS_BLOCK_WITH_RST = T_BLOCK_WITH_RST, - /* The packet should be stashed (made by callback). */ - SS_POSTPONE = T_POSTPONE, - /* The packet looks good and we can safely pass it. */ - SS_OK = T_OK, -}; typedef int (*on_send_cb_t)(void *conn, struct sk_buff **skb_head); typedef void (*on_tcp_entail_t)(void *conn, struct sk_buff *skb_head); diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index dc364e43f..87a761f7f 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -391,7 +391,7 @@ split_and_parse_n(unsigned char *str, uint32_t type, uint32_t len, * See comments for @do_split_and_parse()/__TRY_PARSE_EXPECT_* */ *fchunks = chunks; - return r <= T_BAD || r == T_OK ? r : T_BAD; + return (r == T_OK || is_tfw_common_error_code(r)) ? r : T_BAD; } /** diff --git a/fw/tls.c b/fw/tls.c index d9ac945e8..d2a0f0690 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -186,7 +186,7 @@ tfw_tls_connection_recv(TfwConn *conn, struct sk_buff *skb) /* Do upcall to http or websocket */ r = tfw_connection_recv(conn, data_up.skb); - if (r && r != T_POSTPONE && r != T_DROP) { + if (tfw_error_code_is_crucial(r)) { ss_kfree_skb(nskb); return r; } diff --git a/lib/log.h b/lib/log.h index 3ad5471d2..2e26f2f6e 100644 --- a/lib/log.h +++ b/lib/log.h @@ -22,47 +22,112 @@ #ifdef __KERNEL__ #include +#include #else #define MAX_ERRNO 4095 #endif /* - * Return codes. + * Tempesta FW common return codes. */ -enum { - /* Compression error during hpack decoding. */ - T_COMPRESSION = -MAX_ERRNO + 7, +typedef enum { + /* The message looks good and we can safely pass it. */ + T_OK = 0, + /* - * Generic error. Connection should be shutdown gracefully - * with TCP_FIN. + * Common error codes boundary. All common error codes should be + * greater then this boundary. Error codes should be listed in this + * enum from the most crucial to the least crucial. */ - T_BAD = -MAX_ERRNO + 6, + __T_COMMON_ERROR_CODE_START = -MAX_ERRNO, + /* - * The message must be dropped. Connection should be alive or closed - * with TCP FIN depending on whether we can communicate with this - * client or not. + * The message must be blocked (typically on a security event). + * Tempesta send TCP RST in this case. */ - T_DROP = -MAX_ERRNO + 5, + T_BLOCK_WITH_RST = __T_COMMON_ERROR_CODE_START + 1, + /* * The message must be blocked (typically on a security event). * Tempesta send TCP FIN in this case. */ - T_BLOCK_WITH_FIN = -MAX_ERRNO + 4, + T_BLOCK_WITH_FIN = __T_COMMON_ERROR_CODE_START + 2, + /* - * The message must be blocked (typically on a security event). - * Tempesta send TCP RST in this case. + * Generic error. Connection should be shutdown gracefully + * with TCP_FIN. + */ + T_BAD = __T_COMMON_ERROR_CODE_START + 3, + + /* + * The message must be dropped. Connection should be alive or closed + * with TCP FIN depending on whether we can communicate with this + * client or not. */ - T_BLOCK_WITH_RST = -MAX_ERRNO + 3, + T_DROP = __T_COMMON_ERROR_CODE_START + 4, + + /* The message should be stashed (made by callback). */ + T_POSTPONE = __T_COMMON_ERROR_CODE_START + 5, + + /* Last common error code + 1 */ + __T_COMMON_ERROR_CODE_END, +} TfwRcCommon; + +/* + * Tempesta FW internal error codes. Can be returned from different + * modules (e.g. hpack, frang). Should be converted to common return + * code before use on low level (connection, socket) layer. + */ +typedef enum { + __T_INTERNAL_ERROR_CODE_START = __T_COMMON_ERROR_CODE_END + 1, + + /* Compression error during hpack decoding. */ + T_COMPRESSION = __T_INTERNAL_ERROR_CODE_START + 1, + /* * The message must be blocked (typically on a security event). * Sending TCP RST or TCP FIN depends on block action setting. */ - T_BLOCK = -MAX_ERRNO + 2, - /* The message should be stashed (made by callback). */ - T_POSTPONE = -MAX_ERRNO + 1, - /* The message looks good and we can safely pass it. */ - T_OK = 0, -}; + T_BLOCK = __T_INTERNAL_ERROR_CODE_START + 2, + + /* Last internal error code + 1 */ + __T_INTERNAL_ERROR_CODE_END, +} TfwInternalErrCodes; + +static inline bool +is_tfw_common_error_code(int err_code) +{ + return err_code > __T_COMMON_ERROR_CODE_START + && err_code < __T_COMMON_ERROR_CODE_END; +} + +static inline bool +is_tfw_internal_error_code(int err_code) +{ + return err_code > __T_INTERNAL_ERROR_CODE_START + && err_code < __T_INTERNAL_ERROR_CODE_END; +} + +static inline bool +tfw_error_code_more_crucial(int err_code1, int err_code2) +{ + WARN_ON_ONCE(err_code1 && !is_tfw_common_error_code(err_code1) + && !is_tfw_internal_error_code(err_code1)); + WARN_ON_ONCE(err_code2 && !is_tfw_common_error_code(err_code2) + && is_tfw_internal_error_code(err_code2)); + + return err_code1 < err_code2; +} + +static inline bool +tfw_error_code_is_crucial(int err_code) +{ + /* + * Also works with system error codes, not only Tempesta FW + * error codes. + */ + return err_code && err_code != T_POSTPONE && err_code != T_DROP; +} /* * BANNER variable must be defined before including the file! From 3b24d5ef5cb73c5b9306f1d79240da5ac3aebb9d Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Thu, 16 Apr 2026 08:24:14 +0300 Subject: [PATCH 21/23] Improve performance during checking client memory Don not check client memory consumption during parsing requests/http2 frame processing, check it only at the end of softirq rx path. Under load we can process little bit more requests, before dropping connection and block client but in this case we have no perfomance degradation: 1396 with memory check: small: finished in 50.03s, 1308389.70 req/s, 1013.29MB/s finished in 50.03s, 1303456.74 req/s, 1010.62MB/s finished in 50.03s, 1298829.02 req/s, 1007.03MB/s large: finished in 50.08s, 103259.90 req/s, 9.88GB/s finished in 50.08s, 111437.44 req/s, 10.66GB/s finished in 50.07s, 101983.44 req/s, 9.75GB/s finished in 50.07s, 111777.22 req/s, 10.69GB/s master: small: finished in 50.03s, 1330912.18 req/s, 1.01GB/s finished in 50.03s, 1343435.90 req/s, 1.02GB/s finished in 50.03s, 1344150.06 req/s, 1.02GB/s large: finished in 50.04s, 98945.70 req/s, 9.46GB/s finished in 50.04s, 96055.18 req/s, 9.19GB/s finished in 50.04s, 102439.50 req/s, 9.80GB/s --- fw/connection.c | 4 ++-- fw/connection.h | 4 ++-- fw/http.c | 14 ++++---------- fw/http_frame.c | 5 ----- fw/sock.c | 6 ++++-- fw/sock_clnt.c | 1 + fw/sync_socket.h | 2 +- fw/tls.c | 4 ++-- fw/websocket.c | 25 +++++++++++++++++-------- 9 files changed, 33 insertions(+), 32 deletions(-) diff --git a/fw/connection.c b/fw/connection.c index 5b92b3261..d876d4b3a 100644 --- a/fw/connection.c +++ b/fw/connection.c @@ -204,10 +204,10 @@ tfw_connection_recv(TfwConn *conn, struct sk_buff *skb) return (r == T_OK || is_tfw_common_error_code(r)) ? r : T_BAD; } -void +int tfw_connection_recv_finish(TfwConn *conn) { - TFW_CONN_HOOK_CALL(conn, conn_recv_finish); + return TFW_CONN_HOOK_CALL(conn, conn_recv_finish); } void diff --git a/fw/connection.h b/fw/connection.h index 43beabfa5..ee0d6d378 100644 --- a/fw/connection.h +++ b/fw/connection.h @@ -360,7 +360,7 @@ typedef struct { /* * Called after processing all socket received queue. */ - void (*conn_recv_finish)(TfwConn *conn); + int (*conn_recv_finish)(TfwConn *conn); } TfwConnHooks; #define TFW_CONN_MAX_PROTOS TFW_GFSM_FSM_N @@ -623,7 +623,7 @@ void tfw_connection_hooks_register(TfwConnHooks *hooks, int type); void tfw_connection_hooks_unregister(int type); int tfw_connection_send(TfwConn *conn, TfwMsg *msg); int tfw_connection_recv(TfwConn *conn, struct sk_buff *skb); -void tfw_connection_recv_finish(TfwConn *conn); +int tfw_connection_recv_finish(TfwConn *conn); /* Generic helpers, used for both client and server connections. */ void tfw_connection_init(TfwConn *conn); diff --git a/fw/http.c b/fw/http.c index 158a95688..6d7f5c229 100644 --- a/fw/http.c +++ b/fw/http.c @@ -3187,11 +3187,14 @@ tfw_http_conn_send(TfwConn *conn, TfwMsg *msg) return ss_send(conn->sk, &msg->skb_head, msg->ss_flags); } -static void +static int tfw_http_conn_recv_finish(TfwConn *conn) { if (TFW_FSM_TYPE(conn->proto.type) == TFW_FSM_H2) tfw_h2_conn_recv_finish(conn); + if (unlikely(frang_client_mem_limit((TfwCliConn *)conn, true))) + return T_BLOCK_WITH_RST; + return 0; } /** @@ -6601,15 +6604,6 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, skb->truesize); } - r = frang_client_mem_limit((TfwCliConn *)conn, false); - if (unlikely(r)) { - BUG_ON(r != T_BLOCK); - TFW_INC_STAT_BH(clnt.msgs_filtout); - return tfw_http_req_parse_block(req, 403, - "parsed request has been filtered out", - HTTP2_ECODE_PROTO); - } - r = ss_skb_process(skb, actor, req, &req->chunk_cnt, &parsed); req->msg.len += parsed; TFW_ADD_STAT_BH(parsed, clnt.rx_bytes); diff --git a/fw/http_frame.c b/fw/http_frame.c index d0511525d..2acf00a48 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -1879,11 +1879,6 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) parsed, skb->len); } - r = frang_client_mem_limit((TfwCliConn *)c, true); - if (unlikely(r)) - return T_BLOCK_WITH_RST; - - /* * For fully received frames possibly there are other frames * in the current @skb, so create an skb sibling with next diff --git a/fw/sock.c b/fw/sock.c index f0e11b322..281077ae7 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -1059,7 +1059,7 @@ do { \ static int ss_tcp_process_data(struct sock *sk) { - int r = 0, count, processed = 0; + int tmp_r, r = 0, count, processed = 0; unsigned int skb_len, skb_seq; struct sk_buff *skb, *tmp; struct tcp_sock *tp = tcp_sk(sk); @@ -1094,7 +1094,9 @@ ss_tcp_process_data(struct sock *sk) skb_len); } out: - SS_CALL(connection_recv_finish, sk->sk_user_data); + tmp_r = SS_CALL(connection_recv_finish, sk->sk_user_data); + if (unlikely(tfw_error_code_more_crucial(tmp_r, r))) + r = tmp_r; /* * Recalculate an appropriate TCP receive buffer space diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 11c175451..2544b94dc 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -359,6 +359,7 @@ static const SsHooks tfw_sock_http_clnt_ss_hooks = { .connection_new = tfw_sock_clnt_new, .connection_drop = tfw_sock_clnt_drop, .connection_recv = tfw_connection_recv, + .connection_recv_finish = tfw_connection_recv_finish, .connection_on_shutdown = tfw_cli_conn_on_shutdown, }; diff --git a/fw/sync_socket.h b/fw/sync_socket.h index c47dd0581..69123b034 100644 --- a/fw/sync_socket.h +++ b/fw/sync_socket.h @@ -91,7 +91,7 @@ typedef struct ss_hooks { int (*connection_recv)(TfwConn *conn, struct sk_buff *skb); /* Callback to make some job after processing received data. */ - void (*connection_recv_finish)(TfwConn *conn); + int (*connection_recv_finish)(TfwConn *conn); /* Callback to make some job on connection shutdown. */ void (*connection_on_shutdown)(TfwConn *conn); diff --git a/fw/tls.c b/fw/tls.c index d2a0f0690..09de7c91f 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -809,10 +809,10 @@ tfw_tls_conn_send(TfwConn *c, TfwMsg *msg) return r; } -static void +static int tfw_tls_conn_recv_finish(TfwConn *c) { - tfw_conn_hook_call(TFW_FSM_HTTP, c, conn_recv_finish); + return tfw_conn_hook_call(TFW_FSM_HTTP, c, conn_recv_finish); } static TfwConnHooks tls_conn_hooks = { diff --git a/fw/websocket.c b/fw/websocket.c index 5ef4c6494..3c813b3a2 100644 --- a/fw/websocket.c +++ b/fw/websocket.c @@ -357,18 +357,27 @@ tfw_ws_conn_send(TfwConn *conn, TfwMsg *msg) return r; } +static int +tfw_ws_conn_recv_finish(TfwConn *conn) +{ + return tfw_conn_hook_call(TFW_CONN_HTTP_TYPE(conn), conn, + conn_recv_finish); +} + static TfwConnHooks ws_conn_hooks = { - .conn_close = tfw_ws_conn_close, - .conn_abort = tfw_ws_conn_abort, - .conn_drop = tfw_ws_conn_drop, - .conn_send = tfw_ws_conn_send, + .conn_close = tfw_ws_conn_close, + .conn_abort = tfw_ws_conn_abort, + .conn_drop = tfw_ws_conn_drop, + .conn_send = tfw_ws_conn_send, + .conn_recv_finish = tfw_ws_conn_recv_finish, }; static TfwConnHooks wss_conn_hooks = { - .conn_close = tfw_ws_conn_close, - .conn_abort = tfw_ws_conn_abort, - .conn_drop = tfw_ws_conn_drop, - .conn_send = tfw_ws_conn_send, + .conn_close = tfw_ws_conn_close, + .conn_abort = tfw_ws_conn_abort, + .conn_drop = tfw_ws_conn_drop, + .conn_send = tfw_ws_conn_send, + .conn_recv_finish = tfw_ws_conn_recv_finish, }; /* From a05a385f65b4fe44c44cd9bb265ef21b0d535680 Mon Sep 17 00:00:00 2001 From: EvgeniiMekhanik Date: Thu, 16 Apr 2026 11:29:38 +0300 Subject: [PATCH 22/23] Update copyright years --- fw/apm.c | 2 +- fw/cache.c | 2 +- fw/hpack.c | 2 +- fw/hpack.h | 2 +- fw/http.h | 2 +- fw/http2.c | 2 +- fw/http_frame.h | 2 +- fw/http_msg.c | 2 +- fw/http_msg.h | 2 +- fw/http_sess.c | 2 +- fw/http_stream.c | 2 +- fw/http_stream.h | 2 +- fw/http_tbl.c | 2 +- fw/http_types.h | 2 +- fw/msg.c | 2 +- fw/msg.h | 2 +- fw/pool.c | 2 +- fw/pool.h | 2 +- fw/sock.c | 2 +- fw/ss_skb.c | 2 +- fw/ss_skb.h | 2 +- fw/sync_socket.h | 2 +- fw/t/unit/helpers.h | 2 +- fw/t/unit/test.c | 2 +- fw/t/unit/test_http1_parser.c | 2 +- fw/t/unit/test_http2_parser.c | 2 +- fw/t/unit/test_http2_parser_hpack.c | 2 +- fw/t/unit/test_http_match.c | 2 +- fw/t/unit/test_http_msg.c | 2 +- fw/t/unit/test_http_parser_common.c | 2 +- fw/t/unit/test_http_parser_common.h | 2 +- fw/t/unit/test_pool.c | 2 +- fw/t/unit/tfw_str_helper.c | 2 +- fw/tcp.h | 2 +- fw/tf_filter.c | 2 +- fw/tls.c | 2 +- lib/fault_injection_alloc.c | 2 +- lib/fault_injection_alloc.h | 2 +- lib/log.h | 2 +- tls/mpool.c | 2 +- tls/tls_ticket.c | 2 +- tls/x509_crt.c | 2 +- 42 files changed, 42 insertions(+), 42 deletions(-) diff --git a/fw/apm.c b/fw/apm.c index 00e418c6a..c6b63951f 100644 --- a/fw/apm.c +++ b/fw/apm.c @@ -1,7 +1,7 @@ /* * Tempesta FW * - * Copyright (C) 2016-2025 Tempesta Technologies, Inc. + * Copyright (C) 2016-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/fw/cache.c b/fw/cache.c index 4d10a3484..79c69acb2 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -4,7 +4,7 @@ * HTTP cache (RFC 7234). * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/hpack.c b/fw/hpack.c index ee07d0c9c..f5f4b0ddf 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/hpack.h b/fw/hpack.h index 6780aee13..1a139b5bb 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http.h b/fw/http.h index d39656be9..c77100f83 100644 --- a/fw/http.h +++ b/fw/http.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http2.c b/fw/http2.c index 380be4f18..c29f180ea 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2024-2025 Tempesta Technologies, Inc. + * Copyright (C) 2024-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_frame.h b/fw/http_frame.h index 385ae9d45..fed94c2e9 100644 --- a/fw/http_frame.h +++ b/fw/http_frame.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2022-2025 Tempesta Technologies, Inc. + * Copyright (C) 2022-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_msg.c b/fw/http_msg.c index b637600f4..09440a53d 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -4,7 +4,7 @@ * HTTP message manipulation helpers for the protocol processing. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_msg.h b/fw/http_msg.h index 4408017b4..b255f25c5 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_sess.c b/fw/http_sess.c index 927bb2a6a..6806a82fd 100644 --- a/fw/http_sess.c +++ b/fw/http_sess.c @@ -29,7 +29,7 @@ * JS challenge client should execute it and send new request with * appropriate cookie just in time. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_stream.c b/fw/http_stream.c index 88ed2daae..2466eb7bc 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_stream.h b/fw/http_stream.h index f9a7263a9..4e9b2c5d4 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2019-2025 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_tbl.c b/fw/http_tbl.c index 9f4394ba6..f3d4dde9f 100644 --- a/fw/http_tbl.c +++ b/fw/http_tbl.c @@ -83,7 +83,7 @@ * - Extended string matching operators: "regex", "substring". * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/http_types.h b/fw/http_types.h index 8ce576807..ee8f055d0 100644 --- a/fw/http_types.h +++ b/fw/http_types.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2018-2025 Tempesta Technologies, Inc. + * Copyright (C) 2018-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/msg.c b/fw/msg.c index ab2d14f01..db73ba4c2 100644 --- a/fw/msg.c +++ b/fw/msg.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2018-2023 Tempesta Technologies, Inc. + * Copyright (C) 2018-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/msg.h b/fw/msg.h index 96cf1698a..2460b7622 100644 --- a/fw/msg.h +++ b/fw/msg.h @@ -4,7 +4,7 @@ * Generic protocol message. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2023 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/pool.c b/fw/pool.c index 87f1146aa..ec920fab2 100644 --- a/fw/pool.c +++ b/fw/pool.c @@ -25,7 +25,7 @@ * be immediately freed to keep stack-like memory management. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/pool.h b/fw/pool.h index 6e33d818c..7fa305e0d 100644 --- a/fw/pool.h +++ b/fw/pool.h @@ -4,7 +4,7 @@ * Memory pool. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/sock.c b/fw/sock.c index 281077ae7..ca646bb68 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -2,7 +2,7 @@ * Synchronous Socket API. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/ss_skb.c b/fw/ss_skb.c index 12ce304f2..e2596fdd9 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -7,7 +7,7 @@ * on top on native Linux socket buffers. The helpers provide common and * convenient wrappers for skb processing. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 0bc3bbb33..b0febb0c0 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -3,7 +3,7 @@ * * Synchronous Sockets API for Linux socket buffers manipulation. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/sync_socket.h b/fw/sync_socket.h index 69123b034..8fcbd9ccc 100644 --- a/fw/sync_socket.h +++ b/fw/sync_socket.h @@ -2,7 +2,7 @@ * Synchronous Socket API. * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/helpers.h b/fw/t/unit/helpers.h index 6ce68ea0c..107f2142b 100644 --- a/fw/t/unit/helpers.h +++ b/fw/t/unit/helpers.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2021 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test.c b/fw/t/unit/test.c index 82c11a254..a2e444ffb 100644 --- a/fw/t/unit/test.c +++ b/fw/t/unit/test.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http1_parser.c b/fw/t/unit/test_http1_parser.c index 35d2e86b0..b19e89520 100644 --- a/fw/t/unit/test_http1_parser.c +++ b/fw/t/unit/test_http1_parser.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http2_parser.c b/fw/t/unit/test_http2_parser.c index 45b91e4a2..cecbd16e1 100644 --- a/fw/t/unit/test_http2_parser.c +++ b/fw/t/unit/test_http2_parser.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2022-2025 Tempesta Technologies, Inc. + * Copyright (C) 2022-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http2_parser_hpack.c b/fw/t/unit/test_http2_parser_hpack.c index 808acab5b..4bde6da7d 100644 --- a/fw/t/unit/test_http2_parser_hpack.c +++ b/fw/t/unit/test_http2_parser_hpack.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2024 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http_match.c b/fw/t/unit/test_http_match.c index b20f009c5..bcb9b576a 100644 --- a/fw/t/unit/test_http_match.c +++ b/fw/t/unit/test_http_match.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2022 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 24edb075a..6a9e5a3cc 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2023-2025 Tempesta Technologies, Inc. + * Copyright (C) 2023-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http_parser_common.c b/fw/t/unit/test_http_parser_common.c index 87a761f7f..df32b5f3b 100644 --- a/fw/t/unit/test_http_parser_common.c +++ b/fw/t/unit/test_http_parser_common.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_http_parser_common.h b/fw/t/unit/test_http_parser_common.h index 0d284e84b..65a094655 100644 --- a/fw/t/unit/test_http_parser_common.h +++ b/fw/t/unit/test_http_parser_common.h @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/test_pool.c b/fw/t/unit/test_pool.c index f778c9a1d..deb4f0973 100644 --- a/fw/t/unit/test_pool.c +++ b/fw/t/unit/test_pool.c @@ -2,7 +2,7 @@ * Tempesta FW * * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com). - * Copyright (C) 2015-2024 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/t/unit/tfw_str_helper.c b/fw/t/unit/tfw_str_helper.c index 8e8bcdd3e..a5e15d28a 100644 --- a/fw/t/unit/tfw_str_helper.c +++ b/fw/t/unit/tfw_str_helper.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2015-2019 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/tcp.h b/fw/tcp.h index 8650c913f..63e39bfbe 100644 --- a/fw/tcp.h +++ b/fw/tcp.h @@ -1,7 +1,7 @@ /** * TCP Socket API. * - * Copyright (C) 2015-2023 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/tf_filter.c b/fw/tf_filter.c index 02f9635bd..c8b214f8a 100644 --- a/fw/tf_filter.c +++ b/fw/tf_filter.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/fw/tls.c b/fw/tls.c index 09de7c91f..e7e160511 100644 --- a/fw/tls.c +++ b/fw/tls.c @@ -3,7 +3,7 @@ * * Transport Layer Security (TLS) interfaces to Tempesta TLS. * - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/lib/fault_injection_alloc.c b/lib/fault_injection_alloc.c index bc29eebfe..7240b85d6 100644 --- a/lib/fault_injection_alloc.c +++ b/lib/fault_injection_alloc.c @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/lib/fault_injection_alloc.h b/lib/fault_injection_alloc.h index 50ae90942..b75f7345f 100644 --- a/lib/fault_injection_alloc.h +++ b/lib/fault_injection_alloc.h @@ -1,7 +1,7 @@ /** * Tempesta FW * - * Copyright (C) 2025 Tempesta Technologies, Inc. + * Copyright (C) 2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/lib/log.h b/lib/log.h index 2e26f2f6e..e11e0e623 100644 --- a/lib/log.h +++ b/lib/log.h @@ -1,7 +1,7 @@ /** * Tempesta kernel library * - * Copyright (C) 2015-2025 Tempesta Technologies, INC. + * Copyright (C) 2015-2026 Tempesta Technologies, INC. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by diff --git a/tls/mpool.c b/tls/mpool.c index 1c1d45023..98bdcb464 100644 --- a/tls/mpool.c +++ b/tls/mpool.c @@ -19,7 +19,7 @@ * implicitly for MPI math. Dynamically allocated pages are used instead of * static per-cpu ones. * - * Copyright (C) 2019-2024 Tempesta Technologies, Inc. + * Copyright (C) 2019-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/tls/tls_ticket.c b/tls/tls_ticket.c index 6700530b9..6a90419ae 100644 --- a/tls/tls_ticket.c +++ b/tls/tls_ticket.c @@ -6,7 +6,7 @@ * Based on mbed TLS, https://tls.mbed.org. * * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/tls/x509_crt.c b/tls/x509_crt.c index 52275b38e..c91c88628 100644 --- a/tls/x509_crt.c +++ b/tls/x509_crt.c @@ -15,7 +15,7 @@ * Based on mbed TLS, https://tls.mbed.org. * * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved - * Copyright (C) 2015-2025 Tempesta Technologies, Inc. + * Copyright (C) 2015-2026 Tempesta Technologies, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From cf3214d8ebad79f7b78b699b0fa2bf21f4f5feff Mon Sep 17 00:00:00 2001 From: Alexander Krizhanovsky Date: Sat, 25 Apr 2026 22:11:35 +0200 Subject: [PATCH 23/23] Adjust coding style and add a minor comment --- fw/client.c | 36 ++++++++++++++++++------------------ fw/hpack.c | 8 ++++---- fw/http.c | 11 +++++++++-- fw/http2.c | 4 ++-- fw/http2.h | 2 +- fw/http_frame.c | 4 ++-- fw/http_limits.c | 31 +++++++++++++++---------------- fw/http_parser.c | 4 ++-- fw/http_stream.c | 2 +- fw/http_stream_sched.c | 6 +++--- fw/http_stream_sched.h | 2 +- fw/sock.c | 4 ++-- fw/sock_clnt.c | 15 +-------------- fw/token_tables.h | 4 ++-- fw/vhost.c | 2 +- 15 files changed, 64 insertions(+), 71 deletions(-) diff --git a/fw/client.c b/fw/client.c index 520ea4072..e6b9eb9ce 100644 --- a/fw/client.c +++ b/fw/client.c @@ -38,9 +38,9 @@ #define UA_CMP_LEN 256 static struct { - const char *db_path; - unsigned long db_size; - unsigned int lru_size; + const char *db_path; + unsigned long db_size; + unsigned int lru_size; } client_cfg __read_mostly; /** @@ -52,15 +52,15 @@ static struct { * @user_agent - UA_CMP_LEN first characters of User-Agent */ typedef struct { - TfwClient cli; - TfwAddr xff_addr; - unsigned long user_agent_len; - char user_agent[UA_CMP_LEN]; + TfwClient cli; + TfwAddr xff_addr; + unsigned long user_agent_len; + char user_agent[UA_CMP_LEN]; } TfwClientEntry; static struct { - struct list_head head; - unsigned int lru_size; + struct list_head head; + unsigned int lru_size; } client_lru = { .head = LIST_HEAD_INIT(client_lru.head), .lru_size = 0, @@ -73,15 +73,15 @@ static DECLARE_WAIT_QUEUE_HEAD(shutdown_wq); static struct kmem_cache *cli_mem_cache; static struct { - TfwClientMem *mem; - struct list_head free_list; - unsigned int size; - unsigned int order; + TfwClientMem *mem; + struct list_head free_list; + unsigned int size; + unsigned int order; } cli_mem_pool = { - .mem = NULL, - .free_list = LIST_HEAD_INIT(cli_mem_pool.free_list), - .size = 0, - .order = 0, + .mem = NULL, + .free_list = LIST_HEAD_INIT(cli_mem_pool.free_list), + .size = 0, + .order = 0, }; static inline bool @@ -207,7 +207,7 @@ tfw_cli_mem_pool_init(void) int i, r; if (WARN_ON_ONCE(!client_cfg.lru_size)) - return -EINVAL; + return -EINVAL; order = get_order(sizeof(TfwClientMem) * client_cfg.lru_size); if (order > MAX_PAGE_ORDER) diff --git a/fw/hpack.c b/fw/hpack.c index f5f4b0ddf..999e7529f 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -2363,11 +2363,11 @@ do { \ * Compare split header/value against values stored inside * node and return positive/negative/zero depending on their * relation. - * + * * The order geven by this function is the following: * (hdr_name_len, total_hdr_len, special_strcmp) * where hdr_name_len and total_hdr_len are compared as integers. - * + * * Where special_strcmp is case-insensitive for header names, * case-sensitive for header values and in both cases it compares * multiple characters per instruction, so don't expect strict @@ -2382,7 +2382,7 @@ tfw_hpack_node_compare(const TfwStr *__restrict h_name, unsigned len; const char *np, *p; const TfwStr *c, *end; - + if (h_name->len != node->name_len) return (int)h_name->len - (int)node->name_len; @@ -2412,7 +2412,7 @@ tfw_hpack_node_compare(const TfwStr *__restrict h_name, if (nm_node) *nm_node = node; - + len = h_name->len + h_val->len; if (len != node->hdr_len) return (int)len - (int)node->hdr_len; diff --git a/fw/http.c b/fw/http.c index 6d7f5c229..d7960ddac 100644 --- a/fw/http.c +++ b/fw/http.c @@ -3133,7 +3133,7 @@ tfw_http_conn_cli_drop(TfwCliConn *cli_conn) smp_mb__before_atomic(); set_bit(TFW_HTTP_B_REQ_DROP, req->flags); if (unused) { - tfw_http_free_req_carefully(req, &resp_del_queue); + tfw_http_free_req_carefully(req, &resp_del_queue); TFW_INC_STAT_BH(serv.msgs_otherr); } } @@ -3192,8 +3192,15 @@ tfw_http_conn_recv_finish(TfwConn *conn) { if (TFW_FSM_TYPE(conn->proto.type) == TFW_FSM_H2) tfw_h2_conn_recv_finish(conn); + + /* + * SoftIRQ shot is very short, 0.001 to 0.01 sec, so we can account + * client memory once per softirq shot - this is a quite fast response + * to a DDoS attack. + */ if (unlikely(frang_client_mem_limit((TfwCliConn *)conn, true))) return T_BLOCK_WITH_RST; + return 0; } @@ -7000,7 +7007,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb, tfw_http_send_err_resp(req, 500, "request dropped:" " processing error"); TFW_INC_STAT_BH(clnt.msgs_otherr); - } + } /* * According to RFC 7230 6.3.2, connection with a client * must be dropped after a response is sent to that client, diff --git a/fw/http2.c b/fw/http2.c index c29f180ea..21c1ab1f6 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -352,7 +352,7 @@ tfw_h2_alloc_stream_sched_entry(TfwH2Ctx *ctx) * 15 - 20 streams in parallel even if there are much * more resourses to request). TfwStreamSchedEntry is * small (64 bytes), so use special cache for allocation. - */ + */ entry = kmem_cache_alloc(stream_sched_cache, GFP_ATOMIC | __GFP_ZERO); } else { @@ -604,7 +604,7 @@ tfw_h2_hpack_encode_trailer_headers(TfwHttpResp *resp) /* * TODO #2136: Remove this flag during reworking - * `tfw_http_msg_expand_from_pool` function. + * `tfw_http_msg_expand_from_pool` function. */ __set_bit(TFW_HTTP_B_RESP_ENCODE_TRAILERS, resp->flags); diff --git a/fw/http2.h b/fw/http2.h index b8d9b35dd..e95e97d9d 100644 --- a/fw/http2.h +++ b/fw/http2.h @@ -59,7 +59,7 @@ typedef struct { */ typedef struct { unsigned int ping_cnt; - unsigned int settings_cnt; + unsigned int settings_cnt; unsigned int rst_cnt; unsigned int priority_cnt; unsigned int ts; diff --git a/fw/http_frame.c b/fw/http_frame.c index 2acf00a48..e7d1bf5a8 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -1135,7 +1135,7 @@ tfw_h2_ctrl_frame_limit(TfwH2Ctx *ctx, TfwFrameType hdr_type) break; } - return true; + return true; } /* @@ -1180,7 +1180,7 @@ do { \ goto conn_term; if (unlikely(!tfw_h2_ctrl_frame_limit(ctx, hdr_type))) - return T_BLOCK_WITH_RST; + return T_BLOCK_WITH_RST; /* * TODO: RFC 7540 Section 6.2: diff --git a/fw/http_limits.c b/fw/http_limits.c index 76fba65ac..566de32fd 100644 --- a/fw/http_limits.c +++ b/fw/http_limits.c @@ -1675,28 +1675,27 @@ int frang_client_mem_limit(TfwCliConn *conn, bool block_if_exceeded) { TfwClient *cli = (TfwClient *)conn->peer; + TfwVhost *dflt_vh; - if (tfw_cli_hard_mem_limit - && tfw_client_mem(cli) > tfw_cli_hard_mem_limit) - { - if (block_if_exceeded) { - TfwVhost *dflt_vh = tfw_vhost_lookup_default(); - - if (WARN_ON_ONCE(!dflt_vh)) - return T_BLOCK; + if (likely(!tfw_cli_hard_mem_limit + || tfw_client_mem(cli) <= tfw_cli_hard_mem_limit)) + return 0; - if (dflt_vh->frang_gconf->ip_block) { - unsigned int duration = - dflt_vh->frang_gconf->ip_block_duration; + if (!block_if_exceeded) + return T_BLOCK; - tfw_filter_block_ip(cli, duration); - } - tfw_vhost_put(dflt_vh); - } + dflt_vh = tfw_vhost_lookup_default(); + if (WARN_ON_ONCE(!dflt_vh)) return T_BLOCK; + + if (dflt_vh->frang_gconf->ip_block) { + unsigned int duration = dflt_vh->frang_gconf->ip_block_duration; + + tfw_filter_block_ip(cli, duration); } + tfw_vhost_put(dflt_vh); - return 0; + return T_BLOCK; } diff --git a/fw/http_parser.c b/fw/http_parser.c index c5864aab7..3642b87ba 100644 --- a/fw/http_parser.c +++ b/fw/http_parser.c @@ -5046,7 +5046,7 @@ tfw_http_parse_req(void *req_data, unsigned char *data, unsigned int len, /* HTTP method. */ __FSM_STATE(Req_Method, hot) { parser->_hdr_tag = TFW_HTTP_METHOD; - /* + /* * Open header manually. HTTP method is not a header, storing * it in @msg->h_tbl it's only optimization to not introduce * new field into TfwHttpReq. Using @tfw_http_msg_hdr_open @@ -10515,7 +10515,7 @@ tfw_h2_parse_req_hdr_val(unsigned char *data, unsigned long len, TfwHttpReq *req } __FSM_STATE(Req_Mark, hot) { - __FSM_H2_PSHDR_MOVE_FIN_fixup(Req_Mark, 1, Req_Path); + __FSM_H2_PSHDR_MOVE_FIN_fixup(Req_Mark, 1, Req_Path); } __FSM_STATE(Req_Path) { diff --git a/fw/http_stream.c b/fw/http_stream.c index 2466eb7bc..923ea20e0 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -249,7 +249,7 @@ tfw_h2_stream_create(TfwH2Ctx *ctx, unsigned int id) void tfw_h2_stream_clean(TfwH2Ctx *ctx, TfwStream *stream) { - T_DBG3("Stop and delete stream (id %u state %d(%s) weight %u)," + T_DBG3("Stop and delete stream (id %u state %d(%s) weight %u)," " ctx %px streams num %lu\n", stream->id, tfw_h2_get_stream_state(stream), __h2_strm_st_n(stream), stream->weight, ctx, ctx->streams_num); diff --git a/fw/http_stream_sched.c b/fw/http_stream_sched.c index 083adbfd5..7eeb8afd5 100644 --- a/fw/http_stream_sched.c +++ b/fw/http_stream_sched.c @@ -27,7 +27,7 @@ * are requested sequentially. Progressive JPEGS (only for Firefox browser) * is a very rare case, so we decide to process requests from streams from * larger to smaller weight. - * + * * When we search for the most priority stream we iterate over the levels of * the priority tree. For exanple: * 1 (256) @@ -234,7 +234,7 @@ static void __tfw_h2_stream_sched_remove(TfwStreamSched *sched, TfwStream *stream) { TfwStreamSchedEntry *parent = stream->sched->parent; - + tfw_h2_stream_sched_spin_lock_assert(sched); list_del_init(&stream->sched_node); stream->sched_state = HTTP2_STREAM_SCHED_STATE_UNKNOWN; @@ -453,7 +453,7 @@ tfw_h2_remove_stream_dep(TfwStreamSched *sched, TfwStream *stream) * Here we move children of the removed stream to the parent * scheduler. If parent scheduler has no children we move * current removed stream children as is (saving their weight) - * Otherwise we recalculate their weight according RFC. + * Otherwise we recalculate their weight according RFC. */ parent_has_children = tfw_h2_stream_sched_has_children(parent); diff --git a/fw/http_stream_sched.h b/fw/http_stream_sched.h index ceee36989..5fcabff52 100644 --- a/fw/http_stream_sched.h +++ b/fw/http_stream_sched.h @@ -34,7 +34,7 @@ * free list; * @active - head of the active streams scheduler list; * @blocked - head of the blocked streams scheduler list; - */ + */ typedef struct tfw_stream_sched_entry_t { u64 total_weight; long int active_cnt; diff --git a/fw/sock.c b/fw/sock.c index ca646bb68..43e860ac5 100644 --- a/fw/sock.c +++ b/fw/sock.c @@ -497,7 +497,7 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, while ((*snd_wnd = tfw_tcp_calc_snd_wnd(sk, mss_now))) { struct sk_buff *skb = ss_skb_dequeue(skb_head); - + if (!skb) break; /* @@ -550,7 +550,7 @@ ss_skb_tcp_entail_list(struct sock *sk, struct sk_buff **skb_head, if (*skb_head && !TFW_SKB_CB(*skb_head)->is_head) ss_skb_setup_head_of_list(*skb_head, mark, tls_type); - return r; + return r; } /** diff --git a/fw/sock_clnt.c b/fw/sock_clnt.c index 2544b94dc..3beb82ddb 100644 --- a/fw/sock_clnt.c +++ b/fw/sock_clnt.c @@ -351,7 +351,7 @@ tfw_cli_conn_on_shutdown(TfwConn *conn) * connection keep alive timer. Since we decide to patch and * reuse TCP keep alive timer, we don't need this callback and * should directly modify TCP keep alive timer from sock.c. - */ + */ tfw_cli_conn_mod_timer((TfwCliConn *)conn, tcp_fin_timeout); } @@ -710,19 +710,6 @@ tfw_parse_client_mem(const char *val, unsigned long long *mem) return 0; } -static int -tfw_parse_client_mem(const char *val, unsigned long long *mem) -{ - size_t len = strlen(val); - char *p; - - *mem = memparse(val, &p); - if (p != val + len) - return -EINVAL; - - return 0; -} - static int tfw_cfgop_client_mem(TfwCfgSpec *cs, TfwCfgEntry *ce) { diff --git a/fw/token_tables.h b/fw/token_tables.h index 7cbf8cd35..120926179 100644 --- a/fw/token_tables.h +++ b/fw/token_tables.h @@ -47,7 +47,7 @@ /* * ASCII table column bitmaps for HTTP token, e.g. header name (RFC 7230 3.2.6). - * + * * ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz * !#$%&'*+-.^_`|~0123456789 */ @@ -94,7 +94,7 @@ /* * ASCII codes to accept HTTP header values - * + * * RFC 7230, Appendix B; RFC 5234, Appendix B.1.: * field-value OWS = VCHAR SP HTAB = %x9 %x20-7E * diff --git a/fw/vhost.c b/fw/vhost.c index dd6511b4c..8daeceb04 100644 --- a/fw/vhost.c +++ b/fw/vhost.c @@ -994,7 +994,7 @@ tfw_cfgop_cache_use_stale(TfwCfgSpec *cs, TfwCfgEntry *ce, TfwLocation *loc) TFW_CFG_CHECK_NO_ATTRS(cs, ce); TFW_CFG_CHECK_VAL_N(>=, 1, cs, ce); - /* + /* * TODO: Revise and remove after #2123. */ if (tfw_vhost_is_default_reconfig(tfw_vhost_entry)) {