diff --git a/bbinc/tohex.h b/bbinc/tohex.h index d48c42dd00..9be0fb3d59 100644 --- a/bbinc/tohex.h +++ b/bbinc/tohex.h @@ -19,6 +19,7 @@ #include #include char *util_tohex(char *out, const char *in, size_t len); +int util_tobytes(char *out, const char *hex, size_t len); void hexdumpbuf(const char *key, int keylen, char **buf); void hexdump(loglvl lvl, const char *key, int keylen); void hexdumpdbt(DBT *dbt); diff --git a/db/fingerprint.h b/db/fingerprint.h new file mode 100644 index 0000000000..329a6d59c4 --- /dev/null +++ b/db/fingerprint.h @@ -0,0 +1,20 @@ +/* + Copyright 2025 Bloomberg Finance L.P. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#ifndef _FINGERPRINT_H_ +#define _FINGERPRINT_H_ +#define FINGERPRINTSZ 16 +#endif diff --git a/db/reqlog.c b/db/reqlog.c index 78c1e29bdf..1d259ec8cb 100644 --- a/db/reqlog.c +++ b/db/reqlog.c @@ -800,6 +800,42 @@ void reqlog_process_message(char *line, int st, int lline) gbl_long_request_ms = toknum(tok, ltok); logmsg(LOGMSG_USER, "Long request threshold now %d msec\n", gbl_long_request_ms); + } else if (tokcmp(tok, ltok, "excludefp") == 0) { + tok = segtok(line, lline, &st, <ok); + if (ltok == 0) { + logmsg(LOGMSG_ERROR, "Usage: excludefp ..\n"); + return; + } + while (ltok != 0) { + char str[(FINGERPRINTSZ * 2) + 1]; + char fp[FINGERPRINTSZ] = {0}; + tokcpy(tok, ltok, str); + if (util_tobytes(fp, str, FINGERPRINTSZ)) { + logmsg(LOGMSG_ERROR, "Invalid fingerprint format %s\n", str); + } else { + reqlog_exclude_fingerprint(fp); + logmsg(LOGMSG_USER, "Excluded fingerprint %s from longreq log\n", str); + } + tok = segtok(line, lline, &st, <ok); + } + } else if (tokcmp(tok, ltok, "unexcludefp") == 0) { + tok = segtok(line, lline, &st, <ok); + if (ltok == 0) { + logmsg(LOGMSG_ERROR, "Usage: unexcludefp ..\n"); + return; + } + while (ltok != 0) { + char str[(FINGERPRINTSZ * 2) + 1]; + char fp[FINGERPRINTSZ] = {0}; + tokcpy(tok, ltok, str); + if (util_tobytes(fp, str, FINGERPRINTSZ)) { + logmsg(LOGMSG_ERROR, "Invalid fingerprint format %s\n", str); + } else { + reqlog_unexclude_fingerprint(fp); + logmsg(LOGMSG_USER, "Unexcluded fingerprint %s from longreq log\n", str); + } + tok = segtok(line, lline, &st, <ok); + } } else if (tokcmp(tok, ltok, "longsqlrequest") == 0) { tok = segtok(line, lline, &st, <ok); gbl_sql_time_threshold = toknum(tok, ltok); @@ -1890,8 +1926,59 @@ static int current_long_request_duration_ms = 0; static int current_longest_long_request_ms = 0; static int current_shortest_long_request_ms = INT_MAX; +static hash_t *exclude_fp_hash = NULL; +static pthread_mutex_t exclude_fp_mutex = PTHREAD_MUTEX_INITIALIZER; + +static hash_t *get_exclude_fp_hash() +{ + if (!exclude_fp_hash) { + exclude_fp_hash = hash_init(FINGERPRINTSZ); + } + return exclude_fp_hash; +} + +int reqlog_fingerprint_is_excluded(const char fp[FINGERPRINTSZ]) +{ + Pthread_mutex_lock(&exclude_fp_mutex); + hash_t *exclude_hash = get_exclude_fp_hash(); + char *rtn = (char *)hash_find(exclude_hash, fp); + Pthread_mutex_unlock(&exclude_fp_mutex); + return rtn ? 1 : 0; +} + +void reqlog_unexclude_fingerprint(const char fp[FINGERPRINTSZ]) +{ + char *found_fp; + Pthread_mutex_lock(&exclude_fp_mutex); + hash_t *exclude_hash = get_exclude_fp_hash(); + if ((found_fp = hash_find(exclude_hash, fp)) != NULL) { + hash_del(exclude_hash, found_fp); + free(found_fp); + } + Pthread_mutex_unlock(&exclude_fp_mutex); +} + +void reqlog_exclude_fingerprint(const char fp[FINGERPRINTSZ]) +{ + Pthread_mutex_lock(&exclude_fp_mutex); + hash_t *exclude_hash = get_exclude_fp_hash(); + if (hash_find(exclude_hash, fp) == NULL) { + char *fp_copy = malloc(FINGERPRINTSZ); + if (!fp_copy) { + Pthread_mutex_unlock(&exclude_fp_mutex); + logmsg(LOGMSG_FATAL, "%s: malloc failed\n", __func__); + abort(); + } + memcpy(fp_copy, fp, FINGERPRINTSZ); + hash_add(exclude_hash, fp_copy); + } + Pthread_mutex_unlock(&exclude_fp_mutex); +} + void reqlog_long_running_clnt(struct sqlclntstate *clnt) { + int have_fingerprint = 0; + char fp[FINGERPRINTSZ] = {0}; if (clnt->done || !clnt->thd || !clnt->sql || !clnt->thd->logger) return; if (can_consume(clnt) == 1) { @@ -1902,6 +1989,10 @@ void reqlog_long_running_clnt(struct sqlclntstate *clnt) return; /* Don't log blocking tranlog */ } + if (clnt->thd->logger->have_fingerprint && reqlog_fingerprint_is_excluded(clnt->thd->logger->fingerprint)) { + return; /* Excluded fingerprint */ + } + struct string_ref *sql = clnt->sql_ref; struct reqlogger logger; @@ -1939,8 +2030,24 @@ void reqlog_long_running_clnt(struct sqlclntstate *clnt) return; } - if (!reqlog_init_off && duration_beyond_thresh_sec >= 0 && - duration_beyond_thresh_sec < gbl_longreq_log_freq_sec) { + if (gbl_fingerprint_queries) { + size_t unused; + const char *normSql = NULL; + if (is_stored_proc(clnt)) { + normSql = clnt->work.zOrigNormSql; + } else { + normSql = (clnt->work.zNormSql) ? clnt->work.zNormSql : clnt->work.zOrigNormSql; + } + if (normSql) { + calc_fingerprint(normSql, &unused, (unsigned char *)fp); + if (reqlog_fingerprint_is_excluded(fp)) { + return; + } + have_fingerprint = 1; + } + } + + if (!reqlog_init_off && duration_beyond_thresh_sec >= 0 && duration_beyond_thresh_sec < gbl_longreq_log_freq_sec) { /* Only for the first time, also log more information (like sql) about this long running query. */ logger.event_mask |= REQL_INFO; @@ -1952,21 +2059,8 @@ void reqlog_long_running_clnt(struct sqlclntstate *clnt) reqlog_set_origin(&logger, "%s", clnt->origin); reqlog_set_vreplays(&logger, clnt->verify_retries); reqlog_set_sql(&logger, sql); - - if (gbl_fingerprint_queries) { - unsigned char fp[FINGERPRINTSZ]; - size_t unused; - const char *normSql = NULL; - if (is_stored_proc(clnt)) { - normSql = clnt->work.zOrigNormSql; - } else { - normSql = (clnt->work.zNormSql) ? clnt->work.zNormSql : - clnt->work.zOrigNormSql; - } - if (normSql) { - calc_fingerprint(normSql, &unused, fp); - reqlog_set_fingerprint(&logger, (const char *)fp, FINGERPRINTSZ); - } + if (have_fingerprint) { + reqlog_set_fingerprint(&logger, (const char *)fp, FINGERPRINTSZ); } if (logger.vreplays) { @@ -2029,6 +2123,9 @@ static inline int is_excluded(struct reqlogger *logger) if (can_consume(logger->clnt) == 1 || logger->clnt->blocking_tranlog) { return 1; } + if (logger->have_fingerprint && reqlog_fingerprint_is_excluded(logger->fingerprint)) { + return 1; + } } return 0; } diff --git a/db/reqlog.h b/db/reqlog.h index a07399741c..c0b0e4e9ef 100644 --- a/db/reqlog.h +++ b/db/reqlog.h @@ -19,6 +19,7 @@ #include #include #include +#include /* reqlog.c - new logging stuff */ struct reqlogger; @@ -45,6 +46,9 @@ void reqlog_process_message(char *line, int st, int lline); void reqlog_stat(void); void reqlog_help(void); void reqlog_free(struct reqlogger *reqlogger); +void reqlog_exclude_fingerprint(const char fp[FINGERPRINTSZ]); +void reqlog_unexclude_fingerprint(const char fp[FINGERPRINTSZ]); +int reqlog_fingerprint_is_excluded(const char fp[FINGERPRINTSZ]); void reqlog_reset_logger(struct reqlogger *logger); int reqlog_pushprefixv(struct reqlogger *logger, const char *format, va_list args); diff --git a/db/sql.h b/db/sql.h index cae57c49cc..6bba7ea91b 100644 --- a/db/sql.h +++ b/db/sql.h @@ -38,6 +38,7 @@ #include "db_access.h" #include "sqliteInt.h" #include "ast.h" +#include "fingerprint.h" /* I'm now splitting handle_fastsql_requests into two functions. The * outer function will maintain state (such as temporary buffers etc) while @@ -47,8 +48,6 @@ * we can have a small pool of sql threads with big stacks, and a large pool * of appsock threads with small stacks. */ -#define FINGERPRINTSZ 16 - #define CHECK_NEXT_QUERIES 20 /* Static rootpages numbers. */ diff --git a/sqlite/ext/comdb2/fingerprints.c b/sqlite/ext/comdb2/fingerprints.c index 989ee8f599..eb41f29089 100644 --- a/sqlite/ext/comdb2/fingerprints.c +++ b/sqlite/ext/comdb2/fingerprints.c @@ -22,6 +22,7 @@ #include "sql.h" #include #include "tohex.h" +#include struct fingerprint_track_systbl { char *fingerprint; @@ -32,6 +33,7 @@ struct fingerprint_track_systbl { int64_t rows; /* Cumulative number of rows selected */ char *zNormSql; /* The normalized SQL query */ size_t nNormSql; /* Length of normalized SQL query */ + char *excluded; /* 'Y' if excluded from longreqs */ char fp[FINGERPRINTSZ*2+1]; }; @@ -76,6 +78,11 @@ static int fingerprints_callback(void **data, int *npoints) pFp[copied].time = pEntry->time; pFp[copied].prepTime = pEntry->prepTime; pFp[copied].rows = pEntry->rows; + if (reqlog_fingerprint_is_excluded((char *)pEntry->fingerprint)) { + pFp[copied].excluded = "Y"; + } else { + pFp[copied].excluded = "N"; + } if (pEntry->zNormSql != NULL) { pFp[copied].zNormSql = strdup(pEntry->zNormSql); pFp[copied].nNormSql = strlen(pEntry->zNormSql); @@ -124,5 +131,7 @@ int systblFingerprintsInit(sqlite3 *db) offsetof(struct fingerprint_track_systbl, rows), CDB2_CSTRING, "normalized_sql", -1, offsetof(struct fingerprint_track_systbl, zNormSql), + CDB2_CSTRING, "excluded_from_longreqs", -1, + offsetof(struct fingerprint_track_systbl, excluded), SYSTABLE_END_OF_FIELDS); } diff --git a/tests/excludefp.test/Makefile b/tests/excludefp.test/Makefile new file mode 100644 index 0000000000..b4c0ac1057 --- /dev/null +++ b/tests/excludefp.test/Makefile @@ -0,0 +1,8 @@ +ifeq ($(TESTSROOTDIR),) + include ../testcase.mk +else + include $(TESTSROOTDIR)/testcase.mk +endif +ifeq ($(TEST_TIMEOUT),) + export TEST_TIMEOUT=5m +endif diff --git a/tests/excludefp.test/README b/tests/excludefp.test/README new file mode 100644 index 0000000000..c50e73d5f4 --- /dev/null +++ b/tests/excludefp.test/README @@ -0,0 +1 @@ +Verify that reql excludefp works correctly diff --git a/tests/excludefp.test/lrl.options b/tests/excludefp.test/lrl.options new file mode 100644 index 0000000000..4cba46063d --- /dev/null +++ b/tests/excludefp.test/lrl.options @@ -0,0 +1 @@ +verbose_normalized_queries diff --git a/tests/excludefp.test/runit b/tests/excludefp.test/runit new file mode 100755 index 0000000000..da7a69c881 --- /dev/null +++ b/tests/excludefp.test/runit @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +bash -n "$0" | exit 1 + +. ${TESTSROOTDIR}/tools/runit_common.sh + +debug=1 +[[ $debug == "1" ]] && set -x + +function exclude_test +{ + local n="" + local fp="" + local excluded="" + local count=0 + local aftercnt=0 + + # Grab the first cluster node + if [[ -n "$CLUSTER" ]]; then + n=$(echo "$CLUSTER" | cut -d' ' -f1) + else + n=$(hostname) + fi + + # Create a long request + $CDB2SQL_EXE $CDB2_OPTIONS $DBNAME --host $n "select sleep(15)" + + # Make sure that this appeared in the longreqs log + log=$TESTDIR/logs/excludefp${TESTID}.${n}.db + + count=$(egrep -i -c "LONG REQUEST" $log) + [[ "$count" == "0" ]] && failexit "long request not found in longreqs log" + + # Grab fingerprint for the sleep + fp=$($CDB2SQL_EXE --tabs $CDB2_OPTIONS $DBNAME --host $n "select fingerprint from comdb2_fingerprints where normalized_sql like 'SELECT sleep%'") + + # Make sure we got something + [[ -z "$fp" ]] && failexit "could not get fingerprint for sleep request" + + # Verify that this is not excluded from longreqs + excluded=$($CDB2SQL_EXE --tabs $CDB2_OPTIONS $DBNAME --host $n "select excluded_from_longreqs from comdb2_fingerprints where fingerprint='$fp'") + + [[ "$excluded" == "N" ]] || failexit "fingerprint for sleep request is already marked as excluded_from_longreqs" + + # Exclude this fp from longreqs + $CDB2SQL_EXE $CDB2_OPTIONS $DBNAME --host $n "exec procedure sys.cmd.send('reql excludefp $fp')" + + # Verify that it is now excluded + excluded=$($CDB2SQL_EXE --tabs $CDB2_OPTIONS $DBNAME --host $n "select excluded_from_longreqs from comdb2_fingerprints where fingerprint='$fp'") + + [[ "$excluded" == "Y" ]] || failexit "fingerprint for sleep request is not marked as excluded_from_longreqs" + + # Run the same long request again + $CDB2SQL_EXE $CDB2_OPTIONS $DBNAME --host $n "select sleep(15)" + + # Verify that the new request didn't occur as a LONGREQ + aftercnt=$(egrep -i -c "LONG REQUEST" $log) + + [[ "$aftercnt" != "$count" ]] && failexit "long request found in longreqs log after excluding fingerprint" + + # Now remove the exclusion + $CDB2SQL_EXE $CDB2_OPTIONS $DBNAME --host $n "exec procedure sys.cmd.send('reql unexcludefp $fp')" + + # Run the same long request again + $CDB2SQL_EXE $CDB2_OPTIONS $DBNAME --host $n "select sleep(15)" + + # Verify that the request once again appears as a LONG REQUEST + aftercnt=$(egrep -i -c "LONG REQUEST" $log) + + [[ "$aftercnt" == "$count" ]] && failexit "unexcluded long-request did not appear in longreqs log" +} + +function run_tests +{ + exclude_test +} + +run_tests +echo "Success!" diff --git a/util/tohex.c b/util/tohex.c index f5fbbe1c66..7221c791f8 100644 --- a/util/tohex.c +++ b/util/tohex.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #ifndef BUILDING_TOOLS #include @@ -58,6 +60,46 @@ char *util_tohex(char *out, const char *in, size_t len) return beginning; } +int util_tobytes(char *out, const char *hex, size_t len) +{ + if (!hex || !out) + return -1; + + /* skip optional 0x / 0X */ + if (hex[0] == '0' && (hex[1] == 'x' || hex[1] == 'X')) + hex += 2; + + int hi = -1; + size_t n = 0; + + for (; *hex; hex++) { + if (isspace((unsigned char)*hex)) + continue; + + int v; + if (*hex >= '0' && *hex <= '9') + v = *hex - '0'; + else if (*hex >= 'a' && *hex <= 'f') + v = 10 + (*hex - 'a'); + else if (*hex >= 'A' && *hex <= 'F') + v = 10 + (*hex - 'A'); + else + return -1; /* invalid character */ + + if (hi < 0) { + hi = v; + } else { + if (n >= len) + return -1; + out[n++] = (unsigned char)((hi << 4) | v); + hi = -1; + } + } + + /* odd number of hex digits is invalid */ + return (hi == -1) ? 0 : -1; +} + void hexdump(loglvl lvl, const char *key, int keylen) { char *mem;