mlcommons · freedomtan · Jan 27, 2026 · Jan 6, 2026 · Jan 19, 2026
@@ -0,0 +1,24 @@
+# IFEval C++ implementation
+
+This Project is a modified version of MLCommons' C++ IFEval evaluation code, designed to run standalone using previously written responses stored in `json` format.
+
+## Operation
+
+This tool is a bit more manual than the python implementation released by google, the primary difference is that it operates on a single `json` file rather than 2 separate `jsonl` files, and produces a single result set containing both loose and strict parameters. 
+
+### Tools
+Because of the differences above, the input data from the python implementation will need to be processed, and so will the output. The following are the tools that will be used for that:
+- `merger.py`: used to merge the 2 input files (`input_data.jsonl` and `input_response_data_gpt4_20231107_145030.jsonl`).
+- `jsoner.py`: used to convert `merged.jsonl` to `merged.json` for use with the C++ tool
+- `33merger.py`: used to add missing field to the `IFEval33` result files. (uses an existing merged `jsonl` file)
+- `process-cpp.py`: used to convert the merged output from the C++ code into 2 separate `loose` and `strict` files that are in compatible format with the python code.
+
+### Building and Running
+Compiling the code is simple, a C++17 or later compiler and `make` should handle everything.
+
+Once the code is compiled, and `merged.json` file is created using `merger.py` and `jsoner.py`, the command to run should simply be `cat merged.json | main > cpp-results.txt`.
+
+If everything runs without issue, the file should then be used with `process-cpp.py` to generate the loose and strict result files for comparison.
+
+## Important Notes
+This tool was initially designed for internal testing only, so it might contain commented code or quick patchwork fixes.
@@ -0,0 +1,216 @@
+#ifndef MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_
+#define MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_
+
+#include <algorithm>
+#include <array>
+#include <cctype>
+#include <iomanip>
+#include <iostream>
+#include <regex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace mlperf {
+namespace mobile {
+namespace ifeval {
+
+constexpr const char* red   = "\033[31m";
+constexpr const char* green = "\033[32m";
+constexpr const char* yellow= "\033[33m";
+constexpr const char* reset = "\033[0m";
+
+inline std::string ltrim(std::string s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
+            return !std::isspace(ch);
+          }));
+  return s;
+}
+inline std::string rtrim(std::string s) {
+  s.erase(std::find_if(s.rbegin(), s.rend(),
+                       [](unsigned char ch) { return !std::isspace(ch); })
+              .base(),
+          s.end());
+  return s;
+}
+inline std::string trim(std::string s) { return rtrim(ltrim(std::move(s))); }
+
+inline std::string tolower(std::string s) {
+  std::transform(s.begin(), s.end(), s.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+
+inline std::string to_lower_ascii(std::string s) {
+  for (char& c : s)
+      c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
+  return s;
+}
+
+inline bool is_word_char(unsigned char c) {
+  return std::isalnum(c) || c == '_';
+}
+
+inline bool contains_string(const std::string& text,
+                            const std::string& substring) {
+  std::string h = tolower(text), n = tolower(substring);
+  return h.find(n) != std::string::npos;
+}
+
+inline bool ends_with(const std::string& s, const std::string& suf,
+                      unsigned threshold) {
+  if (s.size() < suf.size()) return false;
+  std::string a = tolower(s.substr(s.size() - (suf.size() + threshold)));
+  std::string b = tolower(suf);
+  return threshold == 0 ? a == b : contains_string(a, b);
+}
+
+inline bool starts_with(const std::string& s, const std::string& prf,
+                        unsigned threshold) {
+  if (s.size() < prf.size()) return false;
+  std::string a = tolower(s.substr(0, prf.size() + threshold));
+  std::string b = tolower(prf);
+  return threshold == 0 ? a == b : contains_string(a, b);
+}
+
+inline bool contains_word(const std::string& text, const std::string& word) {
+  if (word.empty()) return false;
+
+  auto to_lower_ascii = [](std::string s) {
+    for (char& c : s) c = std::tolower(static_cast<unsigned char>(c));
+    return s;
+  };
+  auto is_word_char = [](unsigned char c) {
+    return std::isalnum(c) || c == '_';  // match std::regex \b notion of "word"
+  };
+
+  std::string t = to_lower_ascii(text);
+  std::string w = to_lower_ascii(word);
+
+  // Scan all occurrences of w in t and check word boundaries
+  std::size_t pos = 0;
+  while ((pos = t.find(w, pos)) != std::string::npos) {
+    const bool left_ok =
+        (pos == 0) || !is_word_char(static_cast<unsigned char>(t[pos - 1]));
+    const std::size_t end = pos + w.size();
+    const bool right_ok =
+        (end == t.size()) || !is_word_char(static_cast<unsigned char>(t[end]));
+    if (left_ok && right_ok) return true;
+    ++pos;  // continue searching (overlapping-safe)
+  }
+  return false;
+}
+
+inline size_t find_containing_word(const std::string& text,
+                                   const std::string& keyword,
+                                   std::string& containing_word, size_t pos) {
+  if (keyword.empty() || pos >= text.size()) return std::string::npos;
+
+  std::string t = to_lower_ascii(text);
+  std::string k = to_lower_ascii(keyword);
+
+  std::cout << "looking for '" << k << "' in text:" << std::endl << '\'' << t.substr(0, pos) << yellow << t.substr(pos) << reset << '\'' << std::endl;
+
+  if ((pos = t.find(k, pos)) == std::string::npos) return std::string::npos;
+
+  // Expand left to word boundary
+  size_t start = pos;
+  while (start > 0 &&
+         is_word_char(static_cast<unsigned char>(t[start - 1]))) {
+    --start;
+  }
+
+  // Expand right to word boundary
+  size_t end = pos + k.size();
+  while (end < t.size() &&
+         is_word_char(static_cast<unsigned char>(t[end]))) {
+    ++end;
+  }
+
+  // Extract original (not lowercased) word
+  containing_word = text.substr(start, end - start);
+  return start;
+}
+
+inline size_t find_containing_word(const std::string& text,
+                                   const std::string& keyword,
+                                   std::string& out_word) {
+  return find_containing_word(text, keyword, out_word, 0);
+}
+
+inline bool contains_none(const std::string& text,
+                          const std::vector<std::string>& words) {
+  for (const auto& w : words)
+    if (contains_word(text, w)) return false;
+  return true;
+}
+
+inline std::string remove_font_modifiers(const std::string& s) {
+  std::string out;
+  out.reserve(s.size());
+
+  // bool inBacktick = false;
+  for (std::size_t i = 0; i < s.size(); ++i) {
+    char c = s[i];
+
+    // toggle backtick code span
+    if (c == '`') {
+      // inBacktick = !inBacktick;
+      continue;  // drop the backtick itself
+    }
+
+    // skip emphasis/strong/strike/escape chars as long as they're not preceeded
+    // by an escape character
+    if ((c == '*' || c == '_' || c == '~' || c == '\\') && s[i - 1] != '\\')
+      continue;
+
+    // remove heading markers (#) at line starts
+    if ((c == '#') && (i == 0 || s[i - 1] == '\n')) continue;
+
+    // drop leading '>' in blockquotes
+    if ((c == '>') && (i == 0 || s[i - 1] == '\n')) continue;
+
+    out.push_back(c);
+  }
+  return out;
+}
+
+inline std::string remove_first_line(const std::string& s) {
+  std::size_t pos = s.find('\n');
+  return (pos == std::string::npos) ? std::string(s) : s.substr(pos + 1);
+}
+
+inline std::string remove_last_line(const std::string& s) {
+  std::size_t pos = s.rfind('\n');
+  return (pos == std::string::npos) ? std::string(s) : s.substr(0, pos);
+}
+
+// Returns the 8 transformations as an array of strings.
+// Index is a bitmask over {font_mod (bit0), remove_first (bit1), remove_last
+// (bit2)}.
+
+// 000 (0) nothing
+// 001 (1) font
+// 010 (2) fl
+// 011 (3) font & fl
+// 100 (4) ll
+// 101 (5) ll & font
+// 110 (6) fl & ll
+// 111 (7) all
+inline std::array<std::string, 8> transform_response(const std::string& resp) {
+  std::array<std::string, 8> out{};
+  for (int mask = 0; mask < 8; ++mask) {
+    std::string t = resp;
+    if (mask & 0b001) t = remove_font_modifiers(t);
+    if (mask & 0b010) t = remove_first_line(t);
+    if (mask & 0b100) t = remove_last_line(t);
+    out[mask] = std::move(t);
+  }
+  return out;
+}
+
+}  // namespace ifeval
+}  // namespace mobile
+}  // namespace mlperf
+#endif  // MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_