Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llm/IFEval-cpp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# IFEval C++ implementation

This Project is a modified version of MLCommons' C++ IFEval evaluation code, designed to run standalone using previously written responses stored in `json` format.

## Operation

This tool is a bit more manual than the python implementation released by google, the primary difference is that it operates on a single `json` file rather than 2 separate `jsonl` files, and produces a single result set containing both loose and strict parameters.

### Tools
Because of the differences above, the input data from the python implementation will need to be processed, and so will the output. The following are the tools that will be used for that:
- `merger.py`: used to merge the 2 input files (`input_data.jsonl` and `input_response_data_gpt4_20231107_145030.jsonl`).
- `jsoner.py`: used to convert `merged.jsonl` to `merged.json` for use with the C++ tool
- `33merger.py`: used to add missing field to the `IFEval33` result files. (uses an existing merged `jsonl` file)
- `process-cpp.py`: used to convert the merged output from the C++ code into 2 separate `loose` and `strict` files that are in compatible format with the python code.

### Building and Running
Compiling the code is simple, a C++17 or later compiler and `make` should handle everything.

Once the code is compiled, and `merged.json` file is created using `merger.py` and `jsoner.py`, the command to run should simply be `cat merged.json | main > cpp-results.txt`.

If everything runs without issue, the file should then be used with `process-cpp.py` to generate the loose and strict result files for comparison.

## Important Notes
This tool was initially designed for internal testing only, so it might contain commented code or quick patchwork fixes.
216 changes: 216 additions & 0 deletions llm/IFEval-cpp/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#ifndef MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_
#define MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_

#include <algorithm>
#include <array>
#include <cctype>
#include <iomanip>
#include <iostream>
#include <regex>
#include <sstream>
#include <string>
#include <vector>

namespace mlperf {
namespace mobile {
namespace ifeval {

constexpr const char* red = "\033[31m";
constexpr const char* green = "\033[32m";
constexpr const char* yellow= "\033[33m";
constexpr const char* reset = "\033[0m";

inline std::string ltrim(std::string s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
return !std::isspace(ch);
}));
return s;
}
inline std::string rtrim(std::string s) {
s.erase(std::find_if(s.rbegin(), s.rend(),
[](unsigned char ch) { return !std::isspace(ch); })
.base(),
s.end());
return s;
}
inline std::string trim(std::string s) { return rtrim(ltrim(std::move(s))); }

inline std::string tolower(std::string s) {
std::transform(s.begin(), s.end(), s.begin(),
[](unsigned char c) { return std::tolower(c); });
return s;
}


inline std::string to_lower_ascii(std::string s) {
for (char& c : s)
c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
return s;
}

inline bool is_word_char(unsigned char c) {
return std::isalnum(c) || c == '_';
}

inline bool contains_string(const std::string& text,
const std::string& substring) {
std::string h = tolower(text), n = tolower(substring);
return h.find(n) != std::string::npos;
}

inline bool ends_with(const std::string& s, const std::string& suf,
unsigned threshold) {
if (s.size() < suf.size()) return false;
std::string a = tolower(s.substr(s.size() - (suf.size() + threshold)));
std::string b = tolower(suf);
return threshold == 0 ? a == b : contains_string(a, b);
}

inline bool starts_with(const std::string& s, const std::string& prf,
unsigned threshold) {
if (s.size() < prf.size()) return false;
std::string a = tolower(s.substr(0, prf.size() + threshold));
std::string b = tolower(prf);
return threshold == 0 ? a == b : contains_string(a, b);
}

inline bool contains_word(const std::string& text, const std::string& word) {
if (word.empty()) return false;

auto to_lower_ascii = [](std::string s) {
for (char& c : s) c = std::tolower(static_cast<unsigned char>(c));
return s;
};
auto is_word_char = [](unsigned char c) {
return std::isalnum(c) || c == '_'; // match std::regex \b notion of "word"
};

std::string t = to_lower_ascii(text);
std::string w = to_lower_ascii(word);

// Scan all occurrences of w in t and check word boundaries
std::size_t pos = 0;
while ((pos = t.find(w, pos)) != std::string::npos) {
const bool left_ok =
(pos == 0) || !is_word_char(static_cast<unsigned char>(t[pos - 1]));
const std::size_t end = pos + w.size();
const bool right_ok =
(end == t.size()) || !is_word_char(static_cast<unsigned char>(t[end]));
if (left_ok && right_ok) return true;
++pos; // continue searching (overlapping-safe)
}
return false;
}

inline size_t find_containing_word(const std::string& text,
const std::string& keyword,
std::string& containing_word, size_t pos) {
if (keyword.empty() || pos >= text.size()) return std::string::npos;

std::string t = to_lower_ascii(text);
std::string k = to_lower_ascii(keyword);

std::cout << "looking for '" << k << "' in text:" << std::endl << '\'' << t.substr(0, pos) << yellow << t.substr(pos) << reset << '\'' << std::endl;

if ((pos = t.find(k, pos)) == std::string::npos) return std::string::npos;

// Expand left to word boundary
size_t start = pos;
while (start > 0 &&
is_word_char(static_cast<unsigned char>(t[start - 1]))) {
--start;
}

// Expand right to word boundary
size_t end = pos + k.size();
while (end < t.size() &&
is_word_char(static_cast<unsigned char>(t[end]))) {
++end;
}

// Extract original (not lowercased) word
containing_word = text.substr(start, end - start);
return start;
}

inline size_t find_containing_word(const std::string& text,
const std::string& keyword,
std::string& out_word) {
return find_containing_word(text, keyword, out_word, 0);
}

inline bool contains_none(const std::string& text,
const std::vector<std::string>& words) {
for (const auto& w : words)
if (contains_word(text, w)) return false;
return true;
}

inline std::string remove_font_modifiers(const std::string& s) {
std::string out;
out.reserve(s.size());

// bool inBacktick = false;
for (std::size_t i = 0; i < s.size(); ++i) {
char c = s[i];

// toggle backtick code span
if (c == '`') {
// inBacktick = !inBacktick;
continue; // drop the backtick itself
}

// skip emphasis/strong/strike/escape chars as long as they're not preceeded
// by an escape character
if ((c == '*' || c == '_' || c == '~' || c == '\\') && s[i - 1] != '\\')
continue;

// remove heading markers (#) at line starts
if ((c == '#') && (i == 0 || s[i - 1] == '\n')) continue;

// drop leading '>' in blockquotes
if ((c == '>') && (i == 0 || s[i - 1] == '\n')) continue;

out.push_back(c);
}
return out;
}

inline std::string remove_first_line(const std::string& s) {
std::size_t pos = s.find('\n');
return (pos == std::string::npos) ? std::string(s) : s.substr(pos + 1);
}

inline std::string remove_last_line(const std::string& s) {
std::size_t pos = s.rfind('\n');
return (pos == std::string::npos) ? std::string(s) : s.substr(0, pos);
}

// Returns the 8 transformations as an array of strings.
// Index is a bitmask over {font_mod (bit0), remove_first (bit1), remove_last
// (bit2)}.

// 000 (0) nothing
// 001 (1) font
// 010 (2) fl
// 011 (3) font & fl
// 100 (4) ll
// 101 (5) ll & font
// 110 (6) fl & ll
// 111 (7) all
inline std::array<std::string, 8> transform_response(const std::string& resp) {
std::array<std::string, 8> out{};
for (int mask = 0; mask < 8; ++mask) {
std::string t = resp;
if (mask & 0b001) t = remove_font_modifiers(t);
if (mask & 0b010) t = remove_first_line(t);
if (mask & 0b100) t = remove_last_line(t);
out[mask] = std::move(t);
}
return out;
}

} // namespace ifeval
} // namespace mobile
} // namespace mlperf
#endif // MLPERF_DATASETS_IFEVAL_UTILS_COMMON_H_
Loading