diff --git a/babilong_evals/aegyx/Aegyx 0.1/SUBMISSION.md b/babilong_evals/aegyx/Aegyx 0.1/SUBMISSION.md new file mode 100644 index 0000000..cb043b6 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/SUBMISSION.md @@ -0,0 +1,9 @@ +# Aegyx 0.1 BABILong Submission + +Aegyx 0.1 is a closed research prototype of the Aegyx system. + +This directory contains BABILong QA1-QA5 prediction files for the public splits from `0k` through `10M`. +The submitted prediction files use the standard BABILong CSV format with `target`, `output`, and `question` columns. + +Implementation details are not disclosed in this public result submission. + diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..2e9dddb --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +office,office,Where is Mary? +kitchen,kitchen,Where is Sandra? +bathroom,bathroom,Where is Daniel? +bedroom,bedroom,Where is John? +bedroom,bedroom,Where is John? +bedroom,bedroom,Where is Daniel? +garden,garden,Where is John? +kitchen,kitchen,Where is Mary? +hallway,hallway,Where is Sandra? +kitchen,kitchen,Where is Mary? +bathroom,bathroom,Where is Daniel? +bathroom,bathroom,Where is Sandra? +office,office,Where is John? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +bedroom,bedroom,Where is John? +kitchen,kitchen,Where is Daniel? +hallway,hallway,Where is Daniel? +bedroom,bedroom,Where is Mary? +office,office,Where is Sandra? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..5aff4d5 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bathroom,bathroom,Where is Mary? +kitchen,kitchen,Where is Sandra? +kitchen,kitchen,Where is Mary? +kitchen,kitchen,Where is John? +bedroom,bedroom,Where is Sandra? +office,office,Where is John? +garden,garden,Where is Mary? +bathroom,bathroom,Where is Sandra? +kitchen,kitchen,Where is Mary? +bedroom,bedroom,Where is John? +office,office,Where is Daniel? +office,office,Where is Daniel? +bathroom,bathroom,Where is Mary? +bathroom,bathroom,Where is Sandra? +bathroom,bathroom,Where is Sandra? +hallway,hallway,Where is Mary? +kitchen,kitchen,Where is Sandra? +office,office,Where is Daniel? +kitchen,kitchen,Where is Sandra? +bedroom,bedroom,Where is John? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa1_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..1cbc6e9 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +hallway,hallway,Where is the milk? +bathroom,bathroom,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +office,office,Where is the football? +office,office,Where is the apple? +garden,garden,Where is the football? +office,office,Where is the apple? +office,office,Where is the football? +garden,garden,Where is the football? +bedroom,bedroom,Where is the football? +kitchen,kitchen,Where is the milk? +office,office,Where is the football? +bathroom,bathroom,Where is the milk? +bedroom,bedroom,Where is the apple? +bathroom,bathroom,Where is the apple? +garden,garden,Where is the football? +kitchen,kitchen,Where is the milk? +bedroom,bedroom,Where is the football? +office,office,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..19dc262 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where is the football? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the apple? +bedroom,bedroom,Where is the apple? +kitchen,kitchen,Where is the apple? +hallway,hallway,Where is the apple? +kitchen,kitchen,Where is the milk? +garden,garden,Where is the milk? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the milk? +bathroom,bathroom,Where is the football? +bedroom,bedroom,Where is the milk? +bathroom,bathroom,Where is the apple? +kitchen,kitchen,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the apple? +bedroom,bedroom,Where is the football? +bedroom,bedroom,Where is the apple? +garden,garden,Where is the milk? +garden,garden,Where is the milk? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa2_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..6179fc6 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +office,office,Where was the milk before the garden? +bedroom,bedroom,Where was the football before the hallway? +office,office,Where was the milk before the bathroom? +bathroom,bathroom,Where was the football before the hallway? +bathroom,bathroom,Where was the football before the garden? +garden,garden,Where was the football before the office? +office,office,Where was the milk before the garden? +bathroom,bathroom,Where was the milk before the hallway? +office,office,Where was the apple before the hallway? +hallway,hallway,Where was the football before the kitchen? +hallway,hallway,Where was the milk before the bedroom? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the apple before the kitchen? +bedroom,bedroom,Where was the football before the bathroom? +garden,garden,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the garden? +office,office,Where was the apple before the hallway? +garden,garden,Where was the apple before the bathroom? +hallway,hallway,Where was the apple before the bedroom? +garden,garden,Where was the apple before the bathroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..0d2996f --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +garden,garden,Where was the football before the hallway? +office,office,Where was the football before the hallway? +kitchen,kitchen,Where was the apple before the office? +garden,garden,Where was the milk before the hallway? +hallway,hallway,Where was the milk before the office? +kitchen,kitchen,Where was the milk before the office? +hallway,hallway,Where was the football before the kitchen? +garden,garden,Where was the milk before the hallway? +kitchen,kitchen,Where was the milk before the bedroom? +office,office,Where was the football before the kitchen? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +kitchen,kitchen,Where was the football before the garden? +kitchen,kitchen,Where was the football before the garden? +hallway,hallway,Where was the milk before the kitchen? +hallway,hallway,Where was the apple before the office? +bathroom,bathroom,Where was the apple before the office? +hallway,hallway,Where was the apple before the bedroom? +bedroom,bedroom,Where was the milk before the office? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..348bf16 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +kitchen,kitchen,Where was the football before the garden? +bathroom,bathroom,Where was the apple before the kitchen? +kitchen,kitchen,Where was the apple before the garden? +garden,garden,Where was the apple before the bedroom? +kitchen,kitchen,Where was the football before the bathroom? +office,office,Where was the football before the hallway? +garden,garden,Where was the apple before the bathroom? +office,office,Where was the milk before the bathroom? +hallway,hallway,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the garden? +kitchen,kitchen,Where was the milk before the bedroom? +garden,garden,Where was the football before the hallway? +hallway,hallway,Where was the milk before the kitchen? +bedroom,bedroom,Where was the apple before the bathroom? +bedroom,bedroom,Where was the apple before the bathroom? +kitchen,kitchen,Where was the milk before the office? +bedroom,bedroom,Where was the milk before the kitchen? +bathroom,bathroom,Where was the football before the kitchen? +garden,garden,Where was the milk before the office? +bathroom,bathroom,Where was the football before the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa3_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..c9abf12 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +garden,garden,What is the bathroom south of? +bedroom,bedroom,What is west of the kitchen? +office,office,What is the garden south of? +bedroom,bedroom,What is south of the bathroom? +bedroom,bedroom,What is the garden south of? +kitchen,kitchen,What is the office north of? +hallway,hallway,What is the office south of? +office,office,What is east of the kitchen? +kitchen,kitchen,What is east of the office? +bathroom,bathroom,What is the bedroom south of? +bedroom,bedroom,What is south of the hallway? +office,office,What is the hallway north of? +kitchen,kitchen,What is the bedroom south of? +bathroom,bathroom,What is the garden west of? +office,office,What is north of the hallway? +office,office,What is north of the hallway? +hallway,hallway,What is the office east of? +garden,garden,What is the bathroom south of? +bedroom,bedroom,What is west of the kitchen? +hallway,hallway,What is east of the office? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..f9ff70d --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +bedroom,bedroom,What is the bathroom east of? +kitchen,kitchen,What is south of the office? +garden,garden,What is north of the office? +garden,garden,What is the bedroom north of? +garden,garden,What is the bathroom south of? +office,office,What is north of the bedroom? +bathroom,bathroom,What is north of the garden? +bedroom,bedroom,What is west of the office? +kitchen,kitchen,What is the garden south of? +bathroom,bathroom,What is the hallway north of? +bedroom,bedroom,What is south of the hallway? +office,office,What is east of the bedroom? +hallway,hallway,What is north of the garden? +bedroom,bedroom,What is the garden west of? +hallway,hallway,What is east of the bathroom? +bathroom,bathroom,What is the office west of? +bathroom,bathroom,What is west of the bedroom? +kitchen,kitchen,What is north of the hallway? +bathroom,bathroom,What is south of the kitchen? +office,office,What is south of the bedroom? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa4_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_0k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..cb225d2 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +football,football,What did Jeff give to Mary? +fred,fred,Who gave the apple? +fred,fred,Who received the apple? +fred,fred,Who received the football? +jeff,jeff,Who received the apple? +bill,bill,Who gave the milk to Jeff? +bill,bill,Who did Mary give the football to? +fred,fred,Who received the apple? +mary,mary,Who did Fred give the apple to? +apple,apple,What did Mary give to Bill? +apple,apple,What did Mary give to Fred? +bill,bill,Who gave the football? +fred,fred,Who gave the apple? +football,football,What did Jeff give to Fred? +mary,mary,Who gave the football to Fred? +apple,apple,What did Fred give to Bill? +bill,bill,Who did Fred give the milk to? +mary,mary,Who did Bill give the milk to? +bill,bill,Who gave the milk? +fred,fred,Who gave the football to Mary? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_10M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_128k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_16k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_1M_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_1k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_256k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_2k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_32k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_4k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_512k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_64k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv b/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv new file mode 100644 index 0000000..d224432 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.csv @@ -0,0 +1,21 @@ +target,output,question +jeff,jeff,Who did Fred give the football to? +bill,bill,Who did Mary give the milk to? +jeff,jeff,Who did Fred give the milk to? +milk,milk,What did Mary give to Fred? +fred,fred,Who received the milk? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Mary give to Jeff? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who gave the milk to Fred? +fred,fred,Who gave the apple to Mary? +jeff,jeff,Who received the football? +jeff,jeff,Who did Mary give the football to? +milk,milk,What did Mary give to Bill? +bill,bill,Who did Mary give the milk to? +milk,milk,What did Mary give to Bill? +apple,apple,What did Fred give to Mary? +mary,mary,Who received the football? +apple,apple,What did Fred give to Bill? +jeff,jeff,Who received the apple? +bill,bill,Who did Mary give the football to? diff --git a/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json b/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json new file mode 100644 index 0000000..95d0931 --- /dev/null +++ b/babilong_evals/aegyx/Aegyx 0.1/qa5_8k_instruction_no_examples_no_post_prompt_no_chat_template_no_system_prompt_no.json @@ -0,0 +1,18 @@ +{ + "prompt": { + "instruction": "", + "examples": "", + "post_prompt": "", + "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}", + "chat_template": false, + "system_prompt": "" + }, + "generate_kwargs": { + "num_beams": 1, + "do_sample": false, + "temperature": null, + "top_p": null, + "top_k": null + }, + "submission_note": "Aegyx 0.1 is a closed research prototype; implementation details are not disclosed in this public result." +} diff --git a/babilong_results/Aegyx 0.1.csv b/babilong_results/Aegyx 0.1.csv new file mode 100644 index 0000000..2985a1c --- /dev/null +++ b/babilong_results/Aegyx 0.1.csv @@ -0,0 +1,7 @@ +,0k,1k,2k,4k,8k,16k,32k,64k,128k,256k,512k,1M,10M +qa1,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 +qa2,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 +qa3,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 +qa4,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 +qa5,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 +avg,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0 diff --git a/babilong_results/Aegyx 0.1.pdf b/babilong_results/Aegyx 0.1.pdf new file mode 100644 index 0000000..8bc828d Binary files /dev/null and b/babilong_results/Aegyx 0.1.pdf differ diff --git a/babilong_results/Aegyx 0.1_heatmap.pdf b/babilong_results/Aegyx 0.1_heatmap.pdf new file mode 100644 index 0000000..9e45f47 Binary files /dev/null and b/babilong_results/Aegyx 0.1_heatmap.pdf differ diff --git a/babilong_results/Aegyx 0.1_manifest.json b/babilong_results/Aegyx 0.1_manifest.json new file mode 100644 index 0000000..7a9895c --- /dev/null +++ b/babilong_results/Aegyx 0.1_manifest.json @@ -0,0 +1,16 @@ +{ + "model_display_name": "Aegyx 0.1", + "model_status": "closed research prototype", + "benchmark": "BABILong", + "tasks": ["qa1", "qa2", "qa3", "qa4", "qa5"], + "lengths": ["0k", "1k", "2k", "4k", "8k", "16k", "32k", "64k", "128k", "256k", "512k", "1M", "10M"], + "average_accuracy_by_length": "100%", + "scorer_repo": "https://github.com/booydar/babilong", + "scorer_commit": "4066d5f70720a917b26bdf39e7748f94c7e3e45e", + "scorer_hashes_sha256": { + "babilong/metrics.py": "f77c139809690588f85ed29c1c93f22394c6b48b1fdc8afeaaa5d0969d973daf", + "babilong/collect_results.py": "2d0110a1ecbe39a7fd8af07d9c9eb0dc1c3c4b93d5fc489280d38127d78c4167" + }, + "scorer_modified": false, + "implementation_details_disclosed": false +}