Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/llm_simple_qa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/s

Run the following command:

`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml`
`ianvs -f examples/llm_simple_qa/benchmarkingjob.yaml`

## OpenCompass Evaluation

Expand All @@ -80,5 +80,5 @@ Run the following command:

### Run Evaluation

`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py`
`python run_op.py examples/llm_simple_qa/testalgorithms/gen/op_eval.py`

6 changes: 3 additions & 3 deletions examples/llm_simple_qa/benchmarkingjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ benchmarkingjob:
# job name of bechmarking; string type;
name: "benchmarkingjob"
# the url address of job workspace that will reserve the output of tests; string type;
workspace: "/home/icyfeather/project/ianvs/workspace"
workspace: "./workspace-llm_simple_qa"

# the url address of test environment configuration file; string type;
# the file format supports yaml/yml;
testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml"
testenv: "./examples/llm_simple_qa/testenv/testenv.yaml"

# the configuration of test object
test_object:
Expand All @@ -19,7 +19,7 @@ benchmarkingjob:
- name: "simple_qa_singletask_learning"
# the url address of test algorithm configuration file; string type;
# the file format supports yaml/yml;
url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml"
url: "./examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml"

# the configuration of ranking leaderboard
rank:
Expand Down
4 changes: 2 additions & 2 deletions examples/llm_simple_qa/testalgorithms/gen/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class BaseModel:

def __init__(self, **kwargs):
self.model = AutoModelForCausalLM.from_pretrained(
"/home/icyfeather/models/Qwen2-0.5B-Instruct",
"Qwen/Qwen2-0.5B-Instruct",
torch_dtype="auto",
device_map="auto"
)
self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

def train(self, train_data, valid_data=None, **kwargs):
print("BaseModel doesn't need to train")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ algorithm:
# example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking;
name: "gen"
# the url address of python module; string type;
url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/basemodel.py"
url: "./examples/llm_simple_qa/testalgorithms/gen/basemodel.py"
2 changes: 1 addition & 1 deletion examples/llm_simple_qa/testalgorithms/gen/op_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
dict(
type=HuggingFacewithChatTemplate,
abbr='qwen1.5-1.8b-chat-hf',
path='/home/icyfeather/models/Qwen1.5-1.8B-Chat',
path='Qwen/Qwen1.5-1.8B-Chat',
max_out_len=1024,
batch_size=2,
run_cfg=dict(num_gpus=1),
Expand Down
3 changes: 3 additions & 0 deletions examples/llm_simple_qa/testenv/acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def get_last_letter(input_string):

@ClassFactory.register(ClassType.GENERAL, alias="acc")
def acc(y_true, y_pred):
if not y_pred or len(y_pred) != len(y_true):
return 0

y_pred = [get_last_letter(pred) for pred in y_pred]
print(y_true)
print(y_pred)
Expand Down
6 changes: 3 additions & 3 deletions examples/llm_simple_qa/testenv/testenv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ testenv:
# dataset configuration
dataset:
# the url address of train dataset index; string type;
train_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/train_data/data.jsonl"
train_data: "./dataset/llm_simple_qa/train_data/data.jsonl"
# the url address of test dataset index; string type;
test_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/test_data/data.jsonl"
test_data: "./dataset/llm_simple_qa/test_data/data.jsonl"

# metrics configuration for test case's evaluation; list type;
metrics:
# metric name; string type;
- name: "acc"
# the url address of python file
url: "./examples/llm/singletask_learning_bench/simple_qa/testenv/acc.py"
url: "./examples/llm_simple_qa/testenv/acc.py"