Skip to content
56 changes: 56 additions & 0 deletions ais_bench/benchmark/configs/datasets/refcoco/refcoco_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCODataset
from ais_bench.benchmark.datasets.refcoco import refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcoco_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcoco_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'file://{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcoco_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'test',
'testA',
'testB',
]

refcoco_datasets = [
dict(
abbr='RefCOCO_' + split,
type=RefCOCODataset,
path='ais_bench/datasets/RefCOCO/data',
split=split,
reader_cfg=refcoco_reader_cfg,
infer_cfg=refcoco_infer_cfg,
eval_cfg=refcoco_eval_cfg,
)
for split in _splits
]
57 changes: 57 additions & 0 deletions ais_bench/benchmark/configs/datasets/refcoco/refcoco_gen_base64.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCODataset
from ais_bench.benchmark.datasets.refcoco import IMAGE_BASE64_TYPE, refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcoco_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcoco_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcoco_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'test',
'testA',
'testB',
]

refcoco_datasets = [
dict(
abbr='RefCOCO_base64_' + split,
type=RefCOCODataset,
path='ais_bench/datasets/RefCOCO/data',
split=split,
image_type=IMAGE_BASE64_TYPE,
reader_cfg=refcoco_reader_cfg,
infer_cfg=refcoco_infer_cfg,
eval_cfg=refcoco_eval_cfg,
)
for split in _splits
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCOPlusDataset
from ais_bench.benchmark.datasets.refcoco import refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcoco_plus_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcoco_plus_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'file://{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcoco_plus_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'testA',
'testB',
]

refcoco_plus_datasets = [
dict(
abbr='RefCOCOPlus_' + split,
type=RefCOCOPlusDataset,
path='ais_bench/datasets/RefCOCOplus/data',
split=split,
reader_cfg=refcoco_plus_reader_cfg,
infer_cfg=refcoco_plus_infer_cfg,
eval_cfg=refcoco_plus_eval_cfg,
)
for split in _splits
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCOPlusDataset
from ais_bench.benchmark.datasets.refcoco import IMAGE_BASE64_TYPE, refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcoco_plus_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcoco_plus_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcoco_plus_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'testA',
'testB',
]

refcoco_plus_datasets = [
dict(
abbr='RefCOCOPlus_base64_' + split,
type=RefCOCOPlusDataset,
path='ais_bench/datasets/RefCOCOplus/data',
split=split,
image_type=IMAGE_BASE64_TYPE,
reader_cfg=refcoco_plus_reader_cfg,
infer_cfg=refcoco_plus_infer_cfg,
eval_cfg=refcoco_plus_eval_cfg,
)
for split in _splits
]
54 changes: 54 additions & 0 deletions ais_bench/benchmark/configs/datasets/refcocog/refcocog_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCOgDataset
from ais_bench.benchmark.datasets.refcoco import refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcocog_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcocog_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'file://{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcocog_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'test',
]

refcocog_datasets = [
dict(
abbr='RefCOCOg_' + split,
type=RefCOCOgDataset,
path='ais_bench/datasets/RefCOCOg/data',
split=split,
reader_cfg=refcocog_reader_cfg,
infer_cfg=refcocog_infer_cfg,
eval_cfg=refcocog_eval_cfg,
)
for split in _splits
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from ais_bench.benchmark.openicl.icl_retriever import ZeroRetriever
from ais_bench.benchmark.openicl.icl_inferencer import GenInferencer
from ais_bench.benchmark.openicl.icl_prompt_template import MMPromptTemplate
from ais_bench.benchmark.datasets import RefCOCOgDataset
from ais_bench.benchmark.datasets.refcoco import IMAGE_BASE64_TYPE, refcoco_bbox_postprocess
from ais_bench.benchmark.openicl.icl_evaluator import BBoxIoUEvaluator


refcocog_reader_cfg = dict(
input_columns=['question', 'image'],
output_column='answer'
)

refcocog_infer_cfg = dict(
prompt_template=dict(
type=MMPromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt_mm={
'text': {
'type': 'text',
'text': 'Locate every object that matches the description "{question}" in the image. Report bbox coordinates in JSON format.'
},
'image': {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,{image}'}},
})
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

refcocog_eval_cfg = dict(
evaluator=dict(type=BBoxIoUEvaluator, iou_threshold=0.5, coord_scale=1000.0),
pred_postprocessor=dict(type=refcoco_bbox_postprocess),
)

_splits = [
'val',
'test',
]

refcocog_datasets = [
dict(
abbr='RefCOCOg_base64_' + split,
type=RefCOCOgDataset,
path='ais_bench/datasets/RefCOCOg/data',
split=split,
image_type=IMAGE_BASE64_TYPE,
reader_cfg=refcocog_reader_cfg,
infer_cfg=refcocog_infer_cfg,
eval_cfg=refcocog_eval_cfg,
)
for split in _splits
]
1 change: 1 addition & 0 deletions ais_bench/benchmark/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@
from ais_bench.benchmark.datasets.mmstar import * # noqa: F401, F403
from ais_bench.benchmark.datasets.dapo_math import * # noqa: F401, F403
from ais_bench.benchmark.datasets.mooncake_trace import * # noqa: F401, F403
from ais_bench.benchmark.datasets.refcoco import * # noqa: F401, F403
9 changes: 9 additions & 0 deletions ais_bench/benchmark/datasets/refcoco/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ais_bench.benchmark.datasets.refcoco.refcoco import ( # noqa: F401
IMAGE_BASE64_TYPE,
IMAGE_PATH_TYPE,
TEMP_IMAGE_STORE_DIR,
RefCOCODataset,
refcoco_bbox_postprocess,
)
from ais_bench.benchmark.datasets.refcoco.refcoco_g import RefCOCOgDataset # noqa: F401
from ais_bench.benchmark.datasets.refcoco.refcoco_plus import RefCOCOPlusDataset # noqa: F401
Loading
Loading