opendatalab · gary-Shen · Apr 21, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/README.md b/README.md
@@ -28,6 +28,18 @@ Audio annotation tools are another key feature of LabelU. These tools possess ef
 ### Artificial Intelligence Assisted Labelling
 LabelU supports one-click loading of pre-annotated data, which can be refined and adjusted according to actual needs. This feature improves the efficiency and accuracy of annotation.
 
+### AI Auto-Annotation
+LabelU integrates AI model services for automatic annotation of image data. Click the "AI Annotate" button on the annotation page to have the model automatically detect and segment objects. Supports batch annotation for entire tasks with real-time progress tracking. Three reference model servers are provided out of the box:
+
+- **Florence-2** — lightweight, CPU-friendly (~4GB VRAM)
+- **GroundingDINO + EfficientSAM** — high-quality detection + segmentation (~4GB VRAM)
+- **SAM 3** — state-of-the-art unified model (~8GB VRAM, requires high-end GPU)
+
+See [`model_server/README.md`](./model_server/README.md) for setup instructions.
+
+### S3 Data Source Import
+LabelU supports importing annotation data directly from S3-compatible object storage (AWS S3, MinIO, etc.). Configure data source connections in the task settings, browse and preview files, then import selected files or all files under a path with one click.
+
 
 https://github.com/user-attachments/assets/0fa5bc39-20ba-46b6-9839-379a49f692cf
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -28,9 +28,25 @@ LabelU为图像标注提供了全面的工具集，包括2D框、语义分割、
 ### 人工智能辅助标注
 LabelU 支持预标注数据的一键载入，用户可以根据实际需要对其进行细化和调整。这一特性提高了标注的效率和准确性。
 
+### AI 自动标注
+LabelU 集成了 AI 模型服务，支持图像数据的自动标注。在标注页面点击「AI 标注」按钮即可让模型自动检测和分割目标，也支持对整个任务的所有未标注样本进行批量标注，并可实时查看进度。项目内置提供了三个参考模型服务：
+
+- **Florence-2** — 轻量级，CPU 友好（约 4GB 显存）
+- **GroundingDINO + EfficientSAM** — 高质量检测 + 分割（约 4GB 显存）
+- **SAM 3** — 最新一代统一模型（约 8GB 显存，需要高端 GPU）
+
+详见 [`model_server/README.md`](./model_server/README.md) 了解部署方式。
+
+### S3 数据源导入
+LabelU 支持从 S3 兼容对象存储（AWS S3、MinIO 等）直接导入标注数据。在任务设置中配置数据源连接，浏览和预览文件，然后一键导入选定文件或路径下的所有文件。
+
+
 https://github.com/user-attachments/assets/f90e5a66-ab4d-456e-af4d-e6408a623812
 
 
+https://github.com/user-attachments/assets/0fa5bc39-20ba-46b6-9839-379a49f692cf
+
+
 ## 特性
 
 - 简易，提供多种图像标注工具，通过简单可视化配置即可标注。

diff --git a/labelu/alembic_labelu/versions/a1b2c3d4e5f6_add_export_job_table.py b/labelu/alembic_labelu/versions/a1b2c3d4e5f6_add_export_job_table.py
@@ -10,7 +10,7 @@
 
 # revision identifiers, used by Alembic.
 revision = 'a1b2c3d4e5f6'
-down_revision = '2eb983c9a254'
+down_revision = '034c7045b540'
 branch_labels = None
 depends_on = None
 

diff --git a/labelu/alembic_labelu/versions/b2c3d4e5f6a7_add_data_source.py b/labelu/alembic_labelu/versions/b2c3d4e5f6a7_add_data_source.py
@@ -0,0 +1,73 @@
+"""add data_source table and attachment.data_source_id
+
+Revision ID: b2c3d4e5f6a7
+Revises: a1b2c3d4e5f6
+Create Date: 2026-04-17 12:00:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+revision = "b2c3d4e5f6a7"
+down_revision = "a1b2c3d4e5f6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    existing_tables = inspector.get_table_names()
+
+    if "data_source" not in existing_tables:
+        op.create_table(
+            "data_source",
+            sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+            sa.Column("name", sa.String(128), nullable=False),
+            sa.Column("type", sa.String(32), nullable=False, server_default="S3"),
+            sa.Column("endpoint", sa.String(512)),
+            sa.Column("region", sa.String(64)),
+            sa.Column("bucket", sa.String(256), nullable=False),
+            sa.Column("prefix", sa.String(512), server_default=""),
+            sa.Column("access_key_id", sa.String(512)),
+            sa.Column("secret_access_key", sa.String(1024)),
+            sa.Column("path_style", sa.Boolean(), server_default=sa.text("0")),
+            sa.Column("use_ssl", sa.Boolean(), server_default=sa.text("1")),
+            sa.Column("presign_expire_secs", sa.Integer(), server_default=sa.text("3600")),
+            sa.Column("created_by", sa.Integer(), sa.ForeignKey("user.id")),
+            sa.Column("updated_by", sa.Integer(), sa.ForeignKey("user.id")),
+            sa.Column("created_at", sa.DateTime(timezone=True)),
+            sa.Column("updated_at", sa.DateTime(timezone=True)),
+            sa.Column("deleted_at", sa.DateTime()),
+        )
+        op.create_index("ix_data_source_id", "data_source", ["id"])
+        op.create_index("ix_data_source_created_by", "data_source", ["created_by"])
+        op.create_index("ix_data_source_deleted_at", "data_source", ["deleted_at"])
+
+    existing_columns = [c["name"] for c in inspector.get_columns("task_attachment")]
+    if "data_source_id" not in existing_columns:
+        with op.batch_alter_table("task_attachment", naming_convention={"fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s"}) as batch_op:
+            batch_op.add_column(
+                sa.Column("data_source_id", sa.Integer(), nullable=True)
+            )
+            batch_op.create_foreign_key(
+                "fk_task_attachment_data_source_id_data_source",
+                "data_source",
+                ["data_source_id"],
+                ["id"],
+            )
+            batch_op.create_index("ix_task_attachment_data_source_id", ["data_source_id"])
+
+
+def downgrade() -> None:
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    existing_columns = [c["name"] for c in inspector.get_columns("task_attachment")]
+    if "data_source_id" in existing_columns:
+        with op.batch_alter_table("task_attachment") as batch_op:
+            batch_op.drop_index("ix_task_attachment_data_source_id")
+            batch_op.drop_column("data_source_id")
+
+    if "data_source" in inspector.get_table_names():
+        op.drop_table("data_source")
diff --git a/labelu/alembic_labelu/versions/c3d4e5f6a7b8_add_auto_label_job_table.py b/labelu/alembic_labelu/versions/c3d4e5f6a7b8_add_auto_label_job_table.py
@@ -0,0 +1,54 @@
+"""add auto_label_job table
+
+Revision ID: c3d4e5f6a7b8
+Revises: b2c3d4e5f6a7
+Create Date: 2026-04-20 10:00:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = 'c3d4e5f6a7b8'
+down_revision = 'b2c3d4e5f6a7'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    tables = inspector.get_table_names()
+
+    if 'auto_label_job' not in tables:
+        op.create_table(
+            'auto_label_job',
+            sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
+            sa.Column('task_id', sa.Integer(), nullable=True),
+            sa.Column('created_by', sa.Integer(), nullable=True),
+            sa.Column('status', sa.String(length=32), nullable=True),
+            sa.Column('sample_count', sa.Integer(), nullable=True),
+            sa.Column('processed_count', sa.Integer(), nullable=True),
+            sa.Column('success_count', sa.Integer(), nullable=True),
+            sa.Column('failed_count', sa.Integer(), nullable=True),
+            sa.Column('filter_by_labels', sa.Boolean(), nullable=True),
+            sa.Column('error_message', sa.Text(), nullable=True),
+            sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
+            sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
+            sa.ForeignKeyConstraint(['task_id'], ['task.id']),
+            sa.ForeignKeyConstraint(['created_by'], ['user.id']),
+            sa.PrimaryKeyConstraint('id'),
+        )
+        op.create_index('ix_auto_label_job_id', 'auto_label_job', ['id'])
+        op.create_index('ix_auto_label_job_task_id', 'auto_label_job', ['task_id'])
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    tables = inspector.get_table_names()
+
+    if 'auto_label_job' in tables:
+        op.drop_index('ix_auto_label_job_task_id', table_name='auto_label_job')
+        op.drop_index('ix_auto_label_job_id', table_name='auto_label_job')
+        op.drop_table('auto_label_job')
diff --git a/labelu/internal/adapter/persistence/crud_auto_label_job.py b/labelu/internal/adapter/persistence/crud_auto_label_job.py
@@ -0,0 +1,40 @@
+from typing import Optional
+from sqlalchemy.orm import Session
+from labelu.internal.domain.models.auto_label_job import AutoLabelJob, AutoLabelStatus
+
+
+def create(db: Session, task_id: int, user_id: int, sample_count: int, filter_by_labels: bool) -> AutoLabelJob:
+    job = AutoLabelJob(
+        task_id=task_id,
+        created_by=user_id,
+        sample_count=sample_count,
+        filter_by_labels=filter_by_labels,
+    )
+    db.add(job)
+    db.flush()
+    db.refresh(job)
+    return job
+
+
+def get(db: Session, job_id: int) -> Optional[AutoLabelJob]:
+    return db.query(AutoLabelJob).filter(AutoLabelJob.id == job_id).first()
+
+
+def update_status(db: Session, job: AutoLabelJob, status: str, **kwargs) -> AutoLabelJob:
+    job.status = status
+    for k, v in kwargs.items():
+        setattr(job, k, v)
+    db.flush()
+    db.refresh(job)
+    return job
+
+
+def increment_progress(db: Session, job: AutoLabelJob, success: bool) -> AutoLabelJob:
+    job.processed_count = (job.processed_count or 0) + 1
+    if success:
+        job.success_count = (job.success_count or 0) + 1
+    else:
+        job.failed_count = (job.failed_count or 0) + 1
+    db.flush()
+    db.refresh(job)
+    return job
diff --git a/labelu/internal/adapter/persistence/crud_datasource.py b/labelu/internal/adapter/persistence/crud_datasource.py
@@ -0,0 +1,45 @@
+from typing import Optional, List, Tuple
+from datetime import datetime
+
+from sqlalchemy.orm import Session
+
+from labelu.internal.domain.models.data_source import DataSource
+
+
+def create(db: Session, data_source: DataSource) -> DataSource:
+    db.add(data_source)
+    db.flush()
+    db.refresh(data_source)
+    return data_source
+
+
+def get(db: Session, ds_id: int) -> Optional[DataSource]:
+    return (
+        db.query(DataSource)
+        .filter(DataSource.id == ds_id, DataSource.deleted_at.is_(None))
+        .first()
+    )
+
+
+def list_by_user(
+    db: Session, user_id: int, page: int = 0, size: int = 100
+) -> Tuple[List[DataSource], int]:
+    query = db.query(DataSource).filter(
+        DataSource.created_by == user_id, DataSource.deleted_at.is_(None)
+    )
+    total = query.count()
+    items = query.order_by(DataSource.id.desc()).offset(page * size).limit(size).all()
+    return items, total
+
+
+def update(db: Session, db_obj: DataSource, obj_in: dict) -> DataSource:
+    for k, v in obj_in.items():
+        setattr(db_obj, k, v)
+    db.flush()
+    db.refresh(db_obj)
+    return db_obj
+
+
+def soft_delete(db: Session, db_obj: DataSource) -> None:
+    db_obj.deleted_at = datetime.now()
+    db.flush()
diff --git a/labelu/internal/adapter/persistence/crud_sample.py b/labelu/internal/adapter/persistence/crud_sample.py
@@ -81,6 +81,18 @@ def get_by_ids(db: Session, sample_ids: List[int], task_id: Union[int, None] = N
     return db.query(TaskSample).filter(*query_filter).all()
 
 
+def list_new_samples(db: Session, task_id: int) -> List[TaskSample]:
+    return (
+        db.query(TaskSample)
+        .filter(
+            TaskSample.task_id == task_id,
+            TaskSample.state == SampleState.NEW.value,
+            TaskSample.deleted_at == None,
+        )
+        .all()
+    )
+
+
 def update(db: Session, db_obj: TaskSample, obj_in: Dict[str, Any]) -> TaskSample:
     obj_data = jsonable_encoder(obj_in)
     for field in obj_data:

diff --git a/labelu/internal/adapter/routers/__init__.py b/labelu/internal/adapter/routers/__init__.py
@@ -6,6 +6,7 @@
 from labelu.internal.adapter.routers import sample
 from labelu.internal.adapter.routers import attachment
 from labelu.internal.adapter.routers import pre_annotation
+from labelu.internal.adapter.routers import datasource
 
 
 def add_router(app: FastAPI):
@@ -14,3 +15,4 @@ def add_router(app: FastAPI):
     app.include_router(attachment.router, prefix=settings.API_V1_STR)
     app.include_router(sample.router, prefix=settings.API_V1_STR)
     app.include_router(pre_annotation.router, prefix=settings.API_V1_STR)
+    app.include_router(datasource.router, prefix=settings.API_V1_STR)
diff --git a/labelu/internal/adapter/routers/attachment.py b/labelu/internal/adapter/routers/attachment.py
@@ -4,7 +4,7 @@
 from sqlalchemy.orm import Session
 from fastapi import APIRouter, status, Depends, Security
 from fastapi import File, Header, UploadFile
-from fastapi.responses import FileResponse, StreamingResponse, Response
+from fastapi.responses import FileResponse, StreamingResponse, Response, RedirectResponse
 from fastapi.security import HTTPAuthorizationCredentials
 import mimetypes
 
@@ -60,8 +60,9 @@ async def download_attachment(file_path: str):
 
     # business logic
     data = await service.download_attachment(file_path=file_path)
-
-    return data
+    if data.get("redirect_url"):
+        return RedirectResponse(url=data["redirect_url"], status_code=status.HTTP_307_TEMPORARY_REDIRECT)
+    return FileResponse(path=data["local_path"])
 
 @router.get(
     "/partial/{file_path:path}",
@@ -75,7 +76,9 @@ async def get_content(file_path: str, range: str = Header(None)):
 
     try:
         full_path = await service.download_attachment(file_path=file_path)
-        full_path = Path(full_path) 
+        if full_path.get("redirect_url"):
+            return RedirectResponse(url=full_path["redirect_url"], status_code=status.HTTP_307_TEMPORARY_REDIRECT)
+        full_path = Path(full_path["local_path"])
     except (FileNotFoundError, OSError, LabelUException):
         raise LabelUException(
             code=ErrorCode.CODE_51001_TASK_ATTACHMENT_NOT_FOUND,