Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ Audio annotation tools are another key feature of LabelU. These tools possess ef
### Artificial Intelligence Assisted Labelling
LabelU supports one-click loading of pre-annotated data, which can be refined and adjusted according to actual needs. This feature improves the efficiency and accuracy of annotation.

### AI Auto-Annotation
LabelU integrates AI model services for automatic annotation of image data. Click the "AI Annotate" button on the annotation page to have the model automatically detect and segment objects. Supports batch annotation for entire tasks with real-time progress tracking. Three reference model servers are provided out of the box:

- **Florence-2** — lightweight, CPU-friendly (~4GB VRAM)
- **GroundingDINO + EfficientSAM** — high-quality detection + segmentation (~4GB VRAM)
- **SAM 3** — state-of-the-art unified model (~8GB VRAM, requires high-end GPU)

See [`model_server/README.md`](./model_server/README.md) for setup instructions.

### S3 Data Source Import
LabelU supports importing annotation data directly from S3-compatible object storage (AWS S3, MinIO, etc.). Configure data source connections in the task settings, browse and preview files, then import selected files or all files under a path with one click.


https://github.com/user-attachments/assets/0fa5bc39-20ba-46b6-9839-379a49f692cf

Expand Down
16 changes: 16 additions & 0 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,25 @@ LabelU为图像标注提供了全面的工具集,包括2D框、语义分割、
### 人工智能辅助标注
LabelU 支持预标注数据的一键载入,用户可以根据实际需要对其进行细化和调整。这一特性提高了标注的效率和准确性。

### AI 自动标注
LabelU 集成了 AI 模型服务,支持图像数据的自动标注。在标注页面点击「AI 标注」按钮即可让模型自动检测和分割目标,也支持对整个任务的所有未标注样本进行批量标注,并可实时查看进度。项目内置提供了三个参考模型服务:

- **Florence-2** — 轻量级,CPU 友好(约 4GB 显存)
- **GroundingDINO + EfficientSAM** — 高质量检测 + 分割(约 4GB 显存)
- **SAM 3** — 最新一代统一模型(约 8GB 显存,需要高端 GPU)

详见 [`model_server/README.md`](./model_server/README.md) 了解部署方式。

### S3 数据源导入
LabelU 支持从 S3 兼容对象存储(AWS S3、MinIO 等)直接导入标注数据。在任务设置中配置数据源连接,浏览和预览文件,然后一键导入选定文件或路径下的所有文件。


https://github.com/user-attachments/assets/f90e5a66-ab4d-456e-af4d-e6408a623812


https://github.com/user-attachments/assets/0fa5bc39-20ba-46b6-9839-379a49f692cf


## 特性

- 简易,提供多种图像标注工具,通过简单可视化配置即可标注。
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# revision identifiers, used by Alembic.
revision = 'a1b2c3d4e5f6'
down_revision = '2eb983c9a254'
down_revision = '034c7045b540'
branch_labels = None
depends_on = None

Expand Down
73 changes: 73 additions & 0 deletions labelu/alembic_labelu/versions/b2c3d4e5f6a7_add_data_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""add data_source table and attachment.data_source_id

Revision ID: b2c3d4e5f6a7
Revises: a1b2c3d4e5f6
Create Date: 2026-04-17 12:00:00.000000

"""
from alembic import op
import sqlalchemy as sa

revision = "b2c3d4e5f6a7"
down_revision = "a1b2c3d4e5f6"
branch_labels = None
depends_on = None


def upgrade() -> None:
bind = op.get_bind()
inspector = sa.inspect(bind)
existing_tables = inspector.get_table_names()

if "data_source" not in existing_tables:
op.create_table(
"data_source",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("name", sa.String(128), nullable=False),
sa.Column("type", sa.String(32), nullable=False, server_default="S3"),
sa.Column("endpoint", sa.String(512)),
sa.Column("region", sa.String(64)),
sa.Column("bucket", sa.String(256), nullable=False),
sa.Column("prefix", sa.String(512), server_default=""),
sa.Column("access_key_id", sa.String(512)),
sa.Column("secret_access_key", sa.String(1024)),
sa.Column("path_style", sa.Boolean(), server_default=sa.text("0")),
sa.Column("use_ssl", sa.Boolean(), server_default=sa.text("1")),
sa.Column("presign_expire_secs", sa.Integer(), server_default=sa.text("3600")),
sa.Column("created_by", sa.Integer(), sa.ForeignKey("user.id")),
sa.Column("updated_by", sa.Integer(), sa.ForeignKey("user.id")),
sa.Column("created_at", sa.DateTime(timezone=True)),
sa.Column("updated_at", sa.DateTime(timezone=True)),
sa.Column("deleted_at", sa.DateTime()),
)
op.create_index("ix_data_source_id", "data_source", ["id"])
op.create_index("ix_data_source_created_by", "data_source", ["created_by"])
op.create_index("ix_data_source_deleted_at", "data_source", ["deleted_at"])

existing_columns = [c["name"] for c in inspector.get_columns("task_attachment")]
if "data_source_id" not in existing_columns:
with op.batch_alter_table("task_attachment", naming_convention={"fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s"}) as batch_op:
batch_op.add_column(
sa.Column("data_source_id", sa.Integer(), nullable=True)
)
batch_op.create_foreign_key(
"fk_task_attachment_data_source_id_data_source",
"data_source",
["data_source_id"],
["id"],
)
batch_op.create_index("ix_task_attachment_data_source_id", ["data_source_id"])


def downgrade() -> None:
bind = op.get_bind()
inspector = sa.inspect(bind)

existing_columns = [c["name"] for c in inspector.get_columns("task_attachment")]
if "data_source_id" in existing_columns:
with op.batch_alter_table("task_attachment") as batch_op:
batch_op.drop_index("ix_task_attachment_data_source_id")
batch_op.drop_column("data_source_id")

if "data_source" in inspector.get_table_names():
op.drop_table("data_source")
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""add auto_label_job table

Revision ID: c3d4e5f6a7b8
Revises: b2c3d4e5f6a7
Create Date: 2026-04-20 10:00:00.000000

"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = 'c3d4e5f6a7b8'
down_revision = 'b2c3d4e5f6a7'
branch_labels = None
depends_on = None


def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
tables = inspector.get_table_names()

if 'auto_label_job' not in tables:
op.create_table(
'auto_label_job',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('task_id', sa.Integer(), nullable=True),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('status', sa.String(length=32), nullable=True),
sa.Column('sample_count', sa.Integer(), nullable=True),
sa.Column('processed_count', sa.Integer(), nullable=True),
sa.Column('success_count', sa.Integer(), nullable=True),
sa.Column('failed_count', sa.Integer(), nullable=True),
sa.Column('filter_by_labels', sa.Boolean(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(['task_id'], ['task.id']),
sa.ForeignKeyConstraint(['created_by'], ['user.id']),
sa.PrimaryKeyConstraint('id'),
)
op.create_index('ix_auto_label_job_id', 'auto_label_job', ['id'])
op.create_index('ix_auto_label_job_task_id', 'auto_label_job', ['task_id'])


def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
tables = inspector.get_table_names()

if 'auto_label_job' in tables:
op.drop_index('ix_auto_label_job_task_id', table_name='auto_label_job')
op.drop_index('ix_auto_label_job_id', table_name='auto_label_job')
op.drop_table('auto_label_job')
40 changes: 40 additions & 0 deletions labelu/internal/adapter/persistence/crud_auto_label_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Optional
from sqlalchemy.orm import Session
from labelu.internal.domain.models.auto_label_job import AutoLabelJob, AutoLabelStatus


def create(db: Session, task_id: int, user_id: int, sample_count: int, filter_by_labels: bool) -> AutoLabelJob:
job = AutoLabelJob(
task_id=task_id,
created_by=user_id,
sample_count=sample_count,
filter_by_labels=filter_by_labels,
)
db.add(job)
db.flush()
db.refresh(job)
return job


def get(db: Session, job_id: int) -> Optional[AutoLabelJob]:
return db.query(AutoLabelJob).filter(AutoLabelJob.id == job_id).first()


def update_status(db: Session, job: AutoLabelJob, status: str, **kwargs) -> AutoLabelJob:
job.status = status
for k, v in kwargs.items():
setattr(job, k, v)
db.flush()
db.refresh(job)
return job


def increment_progress(db: Session, job: AutoLabelJob, success: bool) -> AutoLabelJob:
job.processed_count = (job.processed_count or 0) + 1
if success:
job.success_count = (job.success_count or 0) + 1
else:
job.failed_count = (job.failed_count or 0) + 1
db.flush()
db.refresh(job)
return job
45 changes: 45 additions & 0 deletions labelu/internal/adapter/persistence/crud_datasource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from typing import Optional, List, Tuple
from datetime import datetime

from sqlalchemy.orm import Session

from labelu.internal.domain.models.data_source import DataSource


def create(db: Session, data_source: DataSource) -> DataSource:
db.add(data_source)
db.flush()
db.refresh(data_source)
return data_source


def get(db: Session, ds_id: int) -> Optional[DataSource]:
return (
db.query(DataSource)
.filter(DataSource.id == ds_id, DataSource.deleted_at.is_(None))
.first()
)


def list_by_user(
db: Session, user_id: int, page: int = 0, size: int = 100
) -> Tuple[List[DataSource], int]:
query = db.query(DataSource).filter(
DataSource.created_by == user_id, DataSource.deleted_at.is_(None)
)
total = query.count()
items = query.order_by(DataSource.id.desc()).offset(page * size).limit(size).all()
return items, total


def update(db: Session, db_obj: DataSource, obj_in: dict) -> DataSource:
for k, v in obj_in.items():
setattr(db_obj, k, v)
db.flush()
db.refresh(db_obj)
return db_obj


def soft_delete(db: Session, db_obj: DataSource) -> None:
db_obj.deleted_at = datetime.now()
db.flush()
12 changes: 12 additions & 0 deletions labelu/internal/adapter/persistence/crud_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ def get_by_ids(db: Session, sample_ids: List[int], task_id: Union[int, None] = N
return db.query(TaskSample).filter(*query_filter).all()


def list_new_samples(db: Session, task_id: int) -> List[TaskSample]:
return (
db.query(TaskSample)
.filter(
TaskSample.task_id == task_id,
TaskSample.state == SampleState.NEW.value,
TaskSample.deleted_at == None,
)
.all()
)


def update(db: Session, db_obj: TaskSample, obj_in: Dict[str, Any]) -> TaskSample:
obj_data = jsonable_encoder(obj_in)
for field in obj_data:
Expand Down
2 changes: 2 additions & 0 deletions labelu/internal/adapter/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from labelu.internal.adapter.routers import sample
from labelu.internal.adapter.routers import attachment
from labelu.internal.adapter.routers import pre_annotation
from labelu.internal.adapter.routers import datasource


def add_router(app: FastAPI):
Expand All @@ -14,3 +15,4 @@ def add_router(app: FastAPI):
app.include_router(attachment.router, prefix=settings.API_V1_STR)
app.include_router(sample.router, prefix=settings.API_V1_STR)
app.include_router(pre_annotation.router, prefix=settings.API_V1_STR)
app.include_router(datasource.router, prefix=settings.API_V1_STR)
11 changes: 7 additions & 4 deletions labelu/internal/adapter/routers/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sqlalchemy.orm import Session
from fastapi import APIRouter, status, Depends, Security
from fastapi import File, Header, UploadFile
from fastapi.responses import FileResponse, StreamingResponse, Response
from fastapi.responses import FileResponse, StreamingResponse, Response, RedirectResponse
from fastapi.security import HTTPAuthorizationCredentials
import mimetypes

Expand Down Expand Up @@ -60,8 +60,9 @@ async def download_attachment(file_path: str):

# business logic
data = await service.download_attachment(file_path=file_path)

return data
if data.get("redirect_url"):
return RedirectResponse(url=data["redirect_url"], status_code=status.HTTP_307_TEMPORARY_REDIRECT)
return FileResponse(path=data["local_path"])

@router.get(
"/partial/{file_path:path}",
Expand All @@ -75,7 +76,9 @@ async def get_content(file_path: str, range: str = Header(None)):

try:
full_path = await service.download_attachment(file_path=file_path)
full_path = Path(full_path)
if full_path.get("redirect_url"):
return RedirectResponse(url=full_path["redirect_url"], status_code=status.HTTP_307_TEMPORARY_REDIRECT)
full_path = Path(full_path["local_path"])
except (FileNotFoundError, OSError, LabelUException):
raise LabelUException(
code=ErrorCode.CODE_51001_TASK_ATTACHMENT_NOT_FOUND,
Expand Down
Loading