Skip to content

Commit 62ce6db

Browse files
committed
add image analytics
1 parent c3ba16d commit 62ce6db

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

superannotate/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .analytics.class_analytics import (
99
attribute_distribution, class_distribution
1010
)
11+
from .analytics.user_analytics import image_analytics
1112
from .analytics.common import aggregate_annotations_as_df, df_to_annotations
1213
from .annotation_helpers import (
1314
add_annotation_bbox_to_json, add_annotation_comment_to_json,
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import pandas as pd
2+
from tqdm import tqdm
3+
import plotly.express as px
4+
5+
def image_analytics(annotations_df, visualize = False):
6+
"""
7+
Aggregates image analytics: num instances/annotation time in seconds per image
8+
:param annotations_df: pandas DataFrame of project annotations
9+
:type annotations_df: pandas.DataFrame
10+
:param visulaize: enables image analytics scatter plot
11+
:type visualize: bool
12+
13+
:return: DataFrame on image analytics with columns ["image", "instances", "time"]
14+
:rtype: pandas DataFrame
15+
16+
"""
17+
def fix_spent_time(grp: pd.Series) -> pd.Series:
18+
grp = grp.copy()
19+
grp_lost_msk = (grp > 600) | (grp.isna())
20+
grp.loc[grp_lost_msk] = grp[~grp_lost_msk].median()
21+
return grp
22+
23+
analytics = {"user_id": [], "user_role": [], "image": [], "time": [], "ninstances": [] }
24+
annot_cols = ["imageName", "instanceId", "createdAt", "creatorEmail", "creatorRole"]
25+
annotations_df = annotations_df[annotations_df["creationType"] == "Manual"][annot_cols].drop_duplicates()
26+
27+
for annot, grp in annotations_df.groupby(["creatorEmail", "creatorRole"]):
28+
grp_sorted = grp.sort_values("createdAt")
29+
time_spent = grp_sorted.createdAt.diff().shift(-1).dt.total_seconds()
30+
grp["time_spent"] = fix_spent_time(time_spent)
31+
img_time = grp.groupby("imageName", as_index=False)["time_spent"].agg("sum")
32+
img_n_instance = grp.groupby("imageName")["instanceId"].agg("count")
33+
34+
analytics["image"] += img_time.imageName.tolist()
35+
analytics["time"] += img_time.time_spent.tolist()
36+
analytics["ninstances"] += img_n_instance.tolist()
37+
analytics["user_id"] += [annot[0]] * len(img_time)
38+
analytics["user_role"] += [annot[1]] * len(img_time)
39+
40+
analytics_df = pd.DataFrame(analytics)
41+
if visualize:
42+
#scatter plot of number of instances vs annotation time
43+
fig = px.scatter(
44+
analytics_df,
45+
x="ninstances",
46+
y="time",
47+
color="user_id",
48+
facet_col="user_role",
49+
custom_data = ["image"],
50+
color_discrete_sequence=px.colors.qualitative.Dark24,
51+
labels = {'user_id': "User Email", "ninstances": "Number of Instances", "time": "Annotation time"}
52+
)
53+
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
54+
fig.update_traces(hovertemplate="%{customdata[0]}")
55+
fig.show()
56+
return analytics_df

0 commit comments

Comments
 (0)