osteele · yuhangzheng94 · Sep 18, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/package.json b/package.json
@@ -14,6 +14,7 @@
   },
   "dependencies": {
     "@mediapipe/pose": "^0.5.1635988162",
+    "@mediapipe/tasks-vision": "^0.10.6",
     "@socket.io/admin-ui": "^0.2.0",
     "@tensorflow-models/pose-detection": "^2.0.0",
     "@tensorflow/tfjs-backend-webgl": "^3.18.0",

diff --git a/src/blazePose.ts b/src/blazePose.ts
@@ -1,3 +1,17 @@
+// Copyright 2023 The MediaPipe Authors.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 /**
  * This module contains the code for initializing and configuring the BlazePose
  * model.
@@ -11,28 +25,69 @@
  *  });
  */
 
-import * as poseDetection from "@tensorflow-models/pose-detection";
 import "@tensorflow/tfjs-backend-webgl"; // Importing this registers the WebGL backend
 import EventEmitter from "events";
-import { smoothPose } from "./pose-utils";
 import Stats from "stats-js"; // ignore error message
 
+// ========== @mediapipe/tasks-vision ==========
+import {
+  PoseLandmarker,
+  FilesetResolver,
+  RunningMode,
+  PoseLandmarkerResult,
+  NormalizedLandmark,
+} from "@mediapipe/tasks-vision";
+import { BlazePose } from "types";
+
+const poseLandmarks : BlazePose.PartName[] = [
+  "nose"
+  , "left_eye_inner"
+  , "left_eye"
+  , "left_eye_outer"
+  , "right_eye_inner"
+  , "right_eye"
+  , "right_eye_outer"
+  , "left_ear"
+  , "right_ear"
+  , "mouth_left"
+  , "mouth_right"
+  , "left_shoulder"
+  , "right_shoulder"
+  , "left_elbow"
+  , "right_elbow"
+  , "left_wrist"
+  , "right_wrist"
+  , "left_pinky"
+  , "right_pinky"
+  , "left_indix"
+  , "right_indix"
+  , "left_thumb"
+  , "right_thumb"
+  , "left_hip"
+  , "right_hip"
+  , "left_knee"
+  , "right_knee"
+  , "left_ankle"
+  , "right_ankle"
+  , "left_heel"
+  , "right_heel"
+  , "left_foot_index"
+  , "right_foot_index"
+  ,
+ ];
+
+let poseLandmarker: PoseLandmarker | undefined = undefined;
+let runningMode : RunningMode = "VIDEO";
+
+/********************************************************************
+// Demo 2: Continuously grab image from webcam stream and detect it.
+********************************************************************/
+
 // for usage, see: https://github.com/mrdoob/stats.js
 var stats = new Stats();
 stats.showPanel( 0 ); // 0: fps, 1: ms, 2: mb, 3+: custom
 document.body.appendChild( stats.dom );
 
-/**
- * Configuration
- */
-
-const smoothPoses = false;
-
-const blazePoseDetectorConfig = {
-  runtime: "tfjs", // 'mediapipe', 'tfjs'
-  modelType: "full", // 'lite', 'full', 'heavy'
-};
-
 /** Emits the following events:
  *
  * ("pose", pose: Pose)
@@ -58,50 +113,96 @@ export const poseEmitter = new EventEmitter();
 export async function initializeBlazePose(
   video: HTMLVideoElement
 ): Promise<void> {
-  const model = poseDetection.SupportedModels.BlazePose;
-  const detector = await poseDetection.createDetector(
-    model,
-    blazePoseDetectorConfig
-  );
+  // Before we can use PoseLandmarker class we must wait for it to finish
+  // loading. Machine Learning models can be large and take a moment to
+  // get everything needed to run.
+  const createPoseLandmarker = async () => {
+    const vision = await FilesetResolver.forVisionTasks("../node_modules/@mediapipe/tasks-vision/wasm");
+    poseLandmarker = await PoseLandmarker.createFromOptions(vision, {
+      baseOptions: {
+        modelAssetPath: `https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_full/float16/latest/pose_landmarker_full.task`,
+        delegate: "GPU"
+      },
+      runningMode: runningMode,
+      numPoses: 1
+    });
+  };
+  await createPoseLandmarker();
 
   let loopIsRunning = false;
+  let lastVideoTime = -1;
   video.addEventListener("loadeddata", () => {
-    if (!loopIsRunning) loop();
+    loopIsRunning = true;
+    predictWebcam();
   });
-  loop(); // run asynchronously
+  loopIsRunning = true;
+  predictWebcam();
   return;
 
-  async function loop() {
-    loopIsRunning = true;
-    while (true) {
-      // stats begin
-      stats.begin();
-
-      let poses;
-      try {
-        poses = await detector.estimatePoses(video);
-      } catch (e) {
-        console.error("error while estimating poses", e);
-        loopIsRunning = false;
-        return;
+  async function predictWebcam() {
+    // stats begin
+    stats.begin();
+
+    let pose : BlazePose.Pose = {
+      keypoints: [],
+      keypoints3D: [],
+      score: 0.99,
+    };
+
+    let landmarks : NormalizedLandmark[] | undefined = undefined;
+
+    try {
+      let startTimeMs = performance.now();
+      if (lastVideoTime !== video.currentTime) {
+        lastVideoTime = video.currentTime;
+        if (poseLandmarker != undefined) {
+          poseLandmarker.detectForVideo(video, startTimeMs, (result) => {
+            [landmarks] = result.landmarks;
+          });
+        }
+      }
+    } catch (e) {
+      console.error("error while estimating poses", e);
+      // loopIsRunning = false;
+      // return;
+    }
+
+    let startTimeMs = performance.now();
+    if (lastVideoTime !== video.currentTime) {
+      lastVideoTime = video.currentTime;
+      if (poseLandmarker != undefined) {
+        poseLandmarker.detectForVideo(video, startTimeMs, (result) => {
+          [landmarks] = result.landmarks;
+        });
       }
-      let [bpPose] = await detector.estimatePoses(video);
-      if (!bpPose) continue;
-      // TODO remove the any cast
-      //
-      // It is currently needed because the typescript definitions for the
-      // BlazePose model specifies the type as `PartName` instead of `string`.
-      //
-      // Possibly this app should import the BlazePose definitions instead of
-      // defining its own.
-      let pose = bpPose as any;
-      if (smoothPoses) {
-        pose = smoothPose(pose);
+    }    
+
+    if (landmarks != undefined) {
+      for (let i=0;i<landmarks.length;i++) {
+        let landmark = landmarks[i];
+        let kp : BlazePose.Keypoint = {
+          score: 0.99,
+          x: landmark.x * video.width,
+          y: landmark.y * video.height,
+          name: poseLandmarks[i],
+        };
+        pose.keypoints.push(kp);
       }
-      poseEmitter.emit("pose", pose);
+
 
+      if (pose.keypoints.length > 0) {
+        console.log(pose);
+        poseEmitter.emit("pose", pose);
+      }
+
       // stats end
       stats.end();
     }
+
+    // Call this function again to keep predicting when the browser is ready.
+    if (loopIsRunning) {
+      window.requestAnimationFrame(predictWebcam);
+    }
   }
+
 }
diff --git a/src/drawPose.ts b/src/drawPose.ts
@@ -28,6 +28,8 @@ export function drawPose(p5: p5, person: Performer, outline: boolean): void {
   const skeletonColor = p5.color(hue, 50, 50);
   const outlineColor = p5.color(hue, 50, 50, 0.5);
 
+  pose = polishedPose;
+
   drawKeypoints(p5, pose, keypointColor, outline);
 
   // drawKeypoints(p5, polishedPose, keypointColor, !outline);

diff --git a/src/pose-utils.ts b/src/pose-utils.ts
@@ -82,7 +82,6 @@ export function polishPose(
   currentPose: BlazePose.Pose,
 ): BlazePose.Pose {
   // calculate the polished pose
-  // current implementation: (unweighed) average of poses;
   let polishedPose = JSON.parse(JSON.stringify(currentPose));
   // traverse the target keypoints
   for (const targetKeypoint of polishedPose.keypoints) {
@@ -95,9 +94,14 @@ export function polishPose(
       // if name matches the target name, include this data item
       for (const kp of previousPose.keypoints) {
         if (kp.name == targetName) {
-          countertemp++;
-          xtemp += kp.x;
-          ytemp += kp.y;
+          // // past implementation: (unweighted) average of poses
+          // countertemp++;
+          // xtemp += kp.x;
+          // ytemp += kp.y;
+          // current implementation: previous N poses have weight f(delta_N) = (N - delta_N + 1) / N, i = N - delta_N;
+          countertemp += 1/previousPoses.length * (i+1);
+          xtemp += kp.x * 1/previousPoses.length * (i+1);
+          ytemp += kp.y * 1/previousPoses.length * (i+1);
         }
       }
     }