From 5cf2f42bbe19937a344c726bcad8b9322e88c757 Mon Sep 17 00:00:00 2001 From: grumd Date: Tue, 6 Jan 2026 02:19:52 +0100 Subject: [PATCH] Implemented score recognition from a photo using OpenAI API --- package-lock.json | 55 +++-- packages/api/.env.example | 3 +- .../20250106000000_add_openai_tokens.ts | 28 +++ packages/api/package.json | 1 + .../src/services/results/recognizeScore.ts | 127 +++++++++++ packages/api/src/trpc/routes/results/index.ts | 2 + .../src/trpc/routes/results/recognizeScore.ts | 36 ++++ packages/api/src/types/database.ts | 3 + packages/web/package.json | 1 + .../useConfirmationPopup.tsx | 9 +- .../web/src/constants/translations/en.tsx | 34 ++- .../web/src/constants/translations/pl.tsx | 35 ++- .../web/src/constants/translations/ru.tsx | 38 +++- .../web/src/constants/translations/ua.tsx | 35 ++- .../src/features/leaderboards/AddResult.tsx | 150 +++++++------ .../add-result/ScreenshotPreview.tsx | 201 ++++++++++++++++-- .../components/add-result/cropImage.ts | 67 ++++++ 17 files changed, 723 insertions(+), 102 deletions(-) create mode 100644 packages/api/migrations/20250106000000_add_openai_tokens.ts create mode 100644 packages/api/src/services/results/recognizeScore.ts create mode 100644 packages/api/src/trpc/routes/results/recognizeScore.ts create mode 100644 packages/web/src/features/leaderboards/components/add-result/cropImage.ts diff --git a/package-lock.json b/package-lock.json index 9d9022bc..828b13d1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -345,6 +345,17 @@ "node": ">=6.9.0" } }, + "node_modules/@bmunozg/react-image-area": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@bmunozg/react-image-area/-/react-image-area-1.1.0.tgz", + "integrity": "sha512-uMrLKZ+DAWb0TpPVoVhBUj//+J1Z/7ZOAtCp3efJV4dnhN9yWYIma9umJmvku7n9j2wbBFKtH7eMXU8e3bhT6Q==", + "license": "MIT", + "peerDependencies": { + "@types/react": "^17.0.0 || ^18.0.0", + "react": "^17.0.0 || ^18.0.0", + "react-dom": "^17.0.0 || ^18.0.0" + } + }, "node_modules/@derhuerst/http-basic": { "version": "8.2.4", "resolved": "https://registry.npmjs.org/@derhuerst/http-basic/-/http-basic-8.2.4.tgz", @@ -11678,6 +11689,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "packages/api": { "name": "@pumpking/api", "version": "1.1.1", @@ -11702,6 +11722,7 @@ "mysql2": "^2.3.3", "node-cron": "^3.0.3", "nodemon": "^2.0.22", + "openai": "^6.15.0", "pm2": "^5.3.0", "regression": "^2.0.1", "rimraf": "^5.0.1", @@ -11767,18 +11788,32 @@ "url": "https://github.com/motdotla/dotenv?sponsor=1" } }, - "packages/api/node_modules/zod": { - "version": "3.22.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz", - "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==", - "funding": { - "url": "https://github.com/sponsors/colinhacks" + "packages/api/node_modules/openai": { + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.15.0.tgz", + "integrity": "sha512-F1Lvs5BoVvmZtzkUEVyh8mDQPPFolq4F+xdsx/DO8Hee8YF3IGAlZqUIsF+DVGhqf4aU0a3bTghsxB6OIsRy1g==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } } }, "packages/web": { "name": "@pumpking/web", "version": "0.0.5", "dependencies": { + "@bmunozg/react-image-area": "^1.1.0", "@mantine/core": "^8.3.10", "@mantine/form": "^8.3.10", "@mantine/hooks": "^8.3.10", @@ -12769,14 +12804,6 @@ "optional": true } } - }, - "packages/web/node_modules/zod": { - "version": "3.22.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz", - "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==", - "funding": { - "url": "https://github.com/sponsors/colinhacks" - } } } } diff --git a/packages/api/.env.example b/packages/api/.env.example index 3f9401c5..d6fd63e6 100644 --- a/packages/api/.env.example +++ b/packages/api/.env.example @@ -11,4 +11,5 @@ SCREENSHOT_AGENT_BASE_FOLDER=~/screenshots # allowed patterns: {playerId}, {token}, {nickname}, {email} # {dateAdded}, {dateTimeAdded} (of upload to server), # {date}, {dateTime} (of screenshot from EXIF) -SCREENSHOT_FILE_PATH_DB={nickname}/{dateAdded}/{dateTimeAdded}.jpg \ No newline at end of file +SCREENSHOT_FILE_PATH_DB={nickname}/{dateAdded}/{dateTimeAdded}.jpg +OPENAI_API_KEY= \ No newline at end of file diff --git a/packages/api/migrations/20250106000000_add_openai_tokens.ts b/packages/api/migrations/20250106000000_add_openai_tokens.ts new file mode 100644 index 00000000..b40fddb6 --- /dev/null +++ b/packages/api/migrations/20250106000000_add_openai_tokens.ts @@ -0,0 +1,28 @@ +import { Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema + .alterTable('players') + .addColumn('openai_prompt_tokens', 'integer', (col) => col.notNull().defaultTo(0)) + .execute(); + + await db.schema + .alterTable('players') + .addColumn('openai_completion_tokens', 'integer', (col) => col.notNull().defaultTo(0)) + .execute(); + + // Add generated column for cost calculation + // $0.25 per 1M input tokens + $2.00 per 1M output tokens + await sql` + ALTER TABLE players + ADD COLUMN openai_cost DECIMAL(10, 6) AS ( + openai_prompt_tokens * 0.00000025 + openai_completion_tokens * 0.000002 + ) STORED + `.execute(db); +} + +export async function down(db: Kysely): Promise { + await db.schema.alterTable('players').dropColumn('openai_cost').execute(); + await db.schema.alterTable('players').dropColumn('openai_prompt_tokens').execute(); + await db.schema.alterTable('players').dropColumn('openai_completion_tokens').execute(); +} diff --git a/packages/api/package.json b/packages/api/package.json index 145c9b09..1c3e0096 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -54,6 +54,7 @@ "mysql2": "^2.3.3", "node-cron": "^3.0.3", "nodemon": "^2.0.22", + "openai": "^6.15.0", "pm2": "^5.3.0", "regression": "^2.0.1", "rimraf": "^5.0.1", diff --git a/packages/api/src/services/results/recognizeScore.ts b/packages/api/src/services/results/recognizeScore.ts new file mode 100644 index 00000000..c6862c3c --- /dev/null +++ b/packages/api/src/services/results/recognizeScore.ts @@ -0,0 +1,127 @@ +import createDebug from 'debug'; +import fs from 'fs'; +import OpenAI from 'openai'; +import { error } from 'utils'; + +const debug = createDebug('backend-ts:service:recognizeScore'); + +const openai = process.env.OPENAI_API_KEY + ? new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }) + : null; + +export interface RecognizeScoreResult { + numbers: number[]; + promptTokens: number; + completionTokens: number; +} + +export const recognizeScore = async (imagePath: string): Promise => { + if (!process.env.OPENAI_API_KEY || !openai) { + throw error(500, 'OpenAI API key is not configured'); + } + + // Read the image file and convert to base64 + const imageBuffer = fs.readFileSync(imagePath); + const base64Image = imageBuffer.toString('base64'); + + // Detect mime type from file header + const mimeType = detectMimeType(imageBuffer); + + debug('Sending image to OpenAI for score recognition, size: %d bytes', imageBuffer.length); + + const response = await openai.chat.completions.create({ + model: 'gpt-5-mini', + reasoning_effort: 'minimal', + response_format: { + type: 'json_schema', + json_schema: { + name: 'score_numbers_array', + description: 'An array of 8 numbers extracted from the game result screen', + strict: true, + schema: { + type: 'object', + additionalProperties: false, + required: ['numbers'], + properties: { + numbers: { + type: 'array', + items: { type: 'number' }, + minItems: 8, + maxItems: 8, + }, + }, + }, + }, + }, + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: `Extract the vertically lined up white numbers from the photo according to the schema provided. One number per line, some numbers may have leading zeroes. All zeroes have a dot in the middle.`, + }, + { + type: 'image_url', + image_url: { + url: `data:${mimeType};base64,${base64Image}`, + detail: 'high', + }, + }, + ], + }, + ], + }); + + const content = response.choices[0]?.message?.content; + debug('OpenAI response: %s', JSON.stringify(response)); + + if (!content) { + throw error(500, 'No response from OpenAI'); + } + + const promptTokens = response.usage?.prompt_tokens ?? 0; + const completionTokens = response.usage?.completion_tokens ?? 0; + + try { + const parsed = JSON.parse(content) as { numbers: number[] }; + + if (!Array.isArray(parsed.numbers) || parsed.numbers.length !== 8) { + throw new Error('Invalid response format: expected 8 numbers'); + } + + return { + numbers: parsed.numbers, + promptTokens, + completionTokens, + }; + } catch (e) { + debug('Failed to parse OpenAI response: %s', e); + throw error( + 500, + `Failed to parse score recognition result: ${ + e instanceof Error ? e.message : 'Unknown error' + }` + ); + } +}; + +const detectMimeType = (buffer: Buffer): string => { + // Check magic bytes for common image formats + if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) { + return 'image/jpeg'; + } + if (buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4e && buffer[3] === 0x47) { + return 'image/png'; + } + if (buffer[0] === 0x47 && buffer[1] === 0x49 && buffer[2] === 0x46) { + return 'image/gif'; + } + if (buffer[0] === 0x52 && buffer[1] === 0x49 && buffer[2] === 0x46 && buffer[3] === 0x46) { + return 'image/webp'; + } + // Default to jpeg + return 'image/jpeg'; +}; diff --git a/packages/api/src/trpc/routes/results/index.ts b/packages/api/src/trpc/routes/results/index.ts index 0074b8c8..b0e6c984 100644 --- a/packages/api/src/trpc/routes/results/index.ts +++ b/packages/api/src/trpc/routes/results/index.ts @@ -1,6 +1,8 @@ import { addResultMutation } from './addResult'; +import { recognizeScoreMutation } from './recognizeScore'; import { router } from 'trpc/trpc'; export const results = router({ addResultMutation, + recognizeScoreMutation, }); diff --git a/packages/api/src/trpc/routes/results/recognizeScore.ts b/packages/api/src/trpc/routes/results/recognizeScore.ts new file mode 100644 index 00000000..261ffb4b --- /dev/null +++ b/packages/api/src/trpc/routes/results/recognizeScore.ts @@ -0,0 +1,36 @@ +import { db } from 'db'; +import { sql } from 'kysely'; +import { recognizeScore } from 'services/results/recognizeScore'; +import { publicProcedure } from 'trpc/trpc'; +import { base64 } from 'utils/zod'; +import { z } from 'zod'; + +export const recognizeScoreMutation = publicProcedure + .input( + z.object({ + image: base64, + }) + ) + .mutation(async ({ ctx, input }) => { + if (!ctx.user) { + throw new Error('Not logged in'); + } + + try { + const result = await recognizeScore(input.image.filePath); + + // Update the player's OpenAI token usage counters + await db + .updateTable('players') + .set({ + openai_prompt_tokens: sql`openai_prompt_tokens + ${result.promptTokens}`, + openai_completion_tokens: sql`openai_completion_tokens + ${result.completionTokens}`, + }) + .where('id', '=', ctx.user.id) + .execute(); + + return result.numbers; + } finally { + await input.image.dispose(); + } + }); diff --git a/packages/api/src/types/database.ts b/packages/api/src/types/database.ts index d3e4563d..07ce2ff4 100644 --- a/packages/api/src/types/database.ts +++ b/packages/api/src/types/database.ts @@ -157,6 +157,9 @@ export interface Players { can_add_results_manually: Generated; arcade_name: string | null; exp: Decimal | null; + openai_prompt_tokens: Generated; + openai_completion_tokens: Generated; + openai_cost: Generated; } export interface PpHistory { diff --git a/packages/web/package.json b/packages/web/package.json index b403a934..a0dcb123 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -13,6 +13,7 @@ "preview": "vite preview" }, "dependencies": { + "@bmunozg/react-image-area": "^1.1.0", "@mantine/core": "^8.3.10", "@mantine/form": "^8.3.10", "@mantine/hooks": "^8.3.10", diff --git a/packages/web/src/components/ConfirmationPopup/useConfirmationPopup.tsx b/packages/web/src/components/ConfirmationPopup/useConfirmationPopup.tsx index f3bbe91a..a49948bb 100644 --- a/packages/web/src/components/ConfirmationPopup/useConfirmationPopup.tsx +++ b/packages/web/src/components/ConfirmationPopup/useConfirmationPopup.tsx @@ -1,5 +1,7 @@ -import { useState, useRef } from 'react'; -import { Modal, Button, Group } from '@mantine/core'; +import { Button, Group, Modal } from '@mantine/core'; +import { useRef, useState } from 'react'; + +import { useLanguage } from 'utils/context/translation'; interface UseConfirmationPopupOptions { okText?: string; @@ -11,6 +13,7 @@ interface RenderPopupProps { export const useConfirmationPopup = ({ okText = 'OK' }: UseConfirmationPopupOptions = {}) => { const [open, setOpen] = useState(false); + const lang = useLanguage(); const closeCallback = useRef<((isConfirmed: boolean) => void) | null>(null); const close = (isConfirmed: boolean) => { @@ -36,7 +39,7 @@ export const useConfirmationPopup = ({ okText = 'OK' }: UseConfirmationPopupOpti {content} + + )} {error instanceof Error && ( diff --git a/packages/web/src/features/leaderboards/components/add-result/ScreenshotPreview.tsx b/packages/web/src/features/leaderboards/components/add-result/ScreenshotPreview.tsx index 19ad091b..19ed449c 100644 --- a/packages/web/src/features/leaderboards/components/add-result/ScreenshotPreview.tsx +++ b/packages/web/src/features/leaderboards/components/add-result/ScreenshotPreview.tsx @@ -1,17 +1,45 @@ -import { useEffect, useState } from 'react'; +import { AreaSelector, type IArea } from '@bmunozg/react-image-area'; +import { Alert, Button, Group, Stack, Text } from '@mantine/core'; +import { useMutation } from '@tanstack/react-query'; +import imageCompression from 'browser-image-compression'; +import { useEffect, useRef, useState } from 'react'; +import { useLanguage } from 'utils/context/translation'; +import { api } from 'utils/trpc'; + +import { cropImageToBase64 } from './cropImage'; import { getDateFromFile } from './getDate'; +interface RecognizedScore { + perfect: number; + great: number; + good: number; + bad: number; + miss: number; + combo: number; + score: number; +} + export const ScreenshotPreview = ({ file, showDate, + enableOcr, + onScoreRecognized, }: { file: File | null; showDate?: boolean; + enableOcr?: boolean; + onScoreRecognized?: (score: RecognizedScore) => void; }) => { + const lang = useLanguage(); const [src, setSrc] = useState(null); const [date, setDate] = useState(null); const [error, setError] = useState(null); + const [areas, setAreas] = useState([]); + const [recognizedNumbers, setRecognizedNumbers] = useState(null); + const imgRef = useRef(null); + + const recognizeScoreMutation = useMutation(api.results.recognizeScoreMutation.mutationOptions()); useEffect(() => { if (!FileReader) { @@ -39,24 +67,173 @@ export const ScreenshotPreview = ({ setSrc(null); setDate(null); setError(null); + setAreas([]); + setRecognizedNumbers(null); } }, [file]); + const handleRecognizeScore = async () => { + if (!src || areas.length === 0 || !imgRef.current) return; + + try { + setError(null); + const displayedWidth = imgRef.current.clientWidth; + const displayedHeight = imgRef.current.clientHeight; + const croppedImage = await cropImageToBase64(src, areas[0], displayedWidth, displayedHeight); + + // Compress the cropped image before sending for recognition + const croppedFile = await imageCompression.getFilefromDataUrl(croppedImage, 'cropped.png'); + const compressedFile = await imageCompression(croppedFile, { + maxWidthOrHeight: 360, + maxSizeMB: 0.03, + initialQuality: 0.9, + // maxWidthOrHeight: 360, + // maxSizeMB: 0.032, + // initialQuality: 0.95, + }); + const compressedImage = await imageCompression.getDataUrlFromFile(compressedFile); + + const numbers = await recognizeScoreMutation.mutateAsync({ + image: compressedImage, + }); + + setRecognizedNumbers(numbers); + } catch (e) { + console.error('OCR error:', e); + setError(e instanceof Error ? e.message : 'Recognition failed'); + } + }; + + const handleConfirmRecognition = () => { + if (!recognizedNumbers) return; + + const [perfect, great, good, bad, miss, combo, score] = recognizedNumbers; + + onScoreRecognized?.({ + perfect: perfect >= 0 ? perfect : -1, + great: great >= 0 ? great : -1, + good: good >= 0 ? good : -1, + bad: bad >= 0 ? bad : -1, + miss: miss >= 0 ? miss : -1, + combo: combo >= 0 ? combo : -1, + score: score >= 0 ? score : -1, + }); + + setRecognizedNumbers(null); + }; + + const handleCancelRecognition = () => { + setRecognizedNumbers(null); + }; + if (!src) return null; return ( -
- Screenshot preview + {showDate && ( -
- Date taken: {date ? date.toLocaleString() : 'Unknown'} -
+ + {lang.DATE_TAKEN}: {date ? date.toLocaleString() : lang.UNKNOWN} + + )} + {enableOcr ? ( + + Screenshot preview + + ) : ( + Screenshot preview + )} + + {enableOcr && !recognizedNumbers && ( + <> + + {areas.length === 0 ? lang.OCR_DRAW_RECTANGLE_HINT : lang.OCR_AREA_SELECTED_HINT} + + + + )} + + {recognizedNumbers && ( + + +
+ + Perfect: + + + {recognizedNumbers[0] >= 0 ? recognizedNumbers[0] : '?'} + + + Great: + + + {recognizedNumbers[1] >= 0 ? recognizedNumbers[1] : '?'} + + + Good: + + + {recognizedNumbers[2] >= 0 ? recognizedNumbers[2] : '?'} + + + Bad: + + + {recognizedNumbers[3] >= 0 ? recognizedNumbers[3] : '?'} + + + Miss: + + + {recognizedNumbers[4] >= 0 ? recognizedNumbers[4] : '?'} + + Combo: + {recognizedNumbers[5] >= 0 ? recognizedNumbers[5] : '?'} + + Score: + + + {recognizedNumbers[6] >= 0 ? recognizedNumbers[6] : '?'} + +
+ + + + +
+
+ )} + + {error && ( + + {lang.ERROR}: {error} + )} - {error &&
Error: {error}
} -
+ ); }; diff --git a/packages/web/src/features/leaderboards/components/add-result/cropImage.ts b/packages/web/src/features/leaderboards/components/add-result/cropImage.ts new file mode 100644 index 00000000..fe95b090 --- /dev/null +++ b/packages/web/src/features/leaderboards/components/add-result/cropImage.ts @@ -0,0 +1,67 @@ +import type { IArea } from '@bmunozg/react-image-area'; + +/** + * Crops an image to the selected area and returns a base64 data URL + * @param displayedWidth - The width of the image as displayed on screen + * @param displayedHeight - The height of the image as displayed on screen + */ +export const cropImageToBase64 = ( + imageSrc: string, + area: IArea, + displayedWidth: number, + displayedHeight: number +): Promise => { + return new Promise((resolve, reject) => { + const img = new Image(); + img.onload = () => { + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d'); + + if (!ctx) { + reject(new Error('Could not get canvas context')); + return; + } + + // Scale coordinates from displayed size to actual image size + const scaleX = img.naturalWidth / displayedWidth; + const scaleY = img.naturalHeight / displayedHeight; + + const scaledX = area.x * scaleX; + const scaledY = area.y * scaleY; + const scaledWidth = area.width * scaleX; + const scaledHeight = area.height * scaleY; + + // Set canvas size to the cropped area dimensions (at original resolution) + canvas.width = scaledWidth; + canvas.height = scaledHeight; + + console.log({ + scaledWidth, + scaledHeight, + }); + + // Draw the cropped portion of the image + ctx.drawImage( + img, + scaledX, + scaledY, + scaledWidth, + scaledHeight, + 0, + 0, + scaledWidth, + scaledHeight + ); + + // Convert to base64 (JPEG for smaller size) + const base64 = canvas.toDataURL('image/jpeg', 0.9); + resolve(base64); + }; + + img.onerror = () => { + reject(new Error('Failed to load image')); + }; + + img.src = imageSrc; + }); +};