[cuda backend] reduce memory consumption on gemma4_31b by running embedding in int8 #1407

	name: Test WebGPU Backend

	on:
	schedule:
	- cron: 0 2 * * *
	push:
	branches:
	- main
	- release/*
	tags:
	- ciflow/nightly/*
	pull_request:
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}--${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
	cancel-in-progress: true

	jobs:
	test-webgpu:
	uses: ./.github/workflows/_test_backend.yml
	with:
	backend: webgpu
	flows: '["webgpu"]'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 120
	run-linux: true

Provide feedback