Skip to content

react-speech-recognition is not working on android chrome but worked on chrome PC perfectly fine #254

@JaswantSoni41

Description

@JaswantSoni41

Hi, fellas and dev team, i developed a voice reco game which baiscally hepls child (i.e. age of 3-6 yrs old) to learn words, sentences and paragraph read and give a percentage of the readibility and pronouncations how much that child is accurate and helps to improve the child even its wrong but im facing issue like its working on chrome desktop but not working on android 15 chrome latest version 135.0.7049.111 even i hosted on https secure domain please look into it and if anyone can help comments are open here is my code

import React, { useEffect, useRef, useState } from "react";

import SpeechRecognition, {
  useSpeechRecognition,
} from "react-speech-recognition";
import stringSimilarity from "string-similarity-js";
import WebSpeechSupport from "./WebSpeechSupport";
import SiriWave from "siriwave";
import CountUp from "react-countup";
import ProgressBar from "./ProgressBar";
import MicrophoneAccessError from "./MicrophoneAccessError";
// import Modal from "./ReactModalModern";

const VoiceRecoGame = ({
  key2,
  index,
  question = "What is this?",
  imageURL = "https://www.collinsdictionary.com/images/full/apple_158989157.jpg",
  correctAnswers = "Apple",
  AIModel = "default",
  mode,
  goToNextQuestion,
  accent = "en-IN",
}) => {
  // SpeechRecognition is a React hook that provides speech recognition functionality.
  const {
    transcript,
    listening,
    resetTranscript,
    browserSupportsSpeechRecognition,
    isMicrophoneAvailable,
  } = useSpeechRecognition();

  const isCheckingRef = useRef(false);
  const [isCorrect, setIsCorrect] = useState(false);
  const [tranScript, setTranScript] = useState("");
  const [accuracy, setAccuracy] = useState(0);
  const [statusMessage, setStatusMessage] = useState("");

  const siriWaveRef = useRef(null);
  const waveContainerRef = useRef(null);

  // Function to handle the text-to-speech functionality for word and sentence modes
  // This function uses the Web Speech API to convert text to speech
  const speakText = ({
    text,
    mode = "sentence", // "word" or "sentence"
    accent = "en-IN",
    prefix = "You are wrong buddy. The correct sentence is",
  }) => {
    if (!("speechSynthesis" in window)) {
      alert("Sorry, your browser does not support text-to-speech.");
      return;
    }

    window.speechSynthesis.cancel();

    const utterance = new SpeechSynthesisUtterance();
    utterance.lang = accent;
    utterance.rate = mode === "word" ? 0.6 : 0.7;

    const voices = window.speechSynthesis.getVoices();
    const selectedVoice =
      voices.find(
        (voice) =>
          voice.name.toLowerCase().includes("female") &&
          (voice.name.includes("Google") || voice.name.includes("Microsoft"))
      ) ||
      voices.find((voice) => voice.name.toLowerCase().includes("child")) ||
      voices[0];

    if (selectedVoice) {
      utterance.voice = selectedVoice;
    }

    if (mode === "word") {
      const syllableLike = text.split("").join("... ");
      utterance.text = `Awwwwww, don't worry, I will teach you. Let's break it down: ${syllableLike} ... ${text}`;
    } else {
      utterance.text = `${prefix} ${text}`;
    }

    window.speechSynthesis.speak(utterance);
  };

  // Function to handle the click event for starting/stopping speech recognition
  const handleClick = () => {
    const startListening = () => {
      setTranScript("");
      resetTranscript();
      SpeechRecognition.startListening({
        continuous: false,
        language: accent,
        interimResults: false,
      });
    };

    const stopListening = () => {
      SpeechRecognition.stopListening();
    };

    listening ? stopListening() : startListening();
  };

  // Function to check the answer and calculate accuracy
  const checkAnswer = async (spokenText) => {
    if (isCheckingRef.current) return;
    isCheckingRef.current = true;

    const normalize = (text) => {
      if (mode === "sentence") {
        return text
          .trim()
          .toLowerCase()
          .replace(/[^\w\s]/g, "");
      } else {
        return text
          .trim()
          .toLowerCase()
          .replace(/[^\w\s]/g, "")
          .replace(/\s+/g, "");
      }
    };

    const userAnswer = normalize(spokenText);
    const correctAnswer = normalize(correctAnswers);

    const overallSimilarity = await stringSimilarity(userAnswer, correctAnswer);
    setAccuracy(overallSimilarity * 100);
    setTranScript(spokenText);

    if (mode === "sentence") {
      const isCorrectNow = overallSimilarity >= 0.92;
      setIsCorrect(isCorrectNow);
      if (!isCorrectNow) speakText({ text: correctAnswers, mode: "sentence" });
    } else if (mode == "word") {
      const isCorrectNow = overallSimilarity >= 0.8;
      setIsCorrect(isCorrectNow);
      if (!isCorrectNow) speakText({ text: correctAnswers, mode: "word" });
    } else {
      alert("Please select a mode: sentence or word.");
      return;
    }

    setTimeout(() => {
      isCheckingRef.current = false;
    }, 500);
  };

  // Function to get the color based on the accuracy percentage
  const getColor = (percentage) => {
    if (percentage < 30) return "text-red-500";
    if (percentage < 70) return "text-orange-500";
    return "text-green-500";
  };





  const targetWords = correctAnswers.split(" ");
  const spokenWords = tranScript.split(" ");

  const usedIndices = new Set(); // To keep track of which spoken words are already matched

  const SIMILARITY_THRESHOLD = 0.95;
  const highlightedText = targetWords.map((word, index) => {
    let bestMatchIndex = -1;
    let bestSimilarity = 0;

    // Find the best unused match
    spokenWords.forEach((spokenWord, i) => {
      if (usedIndices.has(i)) return; // Skip already used spoken words

      const similarity = stringSimilarity(word.toLowerCase(), spokenWord.toLowerCase(), 1);
      if (similarity > bestSimilarity) {
        bestSimilarity = similarity;
        bestMatchIndex = i;
      }
    });

     // Only mark it as used if the match is good
     if (bestMatchIndex !== -1 && bestSimilarity > SIMILARITY_THRESHOLD) {
      usedIndices.add(bestMatchIndex);
    }

    return (
      <span
        key={index}
        style={{
          color: bestSimilarity > 0.95 ? "#a0f0a0" : "#000000",
          transition: "all 0.2s ease",
        }}
      >
        {word + " "}
      </span>
    );
  });

  // Handle the case when the user starts speaking and Stores Transcript into the State
  useEffect(() => {
    setTranScript(transcript);
  }, [transcript]);

  // Handle the case when the user stops speaking
  useEffect(() => {
    if (!listening && tranScript && tranScript.length > 0) {
      // Check if the mode is "sentence" or "word"
      if (mode === "word") {
        setStatusMessage("Processing...");
        setTimeout(() => {
          setStatusMessage("Analysing...");
          setTimeout(() => {
            setStatusMessage("");
            checkAnswer(tranScript);
          }, 1000);
        }, 1000);
      } else {
        checkAnswer(tranScript);
      }
    }
  }, [transcript, listening]);

  // Reset states on question change
  useEffect(() => {
    setIsCorrect(false);
    resetTranscript();
    setTranScript("");
    setAccuracy(0);
    console.log(correctAnswers);
  }, [index]);

  // SiriWave logic
  useEffect(() => {
    let audioContext;
    let analyser;
    let microphone;
    let dataArray;
    let animationId;
    let mediaStream; // Save the media stream to stop it later

    const updateAmplitude = () => {
      if (!analyser || !dataArray || !siriWaveRef.current) return;

      analyser.getByteTimeDomainData(dataArray);
      let sum = 0;
      for (let i = 0; i < dataArray.length; i++) {
        const normalized = (dataArray[i] - 128) / 128;
        sum += normalized * normalized;
      }
      const rms = Math.sqrt(sum / dataArray.length);
      const amp = Math.min(10, rms * 50);
      siriWaveRef.current.setAmplitude(amp);

      animationId = requestAnimationFrame(updateAmplitude);
    };

    if (listening && waveContainerRef.current) {
      navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
        mediaStream = stream; // Store the stream

        audioContext = new (window.AudioContext || window.webkitAudioContext)();
        analyser = audioContext.createAnalyser();
        analyser.fftSize = 2048;
        dataArray = new Uint8Array(analyser.fftSize);

        microphone = audioContext.createMediaStreamSource(stream);
        microphone.connect(analyser);

        siriWaveRef.current = new SiriWave({
          container: waveContainerRef.current,
          width: waveContainerRef.current.offsetWidth,
          height: 100,
          style: "ios9",
          amplitude: 0,
          speed: 0.2,
          autostart: true,
        });

        updateAmplitude();
      });
    }

    return () => {
      cancelAnimationFrame(animationId);
      if (audioContext) audioContext.close();
      if (siriWaveRef.current) {
        siriWaveRef.current.stop();
        siriWaveRef.current = null;
      }
      // Stop the media stream
      if (mediaStream) {
        mediaStream.getTracks().forEach((track) => track.stop());
      }
    };
  }, [listening]);

  // Transcript logic according to modes
  const shouldShowTranscript =
    mode === "sentence" ||
    (!listening &&
      statusMessage.length === 0 &&
      transcript.length > 0 &&
      mode !== "sentence");

  // Handle Browser Support for Speech Recognition
  if (!browserSupportsSpeechRecognition) {
    return <WebSpeechSupport />;
  }

  // Handle microphone access error
  if (!isMicrophoneAvailable) {
    return <MicrophoneAccessError />;
  }

  // const highlightedText = highlightSentence(correctAnswers, tranScript);

  return (
    <div
      key={key2}
      className="sm:p-2 max-w-xl flex flex-col justify-center items-stretch gap-4 mx-auto"
    >
      <article className="text-right flex gap-3 items-center w-full">
        <ProgressBar percentage={accuracy} />{" "}
        <CountUp
          end={accuracy}
          className={`font-semibold ${getColor(accuracy)}`}
          suffix="%"
          duration={0.5}
          decimals={2}
        />
      </article>

      <article className="flex flex-col gap-3 justify-center items-center">
        {/* Question */}
        <h2 className="text-xl font-bold mb-2">{question}</h2>

        {/* Image */}
        {imageURL && (
          <img src={`${imageURL}`} alt="Question Image" width={300} height={300} />
        )}

        {/* RealTime Highlight Text Feature for the sentence mode */}
        {mode === "sentence" && (
          <p className="text-lg border p-3 rounded bg-gray-100 mb-4">
            {highlightedText}
            
          </p>
        )}
      </article>

      {/* Status for Word Mode only */}
      {statusMessage && (
        <p className="text-blue-500 text-center animate-pulse font-semibold text-md mb-2">
          {statusMessage}
        </p>
      )}

      {/* Transcript Logic according to modes usage*/}
      {shouldShowTranscript && (
        <p className="text-sm sm:text-base text-center text-gray-600">
          <strong>You Said:</strong> {transcript}
        </p>
      )}

      {/* Siri Wave */}
      {listening && (
        <div
          ref={waveContainerRef}
          style={{
            width: "100%",
            height: "80px",
            maxWidth: "100%",
            marginTop: "1rem",
          }}
        />
      )}

      {!listening &&
        statusMessage.length === 0 &&
        tranScript.length > 0 &&
        (isCorrect ? (
          <p className="text-green-500 text-center">Yo Champ You Got This!</p>
        ) : (
          <article className="text-center mt-4">
            <p className="text-red-500">Try Again!</p>
            {mode !== "sentence" && (
              <p className="text-gray-500">Correct Answer: {correctAnswers}</p>
            )}
          </article>
        ))}

      <button
        className="speech-button px-4 py-2 rounded-full cursor-pointer 
              bg-gradient-to-r from-[#ff1b6b] to-[#45caff] 
              hover:from-[#45caff] hover:to-[#ff1b6b]
              transition duration-500 ease-in-out hover:scale-105 text-white font-medium"
        onClick={handleClick}
      >
        {listening ? "🛑 Stop" : "🎤 Start Recording"}
      </button>

      <div className="button-section mt-5">
        <button
          disabled={!isCorrect}
          className={`rounded-2xl transition ease-linear px-4 py-2 duration-200 bg-purple-1 ${
            isCorrect
              ? "opacity-100 cursor-pointer hover:bg-purple-3"
              : "cursor-not-allowed opacity-60"
          } text-white font-semibold`}
          onClick={() => goToNextQuestion()}
        >
          Next
        </button>
      </div>
    </div>
  );
};

export default VoiceRecoGame;

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions