Speech to Text Conversion Using JavaScript

Speech to Text Conversion Using JavaScript

In this tutorial we will show you Speech to Text Conversion Using JavaScript. We will use the JavaScript SpeechRecognition object to convert the speech into text and then display the text on the screen.

JavaScript Speech to Text

Here we create a simple HTML webpage with speech button to initiate the speech recognition.

Below code will listen the user and then convert it into text:

<!DOCTYPE html>
<html>
    <head>
        <title></title>
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
    </head>
    <body>
        <div class="container"> 
            <form action="https://www.google.com/search" method="get" target="_blank" id="search-form">
                <input type="text" name="q" placeholder="Search Google.." autocomplete="off" autofocus />
            </form> 
            <p class="info"></p>
        </div>
    </body>
</html>
<style type="text/css">
    @import url('https://fonts.googleapis.com/css?family=Montserrat');

    * {
      margin: 0;
      padding: 0;
      box-sizing: border-box;
    }
    body {
      font-family: 'Montserrat', sans-serif;
      background-color: #025677;
      color: #fff;
    }
    .container {
      padding: 30px 50px;
    }
    #search-form {
      width: 30%;
      margin: 0 auto;
      position: relative;
    }
    #search-form input {
      width: 100%;
      font-size: 1.5rem;
      padding: 10px 15px;
      border: 2px solid #ccc;
      border-radius: 2px;
    }
    #search-form button {
      position: absolute;
      top: 0;
      right: 0;
      bottom: 0;
      background-color: transparent;
      outline: none;
      border: none;
      width: 3rem;
      text-align: center;
      font-size: 1.75rem;
      cursor: pointer;
      color: #333;
    }
    .info {
      margin-top: 0.5rem;
      text-align: center;
      font-size: 0.75rem;
    }

    @media (max-width: 1200px) {
      #search-form { width: 50%; }
    }
    @media (max-width: 768px) {
      .container { padding: 30px 35px; }
      #search-form { width: 100%; }
      .info { font-size: 0.5rem; }
    }
</style>

<script type="text/javascript">
    const searchForm = document.querySelector("#search-form");
    const searchFormInput = searchForm.querySelector("input"); // <=> document.querySelector("#search-form input");
    const info = document.querySelector(".info");

    // The speech recognition interface lives on the browser’s window object
    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; // if none exists -> undefined

    if(SpeechRecognition) {
      console.log("Your Browser supports speech Recognition");
      
      const recognition = new SpeechRecognition();
      recognition.continuous = true;
      // recognition.lang = "en-US";

      searchForm.insertAdjacentHTML("beforeend", '<button type="button"><i class="fa fa-microphone"></i></button>');
      searchFormInput.style.paddingRight = "50px";

      const micBtn = searchForm.querySelector("button");
      const micIcon = micBtn.firstElementChild;

      micBtn.addEventListener("click", micBtnClick);
      function micBtnClick() {
        if(micIcon.classList.contains("fa-microphone")) { // Start Voice Recognition
          recognition.start(); // First time you have to allow access to mic!
        }
        else {
          recognition.stop();
        }
      }

      recognition.addEventListener("start", startSpeechRecognition); // <=> recognition.onstart = function() {...}
      function startSpeechRecognition() {
        micIcon.classList.remove("fa-microphone");
        micIcon.classList.add("fa-microphone-slash");
        searchFormInput.focus();
        console.log("Voice activated, SPEAK");
      }

      recognition.addEventListener("end", endSpeechRecognition); // <=> recognition.onend = function() {...}
      function endSpeechRecognition() {
        micIcon.classList.remove("fa-microphone-slash");
        micIcon.classList.add("fa-microphone");
        searchFormInput.focus();
        console.log("Speech recognition service disconnected");
      }

      recognition.addEventListener("result", resultOfSpeechRecognition); // <=> recognition.onresult = function(event) {...} - Fires when you stop talking
      function resultOfSpeechRecognition(event) {
        const current = event.resultIndex;
        const transcript = event.results[current][0].transcript;
        
        if(transcript.toLowerCase().trim()==="stop recording") {
          recognition.stop();
        }
        else if(!searchFormInput.value) {
          searchFormInput.value = transcript;
        }
        else {
          if(transcript.toLowerCase().trim()==="go") {
            searchForm.submit();
          }
          else if(transcript.toLowerCase().trim()==="reset input") {
            searchFormInput.value = "";
          }
          else {
            searchFormInput.value = transcript;
          }
        }
      }
      
      info.textContent = 'Voice Commands: "stop recording", "reset input", "go"';
      
    }
    else {
      console.log("Your Browser does not support speech Recognition");
      info.textContent = "Your Browser does not support Speech Recognition";
    }
</script>
About Author

Hey, this is Nilesh Sharma and I belong to the Pink City (Jaipur). My job as a software developer has made me a patient man and negotiation my skills! My job also keeps me on my toes, as i always trying to learn new skills to keep pace with the changing technology landscape.

Sign up for weekly update

Milkshake is almost ready. If you're interested in testing it out, then sign up below to get exclusive access.