// Evolutndetails.js
import React, { useState } from 'react';
import { Col, Row } from 'reactstrap';
import ExternalLayout from '../../customComponents/ExternalLayout/ExternalLayout';
import RenderPage from '../../customComponents/RenderPage/RenderPage';
import Strip from '../../customComponents/Strip/Strip';
import {
    useWindowSize
  } from '@react-hook/window-size';
import application_contants from '../../contants/application_contants';
import ImageViewer from '../../customComponents/ImageViewer/ImageViewer'
import GalleryData from '../../Utils/json/GalleryData';
import SEOComponent from '../../customComponents/SEO/SEOComponent';


import Sentaudio from './Sentaudio';

import audio10m from "../../assets/media1/s0_male.wav";
import audio20m from "../../assets/media2/s0_male.wav";
import audio20f from "../../assets/media2/s0_female.wav";
import audio21m from "../../assets/media2/s1_male.wav";
import audio21f from "../../assets/media2/s1_female.wav";
import audio22m from "../../assets/media2/s2_male.wav";
import audio22f from "../../assets/media2/s2_female.wav";
import audio23m from "../../assets/media2/s3_male.wav";
import audio23f from "../../assets/media2/s3_female.wav";
import audio24m from "../../assets/media2/s4_male.wav";
import audio24f from "../../assets/media2/s4_female.wav";
import audio30m from "../../assets/media3/s0_male.wav";
import audio31m from "../../assets/media3/s1_male.wav";
import audio32m from "../../assets/media3/s2_male.wav";
import audio33m from "../../assets/media3/s3_male.wav";
import audio34m from "../../assets/media3/s4_male.wav"; 
import audio40m from "../../assets/media4/s0_male.wav";
import audio40f from "../../assets/media4/s0_female.wav";
import audio41m from "../../assets/media4/s1_male.wav";
import audio41f from "../../assets/media4/s1_female.wav";
import audio42m from "../../assets/media4/s2_male.wav";
import audio42f from "../../assets/media4/s2_female.wav";
import audio43m from "../../assets/media4/s3_male.wav";
import audio43f from "../../assets/media4/s3_female.wav";
import audio44m from "../../assets/media4/s4_male.wav";
import audio44f from "../../assets/media4/s4_female.wav";
import audio50m from "../../assets/media5/s0_male.wav";
import audio50f from "../../assets/media5/s0_female.wav";
import audio51m from "../../assets/media5/s1_male.wav";
import audio51f from "../../assets/media5/s1_female.wav";
import audio52m from "../../assets/media5/s2_male.wav";
import audio52f from "../../assets/media5/s2_female.wav";
import audio53m from "../../assets/media5/s3_male.wav";
import audio53f from "../../assets/media5/s3_female.wav";
import audio54m from "../../assets/media5/s4_male.wav";
import audio54f from "../../assets/media5/s4_female.wav";
import audio60f from "../../assets/media6/s0_female.wav";
import audio70f from "../../assets/media7/s0_female.wav";
import audio71f from "../../assets/media7/s1_female.wav";
import audio72f from "../../assets/media7/s2_female.wav";
import audio73f from "../../assets/media7/s3_female.wav";
import audio74f from "../../assets/media7/s4_female.wav";
import audio80f from "../../assets/media8/s0_female.wav";
import audio81f from "../../assets/media8/s1_female.wav";
import audio82f from "../../assets/media8/s2_female.wav";
import audio83f from "../../assets/media8/s3_female.wav";
import audio84f from "../../assets/media8/s4_female.wav";



const sentences = {
  1: 'Dearest creature in Creation, Studying English pronunciation, I will teach you in my verse Sounds like corpse, corps, horse and worse. It will keep you, Susy, busy, Make your head with heat grow dizzy.',
  2: 'Dearest creature in Creation, Studying English pronunciation, I will teach you in my verse Sounds like corpse, corps, horse and worse. It will keep you, Susy, busy, Make your head with heat grow dizzy.',
  3: 'Regarded as a continuation of the Klein Erlangen Programme, in the sense that a geometrical space with its group of transformations is generalized to a category with its algebra of mappings.',
  4: 'Regarded as a continuation of the Klein Erlangen Programme, in the sense that a geometrical space with its group of transformations is generalized to a category with its algebra of mappings.',
  5: 'Bust of Herodotus in Stoa of Attalus, one of the earliest nameable historians whose work survives.',
  6: 'Bust of Herodotus in Stoa of Attalus, one of the earliest nameable historians whose work survives.',
  7: 'James Watson and Francis Crick determined the structure of DNA in 1953, using the X-ray crystallography work of Rosalind Franklin and Maurice Wilkins that indicated DNA has a helical structure',
  8: 'James Watson and Francis Crick determined the structure of DNA in 1953, using the X-ray crystallography work of Rosalind Franklin and Maurice Wilkins that indicated DNA has a helical structure',
  9: 'These early attempts to understand microscopic phenomena, now known as the "old quantum theory", led to the full development of quantum mechanics in the mid-1920s by Niels Bohr, Erwin Schrödinger, Werner Heisenberg, Max Born, and others.',
  10: 'These early attempts to understand microscopic phenomena, now known as the "old quantum theory", led to the full development of quantum mechanics in the mid-1920s by Niels Bohr, Erwin Schrödinger, Werner Heisenberg, Max Born, and others.',
};

const Evolutndetails = () =>{
  const [width] = useWindowSize();
    React.useEffect(() => {
        window.scrollTo(0, 0);
    }, []);
  const [selectedValue, setSelectedValue] = useState('');

  const handleDropdownChange = (event) => {
    setSelectedValue(event.target.value);
  };

  return (
    <ExternalLayout>
            <RenderPage
                id="tst-Partners"
                className="strip render-page px-0"
                containerType="container-fluid"
            >
             <SEOComponent
                    title="Evolution of Tools"
                    keywords="SYnthesizing SPeech in INdian languages"
                    description="Speech recognition in agriculture and finance for the poor is an initiative predominantly to create resources and make them available as a digital public good"
                    siteUrl="/evolution-of-tools" />
                <Strip
                    id="tst-strip-funding"
                    className="strip strip-no-padding px-0"
                    containerType="container"
                >
               
                    <Row className="mx-0  pt-5">  
                         {width <= application_contants.MOBILE_BREAKPOINT ? <h5 className="h5-medium section-title title-navy-blue jostRegular pb-4 align-left"> Evolution of Tools</h5> :
                                <h5 className="h5-medium section-title jostRegular title-navy-blue align-left pt-5 pb-5 line-height-48px pl-3"> Evolution of Tools</h5>}
                          {/* <div >  */}
                          {/* <Col xs={12} sm={12} md={12} lg={12} xl={12} className="pb-5">
                        <h2 className="h5-small section-title align-left pt-2 title-navy-blue-medium">Coming soon ...</h2>
                        </Col> */}
                                <Col xs={12} sm={12} md={12} lg={12} xl={12} className="pb-4 pt-2">
                                <h1 className="evltl-h1">Evolution of Text-to-Speech</h1>
                                <p>Text-to-Speech (TTS) technology has undergone a remarkable evolution over the decades, transforming the way machines generate human-like speech from text input. Beginning with rudimentary approaches in the 1970s, TTS has progressed through various methodologies, each marked by significant advancements in computational linguistics, signal processing, and machine learning. From the early articulatory synthesis methods to the cutting-edge end-to-end models of today, the evolution of TTS reflects the relentless pursuit of creating natural-sounding, intelligible speech synthesis systems.
</p>
                               
                                <div className="dropdown-container" >
                                  <select id="sentenceDropdown" onChange={handleDropdownChange} value={selectedValue}>
                                    <option value="">Select Sentence</option>
                                    <option value="1">Sentence 1</option>
                                    <option value="2">Sentence 2</option>
                                    <option value="3">Sentence 3</option>
                                    <option value="4">Sentence 4</option>
                                    <option value="5">Sentence 5</option>
                                    <option value="6">Sentence 6</option>
                                    <option value="7">Sentence 7</option>
                                    <option value="8">Sentence 8</option>
                                    <option value="9">Sentence 9</option>
                                    <option value="10">Sentence 10</option>
                                  </select>
                                </div>


<div className="timeline">

	{/* 1970 Year */}
	<div className="timeline__event  animated fadeInUp delay-3s timeline__event--type1">
		<div className="timeline__event__icon "> </div>
		<div className="timeline__event__date">
			1970s
		</div>
		<div className="timeline__event__content ">
			<div className="timeline__event__title">
				GNU speech: Articulatory
			</div>
			<div className="timeline__event__description">
          <p>In the 1970s, the GNU Speech project pioneered the field of TTS with articulatory synthesis. Articulatory synthesis simulates the human vocal tract's physical movements to produce speech sounds. This involved modeling the physiological mechanisms of speech production, such as the movement of the tongue, lips, and vocal cords. However, early articulatory synthesis systems often produced speech that lacked naturalness and intelligibility due to limitations in modeling complexity.</p>
          <p>Architecture: Articulatory synthesis systems typically comprised models of the vocal tract, including the positions and movements of speech articulators, coupled with acoustic models to simulate the resulting speech sounds. These systems required detailed knowledge of speech production anatomy and physiology.</p>

          {selectedValue === '1' && (
          <div>
            <Sentaudio sentence={sentences[1]} gender="Male" audioFile={audio10m} />
          </div>
          )}
        </div>
    </div>
  </div>

  {/* 1980 Year */}
	<div className="timeline__event animated fadeInUp delay-2s timeline__event--type2">
		<div className="timeline__event__icon "> </div>
		<div className="timeline__event__date">
			1980s
		</div>
		<div className="timeline__event__content ">
			<div className="timeline__event__title">
        eSpeak: Formant synthesis
			</div>
			<div className="timeline__event__description">
          <p>By the 1980s, Espeak emerged as a notable advancement in TTS technology, employing formant synthesis techniques. Formant synthesis generates speech by modeling the resonances of the human vocal tract, known as formants, to produce speech sounds. This method allowed for the synthesis of intelligible speech with relatively low computational complexity compared to articulatory synthesis.</p>
          <p>Architecture: Formant synthesis systems typically consisted of mathematical models representing the resonances of the vocal tract, along with algorithms to generate speech waveforms based on specified parameters such as pitch, intensity, and duration. </p>

          {selectedValue === '1' && (
          <div>
            <Sentaudio sentence={sentences[1]} gender="Male" audioFile={audio20m} />
          </div>
          )}
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio20f} />
          </div>
          )}
          {selectedValue === '3' && (
          <div>
            <Sentaudio sentence={sentences[3]} gender="Male" audioFile={audio21m} />
          </div>
          )}
          {selectedValue === '4' && (
          <div>
            <Sentaudio sentence={sentences[4]} gender="Female" audioFile={audio21f} />
          </div>
          )}
          {selectedValue === '5' && (
          <div>
            <Sentaudio sentence={sentences[5]} gender="Male" audioFile={audio22m} />
          </div>
          )}
          {selectedValue === '6' && (
          <div>
            <Sentaudio sentence={sentences[6]} gender="Female" audioFile={audio22f} />
          </div>
          )}
          {selectedValue === '7' && (
          <div>
            <Sentaudio sentence={sentences[7]} gender="Male" audioFile={audio23m} />
          </div>
          )}
          {selectedValue === '8' && (
          <div>
            <Sentaudio sentence={sentences[8]} gender="Female" audioFile={audio23f} />
          </div>
          )}
          {selectedValue === '9' && (
          <div>
            <Sentaudio sentence={sentences[9]} gender="Male" audioFile={audio24m} />
          </div>
          )}
          {selectedValue === '10' && (
          <div>
            <Sentaudio sentence={sentences[10]} gender="Female" audioFile={audio24f} />
          </div>
          )}
        </div>
    </div>
  </div>

  {/* 1990 Year */}
	<div className="timeline__event animated fadeInUp delay-1s timeline__event--type3">
		<div className="timeline__event__icon "> </div>
		<div className="timeline__event__date">
			1990s
		</div>
		<div className="timeline__event__content ">
			<div className="timeline__event__title">
        Concatenative synthesis: Diphone based concatenation
			</div>
			<div className="timeline__event__description">
          <p>In the 1990s, concatenative synthesis using diphone-based concatenation gained prominence. This method involved assembling speech units known as diphones, which represent transitions between consecutive phonemes, to generate natural-sounding speech. Diphone-based concatenation significantly improved the quality and naturalness of synthesized speech compared to earlier techniques by preserving the smooth transitions between phonemes.</p>
          <p>Architecture: Diphone-based concatenative synthesis systems typically included databases of recorded speech segments (diphones), along with algorithms for selecting and concatenating appropriate diphones based on the input text and desired prosody.</p>

          {selectedValue === '1' && (
          <div>
            <Sentaudio sentence={sentences[1]} gender="Male" audioFile={audio30m} />
          </div>
          )}
          {selectedValue === '3' && (
          <div>
            <Sentaudio sentence={sentences[3]} gender="Male" audioFile={audio31m} />
          </div>
          )}
          {selectedValue === '5' && (
          <div>
            <Sentaudio sentence={sentences[5]} gender="Male" audioFile={audio32m} />
          </div>
          )}
          {selectedValue === '7' && (
          <div>
            <Sentaudio sentence={sentences[7]} gender="Male" audioFile={audio33m} />
          </div>
          )}
          {selectedValue === '9' && (
          <div>
            <Sentaudio sentence={sentences[9]} gender="Male" audioFile={audio34m} />
          </div>
          )}
        </div>
    </div>
  </div>

  {/* 1990 Year */}
	<div className="timeline__event animated fadeInUp timeline__event--type1">
		<div className="timeline__event__icon"> </div>
		<div className="timeline__event__date">
			1990s
		</div>
		<div className="timeline__event__content">
			<div className="timeline__event__title">
				Concatenative synthesis: Unit-selection based concatenation
			</div>
			<div className="timeline__event__description">
          <p>Concurrently in the 1990s, unit selection-based concatenation emerged as another major advancement in TTS technology. Unlike diphone-based concatenation, this method dynamically selected speech units from a large database based on criteria such as phonetic context and prosody, resulting in more natural and expressive speech synthesis.</p>
          <p>Architecture: Unit selection-based concatenative synthesis systems employed sophisticated algorithms for selecting speech units (such as diphones or larger units) from a database, considering factors such as phonetic context, prosody, and smoothness of transitions. These systems often included techniques for minimizing concatenation artifacts and optimizing the selection process for naturalness.</p>

          {selectedValue === '1' && (
          <div>
            <Sentaudio sentence={sentences[1]} gender="Male" audioFile={audio40m} />
          </div>
          )}
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio40f} />
          </div>
          )}
          {selectedValue === '3' && (
          <div>
            <Sentaudio sentence={sentences[3]} gender="Male" audioFile={audio41m} />
          </div>
          )}
          {selectedValue === '4' && (
          <div>
            <Sentaudio sentence={sentences[4]} gender="Female" audioFile={audio41f} />
          </div>
          )}
          {selectedValue === '5' && (
          <div>
            <Sentaudio sentence={sentences[5]} gender="Male" audioFile={audio42m} />
          </div>
          )}
          {selectedValue === '6' && (
          <div>
            <Sentaudio sentence={sentences[6]} gender="Female" audioFile={audio42f} />
          </div>
          )}
          {selectedValue === '7' && (
          <div>
            <Sentaudio sentence={sentences[7]} gender="Male" audioFile={audio43m} />
          </div>
          )}
          {selectedValue === '8' && (
          <div>
            <Sentaudio sentence={sentences[8]} gender="Female" audioFile={audio43f} />
          </div>
          )}
          {selectedValue === '9' && (
          <div>
            <Sentaudio sentence={sentences[9]} gender="Male" audioFile={audio44m} />
          </div>
          )}
          {selectedValue === '10' && (
          <div>
            <Sentaudio sentence={sentences[10]} gender="Female" audioFile={audio44f} />
          </div>
          )}
        </div>
    </div>
  </div>
  
  {/* 2005 Year */}
	<div className="timeline__event  animated fadeInUp delay-3s timeline__event--type1">
		<div className="timeline__event__icon "> </div>
		<div className="timeline__event__date">
			2005
		</div>
		<div className="timeline__event__content ">
			<div className="timeline__event__title">
      Machine learning model: HMM based synthesis
			</div>
			<div className="timeline__event__description">
          <p> By 2005, TTS technology began incorporating machine learning approaches, with Hidden Markov Model (HMM)-based synthesis gaining prominence. HMMs provided a probabilistic framework for modeling speech generation, allowing for more accurate and flexible synthesis of speech from text input.</p>
          <p>Architecture: HMM-based TTS systems typically consisted of models representing linguistic features, acoustic features, and state transitions. These models were trained on large datasets of text and corresponding speech recordings, enabling them to learn the mapping between text input and acoustic features, such as spectrograms or Mel-frequency cepstral coefficients (MFCCs), for speech synthesis.</p>

          {selectedValue === '1' && (
          <div>
            <Sentaudio sentence={sentences[1]} gender="Male" audioFile={audio50m} />
          </div>
          )}
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio50f} />
          </div>
          )}
          {selectedValue === '3' && (
          <div>
            <Sentaudio sentence={sentences[3]} gender="Male" audioFile={audio51m} />
          </div>
          )}
          {selectedValue === '4' && (
          <div>
            <Sentaudio sentence={sentences[4]} gender="Female" audioFile={audio51f} />
          </div>
          )}
          {selectedValue === '5' && (
          <div>
            <Sentaudio sentence={sentences[5]} gender="Male" audioFile={audio52m} />
          </div>
          )}
          {selectedValue === '6' && (
          <div>
            <Sentaudio sentence={sentences[6]} gender="Female" audioFile={audio52f} />
          </div>
          )}
          {selectedValue === '7' && (
          <div>
            <Sentaudio sentence={sentences[7]} gender="Male" audioFile={audio53m} />
          </div>
          )}
          {selectedValue === '8' && (
          <div>
            <Sentaudio sentence={sentences[8]} gender="Female" audioFile={audio53f} />
          </div>
          )}
          {selectedValue === '9' && (
          <div>
            <Sentaudio sentence={sentences[9]} gender="Male" audioFile={audio54m} />
          </div>
          )}
          {selectedValue === '10' && (
          <div>
            <Sentaudio sentence={sentences[10]} gender="Female" audioFile={audio54f} />
          </div>
          )}
        </div>
    </div>
  </div>

  {/* 2015 Year */}
	<div className="timeline__event animated fadeInUp delay-2s timeline__event--type2">
		<div className="timeline__event__icon"> </div>
		<div className="timeline__event__date">
			2015
		</div>
		<div className="timeline__event__content">
			<div className="timeline__event__title">
          Deep learning models
			</div>
			<div className="timeline__event__description">
          <p>In the mid-2010s, deep learning revolutionized TTS technology, enabling the development of highly sophisticated speech synthesis models. Deep learning architectures, such as recurrent neural networks (RNNs), long short-term memory (LSTM) networks, and convolutional neural networks (CNNs), significantly improved the naturalness, fluency, and expressiveness of synthesized speech.</p>
          <p>Architecture: Deep learning-based TTS systems utilized neural network architectures capable of learning complex mappings between text input and speech output directly from data. For example, sequence-to-sequence models, often based on RNNs or transformers, were employed to map input text sequences to corresponding spectrograms or waveforms for speech synthesis.</p>
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio60f} />
          </div>
          )}
        </div>
    </div>
  </div>

  {/* 2018 Year */}
	<div className="timeline__event animated fadeInUp delay-1s timeline__event--type3">
		<div className="timeline__event__icon"> </div>
		<div className="timeline__event__date">
			2018
		</div>
		<div className="timeline__event__content">
			<div className="timeline__event__title">
          Semi end-to-end speech synthesis models
			</div>
			<div className="timeline__event__description">
          <p> Building upon the advancements of deep learning, semi end-to-end speech synthesis models emerged in 2018. These models combined traditional signal processing techniques with deep learning algorithms to achieve more efficient and accurate speech synthesis, bridging the gap between conventional methods and end-to-end approaches.</p>
          <p>Architecture: Semi end-to-end TTS models typically integrated deep learning components, such as neural networks for feature extraction or modeling, with traditional signal processing techniques like vocoding or waveform generation. These hybrid architectures aimed to leverage the strengths of both approaches for improved speech synthesis performance.</p>
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio70f} />
          </div>
          )}
          {selectedValue === '4' && (
          <div>
            <Sentaudio sentence={sentences[4]} gender="Female" audioFile={audio71f} />
          </div>
          )}
          {selectedValue === '6' && (
          <div>
            <Sentaudio sentence={sentences[6]} gender="Female" audioFile={audio72f} />
          </div>
          )}
          {selectedValue === '8' && (
          <div>
            <Sentaudio sentence={sentences[8]} gender="Female" audioFile={audio73f} />
          </div>
          )}
          {selectedValue === '10' && (
          <div>
            <Sentaudio sentence={sentences[10]} gender="Female" audioFile={audio74f} />
          </div>
          )}
        </div>
    </div>
  </div>

   {/* 2021 Year */}
   <div className="timeline__event animated fadeInUp timeline__event--type1">
		<div className="timeline__event__icon"> </div>
		<div className="timeline__event__date">
			2021
		</div>
		<div className="timeline__event__content">
			<div className="timeline__event__title">
          End-to-end models
			</div>
			<div className="timeline__event__description">
          <p>The latest frontier in TTS technology, end-to-end models, reached prominence in 2021. These models directly map text input to spectrograms or waveforms, eliminating the need for intermediate linguistic features or handcrafted rules. End-to-end models have demonstrated superior performance in terms of naturalness, intelligibility, and scalability, representing the culmination of decades of research and development in TTS technology.</p>
          <p>Architecture: End-to-end TTS models typically consist of deep neural networks, such as transformers or convolutional architectures, that directly learn the mapping from input text to output speech representations. These models are trained end-to-end on large-scale datasets of text and corresponding speech recordings, enabling them to capture complex linguistic patterns and generate high-quality synthesized speech with minimal manual intervention.</p>
          {selectedValue === '2' && (
          <div>
            <Sentaudio sentence={sentences[2]} gender="Female" audioFile={audio80f} />
          </div>
          )}
          {selectedValue === '4' && (
          <div>
            <Sentaudio sentence={sentences[4]} gender="Female" audioFile={audio81f} />
          </div>
          )}
          {selectedValue === '6' && (
          <div>
            <Sentaudio sentence={sentences[6]} gender="Female" audioFile={audio82f} />
          </div>
          )}
          {selectedValue === '8' && (
          <div>
            <Sentaudio sentence={sentences[8]} gender="Female" audioFile={audio83f} />
          </div>
          )}
          {selectedValue === '10' && (
          <div>
            <Sentaudio sentence={sentences[10]} gender="Female" audioFile={audio84f} />
          </div>
          )}
        </div>
    </div>
  </div>

     


        
    </div>

    </Col>
    </Row>
    </Strip>
            </RenderPage>
         </ExternalLayout>
    

  );
}

export default Evolutndetails;
