import trimSilence from "./trimSilence";

const MAX_DECIBELS = -10;
const MIN_DECIBELS = -45;
const PAUSE_TIMEOUT = 2000;
// mp4 appears to be the most widely supported audio format
// and seems to work on Chrome and Safari
const AUDIO_FILE_TYPE = "audio/mp4";

// time in seconds for the minimum output length
// to be considered a valid input in seconds
// A curt "yes" or "no" is about 0.15 seconds long.
const MINIMUM_OUTPUT_LENGTH = 0.1;

let count = 0;

/**
 * Helper class to interact with browser recording APIs and parse voice
 * input into chunks and trim silence from them to make it easier for
 * speech to text services to process.
 *
 * This class uses the WebAudio API configured in the following way,
 * using the style of the WebAudio API routing diagrams
 * https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API
 *
 *  AudioContext
 * ┌───────────────────────────────────────────┐
 * │  ┌───────────┐  ┌───────────┐  ┌────────┐ │
 * │  │Mic Source ├─►│Voice Band ├─►│Analyser│ │
 * │  │Stream Node│  │Filter Node│  │Node    │ │
 * │  └───────────┘  └───────────┘  └───┬────┘ │
 * │       ┌────────────────────────────┘      │
 * │  ┌────▼──────┐                            │
 * │  │Destination│                            │
 * │  │Stream Node│                            │
 * │  └────┬──────┘                            │
 * └───────┼───────────────────────────────────┘
 *   ┌─────▼─────────┐   ┌─────────┐
 *   │               │   │Untrimmed┼─────┐
 *   │ MediaRecorder ┼──►│WebM/Opus├─┐   │
 *   │               │   │Recording│ │   │
 *   └───────────────┘   └─────────┘ │   │
 *         ┌─────────────────────────┘   │
 *   ┌─────▼─────────┐   ┌──────────┐    │
 *   │ Silence       │   │Trimmed   │    │
 *   │ Trimmer       ┼──►│WAV/PCM32 │    │
 *   │               │   │Recording │    │
 *   └───────────────┘   └───────┬──┘    │
 *                               │       │
 *                               ▼       │
 *                        onInput()◄─────┘
 *                        callback
 *
 * In words, this class sets up a MediaStream from the user's microphone
 * and routes it through a bandpass filter to isolate the human voice vocal
 * range so we can reject background noise.
 *
 * The filtered audio is then passed to an AnalyserNode to detect when
 * the user is speaking. When the user stops speaking, we use whatever the
 * MediaRecorder has captured thus far to create a new File object with
 * the audio data.
 *
 * That File object is then passed to the trimSilence function to remove
 * silence from the beginning and end of the audio, and both the
 * trimmed audio is passed to the onInput callback along with an "extras"
 * object. The extras object includes the original, untrimmed audio file,
 * the time to trim the file, and the start and end times of the trimmed
 * audio in the original audio buffer. These extras are useful for
 * debugging and displaying the storybook story. If the trimmed audio
 * is too short to be considered a valid input, it is not passed to the
 * onInput callback to avoid triggering on phantom noises/spikes.
 *
 * Note that the MediaRecorder is always active, but only writes to a file
 * when the AnalyserNode detects silence. This is so we don't have to detect
 * the start of the user speaking and potentially miss the first word or so.
 * There is a callback, onStart, that is called when the user starts speaking
 * so that the UI can update to show that the user is speaking.
 */
export default class VoiceInputRecorder {
  /**
   * Private constructor to create a new VoiceInputRecorder instance.
   *
   * @param onInput Callback function to call when a new voice chunk
   *                comes in.
   * @param onStart Callback function to call when the user starts speaking.
   * @param pauseTimeout Time in milliseconds to wait before considering
   *                     the user to be done speaking for this chunk.
   */
  constructor({ onInput, onStart, onSpeech, pauseTimeout = PAUSE_TIMEOUT }) {
    this.recordingActive = false;
    this.onInput = onInput;
    this.onStart = onStart;
    this.onSpeech = onSpeech;
    this.lastTimeSoundHeard = 0;
    this.pauseTimeout = pauseTimeout;
    this.id = count;
    count += 1;
  }

  /**
   * Async factory method to create a new VoiceInputRecorder instance.
   * Await this method instead of calling the constructor directly.
   * When the promise resolves, the recorder is listening for voice input.
   *
   * @returns {Promise<VoiceInputRecorder>}
   */
  static createRecorder = ({ onInput, onStart, onSpeech }) =>
    new VoiceInputRecorder({ onInput, onStart, onSpeech }).initialize();

  static supportsAudioRecording = () => !!navigator?.mediaDevices?.getUserMedia;

  /**
   * Manually stop recording audio entirely.
   *
   * @returns {Promise<void>} A promise that resolves when the recording
   *                          has been stopped.
   */
  stop = () => {
    if (this.animationFrame) {
      window.cancelAnimationFrame(this.animationFrame);
    }

    const stoppingPromise = new Promise(resolve => {
      this.stoppingResolve = resolve;
    });
    this.microphoneMediaStream.getTracks().forEach(track => track.stop());
    this.mediaRecorder.stop();

    return stoppingPromise;
  };

  //
  // private methods
  //

  initialize = async () => {
    // start listening. this also triggers the user to accept the browser
    // permissions if necessary.
    this.microphoneMediaStream = await navigator.mediaDevices.getUserMedia({
      audio: true,
      video: false,
    });

    this.startProcessingStream();
    this.createMediaRecorder();

    return this;
  };

  /**
   * Create a new MediaRecorder instance with the appropriate
   * event handlers and start recording audio.
   */
  createMediaRecorder = () => {
    let unTrimmedFile;

    this.mediaRecorder = new MediaRecorder(this.analyzedStream, {
      mimeType: AUDIO_FILE_TYPE,
    });

    // the data only ever arrives in one chunk, so set up a handler
    // that captures it and wraps it in a File object
    this.mediaRecorder.addEventListener("dataavailable", e => {
      const blob = new Blob([e.data], { type: AUDIO_FILE_TYPE });
      unTrimmedFile = new File([blob], `file${Date.now()}.webm`);
    });

    // stop event can happen either because our analyser detected silence
    // or because we've been manually stopped via the stop() method.
    this.mediaRecorder.addEventListener("stop", async () => {
      // if we've been manually stopped, resolve and leave
      if (this.stoppingResolve) {
        this.stoppingResolve(undefined);
        return;
      }

      // the analyser stopped us for silence, so we can emit the files
      // after trimming.
      const audioBuffer = await this.audioContext.decodeAudioData(
        await unTrimmedFile.arrayBuffer()
      );

      // start the trimming process one second before when we think
      // the user started speaking to avoid including stray noises before then
      const activeOffset =
        (this.recordingActiveTime - this.mediaRecoderStartTime) / 1000;
      const beforeTrimming = performance.now();
      const { wavBytes, startTime, endTime } = trimSilence(audioBuffer, {
        startTimeHint: Math.max(0, activeOffset - 1.0),
      });
      const trimTime = performance.now() - beforeTrimming;

      const trimmedFile = new File([wavBytes], `trimmed${Date.now()}.wav`, {
        type: "audio/wav",
      });

      // only emit the trimmed file if it's long enough to be considered
      // a valid input
      if (endTime - startTime >= MINIMUM_OUTPUT_LENGTH) {
        this.onInput(trimmedFile, {
          unTrimmedFile,
          trimTime,
          startTime,
          endTime,
          activeOffset,
        });
      }

      // restart the recorder for the next chunk
      this.mediaRecoderStartTime = performance.now();
      this.mediaRecorder.start();
    });

    // start recording upon creation
    this.mediaRecoderStartTime = performance.now();
    this.mediaRecorder.start();
  };

  /**
   * Set up the WebAudio API to process the microphone stream. This
   * includes setting up a bandpass filter to isolate the human voice
   * vocal range and an analyser node to detect when the user is speaking.
   */
  startProcessingStream = () => {
    this.audioContext = new AudioContext();
    const audioStreamSource = this.audioContext.createMediaStreamSource(
      this.microphoneMediaStream
    );

    const voicePassFilter = this.audioContext.createBiquadFilter();
    voicePassFilter.type = "bandpass";
    // Calculated here for the 300-3400 Hz range, which wikipedia says is the
    // "narrow" band for human speech.
    // https://www.learningaboutelectronics.com/Articles/Quality-factor-calculator.php#answer
    voicePassFilter.frequency.value = 1550;
    voicePassFilter.Q.value = 0.325790481883;
    audioStreamSource.connect(voicePassFilter);

    // the analyser will allow us to inspect the audio stream
    // to detect when the user is speaking
    this.analyser = this.audioContext.createAnalyser();
    this.analyser.maxDecibels = MAX_DECIBELS;
    this.analyser.minDecibels = MIN_DECIBELS;
    audioStreamSource.connect(this.analyser);

    const bufferLength = this.analyser.frequencyBinCount;
    this.domainData = new Uint8Array(bufferLength);

    this.animationFrame = window.requestAnimationFrame(() =>
      this.parseInputs()
    );

    const destination = this.audioContext.createMediaStreamDestination();
    audioStreamSource.connect(destination);
    this.analyzedStream = destination.stream;
  };

  /**
   * Parse the audio stream to detect when the user is speaking and
   * notify the onStart callback when the user starts speaking.
   *
   * This method is called recursively in an animation frame to
   * process the audio stream continuously.
   */
  parseInputs = () => {
    this.analyser.getByteFrequencyData(this.domainData);

    if (this.isSoundDetected()) {
      this.lastTimeSoundHeard = Date.now();

      // notify on the start of the user speaking
      if (!this.recordingActive) {
        // record when we think the user started speaking
        // so we can use it as a trimming hint later
        this.recordingActiveTime = performance.now();

        this.onStart();
        this.recordingActive = true;
      }

      // send frequency data to the callback
      this.onSpeech(this.domainData);
    } else if (this.recordingActive) {
      if (Date.now() - this.lastTimeSoundHeard > this.pauseTimeout) {
        this.recordingActive = false;
        this.mediaRecorder.stop();
      }
    }

    this.animationFrame = window.requestAnimationFrame(this.parseInputs);
  };

  // The analyser will filter out values below MIN_DECIBELS, so we can
  // just check if any value is above 0.
  isSoundDetected = () => !this.domainData.every(value => value === 0);
}
