import { useMutation } from "@apollo/client";
import { Button, Flex, InputCheckbox, InputText } from "@heart/components";
import Honeybadger from "@honeybadger-io/js";
import { useList, useMountEffect } from "@react-hookz/web";
import EasySpeech from "easy-speech";
import gql from "graphql-tag";
import { isString } from "lodash";
import { DateTime } from "luxon";
import PropTypes from "prop-types";
import { useCallback, useMemo, useRef, useState } from "react";

import { isTestEnvironment } from "@lib/environment";

import ChatHistory from "./ChatHistory";
import ChatTextInput from "./ChatTextInput";
import TextToSpeechContext from "./TextToSpeechContext";
import VoiceInput from "./VoiceInput";
import { ASSISTANT, MessagePropType, USER } from "./common";
import workingAudio from "./working.mp3";

export const TTS_MUTATION = gql`
  mutation TTS($text: String!) {
    generateTextToSpeech(input: { text: $text }) {
      base64Audio
    }
  }
`;

/**
 * Generic voice chat component that allows the user to interact with an
 * assistant endpoint through text and voice.
 */
const VoiceChat = ({
  initialMessages = [],
  onSubmit,
  onClear,
  loading,
  onPlaybackEnd,
  debugMode,
  // browser TTS is better for test environments since we don't have access
  // to the external services needed for backend TTS
  initialBrowserTTSEnabled = isTestEnvironment(),
}) => {
  const [pauseTimeout, setPauseTimeout] = useState(2000);
  const [bingBongEnabled, setBingBongEnabled] = useState(false);
  const [messages, { push, clear: clearMessages }] = useList(initialMessages);
  const [browserTTSEnabled, setBrowserTTSEnabled] = useState(
    initialBrowserTTSEnabled
  );
  const [browserTTSVoice, setBrowserTTSVoice] = useState();
  const [generateTts] = useMutation(TTS_MUTATION);

  const speak = useCallback(
    async ({ text, audioFile }) => {
      if (browserTTSEnabled) {
        const { resolve, promise } = Promise.withResolvers();

        EasySpeech.speak({
          text,
          voice: browserTTSVoice,
          end: resolve,
          error: resolve,
        });

        return promise;
      }

      if (text && !audioFile) {
        const {
          data: {
            generateTextToSpeech: { base64Audio },
          },
        } = await generateTts({ variables: { text } });

        audioRef.current.src = base64Audio;
      } else {
        audioRef.current.src = audioFile;
      }

      return undefined;
    },
    [browserTTSEnabled, browserTTSVoice, generateTts]
  );

  const stopSpeaking = useCallback(() => {
    if (browserTTSEnabled) {
      EasySpeech.cancel();
    } else if (audioRef.current) {
      // stop any audio that is currently playing
      audioRef.current.pause();
      audioRef.current.currentTime = 0;
    }
  }, [browserTTSEnabled]);

  const startEnabled = useMemo(
    () => window.location.search.includes("start_with_microphone_enabled=true"),
    []
  );

  // if the user has interacted in some way with the page, we can start auto
  // playing audio, but if they haven't chrome will block it. We set this
  // variable to true when we know it's safe to start playing.
  const [hasInteracted, setHasInteracted] = useState(startEnabled);

  const audioRef = useRef();

  useMountEffect(async () => {
    try {
      // eslint-disable-next-line no-console
      if (debugMode) EasySpeech.debug(console.log);

      await EasySpeech.init();
      setBrowserTTSVoice(
        // best chrome voice
        EasySpeech.voices().find(v => v.name === "Google US English") ||
          // best available safari voice
          EasySpeech.voices().find(v => v.name.includes("Karen")) ||
          // fallback
          EasySpeech.voices().find(v => v.lang === "en-US")
      );
    } catch (e) {
      Honeybadger.notify(e);
    }
  });

  const onNewMessage = useCallback(
    async message => {
      // if we're loading, don't allow the user to send another message
      // because it'll confuse the assistant having two user messages
      // in a row.
      if (loading) return;

      setHasInteracted(true);

      if (bingBongEnabled) {
        audioRef.current.src = workingAudio;
      }

      let input;
      let inputAudio;

      const newMessageTime = DateTime.now().toISO();

      if (isString(message)) {
        input = message;
        push({ role: USER, message, timestamp: newMessageTime });
      } else {
        inputAudio = message;
      }

      // submit the message
      const { response, responseAudio, inputText } = await onSubmit({
        input,
        inputAudio,
        generateTts: !browserTTSEnabled,
      });

      if (!input) {
        push({
          role: USER,
          message: inputText,
          timestamp: newMessageTime,
          inputAudio,
        });
      }

      push({
        role: ASSISTANT,
        message: response,
        timestamp: DateTime.now().toISO(),
        responseAudio,
      });
    },
    [bingBongEnabled, onSubmit, push, audioRef, browserTTSEnabled, loading]
  );

  const reset = async e => {
    e.preventDefault();
    await onClear();
    clearMessages();
  };

  return (
    <TextToSpeechContext.Provider value={{ speak, stopSpeaking }}>
      <Flex column>
        {/*
          We display all text in the chat history and <audio> tags don't
          even support captions, so I don't know why this rule even
          exists.

          This audio element is used to play the bing bong sound when
          the assistant is working.
        */}
        {/* eslint-disable-next-line jsx-a11y/media-has-caption */}
        <audio
          ref={audioRef}
          style={{ display: "none" }}
          autoPlay
          onEnded={onPlaybackEnd}
        />
        <ChatHistory
          messages={messages}
          loading={loading}
          hasInteracted={hasInteracted}
        />
        <ChatTextInput onNewMessage={onNewMessage} />
        <VoiceInput
          onNewMessage={onNewMessage}
          pauseTimeout={pauseTimeout}
          onStart={stopSpeaking}
          onClick={() => {
            setHasInteracted(true);
          }}
          startEnabled={startEnabled}
          debugMode={debugMode}
        />
        <If condition={debugMode}>
          <Flex column style={{ width: "50%", margin: "0 auto" }}>
            <Flex
              row
              style={{ justifyContent: "space-around", marginTop: "20px" }}
            >
              <InputCheckbox
                label="Working sound enabled"
                value={bingBongEnabled}
                onChange={setBingBongEnabled}
              />
              <InputCheckbox
                label="Use browser TTS"
                value={browserTTSEnabled}
                onChange={setBrowserTTSEnabled}
              />
              <InputText
                label="Pause Timeout (ms)"
                value={pauseTimeout.toString()}
                onChange={newPauseTimeout => {
                  setPauseTimeout(parseInt(newPauseTimeout, 10));
                }}
                type="number"
                step="100"
              />
              <div style={{ width: "100px" }}>
                <Button variant="secondary" onClick={reset}>
                  Reset
                </Button>
              </div>
            </Flex>
          </Flex>
        </If>
      </Flex>
    </TextToSpeechContext.Provider>
  );
};

VoiceChat.propTypes = {
  /**
   * The initial messages to display in the chat, if any.
   */
  initialMessages: PropTypes.arrayOf(MessagePropType.isRequired),
  /**
   * Callback function to call when a new message is sent.
   *
   * @param {object}  options
   * @param {string}  options.input The text input
   * @param {Blob}    options.inputAudio The audio input
   * @param {boolean} options.generateTts Whether to generate TTS for the response
   * @returns {Promise<{ response: string, responseAudio: Blob, inputText: string }>}
   */
  onSubmit: PropTypes.func.isRequired,
  /**
   * Callback function to call when the chat is cleared.
   *
   * @returns {Promise<void>}
   */
  onClear: PropTypes.func.isRequired,
  /**
   * Whether the chat is currently loading after a call to onSubmit.
   */
  loading: PropTypes.bool,
  /**
   * Callback function to call when the audio playback ends.
   */
  onPlaybackEnd: PropTypes.func,
  /**
   * Enable debug mode for the voice chat. Setting this to true will
   * cause additional debugging controls to be displayed.
   */
  debugMode: PropTypes.bool,
  /**
   * Whether to use browser TTS by default. This really only should
   * be used by Jest.
   */
  initialBrowserTTSEnabled: PropTypes.bool,
};

export default VoiceChat;
