import React, { useState, useEffect, useRef } from "react";
import { SyncLoader } from "react-spinners";
import { LowLevelRTClient, SessionUpdateMessage } from "rt-client";
import { Player } from "./player";
import { Recorder } from "./recorder";
import { PiChatFill, PiChatSlashFill } from "react-icons/pi";
import { RiFilePaper2Fill } from "react-icons/ri";
import axios from "axios";
import "./Speech.css";

enum InputState {
  Working,
  ReadyToStart,
  ReadyToStop,
}

let latestInputSpeechBlock: Element | null = null;

interface SpeechProps {
  instructions: string;
  page: string;
  page_subject: string;
  studentId: string;
  temperature?: number;
  voice?: "alloy" | "echo" | "shimmer";
}

const Speech: React.FC<SpeechProps> = ({
  instructions,
  page,
  page_subject,
  studentId,
  temperature = 0.7,
  voice = "shimmer",
}) => {
  const [realtimeStreaming, setRealtimeStreaming] =
    useState<LowLevelRTClient | null>(null);
  const realtimeStreamingRef = useRef<LowLevelRTClient | null>(null);
  const [audioRecorder, setAudioRecorder] = useState<Recorder | null>(null);
  const [audioPlayer, setAudioPlayer] = useState<Player | null>(null);
  const audioPlayerRef = useRef<Player | null>(null);
  const [recordingActive, setRecordingActive] = useState<boolean>(false);
  const recordingActiveRef = useRef<boolean>(recordingActive);
  const [inputState, setInputState] = useState<InputState>(
    InputState.ReadyToStart
  );
  const [chatLoading, setChatLoading] = useState<boolean>(false);
  const bufferRef = useRef<Uint8Array>(new Uint8Array());
  const [conversationTranscript, setConversationTranscript] = useState<
    string[]
  >([]);
  const [isTranscriptBoxOpen, setIsTranscriptBoxOpen] =
    useState<boolean>(false);

  useEffect(() => {
    recordingActiveRef.current = recordingActive;
  }, [recordingActive]);

  useEffect(() => {
    realtimeStreamingRef.current = realtimeStreaming;
  }, [realtimeStreaming]);

  useEffect(() => {
    audioPlayerRef.current = audioPlayer;
  }, [audioPlayer]);

  useEffect(() => {
    localStorage.setItem(
      "conversationTranscript",
      JSON.stringify(conversationTranscript)
    );
  }, [conversationTranscript]);

  useEffect(() => {
    const interval = setInterval(() => {
      console.log("Checking if need post conversation transcript");
      if (conversationTranscript.length !== 0) {
        const formData = new FormData();
        formData.append("student_id", studentId);
        formData.append("page", page);
        formData.append("subject", page_subject);
        formData.append("transcript", JSON.stringify(conversationTranscript));
        postConversationTranscript(formData);
      }
    }, 600000); // 10 minutes in milliseconds

    return () => clearInterval(interval);
  }, [conversationTranscript]);

  const initializeAudio = async () => {
    const player = new Player();
    await player.init(24000);
    console.log("Audio player initialized");
    setAudioPlayer(player);
  };

  const startRealtime = async (
    endpoint: string,
    apiKey: string,
    deploymentOrModel: string
  ) => {
    let client: LowLevelRTClient;
    if (isAzureOpenAI()) {
      client = new LowLevelRTClient(
        new URL(endpoint),
        { key: apiKey },
        { deployment: deploymentOrModel }
      );
    } else {
      client = new LowLevelRTClient(
        { key: apiKey },
        { model: deploymentOrModel }
      );
    }
    setRealtimeStreaming(client);

    try {
      console.log("Sending session config");
      await client.send(createConfigMessage());
    } catch (error) {
      console.error("Error sending session config:", error);
      makeNewTextBlock(
        "[Connection error]: Unable to send initial config message. Please check your endpoint and authentication details.",
        "ai"
      );
      setChatLoading(false);
      setInputState(InputState.ReadyToStart);
      return;
    }
    console.log("Config sent");
    setChatLoading(false);
    try {
      await Promise.all([resetAudio(true), handleRealtimeMessages(client)]);
    } catch (error) {
      console.error("Error during realtime processing:", error);
      setChatLoading(false);
      setInputState(InputState.ReadyToStart);
    }
  };

  const createConfigMessage = (): SessionUpdateMessage => {
    let configMessage: SessionUpdateMessage = {
      type: "session.update",
      session: {
        turn_detection: {
          threshold: 0.8,
          silence_duration_ms: 800,
          type: "server_vad",
        },
        input_audio_transcription: {
          model: "whisper-1",
        },
      },
    };

    if (instructions) {
      configMessage.session.instructions = instructions;
    }
    if (!isNaN(temperature)) {
      configMessage.session.temperature = temperature;
    }
    if (voice) {
      configMessage.session.voice = voice;
    }
    return configMessage;
  };

  const handleRealtimeMessages = async (client: LowLevelRTClient) => {
    for await (const message of client.messages()) {
      let consoleLog = "" + message.type;
      switch (message.type) {
        case "session.created":
          setInputState(InputState.ReadyToStop);
          break;
        case "response.audio_transcript.delta":
          appendToTextBlock(message.delta);
          break;
        case "response.audio.delta":
          const binary = atob(message.delta);
          const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
          const pcmData = new Int16Array(bytes.buffer);
          audioPlayerRef.current?.play(pcmData);
          break;
        case "input_audio_buffer.speech_started":
          makeNewTextBlock("", "user");
          let textElements = document.querySelector<HTMLDivElement>(
            "#received-text-container"
          )?.children;
          if (textElements) {
            latestInputSpeechBlock = textElements[textElements.length - 1];
          }
          makeNewTextBlock("", "ai");
          audioPlayerRef.current?.clear();
          break;
        case "conversation.item.input_audio_transcription.completed":
          if (latestInputSpeechBlock) {
            latestInputSpeechBlock.textContent += message.transcript;
          }
          setConversationTranscript((prev) => [
            ...prev,
            "User: " + message.transcript,
          ]);
          break;
        case "response.audio_transcript.done":
          setConversationTranscript((prev) => [
            ...prev,
            "AI: " + message.transcript,
          ]);
          break;
        case "response.done":
          // document
          //   .querySelector<HTMLDivElement>("#received-text-container")
          //   ?.appendChild(document.createElement("hr"));
          break;
        default:
          consoleLog = JSON.stringify(message, null, 2);
          break;
      }
      if (consoleLog) {
        console.log(consoleLog);
      }
    }
    resetAudio(false);
  };

  const combineArray = (newData: Uint8Array) => {
    const newBuffer = new Uint8Array(bufferRef.current.length + newData.length);
    newBuffer.set(bufferRef.current);
    newBuffer.set(newData, bufferRef.current.length);
    bufferRef.current = newBuffer;
  };

  const processAudioRecordingBuffer = (data: Buffer) => {
    const uint8Array = new Uint8Array(data);
    combineArray(uint8Array);
    if (bufferRef.current.length >= 4800) {
      const toSend = new Uint8Array(bufferRef.current.slice(0, 4800));
      bufferRef.current = new Uint8Array(bufferRef.current.slice(4800));
      const regularArray = String.fromCharCode(...toSend);
      const base64 = btoa(regularArray);
      if (recordingActiveRef.current && realtimeStreamingRef.current) {
        console.log("Sending audio buffer to WebSocket");
        realtimeStreamingRef.current.send({
          type: "input_audio_buffer.append",
          audio: base64,
        });
      }
    }
  };

  const resetAudio = async (startRecording: boolean) => {
    setRecordingActive(false);
    audioRecorder?.stop();
    audioPlayerRef.current?.clear();
    const recorder = new Recorder(processAudioRecordingBuffer);
    setAudioRecorder(recorder);
    if (startRecording) {
      try {
        const stream = await navigator.mediaDevices.getUserMedia({
          audio: true,
        });
        console.log("Microphone stream:", stream);
        console.log("recorder", recorder);
        recorder.start(stream);
        setRecordingActive(true);
      } catch (error) {
        console.error("Error accessing microphone:", error);
        alert("Error accessing microphone. Please check your permissions.");
      }
    }
  };

  const isAzureOpenAI = (): boolean => {
    return (
      document.querySelector<HTMLInputElement>("#azure-toggle")?.checked || true
    );
  };

  const makeNewTextBlock = (
    text: string = "",
    messageType: "ai" | "user" = "ai"
  ) => {
    let newElement = document.createElement("p");
    newElement.textContent = text;
    newElement.className = messageType;
    document
      .querySelector<HTMLDivElement>("#received-text-container")
      ?.appendChild(newElement);
  };

  const appendToTextBlock = (text: string) => {
    let textElements = document.querySelector<HTMLDivElement>(
      "#received-text-container"
    )?.children;
    if (textElements && textElements.length === 0) {
      makeNewTextBlock("", "ai");
    }
    if (textElements) {
      textElements[textElements.length - 1].textContent += text;
    }
  };

  const azureEndpoint = process.env.REACT_APP_AZURE_API_ENDPOINT;
  const apiKey = process.env.REACT_APP_AZURE_API_KEY;

  const handleToggleRecording = async () => {
    if (inputState === InputState.ReadyToStart) {
      setChatLoading(true);
      setInputState(InputState.Working);
      const endpoint = azureEndpoint.trim();
      const key = apiKey.trim();
      const deploymentOrModel = "gpt-4o-realtime-preview";

      console.log("Starting recording with the following parameters:");
      console.log("Deployment/Model:", deploymentOrModel);

      if (isAzureOpenAI() && (!endpoint || !deploymentOrModel)) {
        alert("Endpoint and Deployment are required for Azure OpenAI");
        return;
      }
      if (!isAzureOpenAI() && !deploymentOrModel) {
        alert("Model is required for OpenAI");
        return;
      }
      if (!key) {
        alert("API Key is required");
        return;
      }
      try {
        await initializeAudio();
        await startRealtime(endpoint, key, deploymentOrModel);
      } catch (error) {
        console.error("Error starting realtime:", error);
        setInputState(InputState.ReadyToStart);
      }
    } else if (inputState === InputState.ReadyToStop) {
      setInputState(InputState.Working);
      resetAudio(false);
      realtimeStreaming?.close();
      setInputState(InputState.ReadyToStart);
    }
  };

  const postConversationTranscript = async (formData: FormData) => {
    try {
      const response = await axios.post(
        `${process.env.REACT_APP_V3_API_ENDPOINT}/aibo/v0/master/api/speech/transcript/upload`,
        formData
      );
      console.log("Conversation transcript posted successfully, ", response);
      // Clear the conversation transcript and local storage after successful post
      setConversationTranscript([]);
      localStorage.removeItem("conversationTranscript");
    } catch (error) {
      console.error("Error posting conversation transcript:", error);
    }
  };

  const toggleTranscriptBox = () => {
    setIsTranscriptBoxOpen(!isTranscriptBoxOpen);
  };

  return (
    <div>
      <form>
        <div className="container">
          <div>
            <div className="speech-input-group">
              <div className="button-group">
                {chatLoading ? (
                  <SyncLoader loading={chatLoading} />
                ) : (
                  <button
                    id="toggle-recording"
                    type="button"
                    onClick={handleToggleRecording}
                  >
                    {inputState === InputState.ReadyToStop
                      ? "Stop chatting"
                      : "Chat with AIBO"}
                  </button>
                )}
              </div>
            </div>
          </div>
        </div>
      </form>
      <button
        className="transcript-toggle-button"
        onClick={toggleTranscriptBox}
      >
        {isTranscriptBoxOpen ? <PiChatSlashFill /> : <PiChatFill />}
      </button>
      <div
        className={`transcript-box ${
          isTranscriptBoxOpen ? "opened-transcript-box" : ""
        }`}
      >
        <div className="transcript-title">
          <RiFilePaper2Fill color="#71c19a" /> 與AIBO的聊天記錄
        </div>
        <div className="transcript-content">
          <div id="received-text-container"></div>
        </div>
      </div>
    </div>
  );
};

export default Speech;
