import { Call } from "@azure/communication-calling";
import { AudioConfig, SpeechConfig, SpeechRecognizer } from "microsoft-cognitiveservices-speech-sdk";
import create from "zustand";
import { SPEECHTOKEN_ENDPOINT } from "../../Constants";

export type SpeechRecognitionStore = {
    results: Array<string>;
    lastResult: string;

    remoteRecognizer: SpeechRecognizer | null;
    localRecognizer: SpeechRecognizer | null;

    continuousRecognitionInProgress: boolean;
    localRecognitionInProgress: boolean;

    cognitiveContext: { token: string, region: string } | null;

    api: {
        addRecognitionResult: (text: string) => void;
        getCognitiveServicesToken: () => Promise<{ token: string, region: string }>;
        startRemoteSpeechRecognition: (call: Call) => Promise<void>;
        startLocalSpeechRecognition: (microphoneId: string) => Promise<void>;

        stopRemoteSpeechRecognition: () => Promise<void>;
        stopLocalSpeechRecognition: () => Promise<void>;

        startSingleRemoteSpeechRecognition: (call: Call) => Promise<void>;
    }
}

export const useSpeechRecognitionStore = create<SpeechRecognitionStore>((set, get) => ({
    results: [],
    lastResult: "",
    remoteRecognizer: null,
    localRecognizer: null,

    continuousRecognitionInProgress: false,
    localRecognitionInProgress: false,

    cognitiveContext: null, 
    api: {
        addRecognitionResult: (text: string) => {
            set((state) => {
                return { results: [...state.results, text], lastResult: text }
            })
        },
        getCognitiveServicesToken: () => {
            return new Promise((resolve, reject) => {
                let url =
                    process.env.NODE_ENV === "production"
                        ? SPEECHTOKEN_ENDPOINT
                        : "http://localhost:2567/get-speech-token";

                fetch(url, { method: "GET" })
                    .then((r) => r.json())
                    .then((resp) => {
                        return resolve(resp);
                    });
            });
        },
        startRemoteSpeechRecognition: async (call: Call) => {
            // function sleep(ms) {
            //     return new Promise(resolve => setTimeout(resolve, ms));
            // }
            // await call.unmuteIncomingAudio();
            // await sleep(1000);
            // await call.muteIncomingAudio();

            let cognitiveContext = await get().api.getCognitiveServicesToken();
            set({ cognitiveContext });
            const speechConfig = SpeechConfig.fromAuthorizationToken(cognitiveContext.token, cognitiveContext.region);
            speechConfig.speechRecognitionLanguage = "en-US";

            let ras = call.remoteAudioStreams[0];
            let s = await ras.source.getMediaStream();
            const audioConfig = AudioConfig.fromStreamInput(s);

            const recognizer = new SpeechRecognizer(speechConfig, audioConfig);

            recognizer.recognized = (sender, evt) => {

                console.log("RECOGNIZED REMOTE", sender, evt.result.text);
                if (evt.result.text) {
                    get().api.addRecognitionResult(evt.result.text);
                }
            };

            recognizer.startContinuousRecognitionAsync(() => {
                console.log("continuous recognition started 2");
                set({ continuousRecognitionInProgress: true })
            });

            set({ remoteRecognizer: recognizer });
        },
        startLocalSpeechRecognition: async (microphoneId: string) => {


            let cognitiveContext = await get().api.getCognitiveServicesToken();
            const speechConfig = SpeechConfig.fromAuthorizationToken(cognitiveContext.token, cognitiveContext.region);
            speechConfig.speechRecognitionLanguage = "en-US";

            // let ras = call.localAudioStreams[0];
            // let s = await ras.source.getMediaStream();
            const audioConfig = AudioConfig.fromMicrophoneInput(microphoneId);

            const recognizer = new SpeechRecognizer(speechConfig, audioConfig);

            recognizer.recognized = (sender, evt) => {

                console.log("RECOGNIZED INTERNAL", sender, evt.result.text);
                if (evt.result.text) {
                    get().api.addRecognitionResult(evt.result.text);
                }
            };

            recognizer.startContinuousRecognitionAsync(() => {
                console.log("continuous recognition started");
                set({ localRecognitionInProgress: true })
            });

            set({ localRecognizer: recognizer });
        },
        stopRemoteSpeechRecognition: async () => {
            let recognizer = get().remoteRecognizer;
            await recognizer?.stopContinuousRecognitionAsync();
            set({ continuousRecognitionInProgress: false })
        },
        stopLocalSpeechRecognition: async () => {
            let recognizer = get().localRecognizer;
            await recognizer?.stopContinuousRecognitionAsync();
            set({ localRecognitionInProgress: false })
        },
        startSingleRemoteSpeechRecognition: async (call: Call) => {
            console.log("WTF")
            let cognitiveContext = get().cognitiveContext;
            if (!cognitiveContext){
                console.log("getting cognivecontext")
                cognitiveContext = await get().api.getCognitiveServicesToken();
                set({ cognitiveContext });
            }
            let recognizer = get().remoteRecognizer;
            if (!recognizer){
                console.log("creating recognizer")
                const speechConfig = SpeechConfig.fromAuthorizationToken(cognitiveContext.token, cognitiveContext.region);
                speechConfig.speechRecognitionLanguage = "en-US";
    
                let ras = call.remoteAudioStreams[0];
                let s = await ras.source.getMediaStream();
                const audioConfig = AudioConfig.fromStreamInput(s);
    
                recognizer = new SpeechRecognizer(speechConfig, audioConfig);
                set({ remoteRecognizer: recognizer })
            }
            console.log("recognizing once")
            await recognizer.recognizeOnceAsync((result) => {
                console.log("RECOGNIZED ONCE>", result)
            })
        }


    }
}))