import {
  ApiAzureCognitiveApi,
  Configuration,
  ResponseError,
} from "../generated/api";
import {
  ConvertParam,
  text2Ssml,
  textWithWordPronounciations2Ssml,
} from "./ssmlHelper";

type AudioInfo = {
  blob: Blob;
  blobSize: number;
  blobUrl: string;
  durationMs: number;
};

type ConvertParamWithWordPronounciations = ConvertParam & {
  wordPronounciations: { literal: string; phoneme: string }[];
};

/**
 * https://learn.microsoft.com/ja-jp/azure/cognitive-services/speech-service/rest-text-to-speech
 */
export class AzureCognitiveService {
  /** 透過的キャッシュ */
  private caches = new Map<string, AudioInfo>();

  private cachedToken = {
    exp: new Date("1970-01-01T00:00:00.000Z"),
    token: null as string,
  };

  constructor(private csrfToken: string) {}

  async ensureToken(): Promise<string> {
    if (this.cachedToken.exp > new Date()) {
      return this.cachedToken.token;
    }

    const api = new ApiAzureCognitiveApi(
      new Configuration({
        basePath: "",
        headers: {
          "x-hopper-api-version": "1.0",
          "X-CSRF-Token": this.csrfToken,
        },
      })
    );
    const token = await api.apiAzureCognitiveIssueTokenPost();
    // https://openid-foundation-japan.github.io/draft-ietf-oauth-json-web-token-11.ja.html#expDef
    const exp = new Date(JSON.parse(atob(token.split(".")[1])).exp * 1000);
    this.cachedToken = { exp, token };
    return token;
  }

  async convert(
    { text, voiceName, speechRate }: ConvertParam,
    opt?: { abortSignal: AbortSignal }
  ): Promise<AudioInfo> {
    const ssml = text2Ssml({ text, voiceName, speechRate });

    return this.convertSsml(ssml, opt);
  }

  async convertWithWordPronounciations(
    {
      text,
      voiceName,
      speechRate,
      wordPronounciations,
    }: ConvertParamWithWordPronounciations,
    opt?: { abortSignal: AbortSignal }
  ): Promise<AudioInfo> {
    const ssml = textWithWordPronounciations2Ssml({
      text,
      voiceName,
      speechRate,
      wordPronounciations,
    });

    return this.convertSsml(ssml, opt);
  }

  async convertSsml(
    ssml: string,
    opt?: { abortSignal: AbortSignal }
  ): Promise<AudioInfo> {
    if (this.caches.has(ssml)) {
      return this.caches.get(ssml);
    }

    const token = await this.ensureToken();

    const res = await this.fetch(
      "https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1",
      {
        method: "POST",
        headers: {
          Authorization: `Bearer ${token}`,
          "Content-Type": "application/ssml+xml",
          "X-Microsoft-OutputFormat": "audio-48khz-96kbitrate-mono-mp3",
        },
        body: ssml,
        signal: opt?.abortSignal,
      }
    );
    if (!res.ok) {
      throw new ResponseError(
        res,
        `Request to cognitive service failed with status ${res.status} ${res.statusText}`
      );
    }
    const result = await this.resolveAudioInfo(await res.blob());
    this.caches.set(ssml, result);
    return result;
  }

  private fetchingPromises: Promise<Response>[] = [];
  async fetch(...args: Parameters<typeof fetch>): Promise<Response> {
    // 同時接続数によるクォータ制限を回避するため、少しずつリクエストを待ち合わせる。
    while (this.fetchingPromises.length > 20) {
      await Promise.allSettled([...this.fetchingPromises]);
    }

    const promise = window.fetch(...args);
    this.fetchingPromises.push(promise);
    const res = await promise;
    const index = this.fetchingPromises.findIndex((p) => p === promise);
    this.fetchingPromises.splice(index, 1);
    return res;
  }

  disposeAudioCaches() {
    this.caches.forEach((c) => URL.revokeObjectURL(c.blobUrl));
    this.caches.clear();
  }

  async resolveAudioInfo(blob: Blob): Promise<AudioInfo> {
    const blobSize = blob.size;
    // 句読点だけなど有意でないテキストの場合は生成結果ファイルサイズが 0 になる。
    if (blobSize === 0) {
      return { blob, blobSize, blobUrl: null, durationMs: null };
    }
    const blobUrl = URL.createObjectURL(blob);
    const durationMs = await resolveDurationMs(blobUrl);
    return { blob, blobSize, blobUrl, durationMs };
  }
}

function resolveDurationMs(blobUrl: string) {
  return new Promise<number>((resolve, reject) => {
    const elem = new Audio(blobUrl);
    elem.addEventListener("loadedmetadata", () => {
      const durationMs = Math.round(elem.duration * 1000);
      elem.remove();
      resolve(durationMs);
    });
    elem.addEventListener("error", reject);
  });
}
