// Note (Sebas, 2022-12-20): To prevent some scripts from loading and break the editor
// we need to remove them from the html code. For example this ampproject script was
// not showing any component on the canvas when you add a carousel.
// https://linear.app/replo/issue/REPL-5581/editor-renders-none-of-the-components
//
// NOTE Ben 2024-01-19: I refactored this to allow for using selectors per block, which will let us
// more specifically target individual scripts. I didn't significantly change any of the existing
// blocks in place before USE-667 / REPL-10078 (ie this commit) so some of these are broad and could
// potentially impact other scripts other than their intended target.
export const blockedScripts = [
  // Note (Matt, 2023-11-15): This script is causing the editor to go blank for boarderie.com.
  // I had to do this hacky workaround to select a script tag that begins with /cdn/shop/t and ends
  // with /assets/custom.js because a version number exists in the middle of the asset url.
  // uid: 54961bc4-4023-4bf8-96ed-d8f164e0c935
  `script[src*="boarderie.com/cdn/shop/t/"][src*="/assets/custom.js"]`,
  // NOTE (Matt 2023-10-19): This script is preventing the editor from loading for siptequila.com.
  // uid: e0090147-addf-46c7-8733-f6343a361f41
  `script[src*="//www.orderlogicapp.com/embedded_js/store.js"]`,
  // NOTE (Matt 2023-11-17): These domains are used by a geolocation app called GeoTargetly. It can
  // break the editor if the user is not in a specific region. Store: Loop United States
  // uid: c16a877a-45f0-44f1-9b01-fdd12424a77e
  `script[src*="g10300385420.co"]`,
  `script[src*="g792337344.co"]`,

  // NOTE (Matt 2023-10-26): The following scripts are associated with a package called "disable-devtools"
  // that are aimed at preventing a user from loading devtools. As a side effect, these scripts also totally
  // break the Replo Editor. Found for merchant muscintime.fr.
  // uid: 9f960337-aa35-47d0-bf4d-e2c1ca5ce417
  `script[src*="disable-devtool"]`,
  `script[src*="/cdn.hourth.com/e.js?shop="]`,
  `script[src*="/cdn.hourth.com/k.js?shop="]`,
  `script[src*="/cdn.hourth.com/d.js?shop="]`,
  `script[src*="/cdn.hourth.com/t.js?shop="]`,
  `script[src*="/cdn.hourth.com/n.js?shop="]`,

  `script[src*="https://cdn.ampproject.org/v0.js"]`,
  `script[src*="https://cdn.ampproject.org/v0/amp-skimlinks-0.1.js"]`,

  // Note (Matt, 2023-09-13): This script is causing the editor to go blank for thewanderclub.com.
  // I had to do this hacky workaround to select a script tag that begins with /cdn/shop/t and ends
  // with /assets/global.js because a version number exists in the middle of the asset url.
  // uid: caf556b9-0036-4d77-8822-ffa6fade174e
  `script[src*="thewanderclub.com/cdn/shop/t/"][src*="/assets/global.js"]`,

  // Note (Matt, 2023-08-07): This script is crashing the editor for nightbuddy.co:
  // uid: 7c3222b3-45e0-4ca7-9919-f901c130eb8c
  `script[src*="bundle.nightbuddy.co/client"]`,

  // Note (Sebas, 2023-01-09): This script is causing a flash and infinite loading
  // on ultimateautographs store.
  // uid: 44800a60-9196-4f41-aa8c-8beb92e0fd00 name: Ultimateautographs
  // https://linear.app/replo/issue/REPL-5770/production-store-flashes-iframe-content-then-displays-infinite-spinner
  `script[src*="https://s3.amazonaws.com/jstags/25570-621122e02174.js?shop=ultimateautographs.myshopify.com"]`,

  // Note (Matt, 2023-08-07), USE-339: This script is preventing the Editor from loading. It includes the Marquee3000 package,
  // which for some reason is entirely breaking the editor when invoked.
  // Packaged: https://github.com/ezekielaquino/Marquee3000
  // store uid: 7dee67aa-d2f2-4307-ab0f-7359b8b024be
  `script[src*="/assets/module-marquee.min.js"]`,

  // NOTE Ben 2024-01-19: USE-667 / REPL-10078 This script attempts to rewrite the theme.css by
  // using the window.location.host which is not set when we're loading the canvas iframe w/ srcdoc.
  `script[src*="cdn.weglot.com/weglot.min.js"]`,
];

// NOTE Sebas, 2023-01-11, Ben 2024-01-19: Shopify uses a liquid template called
// content_for_header to inject an IIFE called asyncLoad, which is an array of scripts that will
// be added to the document upon exec. Since we can't remove these by parsing the HTML, the closest
// we can get is searching scripts for strings matching these scripts, and replacing them with
// blank strings.
const blockedAsyncScriptUrls = [
  // uid: 44800a60-9196-4f41-aa8c-8beb92e0fd00 name: Ultimateautographs
  "https:\\/\\/s3.amazonaws.com\\/jstags\\/25570-621122e02174.js?shop=ultimateautographs.myshopify.com",

  // NOTE (Matt 2023-10-26): The following scripts are associated with a package called "disable-devtools"
  // that are aimed at preventing a user from loading devtools. As a side effect, these scripts also totally
  // break the Replo Editor. Found for merchant muscintime.fr.
  // uid: 9f960337-aa35-47d0-bf4d-e2c1ca5ce417
  "\\/cdn.hourth.com\\/e.js?shop=",
  "\\/cdn.hourth.com\\/k.js?shop=",
  "\\/cdn.hourth.com\\/d.js?shop=",
  "\\/cdn.hourth.com\\/t.js?shop=",
  "\\/cdn.hourth.com\\/n.js?shop=",
];

// NOTE (Matt 2023-10-31): There are some Editor-blocking scripts that
// only exist inside of script tags. For those, we can isolate the
// reliable keywords that identify the script tag then see if the
// innerText includes any of those keywords so we can remove the script
// tag.
const scriptTagKeywords = [
  // NOTE (Matt 2023-10-31): Statsig has some JS that can be injected into
  // themes for A/B testing that blocks the Editor from loading. The most
  // reliable way to determine the script tag exists is if
  // `statsig.logEvent` is called. relevant store: wpstandard.com
  // (Whipping Post Leather) store uid:
  // bfe5ec43-ce41-4675-a05c-19348b3a84d9
  "statsig.logEvent",
  // Note (Noah, 2023-11-06, USE-541): Langify has some insane JS
  // observers which try to observe all text on the page and translate it.
  // In addition to being crazy, it's a huge performance issue when loaded
  // in the editor (as far as I can tell, it tries to translate every
  // single text node of the entire editor, which ends up hanging the main
  // thread for like, 2 minutes). So, we remove it! We don't need Langify
  // to translate anything in the editor anyways.
  // Associated store: Gothrider Coffee, store uid c8252177-6ff6-4169-98ee-5232e3677a9b
  "langify.api",
] as string[];

export const stringToHTML = (str: string) => {
  const parser = new DOMParser();
  const doc = parser.parseFromString(str, "text/html");
  return doc.body;
};

export const getHTMLElementFromString = (rawHtml: string) => {
  const parser = new DOMParser();
  return parser.parseFromString(rawHtml, "text/html");
};

/**
 * Prepare the html for the canvas by parsing it, removing scripts that causes issues.
 */
export function processHtmlForCanvas({ rawHTML }: { rawHTML: string }) {
  const parsedHTML = getHTMLElementFromString(rawHTML);
  // NOTE (Matt, 2023-08-07): Some themes have multiple of the same script tag on the page,
  // so we need to use querySelectorAll to remove multiple instances of blacklisted scripts
  // NOTE Ben 2024-01-19: Since the blocked scripts are selectors, we can just join them together
  // with a comma, and run one big query.
  parsedHTML
    .querySelectorAll(blockedScripts.join(", "))
    .forEach((script) => script.remove());
  parsedHTML.querySelectorAll("script:not(:empty)").forEach((script) => {
    const hasTagWord = scriptTagKeywords.find((keyword) =>
      script.textContent?.includes(keyword),
    );
    if (hasTagWord) {
      script.remove();
    }
  });
  let newHTML = parsedHTML.documentElement.innerHTML;
  blockedAsyncScriptUrls.forEach((url) => {
    newHTML = newHTML.replace(url, "");
  });
  return newHTML;
}
// NOTE (Matt 2024-02-06): these are domains associated with approved integrations that may
// work in the editor.
const domainsAllowed = [
  /rechargecdn/,
  /reviews.io/,
  /rebuyengine/,
  /loox.io/,
  /stamped.io/,
  /judge.me/,
  /productreviews.shopifycdn.com/,
  /okendo-reviews/,
  /fera.js/,
  /junip/,
  /yotpo/,
  /klaviyo/,
  /cdn.tailwindcss.com/,
];
// NOTE (Matt 2024-02-06): These are specific keywords or snippets of code from essential script
// tags that allow certain integrations to work in the editor.
const scriptTagContentAllowed = [
  /reviewsIoStore/,
  /loadReviewsIoRatingSnippets/,
  /var Shopify = Shopify \|\| {}/,
  /oke-reviews/,
  /jdgm/,
  /UploadKit/,
  /junip/,
  /yotpo/,
  /klaviyo/,
];

// NOTE (Matt 2024-02-06): Shopify has a script tag on the page that has an array of urls
// that it adds to the page as script tags. We need to update this array to only include
// the scripts that we allow for integrations inside of the editor. This function takes
// the JS from the original script tag as a string, finds the urls array, and updates it
// based on the domainsAllowed array from above, then returns the modified script code.
function filterAndModifyUrlsInScript(scriptCode: string) {
  const urlsArrayRegex = /var urls = \[([^\]]+)]/;
  const match = scriptCode.match(urlsArrayRegex);

  if (match?.[1]) {
    // Extract the array part and split into individual URLs, trimming quotes and commas
    let urls = match[1]
      .split(",")
      .map((url) => url.trim().replace(/^"|"$/g, ""));

    // Filter the URLs based on the domainsAllowed regexes
    urls = urls.filter((url) =>
      domainsAllowed.some((regex) => regex.test(url)),
    );

    // Reconstruct the urls array string
    const modifiedUrlsArray = `var urls = ["${urls.join('","')}"]`;

    // Replace the old urls array in the script with the modified one
    const modifiedScript = scriptCode.replace(
      urlsArrayRegex,
      modifiedUrlsArray,
    );

    return modifiedScript;
  }
  return scriptCode; // Return the original script if urls array is not found
}

// NOTE (Matt 2024-02-06): This function essentially removes all unessential JS
// before rendering in the canvas. The only JS assets we allow from a users theme
// are third party widget scripts, and some native Shopify scripts.
// We need to look through scripts and filter out based on src as well as innerHTML,
// additionally we need to find the shopify script loader (see isShopifyTagLoader)
// below and parse its contents to block any unapproved scripts.
export function processHtmlForCanvasWithoutThemeScripts({
  rawHTML,
}: {
  rawHTML: string;
}) {
  const parsedHTML = getHTMLElementFromString(rawHTML);
  // NOTE (Matt 2024-02-06): We don't want to remove script tags inside of shopify-app-blocks
  // because these scripts correspond directly to widgets that render on the page, which
  // is not the type of script that we're worried about crashing the editor. Including this
  // logic allows us to have a smaller list of validScriptContents and validScriptSrc.
  [
    ...parsedHTML.querySelectorAll(
      'script:not(.shopify-app-block script):not([type="application/json"]',
    ),
  ].forEach((scriptTag) => {
    const validScriptSrc =
      scriptTag.hasAttribute("src") &&
      domainsAllowed.some((regex: RegExp) =>
        regex.test(scriptTag.getAttribute("src")!),
      );
    const validScriptContents =
      scriptTag.innerHTML &&
      scriptTagContentAllowed.some((regex: RegExp) =>
        regex.test(scriptTag.innerHTML),
      );
    const isShopifyTagLoader = scriptTag.innerHTML?.includes(
      "function asyncLoad() {",
    );
    if (isShopifyTagLoader) {
      scriptTag.innerHTML = filterAndModifyUrlsInScript(scriptTag.innerHTML);
    } else if (!validScriptSrc && !validScriptContents) {
      scriptTag.remove();
    }
  });
  // NOTE (Matt 2024-02-21): This is a generic solution to remove the x-cloak and v-cloak
  // attributes from the main content elements. These attributes are used by AlpineJS and VueJS
  // to hide elements until the page is loaded, but they cause issues with the editor.
  const mainContentElements =
    parsedHTML.documentElement.querySelectorAll("body, main");
  mainContentElements.forEach((element) => {
    if (element) {
      element.removeAttribute("x-cloak");
      element.removeAttribute("v-cloak");
    }
  });
  return parsedHTML.documentElement.innerHTML;
}
