source_utils_url.bs

' URL Utility Functions
' HTTP/HTTPS URL resolution per RFC 3986 Section 5 with automatic normalization

' Resolve a reference URI against a base URI per RFC 3986 Section 5
' Note: Only http:// and https:// schemes are supported. Other schemes (mailto:, ftp:, etc.)
' will be treated as relative paths. The result is automatically normalized (lowercased scheme,
' default ports removed, trailing slashes stripped, etc.) for consistent comparison and storage.
' @param baseUrl - The base URI (absolute, http/https only)
' @param reference - The reference URI (can be absolute http/https, relative, or protocol-relative)
' @returns Resolved and normalized absolute URI string, or empty string if resolution fails
function resolveUrl(baseUrl as string, reference as string) as string
  if reference = "" then return normalizeUrl(baseUrl)

  ' Check for fragment-only reference (same-document reference)
  if Left(reference, 1) = "#" then return normalizeUrl(baseUrl)

  ' Parse the base URL
  baseParts = parseUrlComponents(baseUrl)
  if not isValid(baseParts) then return ""

  ' Check if reference is absolute (has scheme)
  if LCase(Left(reference, 7)) = "http://" or LCase(Left(reference, 8)) = "https://"
    ' Reference is absolute - return it (with normalization)
    return normalizeUrl(reference)
  end if

  ' Check for protocol-relative reference (starts with //)
  if Left(reference, 2) = "//"
    return normalizeUrl(baseParts.scheme + ":" + reference)
  end if

  ' Reference is relative - need to resolve against base
  ' First, extract query and fragment from reference before path processing
  ' per RFC 3986 Section 5.2.2 (fragments are stripped during resolution)
  refPath = reference
  refQuery = ""

  ' Extract fragment first (it comes after query) - fragments are stripped
  fragmentIndex = Instr(1, refPath, "#")
  if fragmentIndex > 0
    refPath = Left(refPath, fragmentIndex - 1)
  end if

  ' Extract query
  queryIndex = Instr(1, refPath, "?")
  if queryIndex > 0
    refQuery = Mid(refPath, queryIndex)
    refPath = Left(refPath, queryIndex - 1)
  end if

  resultParts = {}

  ' Check if reference is path-absolute (starts with /)
  if Left(refPath, 1) = "/"
    resultParts = {
      scheme: baseParts.scheme,
      authority: baseParts.authority,
      path: removeDotSegments(refPath),
      query: refQuery
    }
  else if refPath = ""
    ' Query-only reference (e.g., "?y") - preserve base path
    resultParts = {
      scheme: baseParts.scheme,
      authority: baseParts.authority,
      path: baseParts.path,
      query: refQuery
    }
  else
    ' Reference is path-relative - merge with base path
    mergedPath = mergePaths(baseParts.path, refPath)
    resultParts = {
      scheme: baseParts.scheme,
      authority: baseParts.authority,
      path: removeDotSegments(mergedPath),
      query: refQuery
    }
  end if

  return normalizeUrl(reconstructUrl(resultParts))
end function

' Normalize a URL for consistent comparison and storage
' - Lowercase scheme
' - Remove default ports (:80 for http, :443 for https)
' - Remove trailing slash from path (except root)
' - Remove empty query strings
' @param url - The URL to normalize
' @returns Normalized URL string, or empty string if invalid
function normalizeUrl(url as string) as string
  if url = "" then return ""

  ' If no :// present, assume http:// scheme for backward compatibility with
  ' UI-suggested inputs like "192.168.1.100:8096" and persisted legacy values.
  ' Otherwise parse as-is for URLs that already have a scheme.
  if Instr(1, url, "://") = 0
    parts = parseUrlComponents("http://" + url)
  else
    parts = parseUrlComponents(url)
  end if
  if not isValid(parts) then return ""

  ' For http/https schemes, require non-empty authority (host)
  ' This prevents invalid URLs like "http://" from being persisted as serverUrl
  scheme = LCase(parts.scheme)
  if (scheme = "http" or scheme = "https") and (parts.host = "")
    return ""
  end if

  ' Lowercase scheme
  parts.scheme = LCase(parts.scheme)

  ' Remove default ports
  if parts.port = "80" and parts.scheme = "http"
    parts.authority = parts.host
  else if parts.port = "443" and parts.scheme = "https"
    parts.authority = parts.host
  end if

  ' Remove trailing slash from path (but keep root "/")
  if Len(parts.path) > 1 and Right(parts.path, 1) = "/"
    parts.path = Left(parts.path, Len(parts.path) - 1)
  end if

  ' Remove empty query strings
  if parts.query = "?"
    parts.query = ""
  end if

  return reconstructUrl(parts)
end function

' Parse URL into components
' @param url - The URL to parse
' @returns Associative array with scheme, authority, host, port, path, query, or invalid
function parseUrlComponents(url as string) as object
  if url = "" then return invalid

  ' Regex pattern for RFC 3986 generic URI
  ' Groups: 1=scheme, 3=authority, 4=path, 5=query (with "?"), 6=query value, 7=fragment
  rgx = CreateObject("roRegex", "^([a-zA-Z][a-zA-Z0-9+.-]*):(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#.*)?$", "")
  match = rgx.match(url)

  if match.count() < 4 then return invalid

  scheme = match[1]
  authority = ""
  if match.count() > 3 and isValid(match[3])
    authority = match[3]
  end if
  path = match[4]
  query = ""
  if match.count() > 5 and isValid(match[5]) and match[5] <> ""
    ' match[5] contains the query including the leading "?"
    query = match[5]
  end if

  ' Parse authority into host and port
  host = ""
  port = ""
  if isValid(authority) and authority <> ""
    ' Check for userinfo (user:pass@host)
    atIndex = Instr(1, authority, "@")
    if atIndex > 0
      authority = Mid(authority, atIndex + 1)
    end if

    ' Check for IPv6 literal (starts with [)
    if Left(authority, 1) = "["
      ' IPv6 literal: [::1] or [::1]:8096
      ' Find closing bracket
      bracketIndex = Instr(1, authority, "]")
      if bracketIndex > 0
        ' Extract IPv6 address inside brackets (without the brackets for lowercasing)
        ipv6Addr = Mid(authority, 2, bracketIndex - 2)
        ' Host includes the brackets per RFC 3986 with lowercase address per RFC 5952
        host = "[" + LCase(ipv6Addr) + "]"
        ' Check for port after the closing bracket
        port = ""
        if Len(authority) > bracketIndex + 1 and Mid(authority, bracketIndex + 1, 1) = ":"
          port = Mid(authority, bracketIndex + 2)
          ' Rebuild authority with port since it was explicitly provided
          authority = host + ":" + port
        else
          ' No port specified - use default port internally but don't add to authority
          if LCase(scheme) = "http"
            port = "80"
          else if LCase(scheme) = "https"
            port = "443"
          end if
          ' Authority is just the host (without port) when no explicit port
          authority = host
        end if
      else
        ' Invalid IPv6 literal (no closing bracket)
        return invalid
      end if
    else
      ' Check for port (non-IPv6)
      colonIndex = Instr(1, authority, ":")
      if colonIndex > 0
        host = LCase(Left(authority, colonIndex - 1))
        port = Mid(authority, colonIndex + 1)
        ' Rebuild authority with lowercase host
        authority = host + ":" + port
      else
        host = LCase(authority)
        authority = host
        ' Default ports
        if LCase(scheme) = "http"
          port = "80"
        else if LCase(scheme) = "https"
          port = "443"
        end if
      end if
    end if
  end if

  ' Only ensure path starts with / for hierarchical URLs (those with authority)
  ' Non-hierarchical URIs (mailto, tel, data, etc.) should preserve their path as-is
  if authority <> ""
    if path = ""
      path = "/"
    else if Left(path, 1) <> "/"
      path = "/" + path
    end if
  end if

  return {
    scheme: scheme,
    authority: authority,
    host: host,
    port: port,
    path: path,
    query: query,
    fullAuthority: isValid(authority) and authority <> "" ? "//" + authority : ""
  }
end function

' Merge a relative path with a base path
' @param basePath - The base path (directory)
' @param relPath - The relative path to merge
' @returns Merged path string
function mergePaths(basePath as string, relPath as string) as string
  ' Remove everything after last / in base path to get directory
  ' Search backwards from the end to find the last slash
  lastSlash = 0
  for i = Len(basePath) to 1 step -1
    if Mid(basePath, i, 1) = "/"
      lastSlash = i
      exit for
    end if
  end for

  if lastSlash <= 0
    return "/" + relPath
  end if

  baseDir = Left(basePath, lastSlash)
  return baseDir + relPath
end function

' Remove dot segments from path per RFC 3986 Section 5.2.4
' @param path - The path to process
' @returns Path with dot segments removed
function removeDotSegments(path as string) as string
  if path = "" then return "/"

  rgx = CreateObject("roRegex", "/", "")
  segments = rgx.Split(path)
  output = []

  for each segment in segments
    if segment = ".."
      ' Pop last segment if we can
      if output.count() > 0 and output[output.count() - 1] <> ""
        output.pop()
      end if
    else if segment <> "." and segment <> ""
      ' Keep non-dot segments
      output.push(segment)
    end if
  end for

  ' Reconstruct path
  if output.count() = 0
    return "/"
  end if

  result = ""
  for each segment in output
    result = result + "/" + segment
  end for

  ' Preserve trailing slash if original had it
  if Right(path, 1) = "/" and Right(result, 1) <> "/"
    result = result + "/"
  end if

  return result
end function

' Reconstruct URL from parsed components
' @param parts - Associative array with scheme, authority, path, query
' @returns Reconstructed URL string
function reconstructUrl(parts as object) as string
  url = parts.scheme + ":"

  if isValid(parts.authority) and parts.authority <> ""
    url = url + "//" + parts.authority
  else if isValid(parts.fullAuthority) and parts.fullAuthority <> ""
    url = url + parts.fullAuthority
  end if

  ' Only add path if it's not just the root "/"
  ' (to avoid trailing slash on URLs without paths)
  if isValid(parts.path) and parts.path <> "" and parts.path <> "/"
    url = url + parts.path
  end if

  if isValid(parts.query) and parts.query <> ""
    url = url + parts.query
  end if

  return url
end function

' Extract base URL (scheme + authority) from a full URL
' @param url - The full URL
' @returns Base URL (e.g., "https://example.com:8096"), or empty string if invalid
function getBaseUrl(url as string) as string
  if url = "" then return ""

  parts = parseUrlComponents(url)
  if not isValid(parts) then return ""

  base = LCase(parts.scheme) + ":"
  if isValid(parts.authority) and parts.authority <> ""
    base = base + "//" + parts.authority
  end if

  return base
end function

' Check if two URLs are equivalent after normalization
' @param url1 - First URL
' @param url2 - Second URL
' @returns true if URLs are equivalent, false otherwise
function urlsEqual(url1 as string, url2 as string) as boolean
  return normalizeUrl(url1) = normalizeUrl(url2)
end function

' Extract server base URL by removing a known endpoint path suffix
' This preserves any base path (e.g., /jellyfin) unlike getBaseUrl() which strips all paths
' @param url - The full URL (e.g., "https://host/jellyfin/system/info/public")
' @param endpointPath - The endpoint path to remove (e.g., "/system/info/public")
' @returns Base URL with preserved path (e.g., "https://host/jellyfin"), or empty string if invalid
function getServerBaseUrl(url as string, endpointPath as string) as string
  if url = "" or endpointPath = "" then return ""

  parts = parseUrlComponents(url)
  if not isValid(parts) then return ""

  ' Remove the endpoint path suffix if present (case-insensitive)
  path = parts.path
  endpointLower = LCase(endpointPath)
  pathLower = LCase(path)

  if Right(pathLower, Len(endpointLower)) = endpointLower
    ' Remove the endpoint path
    newPath = Left(path, Len(path) - Len(endpointPath))
    ' Remove trailing slash (but keep root "/")
    if Len(newPath) > 1 and Right(newPath, 1) = "/"
      newPath = Left(newPath, Len(newPath) - 1)
    end if
    parts.path = newPath
  end if

  ' Always clear query string - we want the base server URL, not endpoint-specific params
  parts.query = ""

  return reconstructUrl(parts)
end function