#!/usr/bin/env bash
# -----------------------------------------------------------------------------
# fremforge pre-migration-check.sh
#
# Run this against your existing GitHub, GitLab, or Azure DevOps tenant BEFORE
# starting a fremforge migration. It surfaces the conditions that cause
# mid-migration pain: oversized repos, heavy LFS pools, Marketplace action
# dependencies, third-party integrations that do not auto-reconnect.
#
# Usage:
#   GITHUB_TOKEN=xxx ./pre-migration-check.sh github <org-or-user>
#   GITLAB_TOKEN=xxx ./pre-migration-check.sh gitlab <group-or-namespace>
#   ADO_TOKEN=xxx   ./pre-migration-check.sh azure-devops <org>/<project>
#
# Exit code 0 = clean migration expected; no blocking issues detected.
# Exit code 1 = usage error.
# Exit code 2 = at least one warning-class finding (migration is still
#                expected to succeed, but plan for the specific points
#                the output flags).
# Exit code 3 = at least one blocking finding (investigate before migrating
#                or budget substantial extra migration time).
#
# Requires: bash 4+, curl, jq.
#
# Authoring note: this script intentionally performs READ-ONLY queries only.
# It does not modify any state in the source tenant.
# -----------------------------------------------------------------------------

set -euo pipefail

# -----------------------------------------------------------------------------
# Configuration
# -----------------------------------------------------------------------------

REPO_SIZE_WARN_MB=500
REPO_SIZE_BLOCK_MB=2048
LFS_SIZE_WARN_MB=1024
LFS_SIZE_BLOCK_MB=5120
MARKETPLACE_ACTIONS_WARN=10   # >10 distinct Marketplace actions → warn
WEBHOOKS_WARN=3               # >3 configured webhooks → warn

FINDING_COUNT=0
WARNING_COUNT=0
BLOCKER_COUNT=0

# P2-MIG-05: machine-readable mode. When `--json` is passed, suppress
# terminal-styled output and emit a single structured envelope at the
# end. Findings accumulate into arrays alongside the counters.
JSON_MODE=0
JSON_FINDINGS_FILE=""

# -----------------------------------------------------------------------------
# Pretty printing
# -----------------------------------------------------------------------------

red()    {
  # red() is used for hard errors (missing token, unknown platform).
  # In JSON mode the human envelope is suppressed; still surface the
  # message on stderr so the operator sees the cause of a non-zero exit.
  if [[ "${JSON_MODE}" == "1" ]]; then
    printf '%s\n' "$*" >&2
    return 0
  fi
  printf '\033[0;31m%s\033[0m\n' "$*"
}
yellow() { [[ "${JSON_MODE}" == "1" ]] && return 0; printf '\033[0;33m%s\033[0m\n' "$*"; }
green()  { [[ "${JSON_MODE}" == "1" ]] && return 0; printf '\033[0;32m%s\033[0m\n' "$*"; }
cyan()   { [[ "${JSON_MODE}" == "1" ]] && return 0; printf '\033[0;36m%s\033[0m\n' "$*"; }
bold()   { [[ "${JSON_MODE}" == "1" ]] && return 0; printf '\033[1m%s\033[0m\n' "$*"; }

ok() {
  [[ "${JSON_MODE}" == "1" ]] && return 0
  green "  ✓ $*"
}

warn() {
  WARNING_COUNT=$((WARNING_COUNT + 1))
  FINDING_COUNT=$((FINDING_COUNT + 1))
  if [[ "${JSON_MODE}" == "1" ]]; then
    jq -nc --arg lvl warning --arg msg "$*" '{level:$lvl, message:$msg}' >> "${JSON_FINDINGS_FILE}"
    return 0
  fi
  yellow "  ⚠ $*"
}

block() {
  BLOCKER_COUNT=$((BLOCKER_COUNT + 1))
  FINDING_COUNT=$((FINDING_COUNT + 1))
  if [[ "${JSON_MODE}" == "1" ]]; then
    jq -nc --arg lvl blocker --arg msg "$*" '{level:$lvl, message:$msg}' >> "${JSON_FINDINGS_FILE}"
    return 0
  fi
  red "  ✗ $*"
}

info() {
  [[ "${JSON_MODE}" == "1" ]] && return 0
  cyan "  · $*"
}

# -----------------------------------------------------------------------------
# Dependency check
# -----------------------------------------------------------------------------

need() {
  if ! command -v "$1" >/dev/null 2>&1; then
    red "Required command not found: $1"
    red "Install it and re-run."
    exit 1
  fi
}

need curl
need jq

# -----------------------------------------------------------------------------
# Rate-limit aware curl
# -----------------------------------------------------------------------------
#
# P2-MIGRATE-01 — large-org scans issue 5 sequential GETs per repo. A 500-
# repo org = 2,500 calls in tight succession against a 5,000/hour PAT
# quota. Stock `curl -s` swallows the response body on 429 and the
# downstream `jq` coalesces to `0`/empty, producing a falsely-clean
# report.
#
# This helper:
#   - retries on HTTP 429 / 5xx with jittered exponential backoff
#     (1s, 2s, 4s, 8s, 16s — five tries total),
#   - honours `Retry-After` when GitHub provides it (header is in
#     seconds), capped at 60s,
#   - emits a `warn()` finding when a request still fails after the
#     retry budget, so the report flags partial coverage rather than
#     silently coalescing to `0`.
#
# Stdout is the response body (unchanged shape from raw `curl -s`).
# Return code: 0 on success; 1 on permanent failure (caller's jq pipe
# will then coalesce — but the warn() finding makes that visible).
api_curl() {
  local max_attempts=5 attempt=0 delay=1 http_code body resp
  while [[ "${attempt}" -lt "${max_attempts}" ]]; do
    # Use -w to capture HTTP status; redirect body to a tempfile so
    # we can both inspect status and emit body on success.
    local tmp
    tmp="$(mktemp)"
    http_code="$(curl -s -o "${tmp}" -w '%{http_code}' "$@" 2>/dev/null || echo "000")"
    if [[ "${http_code}" =~ ^2 ]]; then
      cat "${tmp}"
      rm -f "${tmp}"
      return 0
    fi
    if [[ "${http_code}" == "429" || "${http_code}" =~ ^5 ]]; then
      # Try Retry-After (HEAD a fresh request for the header — cheaper
      # than parsing curl's response-line set). If absent, fall through
      # to jittered exponential.
      local retry_after=""
      retry_after="$(curl -sI "$@" 2>/dev/null | awk 'tolower($1) == "retry-after:" { print $2 }' | tr -d '\r' | head -n1)"
      if [[ "${retry_after}" =~ ^[0-9]+$ && "${retry_after}" -gt 0 ]]; then
        # Cap at 60s per attempt — anything longer means we're not
        # going to recover within this script's run.
        [[ "${retry_after}" -gt 60 ]] && retry_after=60
        sleep "${retry_after}"
      else
        # Jittered exponential: 1..delay seconds.
        local jitter=$((RANDOM % delay + 1))
        sleep "${jitter}"
        delay=$((delay * 2))
      fi
      attempt=$((attempt + 1))
      rm -f "${tmp}"
      continue
    fi
    # 4xx (non-429), 3xx, or network 000: don't retry, just return.
    cat "${tmp}"
    rm -f "${tmp}"
    return 1
  done
  rm -f "${tmp}"
  return 1
}

# -----------------------------------------------------------------------------
# GitHub checks
# -----------------------------------------------------------------------------

check_github() {
  local target="$1"
  local api_base="https://api.github.com"

  if [[ -z "${GITHUB_TOKEN:-}" ]]; then
    red "GITHUB_TOKEN environment variable is not set."
    exit 1
  fi

  local auth=(-H "Authorization: Bearer ${GITHUB_TOKEN}" -H "Accept: application/vnd.github+json")

  bold "Scanning GitHub tenant: ${target}"
  echo

  # Determine if target is an org or user
  local target_type
  if curl -sf "${auth[@]}" "${api_base}/orgs/${target}" >/dev/null 2>&1; then
    target_type="orgs"
  elif curl -sf "${auth[@]}" "${api_base}/users/${target}" >/dev/null 2>&1; then
    target_type="users"
  else
    red "Cannot resolve ${target} as a GitHub organisation or user (check token scopes + target name)."
    exit 1
  fi

  info "Listing repositories (this may take a minute for large orgs)..."
  # P0-PREMIG-PAGINATION (round-6 audit, 2026-05-08): paginate
  # GitHub's repo listing across pages. The previous shape pulled
  # only the first 100; orgs with ≥101 repos had an unaudited tail
  # that produced false-clean reports. GitHub returns a Link header
  # with rel="next" until the last page; loop until that header is
  # absent.
  local repos="" page=1
  while :; do
    local page_body
    page_body="$(curl -s "${auth[@]}" "${api_base}/${target_type}/${target}/repos?per_page=100&type=all&page=${page}" | jq -r '.[] | @json' || true)"
    [[ -z "${page_body}" ]] && break
    repos+="${page_body}"$'\n'
    local page_count
    page_count="$(echo "${page_body}" | jq -s 'length' 2>/dev/null || echo 0)"
    # GitHub returns a max of 100 per page. Anything less = last page.
    [[ "${page_count}" -lt 100 ]] && break
    page=$((page + 1))
    # Safety belt — orgs > 50 pages (5,000 repos) are vanishingly rare;
    # break to avoid infinite-loop on a misbehaving server.
    [[ "${page}" -gt 50 ]] && { info "stopped at page 50 (5000 repos) — orgs larger than this should run the check against a specific subset (e.g. fork the script and add a name filter, or run per-team). Open an issue at frem.sh/fremforge/docs if you need a paginated mode."; break; }
  done
  local repo_count
  repo_count="$(echo "${repos}" | jq -s 'length')"
  info "Found ${repo_count} repositories."
  echo

  bold "Per-repository checks"
  # Use process substitution so counter variables update the parent shell,
  # not a subshell spawned by a pipe.
  while IFS= read -r repo_json; do
    local name size disabled archived
    name=$(echo "${repo_json}" | jq -r '.name')
    size=$(echo "${repo_json}" | jq -r '.size')    # size is in KB
    disabled=$(echo "${repo_json}" | jq -r '.disabled')
    archived=$(echo "${repo_json}" | jq -r '.archived')

    [[ "${archived}" == "true" ]] && { info "${name}: archived (skip)"; continue; }
    [[ "${disabled}" == "true" ]] && { info "${name}: disabled (skip)"; continue; }

    local size_mb=$((size / 1024))

    if [[ "${size_mb}" -ge "${REPO_SIZE_BLOCK_MB}" ]]; then
      block "${name}: ${size_mb} MB — use the mirror-clone migration path, not the UI importer."
    elif [[ "${size_mb}" -ge "${REPO_SIZE_WARN_MB}" ]]; then
      warn "${name}: ${size_mb} MB — importer may be slow; expect 10+ minutes."
    fi

    # LFS check (requires admin on the repo; may 403 on user-owned repos)
    local lfs_info
    lfs_info="$(curl -s -o /dev/null -w '%{http_code}' "${auth[@]}" "${api_base}/repos/${target}/${name}/lfs" 2>/dev/null || echo "000")"
    if [[ "${lfs_info}" == "200" ]]; then
      info "${name}: LFS enabled — ensure local 'git lfs' tooling is available for mirror path."
    fi

    # Workflows scan
    # P2-MIGRATE-01 — use api_curl so 429/5xx surface as `warn()` rather
    # than coalescing to empty `wf_list` (which previously masked the
    # rate-limit hit and produced falsely-clean reports for >100-repo
    # orgs).
    local wf_list wf_status=0
    wf_list="$(api_curl "${auth[@]}" "${api_base}/repos/${target}/${name}/actions/workflows" | jq -r '.workflows[]?.path' 2>/dev/null || true)" || wf_status=$?
    if [[ "${wf_status}" -ne 0 ]]; then
      warn "${name}: workflows API call did not complete cleanly (rate-limit / transient) — workflow Marketplace count UNKNOWN, re-run after backoff."
    fi
    if [[ -n "${wf_list}" ]]; then
      # Count distinct "uses:" Marketplace references in the workflow files
      local marketplace_count=0
      for wf_path in ${wf_list}; do
        local wf_content
        wf_content="$(curl -sf "${auth[@]}" "${api_base}/repos/${target}/${name}/contents/${wf_path}" 2>/dev/null | jq -r '.content // empty' | base64 -d 2>/dev/null || true)"
        if [[ -n "${wf_content}" ]]; then
          local wf_marketplace
          wf_marketplace="$(echo "${wf_content}" | grep -oE 'uses:\s+[^@/]+/[^@]+@' | sort -u | wc -l | tr -d ' ')"
          marketplace_count=$((marketplace_count + wf_marketplace))
        fi
      done
      if [[ "${marketplace_count}" -ge "${MARKETPLACE_ACTIONS_WARN}" ]]; then
        warn "${name}: ${marketplace_count} distinct Marketplace action references across workflows — cross-check against docs.frem.sh/marketplace-compat before migrating."
      fi
    fi

    # Webhook count
    local webhook_count wh_status=0
    webhook_count="$(api_curl "${auth[@]}" "${api_base}/repos/${target}/${name}/hooks" | jq 'length' 2>/dev/null || echo 0)" || wh_status=$?
    if [[ "${wh_status}" -ne 0 ]]; then
      warn "${name}: hooks API call did not complete cleanly (rate-limit / transient) — webhook count UNKNOWN, re-run after backoff."
    fi
    if [[ "${webhook_count}" -ge "${WEBHOOKS_WARN}" ]]; then
      warn "${name}: ${webhook_count} webhooks configured — document each before migration."
    fi

    # Branch protection check (complexity indicator)
    local protected_branches pb_status=0
    protected_branches="$(api_curl "${auth[@]}" "${api_base}/repos/${target}/${name}/branches?protected=true" | jq 'length' 2>/dev/null || echo 0)" || pb_status=$?
    if [[ "${pb_status}" -ne 0 ]]; then
      warn "${name}: branches API call did not complete cleanly (rate-limit / transient) — protected-branch count UNKNOWN, re-run after backoff."
    fi
    if [[ "${protected_branches}" -gt 0 ]]; then
      info "${name}: ${protected_branches} protected branch(es) — re-create branch-protection rules post-migration."
    fi
  done < <(echo "${repos}")

  echo
  bold "Org-level checks"

  if [[ "${target_type}" == "orgs" ]]; then
    local org_secrets
    org_secrets="$(curl -s "${auth[@]}" "${api_base}/orgs/${target}/actions/secrets" | jq '.total_count' 2>/dev/null || echo 0)"
    if [[ "${org_secrets}" -gt 0 ]]; then
      warn "Org has ${org_secrets} organisation-level Actions secret(s) — re-enter in fremforge."
    fi

    local org_variables
    org_variables="$(curl -s "${auth[@]}" "${api_base}/orgs/${target}/actions/variables" | jq '.total_count' 2>/dev/null || echo 0)"
    if [[ "${org_variables}" -gt 0 ]]; then
      info "Org has ${org_variables} organisation-level Actions variable(s) — re-enter or use workflow defaults."
    fi

    local packages
    packages="$(curl -s "${auth[@]}" "${api_base}/orgs/${target}/packages?package_type=container" | jq 'length' 2>/dev/null || echo 0)"
    if [[ "${packages}" -gt 0 ]]; then
      warn "Org publishes ${packages} container package(s) — migrate to fremforge package registry separately."
    fi
  fi
}

# -----------------------------------------------------------------------------
# GitLab checks
# -----------------------------------------------------------------------------

check_gitlab() {
  local target="$1"
  local api_base="${GITLAB_URL:-https://gitlab.com}/api/v4"

  if [[ -z "${GITLAB_TOKEN:-}" ]]; then
    red "GITLAB_TOKEN environment variable is not set."
    exit 1
  fi

  local auth=(-H "PRIVATE-TOKEN: ${GITLAB_TOKEN}")

  bold "Scanning GitLab tenant: ${target}"
  echo

  info "Resolving group..."
  local group_id
  group_id="$(curl -s "${auth[@]}" "${api_base}/groups/${target//\//%2F}" | jq -r '.id // empty')"
  if [[ -z "${group_id}" ]]; then
    red "Cannot resolve ${target} as a GitLab group (check token scopes + path)."
    exit 1
  fi

  info "Listing projects under group ${target} (id=${group_id}, includes subgroups)..."
  # P0-PREMIG-PAGINATION (round-6 audit, 2026-05-08): paginate GitLab
  # listings. GitLab also caps per_page at 100 and uses page=N. Same
  # safety belt at 50 pages.
  local projects="" page=1
  while :; do
    local page_body
    page_body="$(curl -s "${auth[@]}" "${api_base}/groups/${group_id}/projects?include_subgroups=true&per_page=100&page=${page}" | jq -r '.[] | @json' || true)"
    [[ -z "${page_body}" ]] && break
    projects+="${page_body}"$'\n'
    local page_count
    page_count="$(echo "${page_body}" | jq -s 'length' 2>/dev/null || echo 0)"
    [[ "${page_count}" -lt 100 ]] && break
    page=$((page + 1))
    [[ "${page}" -gt 50 ]] && { info "stopped at page 50 — see GitHub note above for tenants > 5000 projects"; break; }
  done
  local project_count
  project_count="$(echo "${projects}" | jq -s 'length')"
  info "Found ${project_count} projects."
  echo

  bold "Per-project checks"
  # Process substitution: counters propagate to parent shell.
  while IFS= read -r project_json; do
    local id path_full default_branch archived
    id=$(echo "${project_json}" | jq -r '.id')
    path_full=$(echo "${project_json}" | jq -r '.path_with_namespace')
    default_branch=$(echo "${project_json}" | jq -r '.default_branch // "(none)"')
    archived=$(echo "${project_json}" | jq -r '.archived')

    [[ "${archived}" == "true" ]] && { info "${path_full}: archived (skip)"; continue; }

    # P2-MIG-02: subgroup depth tally. GitLab supports arbitrary nesting;
    # fremforge maps `<group>/<repo>` to a single Forgejo org+repo pair, so
    # depth ≥3 (e.g. `team/area/subarea/repo`) needs an explicit mapping
    # decision before migration. Warn — never block — so customers see
    # the signal without the importer pretending to be smart about it.
    local depth
    depth=$(awk -F/ '{print NF}' <<< "${path_full}")
    if [[ "${depth}" -ge 3 ]]; then
      warn "${path_full}: depth=${depth} (≥3 levels deep) — flat-org mapping requires a pre-migration name decision (see docs.frem.sh/migration/gitlab/#subgroups)."
    fi

    # P2-MIG-12: GitLab projects with no commits return default_branch=null,
    # which surfaced as "(none)" above. Skip per-branch checks for empty
    # repos so the script doesn't crash on the next CI-yaml fetch (which
    # would treat "(none)" as a literal ref and 404 noisily). Empty repos
    # migrate cleanly via the API path with no special handling.
    if [[ "${default_branch}" == "(none)" ]]; then
      info "${path_full}: empty repo (no default branch) — skipping per-branch checks"
      continue
    fi

    # Statistics (requires admin role typically)
    local stats
    stats="$(curl -s "${auth[@]}" "${api_base}/projects/${id}?statistics=true" | jq '.statistics // {}')"
    local repo_bytes lfs_bytes
    repo_bytes=$(echo "${stats}" | jq -r '.repository_size // 0')
    lfs_bytes=$(echo "${stats}" | jq -r '.lfs_objects_size // 0')
    local repo_mb=$((repo_bytes / 1024 / 1024))
    local lfs_mb=$((lfs_bytes / 1024 / 1024))

    if [[ "${repo_mb}" -ge "${REPO_SIZE_BLOCK_MB}" ]]; then
      block "${path_full}: ${repo_mb} MB repo — use mirror-clone migration path."
    elif [[ "${repo_mb}" -ge "${REPO_SIZE_WARN_MB}" ]]; then
      warn "${path_full}: ${repo_mb} MB repo — importer may be slow."
    fi
    if [[ "${lfs_mb}" -ge "${LFS_SIZE_BLOCK_MB}" ]]; then
      block "${path_full}: ${lfs_mb} MB LFS pool — mirror migration strongly preferred."
    elif [[ "${lfs_mb}" -ge "${LFS_SIZE_WARN_MB}" ]]; then
      warn "${path_full}: ${lfs_mb} MB LFS pool — allocate time."
    fi

    # .gitlab-ci.yml presence and complexity signal.
    # P2-MIG-11 (round-6 audit, 2026-05-08): the previous heuristic
    # double-counted list items because `^\s+- [a-zA-Z]` matched every
    # YAML list bullet (script: -, tags: -, services: -, …), not just
    # `stages:` entries. The new shape:
    #   - `job_count` = top-level key count (lines with `^[a-zA-Z_].*:`
    #     and no leading whitespace), minus the small set of reserved
    #     top-level keys. This is the actual job count, the strongest
    #     migration-effort signal.
    #   - `include_count` and `extends_count` stay as before (those
    #     regexes were already accurate).
    #   - `services_count` only counts the `^\s*services:` *header* lines
    #     (it was already that — kept).
    # Complexity = job_count + include_count×5 + extends_count×3 +
    # services_count×2. Threshold bumped to 30 to compensate for the
    # now-honest job_count.
    local ci_yaml
    ci_yaml="$(curl -s "${auth[@]}" "${api_base}/projects/${id}/repository/files/.gitlab-ci.yml/raw?ref=${default_branch}" 2>/dev/null || true)"
    if [[ -n "${ci_yaml}" && "${ci_yaml}" != *"404"* ]]; then
      local job_count include_count extends_count services_count
      # Top-level keys that aren't jobs — strip from the count.
      local reserved_keys='^(stages|variables|default|include|workflow|cache|before_script|after_script|image|services|types|pages):$'
      job_count=$(echo "${ci_yaml}" | grep -cE '^[a-zA-Z_][a-zA-Z0-9_-]*:$' || true)
      local reserved_hit
      reserved_hit=$(echo "${ci_yaml}" | grep -cE "${reserved_keys}" || true)
      job_count=$((job_count - reserved_hit))
      [[ "${job_count}" -lt 0 ]] && job_count=0
      include_count=$(echo "${ci_yaml}" | grep -cE '^\s*include:' || true)
      extends_count=$(echo "${ci_yaml}" | grep -cE '^\s*extends:' || true)
      services_count=$(echo "${ci_yaml}" | grep -cE '^\s*services:' || true)
      local complexity=$((job_count + include_count * 5 + extends_count * 3 + services_count * 2))
      if [[ "${complexity}" -ge 30 ]]; then
        warn "${path_full}: complex GitLab CI pipeline (complexity heuristic ${complexity}, jobs=${job_count}) — budget 2-4 hours for Forgejo Actions conversion."
      elif [[ "${include_count}" -ge 1 ]]; then
        warn "${path_full}: GitLab CI uses 'include:' (${include_count} refs) — map dependency graph before migration."
      fi
    fi

    # Webhook count
    local webhook_count
    webhook_count="$(curl -s "${auth[@]}" "${api_base}/projects/${id}/hooks" | jq 'length' 2>/dev/null || echo 0)"
    if [[ "${webhook_count}" -ge "${WEBHOOKS_WARN}" ]]; then
      warn "${path_full}: ${webhook_count} webhooks configured — document each before migration."
    fi

    # GitLab Pages deployments
    local pages_status
    pages_status="$(curl -s -o /dev/null -w '%{http_code}' "${auth[@]}" "${api_base}/projects/${id}/pages" 2>/dev/null || echo "000")"
    if [[ "${pages_status}" == "200" ]]; then
      warn "${path_full}: GitLab Pages deployment active — reconfigure as Forgejo Pages."
    fi
  done < <(echo "${projects}")

  echo
  bold "Group-level checks"

  local cicd_vars
  cicd_vars="$(curl -s "${auth[@]}" "${api_base}/groups/${group_id}/variables" | jq 'length' 2>/dev/null || echo 0)"
  if [[ "${cicd_vars}" -gt 0 ]]; then
    warn "Group has ${cicd_vars} CI/CD variable(s) — re-enter in fremforge (secret values cannot be exported)."
  fi
}

# -----------------------------------------------------------------------------
# Azure DevOps checks
# -----------------------------------------------------------------------------

check_azure_devops() {
  local target="$1"

  if [[ -z "${ADO_TOKEN:-}" ]]; then
    red "ADO_TOKEN environment variable is not set."
    exit 1
  fi

  local org project
  IFS='/' read -r org project <<< "${target}"
  if [[ -z "${org}" || -z "${project}" ]]; then
    red "Target must be <org>/<project> (e.g. acme/backend-services)."
    exit 1
  fi

  local api_base="https://dev.azure.com/${org}/${project}/_apis"
  local auth=(-u ":${ADO_TOKEN}")

  bold "Scanning Azure DevOps tenant: ${org}/${project}"
  echo

  info "Listing Git repositories..."
  # P0-PREMIG-PAGINATION (round-6 audit, 2026-05-08): unlike GitHub +
  # GitLab, ADO's `/git/repositories` returns ALL repos in one
  # response (no default per-page cap, no Link header). Pagination is
  # only needed if the tenant has >5000 repos — which would be an
  # outlier. The audit's call-out about "ADO no pagination param at
  # all" was correct in observation but the absence is intentional on
  # this endpoint. Defensive belt: warn if we get exactly the
  # documented hard cap (≥5000), where the response may have been
  # truncated server-side.
  local repos
  repos="$(curl -s "${auth[@]}" "${api_base}/git/repositories?api-version=7.1" | jq -r '.value[] | @json')"
  local repo_count
  repo_count="$(echo "${repos}" | jq -s 'length')"
  if [[ "${repo_count}" -ge 5000 ]]; then
    red "ADO returned ${repo_count} repos — at or above the documented hard cap. Some repos may have been truncated; re-run with manual filters or contact ADO support."
  fi
  info "Found ${repo_count} repositories."
  echo

  bold "Per-repository checks"
  # Process substitution: counters propagate to parent shell.
  while IFS= read -r repo_json; do
    local id name size_bytes
    id=$(echo "${repo_json}" | jq -r '.id')
    name=$(echo "${repo_json}" | jq -r '.name')
    size_bytes=$(echo "${repo_json}" | jq -r '.size // 0')
    local size_mb=$((size_bytes / 1024 / 1024))

    if [[ "${size_mb}" -ge "${REPO_SIZE_BLOCK_MB}" ]]; then
      block "${name}: ${size_mb} MB — use mirror-clone migration path."
    elif [[ "${size_mb}" -ge "${REPO_SIZE_WARN_MB}" ]]; then
      warn "${name}: ${size_mb} MB — importer may be slow."
    fi
  done < <(echo "${repos}")

  echo
  bold "Project-level checks"

  local pipeline_count
  pipeline_count="$(curl -s "${auth[@]}" "${api_base}/pipelines?api-version=7.1" | jq '.count' 2>/dev/null || echo 0)"
  info "${pipeline_count} build/release pipeline(s) in project."
  if [[ "${pipeline_count}" -ge 10 ]]; then
    warn "Project has ${pipeline_count} pipelines — pipeline conversion to Forgejo Actions is the dominant migration cost. Budget 2-6 hours per pipeline."
  fi

  local svc_conn_count
  svc_conn_count="$(curl -s "${auth[@]}" "${api_base}/serviceendpoint/endpoints?api-version=7.1-preview.4" | jq '.count' 2>/dev/null || echo 0)"
  if [[ "${svc_conn_count}" -gt 0 ]]; then
    warn "Project has ${svc_conn_count} service connection(s) — replace with OIDC federation (strongly recommended) or re-enter as fremforge secrets."
  fi

  local variable_groups
  variable_groups="$(curl -s "${auth[@]}" "${api_base}/distributedtask/variablegroups?api-version=7.1-preview.2" | jq '.count' 2>/dev/null || echo 0)"
  if [[ "${variable_groups}" -gt 0 ]]; then
    warn "Project has ${variable_groups} variable group(s) — re-create as fremforge environment-level secrets (secret values cannot be exported)."
  fi

  # Test Plans
  local test_plan_count
  test_plan_count="$(curl -s "${auth[@]}" "${api_base}/testplan/plans?api-version=7.1-preview.1" | jq '.count' 2>/dev/null || echo 0)"
  if [[ "${test_plan_count}" -gt 0 ]]; then
    block "Project has ${test_plan_count} Test Plan(s) — fremforge has no direct equivalent. Either keep ADO Test Plans as a separate tool or migrate to a dedicated test-management tool before decommissioning ADO."
  fi

  # Classic (non-YAML) pipelines check
  local classic_count
  classic_count="$(curl -s "${auth[@]}" "${api_base}/build/definitions?api-version=7.1" | jq '[.value[] | select(.process.type == 1)] | length' 2>/dev/null || echo 0)"
  if [[ "${classic_count}" -gt 0 ]]; then
    block "Project has ${classic_count} classic (non-YAML) pipeline(s). Convert to YAML pipelines in ADO before migrating — classic→Forgejo Actions conversion is otherwise a from-scratch rewrite."
  fi
}

# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------

main() {
  # P2-MIG-05: optional --json flag (must precede the positional args)
  # produces a structured envelope on stdout suitable for ticket
  # attachment. Human-readable mode stays the default.
  if [[ "${1:-}" == "--json" ]]; then
    JSON_MODE=1
    JSON_FINDINGS_FILE="$(mktemp -t fremforge-premig-XXXXXX.ndjson)"
    : > "${JSON_FINDINGS_FILE}"
    shift
  fi

  if [[ $# -lt 2 ]]; then
    cat >&2 <<EOF
Usage: $0 [--json] <platform> <target>

Platforms:
  github         — target is <org> or <user>
  gitlab         — target is <group> or <group/subgroup>
  azure-devops   — target is <org>/<project>

Flags:
  --json         — emit a JSON envelope on stdout instead of styled output;
                   suitable for support-ticket attachment and CI piping.

Environment variables:
  GITHUB_TOKEN   — personal access token with repo, admin:org, workflow scopes
  GITLAB_TOKEN   — personal access token with api, read_repository scopes
  GITLAB_URL     — (optional) self-managed GitLab URL, defaults to https://gitlab.com
  ADO_TOKEN      — personal access token with Code, Work Items, Build, Release (Read) scopes

Exit codes:
  0 — clean: no findings
  1 — usage or authentication error
  2 — warnings only: migration should succeed, plan for flagged items
  3 — blockers present: investigate before migrating

Output is colourised on terminals; find the machine-readable summary at the end.
EOF
    exit 1
  fi

  local platform="$1"
  local target="$2"

  # JSON mode: silence all check-function side output (raw echo,
  # `cat >&2`, etc) by redirecting stdout to /dev/null while still
  # letting warn()/block() write into JSON_FINDINGS_FILE. The final
  # envelope is emitted to the saved original-stdout FD.
  if [[ "${JSON_MODE}" == "1" ]]; then
    exec 3>&1 >/dev/null
  fi

  case "${platform}" in
    github)       check_github "${target}" ;;
    gitlab)       check_gitlab "${target}" ;;
    azure-devops|ado) check_azure_devops "${target}" ;;
    *)
      red "Unknown platform: ${platform}"
      exit 1
      ;;
  esac

  if [[ "${JSON_MODE}" == "1" ]]; then
    # Build the envelope on the saved original stdout (fd 3).
    exec 1>&3 3>&-
    jq -s --arg schema_version "1" \
          --arg platform "${platform}" \
          --arg target "${target}" \
          --arg generated_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
          --argjson total "${FINDING_COUNT}" \
          --argjson warnings "${WARNING_COUNT}" \
          --argjson blockers "${BLOCKER_COUNT}" \
          '{
            schema_version: $schema_version,
            generated_at: $generated_at,
            platform: $platform,
            target: $target,
            summary: { total: $total, warnings: $warnings, blockers: $blockers },
            findings: .
          }' "${JSON_FINDINGS_FILE}"
    rm -f "${JSON_FINDINGS_FILE}"
    if [[ "${BLOCKER_COUNT}" -gt 0 ]]; then exit 3; fi
    if [[ "${WARNING_COUNT}" -gt 0 ]]; then exit 2; fi
    exit 0
  fi

  echo
  bold "Summary"
  echo "  Findings total: ${FINDING_COUNT}"
  echo "  Warnings:       ${WARNING_COUNT}"
  echo "  Blockers:       ${BLOCKER_COUNT}"
  echo

  # Machine-readable summary line for CI consumption
  echo "FREMFORGE_PREMIGRATION_RESULT total=${FINDING_COUNT} warnings=${WARNING_COUNT} blockers=${BLOCKER_COUNT}"

  if [[ "${BLOCKER_COUNT}" -gt 0 ]]; then
    red "At least one blocking finding. Investigate before migrating."
    exit 3
  fi
  if [[ "${WARNING_COUNT}" -gt 0 ]]; then
    yellow "Warnings only. Migration is expected to succeed; plan for flagged items."
    exit 2
  fi

  green "No findings. Migration is expected to be clean."
  exit 0
}

main "$@"
