#!/usr/bin/env bash
# es-zero-downtime-reindex.sh, reindex an Elasticsearch index without downtime
# via the alias-swap pattern. Verified against Elasticsearch 8.x and 9.x.
# Source: https://techearl.com/elasticsearch-zero-downtime-reindex
# Site:   https://techearl.com/
#
# Usage:
#   ./es-zero-downtime-reindex.sh --old products_v3 --new products_v4 --alias products
#   ./es-zero-downtime-reindex.sh --old products_v3 --new products_v4 --alias products --rollback
#
# Requires: curl, jq.
#
# Prerequisites:
#   - The new index already exists with your desired mapping.
#   - The alias already points to the old index.
#   - ES_HOST and ES_AUTH are set in the environment, or you pass --host.

set -euo pipefail

HOST="${ES_HOST:-https://localhost:9200}"
AUTH="${ES_AUTH:-elastic:changeme}"
OLD_INDEX=""
NEW_INDEX=""
ALIAS=""
ACTION="reindex"
YES=0

usage() {
  cat <<EOF
Usage: $0 --old <old_index> --new <new_index> --alias <alias> [--rollback] [--yes] [--host <host>]

Required:
  --old      Current index name (alias currently points here)
  --new      New index name (must already exist with the new mapping)
  --alias    Alias the application queries

Optional:
  --rollback Swap the alias back from new to old (use when the new index is bad)
  --yes      Don't prompt before the alias swap
  --host     Elasticsearch host (default: \$ES_HOST or https://localhost:9200)

Auth: set ES_AUTH=user:pass in the environment (default: elastic:changeme).
EOF
  exit 1
}

while [[ $# -gt 0 ]]; do
  case "$1" in
    --old)      OLD_INDEX="$2"; shift 2 ;;
    --new)      NEW_INDEX="$2"; shift 2 ;;
    --alias)    ALIAS="$2"; shift 2 ;;
    --rollback) ACTION="rollback"; shift ;;
    --yes)      YES=1; shift ;;
    --host)     HOST="$2"; shift 2 ;;
    -h|--help)  usage ;;
    *)          echo "Unknown arg: $1"; usage ;;
  esac
done

[[ -z "$OLD_INDEX" || -z "$NEW_INDEX" || -z "$ALIAS" ]] && usage

es() {
  curl -sS -k -u "$AUTH" -H 'Content-Type: application/json' "$@"
}

# --- Rollback: swap the alias back from new to old, then exit ---
if [[ "$ACTION" == "rollback" ]]; then
  echo "Rolling back: pointing '$ALIAS' from '$NEW_INDEX' back to '$OLD_INDEX'..."
  es -X POST "$HOST/_aliases" -d @- <<JSON
{ "actions": [
  { "remove": { "index": "$NEW_INDEX", "alias": "$ALIAS" } },
  { "add":    { "index": "$OLD_INDEX", "alias": "$ALIAS" } }
] }
JSON
  echo "Done. The application is now reading from '$OLD_INDEX' again."
  exit 0
fi

# --- Sanity checks ---
echo "Verifying old index exists..."
es -o /dev/null -w "%{http_code}\n" "$HOST/$OLD_INDEX" | grep -q "^200$" \
  || { echo "Error: '$OLD_INDEX' not found"; exit 1; }

echo "Verifying new index exists with mapping..."
es -o /dev/null -w "%{http_code}\n" "$HOST/$NEW_INDEX" | grep -q "^200$" \
  || { echo "Error: '$NEW_INDEX' not found. Create it with the new mapping first."; exit 1; }

echo "Verifying alias points to old index..."
es "$HOST/_alias/$ALIAS" | jq -e ".\"$OLD_INDEX\".aliases.\"$ALIAS\"" > /dev/null \
  || { echo "Error: alias '$ALIAS' does not point to '$OLD_INDEX'"; exit 1; }

# --- Tune the new index for fast reindex ---
echo "Tuning new index for reindex (replicas=0, refresh=60s)..."
es -X PUT "$HOST/$NEW_INDEX/_settings" \
  -d '{"index":{"number_of_replicas":0,"refresh_interval":"60s"}}' > /dev/null

# --- Trigger the reindex as a background task ---
echo "Starting reindex from '$OLD_INDEX' to '$NEW_INDEX' (slices=auto)..."
TASK=$(es -X POST "$HOST/_reindex?wait_for_completion=false&slices=auto" -d @- <<JSON | jq -r .task
{ "source": { "index": "$OLD_INDEX", "size": 1000 },
  "dest":   { "index": "$NEW_INDEX" } }
JSON
)
echo "Task: $TASK"

# --- Poll until completion ---
# Note: with slices=auto the parent task counts only completed slices, so
# the progress number can jump in steps rather than climbing smoothly.
while true; do
  RESP=$(es "$HOST/_tasks/$TASK")
  COMPLETED=$(echo "$RESP" | jq -r .completed)
  CREATED=$(echo "$RESP" | jq -r '.task.status.created // 0')
  TOTAL=$(echo "$RESP"   | jq -r '.task.status.total   // 0')
  printf "\r  Progress: %s / %s   " "$CREATED" "$TOTAL"
  [[ "$COMPLETED" == "true" ]] && break
  sleep 5
done
printf "\n  Reindex task finished. Inspecting result...\n"

# --- Fail fast on task errors, document failures, and version conflicts ---
# A _reindex task can report "completed": true and still have failed on
# individual documents. Counts alone do not catch this.
FINAL=$(es "$HOST/_tasks/$TASK")
if echo "$FINAL" | jq -e 'has("error")' > /dev/null; then
  echo "ERROR: the reindex task itself failed:"
  echo "$FINAL" | jq '.error'
  echo "Aborted. The alias still points to '$OLD_INDEX'."
  exit 1
fi
FAILURES=$(echo "$FINAL" | jq -r '(.response.failures // []) | length')
VCONFLICTS=$(echo "$FINAL" | jq -r '.response.version_conflicts // 0')
if [[ "$FAILURES" -gt 0 ]]; then
  echo "ERROR: reindex completed with $FAILURES document failure(s). First few:"
  echo "$FINAL" | jq '.response.failures[0:5]'
  echo "Aborted. The alias still points to '$OLD_INDEX'."
  exit 1
fi
if [[ "$VCONFLICTS" -gt 0 ]]; then
  echo "WARNING: $VCONFLICTS version conflict(s) during reindex."
  echo "This is normal only if live writes were updating the same documents."
fi
echo "  No document failures reported."

# --- Refresh the new index so _count is current ---
es -X POST "$HOST/$NEW_INDEX/_refresh" > /dev/null

# --- Verify counts ---
OLD_COUNT=$(es "$HOST/$OLD_INDEX/_count" | jq -r .count)
NEW_COUNT=$(es "$HOST/$NEW_INDEX/_count" | jq -r .count)
echo "Document counts: old=$OLD_COUNT new=$NEW_COUNT"

if [[ "$OLD_COUNT" != "$NEW_COUNT" ]]; then
  echo "WARNING: counts differ. Investigate before continuing."
  if [[ $YES -ne 1 ]]; then
    read -p "Swap the alias anyway? [y/N] " ans
    [[ "$ans" =~ ^[Yy]$ ]] || { echo "Aborted. The alias still points to '$OLD_INDEX'."; exit 1; }
  fi
fi

# --- Restore production settings on new index ---
echo "Restoring production settings (replicas=1, refresh=1s)..."
es -X PUT "$HOST/$NEW_INDEX/_settings" \
  -d '{"index":{"number_of_replicas":1,"refresh_interval":"1s"}}' > /dev/null

# --- Atomic alias swap ---
if [[ $YES -ne 1 ]]; then
  read -p "Ready to swap alias '$ALIAS' from '$OLD_INDEX' to '$NEW_INDEX'? [y/N] " ans
  [[ "$ans" =~ ^[Yy]$ ]] || { echo "Aborted. The alias still points to '$OLD_INDEX'."; exit 1; }
fi

es -X POST "$HOST/_aliases" -d @- <<JSON
{ "actions": [
  { "remove": { "index": "$OLD_INDEX", "alias": "$ALIAS" } },
  { "add":    { "index": "$NEW_INDEX", "alias": "$ALIAS" } }
] }
JSON

echo "Done. The application is now reading from '$NEW_INDEX' via '$ALIAS'."
echo
echo "Old index '$OLD_INDEX' is preserved. Delete when you're confident:"
echo "  curl -sk -u $AUTH -X DELETE \"$HOST/$OLD_INDEX\""
echo
echo "If something goes wrong, roll back with:"
echo "  $0 --old $OLD_INDEX --new $NEW_INDEX --alias $ALIAS --rollback"
