Data Availability (DA) Nodes Monitoring Script
- Tracks DA Node status (every 15m)
- Compares block height with an external server (every 15m)
- Checks node wallet balance (every 15m)
- Sends alerts and status updates via Telegram
Choose your DA node type:
Configure Telegram alerting:
Open Telegram and find
@BotFather
- Here are the telegram alerts integration instructions
- How to get chat id
After creating telegram bot and group, specify the variables. Set values for TELEGRAM_BOT_TOKEN and TELEGRAM_CHAT_ID
TELEGRAM_BOT_TOKEN="<BOT_TOKEN>" # Telegram bot token
TELEGRAM_CHAT_ID="<CHAT_ID>" # Telegram chat ID for notifications
NODE_NAME="Celestia_Mainnet_bridge_Node" # Name of the node
NODE_STORE="$HOME/.celestia-bridge/" # Node path
PARENT_RPC="https://celestia-mainnet-rpc.itrocket.net" # URL of the parent RPC node
SECOND_PARENT_RPC="" # (Optional)
MIN_BALANCE=1000000 # Minimum balance threshold in utia
SLEEP=15m # Sleep interval between checks
Update packages and Install dependencies:
sudo apt update && sudo apt upgrade -y
sudo apt install curl git wget htop tmux build-essential jq make gcc tar clang pkg-config libssl-dev ncdu -y
Create monitoring-bridge.sh file:
cat <<EOF >"$HOME/monitoring-bridge.sh"
#!/bin/bash
# Your variables
NODE_NAME="$NODE_NAME"
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN"
TELEGRAM_CHAT_ID="$TELEGRAM_CHAT_ID"
PARENT_RPC="$PARENT_RPC"
SECOND_PARENT_RPC="$SECOND_PARENT_RPC"
NODE_STORE="$NODE_STORE"
MIN_BALANCE="$MIN_BALANCE" # Minimal utia balance
SLEEP="$SLEEP"
MAX_ATTEMPTS="4"
# Enable pipefail function
set -o pipefail
# Function to send messages to Telegram
send_telegram() {
local message="\$1"
curl -s -X POST "https://api.telegram.org/bot\$TELEGRAM_BOT_TOKEN/sendMessage" \
-d chat_id="\$TELEGRAM_CHAT_ID" \
-d text="\$message" >/dev/null
}
# Print line function
print_line() {
echo "---------------------------------------------------------------" >&2
}
# Function to get node height
get_node_height() {
ATTEMPTS=0
while [[ \$ATTEMPTS -lt \$MAX_ATTEMPTS ]]; do
HEIGHT=\$(celestia header sync-state --node.store \$NODE_STORE | jq '.result.height')
if [[ \$HEIGHT =~ ^[0-9]+$ ]]; then
break
fi
ATTEMPTS=\$((ATTEMPTS + 1))
echo "Attempt \$ATTEMPTS/\$MAX_ATTEMPTS: \$NODE_NAME is down or sent an invalid response. Retrying in 5 seconds..."
sleep 2
done
if [[ \$ATTEMPTS -eq \$MAX_ATTEMPTS ]]; then
send_telegram "\$NODE_NAME node is down or sent an invalid response after \$MAX_ATTEMPTS attempts."
echo "Error: \$NODE_NAME node is down or sent an invalid response after \$MAX_ATTEMPTS attempts."
return
fi
echo "Local Node Height: \$HEIGHT"
print_line
}
# Function to get parent height
get_parent_block_height() {
local MAX_PARENT_HEIGHT=0
local ATTEMPTS=0
while [[ \$ATTEMPTS -lt \$MAX_ATTEMPTS ]]; do
echo "Attempt \$((ATTEMPTS + 1))/\$MAX_ATTEMPTS: Fetching block heights..." >&2
PARENT_HEIGHT_1=0
PARENT_HEIGHT_2=0
if [[ -n "\$PARENT_RPC" ]]; then
echo "Trying Parent RPC 1 (\$PARENT_RPC)..." >&2
PARENT_RESPONSE=\$(curl -s --max-time 3 "\$PARENT_RPC/status")
PARENT_HEIGHT_1=\$(echo "\$PARENT_RESPONSE" | jq -r '.result.sync_info.latest_block_height' 2>/dev/null | tr -d '[:space:]')
if [[ \$PARENT_HEIGHT_1 =~ ^[0-9]+$ ]]; then
echo "Parent RPC 1 Height: \$PARENT_HEIGHT_1" >&2
else
echo "Parent RPC 1 returned invalid height: \$PARENT_HEIGHT_1" >&2
PARENT_HEIGHT_1=0
fi
fi
if [[ -n "\$SECOND_PARENT_RPC" ]]; then
echo "Trying Parent RPC 2 (\$SECOND_PARENT_RPC)..." >&2
PARENT_RESPONSE=\$(curl -s --max-time 3 "\$SECOND_PARENT_RPC/status")
PARENT_HEIGHT_2=\$(echo "\$PARENT_RESPONSE" | jq -r '.result.sync_info.latest_block_height' 2>/dev/null | tr -d '[:space:]')
if [[ \$PARENT_HEIGHT_2 =~ ^[0-9]+$ ]]; then
echo "Parent RPC 2 Height: \$PARENT_HEIGHT_2" >&2
else
echo "Parent RPC 2 returned invalid height: \$PARENT_HEIGHT_2" >&2
PARENT_HEIGHT_2=0
fi
fi
if [[ \$PARENT_HEIGHT_1 -gt 0 && \$PARENT_HEIGHT_2 -gt 0 ]]; then
if (( PARENT_HEIGHT_1 > PARENT_HEIGHT_2 )); then
MAX_PARENT_HEIGHT=\$PARENT_HEIGHT_1
else
MAX_PARENT_HEIGHT=\$PARENT_HEIGHT_2
fi
break
elif [[ \$PARENT_HEIGHT_1 -gt 0 ]]; then
MAX_PARENT_HEIGHT=\$PARENT_HEIGHT_1
break
elif [[ \$PARENT_HEIGHT_2 -gt 0 ]]; then
MAX_PARENT_HEIGHT=\$PARENT_HEIGHT_2
break
fi
ATTEMPTS=\$((ATTEMPTS + 1))
echo "No valid block height received from any Parent RPC. Retrying in 5 seconds..." >&2
sleep 5
done
if [[ \$ATTEMPTS -eq \$MAX_ATTEMPTS && \$MAX_PARENT_HEIGHT -eq 0 ]]; then
send_telegram "\$NODE_NAME: Both Parent RPCs are down or returned invalid data after \$MAX_ATTEMPTS attempts.
- Parent RPC 1: \$PARENT_RPC
- Parent RPC 2: \$SECOND_PARENT_RPC"
fi
echo "\$MAX_PARENT_HEIGHT"
}
# Check balance function
check_wallet_balance() {
print_line
echo "Checking wallet balance..."
balance=\$(celestia state balance --node.store \$NODE_STORE | jq -r .result.amount )
if [ "\$balance" -lt "\$MIN_BALANCE" ]; then
BAL_MESSAGE="\$NODE_NAME
>> Wallet balance < \$(echo "scale=1; \${MIN_BALANCE}/1000000" | bc) TIA
>> Current balance: \$(echo "scale=1; \${balance}/1000000" | bc) TIA"
echo "\$BAL_MESSAGE"
send_telegram "\$BAL_MESSAGE"
else
echo "Balance of wallet is acceptable: \$(echo "scale=1; \${balance}/1000000" | bc) TIA"
fi
}
# The main cycle
while true; do
print_line
PARENT_HEIGHT=\$(get_parent_block_height)
get_node_height
if [[ \$PARENT_HEIGHT =~ ^[0-9]+$ && \$HEIGHT =~ ^[0-9]+$ ]]; then
DIFF=\$((PARENT_HEIGHT - HEIGHT))
if [[ \$DIFF -gt 1 ]]; then
send_telegram "\$NODE_NAME Block difference:
DIFF: \$DIFF
Local Node: \$HEIGHT
Parent RPC: \$PARENT_HEIGHT"
echo -e "Block difference detected: \$DIFF
Local Node HEIGHT: \$HEIGHT
>>>Max Parent RPC: \$PARENT_HEIGHT" >&2
else
echo -e "Local Node Height: \$HEIGHT
>>>Max Parent RPC: \$PARENT_HEIGHT
Block height within acceptable range." >&2
fi
else
echo "Error: Invalid block height detected. Parent Height: \$PARENT_HEIGHT, Local Node Height: \$HEIGHT" >&2
fi
check_wallet_balance
print_line
echo "Waiting \$SLEEP before next check..." >&2
sleep "\$SLEEP"
done
EOF
Create service file:
sudo tee /etc/systemd/system/monitoring-bridge.service > /dev/null <<EOF
[Unit]
Description=Celestia bridge-monitoring script
After=network.target
[Service]
User=$USER
ExecStart=$HOME/monitoring-bridge.sh
StandardOutput=inherit
StandardError=inherit
Restart=always
[Install]
WantedBy=multi-user.target
EOF
Enable and start service:
chmod +x $HOME/monitoring-bridge.sh
sudo systemctl daemon-reload
sudo systemctl enable monitoring-bridge
sudo systemctl restart monitoring-bridge && sudo journalctl -u monitoring-bridge -f
Delete:
sudo systemctl stop monitoring-bridge
sudo systemctl disable monitoring-bridge
sudo rm -rf /etc/systemd/system/monitoring-bridge.service
rm $HOME/monitoring-bridge.sh
sudo systemctl daemon-reload