针对k8s环境相关目录使用率过高,进行脚本后台检测,达到告警阈值(80%),进行排查并记录到日志(/var/log/monitor.log)
使用步骤:
sh运行即可
#!/bin/bashMONITOR_DIRS=("/")
LOGFILE="/var/log/monitor.log"
INTERVAL=60
THRESHOLD=80
TIMEOUT=300
EXCLUDED_PATHS=("*/proc/*" "*/sys/*" "*/dev/*" "*/run/secrets/*" "*/run/netns/*" "*/run/docker/*")log_message() {
local level="$1"
local message="$2"
echo "[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $message" >> "$LOGFILE"
}check_d_processes() {
local d_processes
d_processes=$(ps -eo pid,stat,comm | awk '$2 ~ /^D/')
if [[ -n "$d_processes" ]]; then
log_message "ERROR" "发现 D 状态进程,终止脚本运行。"
log_message "ERROR" "D 状态进程信息:$d_processes"
exit 1
fi
}check_directory_usage() {
local dir="$1"
local usage
usage=$(df "$dir" | awk 'NR==2 {print $5}' | sed 's/%//')
log_message "INFO" "检查目录:$dir,当前使用率:$usage%"
if (( usage >= THRESHOLD )); then
log_message "WARN" "目录 $dir 使用率超过 $THRESHOLD%。当前使用率:$usage%"
log_message "INFO" "开始定位占用空间较大的子目录和文件..."
local exclude_args=()
for path in "${EXCLUDED_PATHS[@]}"; do
exclude_args+=(-not -path "$path")
done
if timeout "$TIMEOUT" find "$dir" -type d "${exclude_args[@]}" -exec du -sh {} + 2>/dev/null | sort -rh | head -n 10 >> "$LOGFILE"; then
log_message "INFO" "完成定位占用空间较大的子目录。"
else
log_message "WARN" "find 命令超时,已强制终止。"
fi
log_message "INFO" "开始定位目录 $dir 内占用空间最大的文件..."
if timeout "$TIMEOUT" find "$dir" -type f "${exclude_args[@]}" -exec du -h {} + 2>/dev/null | sort -rh | head -n 10 >> "$LOGFILE"; then
log_message "INFO" "完成定位占用空间较大的文件。"
else
log_message "WARN" "文件查找命令超时,已强制终止。"
fi
fi
}main() {
while true; do
check_d_processes
for dir in "${MONITOR_DIRS[@]}"; do
check_directory_usage "$dir"
done
sleep "$INTERVAL"
done
}main