#!/bin/bash
###ver=5.0.0

PATH="/usr/local/bin:/usr/bin:/sbin:/usr/X11R6/bin:/usr/sbin:/bin:/usr/games"
export PATH

# 20%
CPU_LIMIT=2000
# uint: KB, 50M
RSS_LIMIT=51200

SCRIPT_PATH=$(readlink -f $0)
BASE_DIR=$(dirname "$SCRIPT_PATH")

PROC_NAME=secu-tcs-agent

PS_INFO=$BASE_DIR/secubase/secu-tcs-ps.info
MON_LOG=$BASE_DIR/secubase/secu-tcs-ps.log
LIMIT_FILE=$BASE_DIR/secubase/secu-tcs-ps.lmt
RESTART_FILE=$BASE_DIR/secubase/secu-tcs-restart.cnt

# 检查日志, 如果大小超过限制就删除
if [ -e ${MON_LOG} ]; then
	LOG_FILE_SIZE=`stat --format=%s ${MON_LOG}`
	# limit 10K
	if [ $LOG_FILE_SIZE -gt 10240 ]; then
		rm -f ${MON_LOG}
	fi
fi

# 获取pid为1的进程的mnt namespace inode
PID1_MNT_NS_INODE=""
if [ -L /proc/1/ns/mnt ]; then
	PID1_MNT_NS_INODE=$(readlink /proc/1/ns/mnt)
fi


function DoLog()
{
	CUR_TIME=`date +"%Y-%m-%d %H:%M:%S"`
	echo "[$CUR_TIME] $1" >> $MON_LOG
}

function DoStop()
{
	if [ -z "$PID1_MNT_NS_INODE" ]; then
		LIST_WATCH_DOG_PID=`ps -efw | grep "watchdog\.sh" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $2}'`
		for watchdog_pid in $LIST_WATCH_DOG_PID
		do
			kill -9 $watchdog_pid
		done
		
		LIST_AGENT_PID=`ps -efw | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $2}'`
		for agent_pid in $LIST_AGENT_PID
		do
			kill -9 $agent_pid
		done
	else
		LIST_WATCH_DOG_PID=`ps -efw | grep "watchdog\.sh" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $2}'`
		for watchdog_pid in $LIST_WATCH_DOG_PID
		do
			WATCHDOG_MNT_NS_INODE=$(readlink /proc/${watchdog_pid}/ns/mnt)
			if [ "$WATCHDOG_MNT_NS_INODE" = "$PID1_MNT_NS_INODE" ]; then
				kill -9 $watchdog_pid
			fi
		done
		
		LIST_AGENT_PID=`ps -efw | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $2}'`
		for agent_pid in $LIST_AGENT_PID
		do
			AGENT_MNT_NS_INODE=$(readlink /proc/${agent_pid}/ns/mnt)
			if [ "$AGENT_MNT_NS_INODE" = "$PID1_MNT_NS_INODE" ]; then
				kill -9 $agent_pid
			fi
		done
	fi
}

function restore_backup
{
	if [ -f $BASE_DIR/secu-tcs-agent-avail-bk ];then
	        if [ -f $BASE_DIR/secu-tcs-agent ] || [ -h $BASE_DIR/secu-tcs-agent ];then
                        rm -rf $BASE_DIR/secu-tcs-agent-bk_*
	                cur_times=`date +"%Y-%m-%d"`
                	mv $BASE_DIR/secu-tcs-agent $BASE_DIR/secu-tcs-agent-bk_$cur_times
                fi

                cp -rf  $BASE_DIR/secu-tcs-agent-avail-bk $BASE_DIR/secu-tcs-agent-avail
                if [[ $? == 0 ]];then
                    ln -s -n -f $BASE_DIR/secu-tcs-agent-avail $BASE_DIR/secu-tcs-agent
                    chmod 755 $BASE_DIR/secu-tcs-agent-avail
                    echo "0" > $RESTART_FILE
                fi
	fi
}

# 启动watchdog
NEED_RESTART_WATCHDOG=0
if [ -z "$PID1_MNT_NS_INODE" ]; then
	WATCH_DOG_NUM=`ps -efw | grep "watchdog\.sh" | grep $BASE_DIR | grep -v grep | wc -l`
	if [ $WATCH_DOG_NUM -lt 1 ]; then
		NEED_RESTART_WATCHDOG=1
	fi
else
	NEED_RESTART_WATCHDOG=1

	LIST_WATCH_DOG_PID=`ps -efw | grep "watchdog\.sh" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $2}'`
	for watchdog_pid in $LIST_WATCH_DOG_PID
	do
		WATCHDOG_MNT_NS_INODE=$(readlink /proc/${watchdog_pid}/ns/mnt)
		if [ "$WATCHDOG_MNT_NS_INODE" = "$PID1_MNT_NS_INODE" ]; then
			NEED_RESTART_WATCHDOG=0
			break
		fi
	done
fi

if [ $NEED_RESTART_WATCHDOG -eq 1 ]; then
	DoLog "WARNING: watchdog.sh restarted because of process has down!!!"
	nohup $BASE_DIR/watchdog.sh > /dev/null 2>&1 &
else
	DoLog "Check watchdog OK!"
fi

# 判断agent是否成功启动，如果不是，拉起
NEED_RESTART_AGENT=0
if [ -z "$PID1_MNT_NS_INODE" ]; then
	AGENT_NUM=`ps -Aw -o pid,ppid,stime,rss,pcpu,command | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep | wc -l`
	if [ $AGENT_NUM -lt 1 ]; then
		NEED_RESTART_AGENT=1
	else
		echo "0" > $RESTART_FILE
	fi
else
	NEED_RESTART_AGENT=1

	LIST_AGENT_PID=`ps -Aw -o pid,ppid,stime,rss,pcpu,command | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep | awk -F ' ' '{print $1}'`
	for agent_pid in $LIST_AGENT_PID
	do
		AGENT_MNT_NS_INODE=$(readlink /proc/${agent_pid}/ns/mnt)
		if [ "$AGENT_MNT_NS_INODE" = "$PID1_MNT_NS_INODE" ]; then
			NEED_RESTART_AGENT=0
			echo "0" > $RESTART_FILE
			break
		fi
	done
fi

if [ $NEED_RESTART_AGENT -eq 1 ]; then
	${BASE_DIR}/${PROC_NAME} < /dev/null > /dev/null 2>&1
	DoLog "WARNING: $PROC_NAME restarted because of process has down or run multi example!!!"
	path=`readlink /usr/local/sa/agent/secu-tcs-agent`
	create_time=`stat --format=%Z $path`
	now_time=`date +%s`
	slip_time=$[ $now_time - $create_time ]
	if [[ $slip_time  -lt  172800  ]];then
		DoLog "need restart check: create_time:$create_time now_time:$now_time slip_time:$slip_time!!"
		if [ -f $RESTART_FILE ];then
			RESTART_TIME=`awk '{print $1+1}' $RESTART_FILE`
			if [ $RESTART_TIME -gt 3 ];then
				restore_backup
			else
				echo $RESTART_TIME > $RESTART_FILE
			fi
		else
			echo "0" > $RESTART_FILE
		fi
		
		echo "0" > $LIMIT_FILE
		exit
	fi
else
	DoLog "Check $PROC_NAME Server OK!"
fi

# 资源使用检查
if [ -z "$PID1_MNT_NS_INODE" ]; then
	ps -Aw -o pid,ppid,stime,rss,pcpu,command | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep > $PS_INFO
else
	echo "" > $PS_INFO

	ps -Aw -o pid,ppid,stime,rss,pcpu,command | grep -E "${PROC_NAME}($|[[:space:]]+)" | grep $BASE_DIR | grep -v grep | while read agent_ps_info
	do
		agent_pid=$(echo $agent_ps_info | awk -F ' ' '{print $1}')
		AGENT_MNT_NS_INODE=$(readlink /proc/${agent_pid}/ns/mnt)
		if [ "$AGENT_MNT_NS_INODE" = "$PID1_MNT_NS_INODE" ]; then
			echo $agent_ps_info > $PS_INFO
			break
		fi
	done
fi

RSS=`awk '{print $4}' $PS_INFO`
CPU=`awk '{print $5*100}' $PS_INFO`
if [ $RSS -gt $RSS_LIMIT -o $CPU -gt $CPU_LIMIT ]; then
	LIMIT_TIMES=`awk '{print $1+1}' $LIMIT_FILE`
    DoLog "INFO: $PROC_NAME source limit $RSS, $CPU times ${LIMIT_TIMES}."
	
	if [ $LIMIT_TIMES -ge 3 ]; then
		DoStop
		
       	DoLog "WARNING: $PROC_NAME stoped because of source limit times ${LIMIT_TIMES}."
		echo "0" > $LIMIT_FILE
	else
		echo $LIMIT_TIMES > $LIMIT_FILE
	fi
else
	echo "0" > $LIMIT_FILE
fi

# boot check
if [ ! -f /etc/rc.d/rc.local ]; then
	mkdir -p /etc/rc.d/
	echo '#!/bin/bash' >> /etc/rc.d/rc.local
fi

BOOT_NUM=`grep "^${BASE_DIR}/secu-tcs-agent-mon-safe\.sh" /etc/rc.d/rc.local | wc -l`
if [ $BOOT_NUM -eq 0 ]; then
	echo "# secu-tcs-agent bootstart, install at $(date)" >> /etc/rc.d/rc.local
	echo "${BASE_DIR}/secu-tcs-agent-mon-safe.sh > /dev/null 2>&1" >> /etc/rc.d/rc.local
fi
