#!/bin/bash # write all logs in a same file cat `find . -name 'access.log*'` > logAll; # get all the ips of the last file with the format xxx.xxx.xxx. # sort them # count them on keeping only uniq entries # sort again by count # write the result in a file cat logAll | egrep -o -e '(([0-9]){1,3}\.){3}' | sort | uniq -c | sort -r -g > listeIPs # defines bots list bots[1]="googlebot.com"; bots[2]="yandex.ru"; bots[3]="yahoo.net"; bots[4]="baidu.com"; bots[5]="msn.com"; bots[6]="reverse.wowrack.com"; bots[7]="google.com"; bots[8]="naver.jp"; bots[9]="exabot.com"; # define users domain list to keep privacy domains[1]="abo.wanadoo.fr"; domains[2]="univ-rennes1.fr"; domains[3]="sfr.net"; domains[4]="acropolistelecom.net"; domains[5]="rd.francetelecom.com"; isBot() { for botId in $(seq 1 ${#bots[@]}) do regex=".*${bots[$botId]}\.$" # echo -e "$1\t$regex"; if [[ $1 =~ $regex ]]; then return $botId; fi done return 0; } isUser() { for domainId in $(seq 1 ${#domains[@]}) do regex=".*${domains[$domainId]}\.$" if [[ $1 =~ $regex ]]; then return $domainId; fi done return 0; } usersByDomain=""; botsByDomain=""; # read line by line the number of visits and the ip while line=: read visits ip do if [ $visits -le 100 ]; then break fi regex="${ip}[0-9]{1,3}"; ipToDomain=$(cat logAll | egrep -o -e "$regex" | tail -1); if [[ $ipToDomain =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]]; then domainName=$(nslookup $ipToDomain | egrep -o -e "name =.*" | egrep -o -e "[a-zA-Z0-9._-]*$") ; if [ ${#domainName} -eq 0 ]; then continue; else isBot $domainName; aBot=$?; if [ $aBot -eq 0 ]; then isUser $domainName; user=$?; if [ $user -eq 0 ]; then entry="$visits\t$ip*\t$domainName\n"; #echo -e -n $entry; usersByDomain="$usersByDomain$entry"; else entry="$visits\t$ip*\t${domains[$user]}\n"; #echo -e -n $entry; usersByDomain="$usersByDomain$entry"; fi else entry="$visits\t$ip*\t${bots[$aBot]}\n"; #echo -e -n $entry; botsByDomain="$botsByDomain$entry"; fi fi fi done < ./listeIPs echo -e $botsByDomain > topBots.stats echo -e $usersByDomain > topUsers.stats ##### For bots ##### botsDomainMem[1]="googlebot.com"; botVisitCount[1]=0; while line=: read visits ip domain do next=false; if [ ${#domain} -eq 0 ]; then continue; fi for domainId in $(seq 1 ${#botsDomainMem[@]}) do if [ ${botsDomainMem[$domainId]} = $domain ]; then botVisitCount[$domainId]=$((${botVisitCount[$domainId]} + $visits)); next=true; break; fi done if $next ; then continue; else nextId=$((${#botsDomainMem[@]} + 1)); botsDomainMem[$nextId]=$domain; botVisitCount[$nextId]=$visits; fi done < ./topBots.stats for domainId in $(seq 1 ${#botsDomainMem[@]}) do echo -e "${botVisitCount[$domainId]}\t${botsDomainMem[$domainId]}"; done echo -e "\n\n\n"; ##### For users ##### usersDomainMem[1]="abo.wanadoo.fr"; userVisitCount[1]=0; while line=: read visits ip domain do next=false; if [ ${#domain} -eq 0 ]; then continue; fi for domainId in $(seq 1 ${#usersDomainMem[@]}) do if [ ${usersDomainMem[$domainId]} = $domain ]; then userVisitCount[$domainId]=$((${userVisitCount[$domainId]} + $visits)); next=true; break; fi done if $next ; then continue; else nextId=$((${#usersDomainMem[@]} + 1)); usersDomainMem[$nextId]=$domain; userVisitCount[$nextId]=$visits; fi done < ./topUsers.stats for domainId in $(seq 1 ${#usersDomainMem[@]}) do echo -e "${userVisitCount[$domainId]}\t${usersDomainMem[$domainId]}"; done