#!/bin/bash # @file # @brief Import Server Logs for Piwik from Hosts # @see http://edoceo.com/pub/piwik-import.sh # @see http://forum.piwik.org/read.php?2,98270,98270 # # Apache Config: # LogFormat "%V %h %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" vhost # CustomLog /var/log/apache2/access.log vhost # # Lighttpd Config: # accesslog.format = "%V %h %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" # accesslog.filename = var.logdir + "/access.log" # # Nginx Config: # log_format vhost '$http_host $remote_addr - [$time_local] "$request" $status $bytes_sent "$http_referer" "$http_user_agent"'; # access_log /var/log/nginx/access.log vhost; piwik_auth="" piwik_root="/opt/piwik" piwik_conf="$piwik_root/config/config.ini.php" piwik_site="http://your.host/piwik" host_kind_list=" www1.host.tld:apache2 www2.host.tld:lighttpd www3.host.tld:nginx " cpu=$(cat /proc/cpuinfo |grep processor|wc -l) cpu=$(( $cpu / 3 )) cpu=$(( $cpu + 1 )) # You can use this add_new_sites="--add-sites-new-hosts" add_new_sites="" # Add --debug debug="--debug --debug --debug" debug="--debug" debug="" show_progress="--show-progress --show-progress-delay=2" show_progress="" # # Import the Log Files function do_piwik { file="$1" site="${2:-}" if [ -n "$site" ] then site="--idsite=$site" fi lc=$(wc -l $file) echo "Lines to Process: $lc" python /opt/piwik/misc/log-analytics/import_logs.py \ $debug \ $add_new_sites \ $show_progress \ --url=$piwik_site \ --idsite-fallback=3 \ --log-format-regex='(?P\S+) (?P\S+) (?P\S+) \[(?P[\w\/\:]+) (?P[\d\-\+]+)\] "\w+ (?P.*?)(?: \S+)" (?P\d+) (?P\d+) "(?P.*?)" "(?P.*?)"' \ --enable-bots \ --enable-http-errors \ --enable-http-redirects \ --enable-static \ --strip-query-string \ --recorders=$cpu \ "$file" \ 2>&1 >> /var/log/piwik-import.log } # # Remove Old Logs rm -fr /var/log/piwik-*log # # Load From Apache Hosts for host_kind in $host_kind_list do kind=${host_kind#*:} host=${host_kind%:*} echo "Processing: $host ($kind)" d=$(mktemp -d) cd "$d" # If Host the Remove Old Logs if [ -n "$host" ] then rsync -a --delete $host:/var/log/$kind/ ./ ssh $host "rm /var/log/$kind/*log; /etc/init.d/$kind reload >/dev/null;" else rsync -a --delete /var/log/$kind/ ./ rm /var/log/$kind/*log /etc/init.d/$kind reload >/dev/null fi # ls *.gz do_piwik access*log # echo "Errors from: $host" >> /tmp/webserver-errors.log # cat error*log >> /tmp/webserver-errors.log cd - >/dev/null rm -fr "$d" done # # Now run the Archiver php "$piwik_root/misc/cron/archive.php" --url=$piwik_site 2>&1 >>/var/log/piwik-archive.log # tail -n 20 /var/log/piwik-archive.log echo # echo "Errors:" grep -i -e 'error' -e 'fatal' -e 'warn' \ /var/log/piwik-import.log /var/log/piwik-archive.log \ | grep -v '0 HTTP' | grep -v 'without error' | grep -v 'no error' || true # # Produce a Report of NotFound Sites # grep 'No Piwik' /var/log/piwik-import.log | cut -d' ' -f4- | sort | uniq # grep 'Invalid line' /var/log/piwik-*log | cut -d' ' -f4-