#!/usr/bin/perl -w # ################################################################################ # Script to retrieve data from the urchin.cgi engine and print a text-based # report, which could then be emailed, converted to HTML, etc. # # To customize this script for your Urchin installation, edit the defaults below # for $urchinpath, $profile, and $language. # # # Usage: u5data_extractor.pl [--begin YYYYMMDD] [--end YYYYMMDD] [--help] # [--language LA] [--max N] [--profile PROFILE] # [--report RRRR] [--urchinpath PATH] # # Where: # '--begin YYYYMMDD' specifies the starting date (default: one week ago) # '--end YYYYMMDD' specifies the ending date (default: yesterday) # # Note if date range is invalid - the default is last week. # # '--help' displays this message # '--language LA' specifies the language for the report. Available # languages are: ch, en, fr, ge, it, ja, ko, po, sp, and sw # '--max N' is the maximum number of entries printed in the top 10 report # types (default is 10). # '--profile PROFILE' specifies the profile to retrieve data from. The # default is specified at the beginning of this script # '--report RRRR is the 4-digit number for the report (default is 1102) # Run this script with --help to see a list of available reports # '--urchinpath PATH' specifies the path to the Urchin distribution. # Note that you can edit the script and set your path as a default # # Copyright (c) 2001,2002,2003 Quantified Systems, Inc # # DISCLAIMER: This program is supplied AS-IS, with no warranties # implied or expressed. Use at your own risk. # ################################################################################ use strict; use Getopt::Long; #------------------------------------------------------------------------------- # Define the path to the Urchin distribution and default values for profile # name, language, report number, and maximum number of items to show in a list. # # Edit the following defaults to customize this script for your particular site. #------------------------------------------------------------------------------- my $urchinpath = "/usr/local/stats-urchin"; # Path to the Urchin distribution my $profile = "SEstats"; # Name of the default profile my $language = "en"; # Language (see usage for options) my $report = 1304; # Report number (see /usr/local/urchin/lib/custom/profiletypes/Standard_Website/BC_Reports.rs) # 1304 = Search Engine Table, 1102 = Summary Table my $max = 10; # Number of Top 10 items to show my $total = 0; # This sums individual report items - added by BC 6-Nov-03 #------------------------------------------------------------------------------- # Do NOT edit anything below this line. #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Calculate the default beginning and ending dates. #------------------------------------------------------------------------------- my ($day, $month, $year, $begin, $end); # BC added 6-Nov-03, beginning date is the 1st day of previous month ($month, $year) = (localtime(time() ))[4,5]; $day = 1; $month -= 1; # REMEMBER $month is from 0-11! if($month == -1){$month = 11; $year -= 1;} # Default beginning date is one week ago #($day, $month, $year) = (localtime(time() - (7*86400)))[3,4,5]; $begin = ($year + 1900)*10000 + ($month + 1)*100 + $day; # BC added 6-Nov-03, end date is the last day of previous month # REMEMBER $month is from 0-11! If date range is invalid, Urchin defaults to last week. if ($month == 1){ if($year%4 == 0){$day = 28} else {$day = 29} # take account of leap years } if($month == 8 || $month == 3 || $month == 5 || $month == 10){$day = 30;}else{$day = 31;} # Default ending date is yesterday #($day, $month, $year) = (localtime(time() - 86400))[3,4,5]; $end = ($year + 1900)*10000 + ($month + 1)*100 + $day; #------------------------------------------------------------------------------- # Get the command line options and display help if requested #------------------------------------------------------------------------------- my $help = 0; GetOptions('begin=i' => \$begin, 'end=i' => \$end, 'help' => \$help, 'language=s' => \$language, 'max=i' => \$max, 'profile=s' => \$profile, 'report=i' => \$report, 'urchinpath=s' => \$urchinpath); if ($help) { &usage(); exit; } #------------------------------------------------------------------------------- # Check for the urchin path, uconf-driver, and urchin.cgi and display help if # these do not exist. #------------------------------------------------------------------------------- if (! -e "$urchinpath") { print "\nERROR: The Urchin path, $urchinpath, does not exist. Please specify the path to the Urchin distribution on the command line, or edit the script and change the default for the \$urchinpath variable. Exiting.\n\n"; &usage(); exit; } elsif (! -e "$urchinpath/util/uconf-driver") { print "\nERROR: $urchinpath/util/uconf-driver does not exist. Please verify that your Urchin path is set correctly and is pointing to a full Urchin distribution. Exiting.\n\n"; &usage(); exit; } elsif (! -e "$urchinpath/bin/urchin.cgi") { print "\nERROR: $urchinpath/bin/urchin.cgi does not exist. Please verify that your Urchin path is set correctly and is pointing to a full Urchin distribution. Exiting.\n\n"; &usage(); exit; } #------------------------------------------------------------------------------- # Read Urchin configuration for report id and profile type #------------------------------------------------------------------------------- my $result = `$urchinpath/util/uconf-driver table=profile action=seek name=\"$profile\"`; my $rid = $result; $rid =~ /recnum=(\d+)/; $rid = $1; if ((!defined $rid) || ($rid < 1)) { die "Profile \"$profile\" does not exist\n"; } my $profiletype = $result; $profiletype =~ /cr_profiletype="(\S+)"/; $profiletype = $1; if ((!defined $profiletype) || ($profiletype eq "")) { $profiletype = "Standard_Website"; } my $reportset = $result; $reportset =~ /cs_reportset="(\S+)"/; $reportset = $1; if ((!defined $reportset) || ($reportset eq "")) { $reportset = "Standard"; } $reportset .= ".rs"; #------------------------------------------------------------------------------- # Read the reportset for a list of available reports #------------------------------------------------------------------------------- my (%name, %ifield, %table, %type, @parts, $line, $count); my $rrs = "$urchinpath/lib/reporting/profiletypes/$profiletype/$reportset"; my $crs = "$urchinpath/lib/custom/profiletypes/$profiletype/$reportset"; open (RS, "$rrs") or open(RS, "$crs") or die "Failed to open reportset $!"; while() { chomp; if ($_ =~ /^#/) { next; } # Skip commented entries #------------------------------------------------------------------------------- # Separate the reportset parts #------------------------------------------------------------------------------- $count = 0; $line = $_; while($line) { if ($line =~ /^"(.*?)"\s+(.*)/) { $parts[$count] = $1; $line = $2; $count++; } elsif ($line =~ /^"(.*?)"\s*$/) { $parts[$count] = $1; $line = ""; } elsif ($line =~ /^(.*?)\s+(.*)/) { $parts[$count] = $1; $line = $2; $count++; } elsif ($line =~ /^(.*?)\s*$/) { $parts[$count] = $1; $line = ""; } } #------------------------------------------------------------------------------- # Verify the entry is a report. Store the name, ifield and table #------------------------------------------------------------------------------- if ($parts[1] !~ /report/i) { next; } $name{$parts[0]} = $parts[3]; $ifield{$parts[0]} = $parts[7]; $table{$parts[0]} = $parts[9]; $type{$parts[0]} = $parts[15]; } #------------------------------------------------------------------------------- # Exit if report is not available #------------------------------------------------------------------------------- if (!$name{$report}) { print "Report $report is not available. Available reports are:\n"; my $rep; foreach $rep (sort (keys(%name))) { print " $rep $name{$rep}\n"; } exit; } #------------------------------------------------------------------------------- # Create query string, retrieve data, and print out values #------------------------------------------------------------------------------- my $cgi = "$urchinpath/bin/urchin.cgi"; $ENV{REQUEST_METHOD} = "GET"; $ENV{QUERY_STRING} = "vid=$report&bd=$begin&ed=$end&v=$max&rid=$rid&lang=$language&x=1&user=(admin)"; $count = 0; open (CGI, "$cgi |") or die "Failed to open urchin.cgi: $!"; while() { chomp; #------------------------------------------------------------------------------- # Skip Content-type and empty lines and print comment lines #------------------------------------------------------------------------------- if ($_ =~ /^Content-type:/) { next; } if ($_ =~ /^$/) { next; } if ($_ =~ /^##/) { printf "############################################################\n"; next; } if ($_ =~ /^#/) { print "$_\n"; next; } #------------------------------------------------------------------------------- # Only print up to $max entries for TOP 10 type reports #------------------------------------------------------------------------------- if ($type{$report} eq "TOP") { $count++; if ($count > $max) { exit; } } #------------------------------------------------------------------------------- # Format output for reports #------------------------------------------------------------------------------- @parts = split(/\t/, $_); # BC 6-Nov-2003 Add up totals in here i.e. before formatting $total += $parts[1]; if ($name{$report} =~ /summary$/i) { if ($parts[0] !~ /length/i) { $parts[1] /= 100; } if ($parts[0] =~ /(visitors|sessions|pageviews|hits)/i) { $parts[1] = &formatNumber($parts[1]); } if ($parts[0] =~ /bytes/i) { $parts[1] = &formatBytes($parts[1]); } if ($parts[0] =~ /length/i) { $parts[1] = &formatTime($parts[1]); } } else { if ($ifield{$report} =~ /^(visitors|sessions|pageviews|hits|trans|items)$/i) { $parts[1] = &formatNumber($parts[1]); } if ($ifield{$report} =~ /^(bytes)$/i) { $parts[1] = &formatBytes($parts[1]); } if ($ifield{$report} =~ /^(time)$/i) { $parts[1] = &formatTime($parts[1]); } if ($ifield{$report} =~ /^(revenue)$/i) { $parts[1] /= 100; $parts[1] = &formatNumber($parts[1]); } } #------------------------------------------------------------------------------- # Print the report data #------------------------------------------------------------------------------- printf("%-46s %13s\n", $parts[0], $parts[1]); } # BC added 6-Nov-2003. Format & print total $total = &formatNumber($total); printf("%-46s %13s\n", "\nTOTAL", $total); close(CGI) or die "Failed to close urchin.cgi: $!"; exit; #------------------------------------------------------------------------------- # Subroutines #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Changes a number into the proper byte format #------------------------------------------------------------------------------- sub formatBytes { my $x = $_[0]; my $i = 0; while ($x > 1024) { $i++; $x = $x/1024; } my $suffix = ("B", "KB", "MB", "GB", "TB")[$i]; return sprintf("%4.2f %2s", $x, $suffix); } #------------------------------------------------------------------------------- # Formats a number with commas to separate the thousands. Also displays non- # integer numbers with 2 decimal places. #------------------------------------------------------------------------------- sub formatNumber { my $x = $_[0]; my $y = ''; my $z = ((1000*$x) % 1000)/10; $z = sprintf("%02.0f", $z); if ($x < 1000) { if (($z == 0) && ($x >= 1)) { return $x; } else { return (int($x)).".$z"; } } while ($x > 1000) { my $rem = $x % 1000; $y = sprintf(",%03u%s", $rem, $y); $x = int($x/1000); } $y = $x.$y; if ($z == 0) { return $y; } else { return "$y.$z"; } } #------------------------------------------------------------------------------- # Formats a number into a XX:XX:XX time format. #------------------------------------------------------------------------------- sub formatTime { my $sec = $_[0] % 60; my $min = int($_[0]/60) % 60; my $hour = int($_[0]/3600); return sprintf("%02d:%02d:%02d", $hour, $min, $sec); } #------------------------------------------------------------------------------- # Displays the usage for this script #------------------------------------------------------------------------------- sub usage { my ($rep, $des); print "Usage: $0 [--begin YYYYMMDD] [--end YYYYMMDD] [--help] \\ [--language LA] [--max N] [--profile PROFILE] [--report RRRR] \\ [--urchinpath PATH] Where: '--begin YYYYMMDD' specifies the starting date (default is one week ago), '--end YYYYMMDD' specifies the ending date (default is yesterday), '--help' displays this message '--language LA' specifies the language for the report. Available languages are: ch, en, fr, ge, it, ja, ko, po, sp, and sw '--max N' is the maximum number of entries printed in the top 10 report types (default is 10). '--profile PROFILE' specifies the profile to retrieve data from. The default is $profile. '--report RRRR is the 4-digit number for the report (default is 1304) Available reports are defined in the report sets. '--urchinpath PATH' specifies the path to the Urchin distribution. Note that you can edit the script and set your path as a default\n\n"; }