#!/local/std/bin/perl # # v 0.70 # push @INC, "/home/discovery/simra/public_html/counter/"; require "counter.config"; print < Google hit stats for www.cim.mcgill.ca/~simra

google hit stats

home contact publications cv software
koan simra.net

I find the google queries that hit my home page to be kind of interesting.

EOI ; &make_logfile; #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # Hits per domain, per day per Browser and per OS # open (LOG, "$data_dir/google.log"); open (UNKONOWN, ">$data_dir/unrecognized.log"); while () { my @entry = (); @entry=split('#',$_); #address my $ignore=0; my $host; foreach $host (@ignore_hosts) { $ignore=1 if ($entry[2]=~/$host/i); } print STDERR "Entry5: $entry[5]\n"; $ignore=1 if ($entry[5]=~/1/); if (!$ignore) { @addr = split(/[.]/, $entry[2]); if ($addr[$#addr] == 0) { $count{$addr[$#addr]}++; $domain_count{$addr[$#addr-1].".".$addr[$#addr]}++; } else { $count{"??"}++; } $host_count{$entry[2]}++; #date @date_ent=split(',', $entry[0]); $date="$date_ent[0]-$date_ent[1]-$date_ent[2]"; $day_count{$date}++; #OS $OS = OS_recognize($entry[3]); ($OS eq "Unknown") && (print UNKONOWN "OS:\t$entry[3]\n"); $OS_count{$OS}++; #Browser $BR = BR_recognize($entry[3]); ($BR eq "Unknown") && (print UNKONOWN "browser:\t$entry[3]\n"); $BR_count{$BR}++; $PG = $entry[1]; $PG_count{$PG}++; $entry[4]=~s/(\%3A)/:/g; $entry[4]=~s/(\%3D)/=/g; $entry[4]=~s/(\%3F)/\&/g; my ($RF)=$entry[4]=~/(\S+)/; $RF||="None"; $url=unescape($RF); # $url=~s/(.{120})(.*?)/$1
$2/; my ($query)=$url=~/q=(.*?)&/; ($query)=$url=~/q=(.*)$/ unless $query; $query=~s/\+/ /g; $query=~s/\"//g; $query=~tr/A-Z/a-z/; $RF_count{$query}++; } } close (LOG); close (UNKONOWN); # # Hits per Referrer # print "

Recent Queries

\n"; #print "
\n";
foreach $RF (sort { $RF_count{$b} <=> $RF_count{$a} } keys %RF_count) {
#    $RF=~s/%7E/\~/;
#    print "$query ($url)
\n"; # print qq($query ($RF_count{$RF})
\n); print qq($RF ($RF_count{$RF})
\n); } #print "
\n"; print "\n"; sub unescape { $url=shift; $url=~s/\%25/\%/g; my @codes= $url=~/\%(..)/g; foreach $code (@codes) { $unc=pack("H2",$code); $url=~s/\%$code/$unc/; } return $url; }