#!/local/std/bin/perl
#
# v 0.70
#
push @INC, "/home/discovery/simra/public_html/counter/";

require "counter.config";


print <<EOI;
Content-type: text/html

<html>
<head>
<link rel="stylesheet" href="http://www.cim.mcgill.ca/~simra/style.css" type="text/css">
<title>Google hit stats for www.cim.mcgill.ca/~simra</title>
</head>
<body>
<div class="header">
<table width="100%">
<tr><td valign=bottom align=left>
<H1>google hit stats</h1>
<a href="../index.html">home</a> <a href="../contact.html">contact</a> <a href="../research.html">publications</a> <a href="../cv.html">cv</a> <a href="../software.html">software</a> 
<td valign=bottom align=right>
<a href="../koan.html">koan</a>
<a href="../simra_net.html">simra.net</a>
</td></tr>
</table>
</div>
<div class="core">
<p>
I find the google queries that hit my home page to be kind of interesting.
<p>

EOI
;

&make_logfile;


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# Hits per domain, per day per Browser and per OS
#
open (LOG, "$data_dir/google.log");
open (UNKONOWN, ">$data_dir/unrecognized.log");
while (<LOG>)
{
    my @entry = ();
    @entry=split('#',$_);
    #address
    my $ignore=0;
    my $host;
    foreach $host (@ignore_hosts) {
	$ignore=1 if ($entry[2]=~/$host/i);
    }
    
    print STDERR "Entry5: $entry[5]\n";
    $ignore=1 if ($entry[5]=~/1/);

    if (!$ignore) {
	@addr = split(/[.]/, $entry[2]);
	if ($addr[$#addr] == 0) {
	    $count{$addr[$#addr]}++;
	    $domain_count{$addr[$#addr-1].".".$addr[$#addr]}++;
	} else {
	    $count{"??"}++;
	}
	$host_count{$entry[2]}++;
	
	
	#date
	@date_ent=split(',', $entry[0]);
	$date="$date_ent[0]-$date_ent[1]-$date_ent[2]"; 
	$day_count{$date}++;
	#OS
	$OS = OS_recognize($entry[3]);
	($OS eq "Unknown") && (print UNKONOWN "OS:\t$entry[3]\n");
	$OS_count{$OS}++;
	#Browser
	$BR = BR_recognize($entry[3]);
	($BR eq "Unknown") && (print UNKONOWN "browser:\t$entry[3]\n");
	$BR_count{$BR}++;
	
	$PG = $entry[1];
	$PG_count{$PG}++;
	
	$entry[4]=~s/(\%3A)/:/g;
	$entry[4]=~s/(\%3D)/=/g;
	$entry[4]=~s/(\%3F)/\&/g;
	my ($RF)=$entry[4]=~/(\S+)/;
	$RF||="None";
	$url=unescape($RF);
	#    $url=~s/(.{120})(.*?)/$1<br>$2/;
	my ($query)=$url=~/q=(.*?)&/;
	($query)=$url=~/q=(.*)$/ unless $query;
	$query=~s/\+/ /g;
	$query=~s/\"//g;
	$query=~tr/A-Z/a-z/;
	$RF_count{$query}++;
    }
}
close (LOG);
close (UNKONOWN);

#
# Hits per Referrer
#
print "<h3>Recent Queries</h3>\n";
#print "<pre>\n";
foreach $RF (sort { $RF_count{$b} <=> $RF_count{$a} } keys %RF_count) {
#    $RF=~s/%7E/\~/;
#    print "$query ($url)<br>\n";
#    print qq(<a href="$url">$query</a> ($RF_count{$RF})<br>\n);
    print qq($RF ($RF_count{$RF})<br>\n);
}
#print "</pre>\n";

print "</HTML>\n";



sub unescape {
    $url=shift;
    $url=~s/\%25/\%/g;
    my @codes= $url=~/\%(..)/g;
    foreach $code (@codes) {
      $unc=pack("H2",$code);
      $url=~s/\%$code/$unc/;
    }
    return $url;
}
