#!/usr/local/bin/perl
# Listing_3.pl
=head1 NOTES
This script calculates the memetic isolations for each chosen trait in Murdock.
As with all the scripts, Perl must be installed.
The usage is:
Place the raw data file (dg_raw_data.txt) in the same directory as the script, then
perl -w Listing_3..pl
The first time the script is run, you'll need to make the distance matrix.
This will be created as a file anthro_dist_table.txt
After you've run the script once, you can access the distances from the file,
so to save time, then remove the "comment outs" on lines 68 and 141.
As with all Perl scripts, they are offered as is, without any warranties concerning
safety, functionality etc
The author welcomes any comments on any aspects of the software.
to: dgatherer2002@yahoo.co.uk
This is an occasionally on-going project and newer versions of code are frequently
available, so if you want the really rough cuts, please ask.
=cut
#==============PRELIMINARIES===============================#
use strict;
my $pi = atan2(1,1) * 4; # constant pi
my $r = 6378; # constant radius of earth (miles)
print "\nDistance maker: \n\nopening input file.............";
open (INFILE, 'dg_raw_data.txt') or die "can't open infile: $!";
my @input = <INFILE>;
my $size = @input;
print "\n\nThe table has $size lines (should say 864 lines, if not wrong input file)."; # just a check for correct input format
print "\n\nmaking array..................";
my @matrix;
my @line;
for(my $entry=1;$entry<=$#input;$entry++) # each line of the raw data
{
@line = (split /\t/, $input[$entry]);
push @matrix, [ @line ]; # make anonymous array of arrays
}
my $av_dist=0; # the average distance
my $no_of_dists=0; # counter
my $max_dist=-1000; # top and bottom dists
my $min_dist=1000; # arbitrarily low and high
my %dist_hash; # holds all the distances
my %attributes; # holds the attribute being measured
my $dims = $#line-1;
#==============CALCULATE DISTANCES===============================#
#=head1 # now calculate the similarity matrix
print "\nopening output file............";
open(OUTFILE, '>anthro_dist_table.txt') or die "can't find the outfile $!\n";
for(my $z=0;$z<=$#matrix;$z++) # top row
{
print OUTFILE "\t$matrix[$z][1]";
}
for(my $x=0;$x<=$#matrix-1;$x++)
{
print OUTFILE "\n$matrix[$x][1]\t";
print "\n$matrix[$x][1]\t";
# for(my $tabs=0;$tabs<=$x;$tabs++) # for padding triangiular matrix
# {
# print OUTFILE "\t";
# }
for(my $y=0;$y<=$#matrix;$y++) # square matrix, or hash
{
my $distance=0; # reset to zero for each pair
my $a1; # = lat1 in radians
my $b1; # = lon1 in radians
my $a2; # = lat2 in radians
my $b2; # = lon2 in radians
$matrix[$x][30] =~ /(\d{1,3})([NS])\s+(\d{1,3})([EW])/; # eg 3N 45W
$a1 = $1*($pi/180);
my $NorS = $2;
$a1 *= -1 if $NorS eq "S";
$b1 = $3*($pi/180);
my $EorW = $4;
$b1 *= -1 if $EorW eq "W";
$matrix[$y][30] =~ /(\d{1,3})([NS])\s+(\d{1,3})([EW])/; # eg 3N 45W
$a2 = $1*($pi/180);
$NorS = $2;
$a2 *= -1 if $NorS eq "S";
$b2 = $3*($pi/180);
$EorW = $4;
$b2 *= -1 if $EorW eq "W";
$distance = &acos(cos($a1)*cos($b1)*cos($a2)*cos($b2)
+cos($a1)*sin($b1)*cos($a2)*sin($b2)
+sin($a1)*sin($a2))*$r;
if($distance > $max_dist)
{
$max_dist = $distance;
}
if($distance < $min_dist)
{
$min_dist = $distance;
}
$av_dist += $distance;
$no_of_dists++;
printf (OUTFILE "%.3f", $distance); # how to make it statistically analysable
print OUTFILE "\t";
}
}
print "\nAverage distance is ";
printf '%.3f', $av_dist/$no_of_dists;
print " out of $no_of_dists counted";
print "\nMax: ";
printf '%.3f', $max_dist;
print "\nMin: ";
printf '%.3f', $min_dist;
close OUTFILE; # seal distance matrix once created
#=cut
#==============CALCULATE CLUSTERING===============================#
for(my $col=7; $col<=29; $col++) # all columns
{
# $attributes{$matrix[0][1]} = $matrix[0][$col];
my @line = split /\t/, $input[0];
print "\n$line[$col]"; # print column titles
for(my $z=0;$z<=$#matrix;$z++) # square
{
$attributes{$matrix[$z][1]} = $matrix[$z][$col];
}
while((my $soc, my $att) = each %attributes)
{
# print "\n$soc\t$att";
}
open(MATFILE, 'anthro_dist_table.txt') or die "can't find the outfile $!\n";
my @reload = <MATFILE>;
my @dists;
for(my $entry=0;$entry<=$#reload;$entry++) # each line of the distance matrix
{
@line = (split /\t/, $reload[$entry]);
push @dists, [ @line ]; # make anonymous array of arrays
}
for(my $x=1;$x<=$#dists-1;$x++)
{
for(my $y=1;$y<=$#dists;$y++) # square matrix, or hash
{
$dist_hash{$dists[$x][0]}{$dists[0][$y]} = $dists[$x][$y]; # HoH
# print "\n$dists[$x][0] to $dists[0][$y] is $dists[$x][$y]";
}
}
my %nn; # hash of nearest neighbours
my %nn_dist;
my %nn_same; # hash of nearest neighbours with identical attribute
my %nn_same_dist;
for(my $x=1;$x<=$#dists-1;$x++)
{
my $nn_dist = my $nn_same_dist = 2000000; # arbitrarily high
for(my $y=1;$y<=$#dists;$y++) # square matrix, or hash
{
if($dist_hash{$dists[$x][0]}{$dists[0][$y]} < $nn_dist
&& $dists[$x][0] ne $dists[0][$y])
{
$nn_dist = $dist_hash{$dists[$x][0]}{$dists[0][$y]};
$nn{$dists[$x][0]} = $dists[0][$y];
$nn_dist{$dists[$x][0]} = $nn_dist;
}
if($dist_hash{$dists[$x][0]}{$dists[0][$y]} < $nn_same_dist
&& $attributes{$dists[$x][0]} eq $attributes{$dists[0][$y]}
&& $dists[$x][0] ne $dists[0][$y]
&& $attributes{$dists[$x][0]} !~ /[\.X]/)
{
$nn_same_dist = $dist_hash{$dists[$x][0]}{$dists[0][$y]};
$nn_same{$dists[$x][0]} = $dists[0][$y];
$nn_same_dist{$dists[$x][0]} = $nn_same_dist;
}
}
}
my $valids = 0; # number of informative comparisons
my $diff = 0; # total extra distance needed
my $yes =0; # numberof nns that are also same
for(my $x=1;$x<=$#dists-1;$x++)
{
if(defined $nn_same{$dists[$x][0]})
{
$diff += $nn_same_dist{$dists[$x][0]} - $nn_dist{$dists[$x][0]};
$valids++;
$yes++ if($nn_same_dist{$dists[$x][0]} == $nn_dist{$dists[$x][0]})
}
}
$diff /= $valids;
print "\nAverage distance to nearest neighbour of same trait type = ";
printf("%.2f", $diff);
print "\nover $valids comparisons";
print "\nPercentage same nns = ";
printf("%.2f", $yes/$valids);
print "\nPercentage isolates = ";
printf("\t%.2f", (1-($yes/$valids)));
}
# subroutine acos input: an angle in radians
# output: returns the arc cosine of the angle
# description: this is needed because perl does not provide an arc cosine function
sub acos
{
my($x) = @_;
$x = sprintf("%.6f", $x);
my $ret = atan2(sqrt(1 - $x**2), $x);
return $ret;
}
© Copyright Journal of Artificial Societies and Social Simulation, [2002]