#!/usr/local/bin/perl # Listing_3.pl =head1 NOTES This script calculates the memetic isolations for each chosen trait in Murdock. As with all the scripts, Perl must be installed. The usage is: Place the raw data file (dg_raw_data.txt) in the same directory as the script, then perl -w Listing_3..pl The first time the script is run, you'll need to make the distance matrix. This will be created as a file anthro_dist_table.txt After you've run the script once, you can access the distances from the file, so to save time, then remove the "comment outs" on lines 68 and 141. As with all Perl scripts, they are offered as is, without any warranties concerning safety, functionality etc The author welcomes any comments on any aspects of the software. to: dgatherer2002@yahoo.co.uk This is an occasionally on-going project and newer versions of code are frequently available, so if you want the really rough cuts, please ask. =cut #==============PRELIMINARIES===============================# use strict; my $pi = atan2(1,1) * 4; # constant pi my $r = 6378; # constant radius of earth (miles) print "\nDistance maker: \n\nopening input file............."; open (INFILE, 'dg_raw_data.txt') or die "can't open infile: $!"; my @input = <INFILE>; my $size = @input; print "\n\nThe table has $size lines (should say 864 lines, if not wrong input file)."; # just a check for correct input format print "\n\nmaking array.................."; my @matrix; my @line; for(my $entry=1;$entry<=$#input;$entry++) # each line of the raw data { @line = (split /\t/, $input[$entry]); push @matrix, [ @line ]; # make anonymous array of arrays } my $av_dist=0; # the average distance my $no_of_dists=0; # counter my $max_dist=-1000; # top and bottom dists my $min_dist=1000; # arbitrarily low and high my %dist_hash; # holds all the distances my %attributes; # holds the attribute being measured my $dims = $#line-1; #==============CALCULATE DISTANCES===============================# #=head1 # now calculate the similarity matrix print "\nopening output file............"; open(OUTFILE, '>anthro_dist_table.txt') or die "can't find the outfile $!\n"; for(my $z=0;$z<=$#matrix;$z++) # top row { print OUTFILE "\t$matrix[$z][1]"; } for(my $x=0;$x<=$#matrix-1;$x++) { print OUTFILE "\n$matrix[$x][1]\t"; print "\n$matrix[$x][1]\t"; # for(my $tabs=0;$tabs<=$x;$tabs++) # for padding triangiular matrix # { # print OUTFILE "\t"; # } for(my $y=0;$y<=$#matrix;$y++) # square matrix, or hash { my $distance=0; # reset to zero for each pair my $a1; # = lat1 in radians my $b1; # = lon1 in radians my $a2; # = lat2 in radians my $b2; # = lon2 in radians $matrix[$x][30] =~ /(\d{1,3})([NS])\s+(\d{1,3})([EW])/; # eg 3N 45W $a1 = $1*($pi/180); my $NorS = $2; $a1 *= -1 if $NorS eq "S"; $b1 = $3*($pi/180); my $EorW = $4; $b1 *= -1 if $EorW eq "W"; $matrix[$y][30] =~ /(\d{1,3})([NS])\s+(\d{1,3})([EW])/; # eg 3N 45W $a2 = $1*($pi/180); $NorS = $2; $a2 *= -1 if $NorS eq "S"; $b2 = $3*($pi/180); $EorW = $4; $b2 *= -1 if $EorW eq "W"; $distance = &acos(cos($a1)*cos($b1)*cos($a2)*cos($b2) +cos($a1)*sin($b1)*cos($a2)*sin($b2) +sin($a1)*sin($a2))*$r; if($distance > $max_dist) { $max_dist = $distance; } if($distance < $min_dist) { $min_dist = $distance; } $av_dist += $distance; $no_of_dists++; printf (OUTFILE "%.3f", $distance); # how to make it statistically analysable print OUTFILE "\t"; } } print "\nAverage distance is "; printf '%.3f', $av_dist/$no_of_dists; print " out of $no_of_dists counted"; print "\nMax: "; printf '%.3f', $max_dist; print "\nMin: "; printf '%.3f', $min_dist; close OUTFILE; # seal distance matrix once created #=cut #==============CALCULATE CLUSTERING===============================# for(my $col=7; $col<=29; $col++) # all columns { # $attributes{$matrix[0][1]} = $matrix[0][$col]; my @line = split /\t/, $input[0]; print "\n$line[$col]"; # print column titles for(my $z=0;$z<=$#matrix;$z++) # square { $attributes{$matrix[$z][1]} = $matrix[$z][$col]; } while((my $soc, my $att) = each %attributes) { # print "\n$soc\t$att"; } open(MATFILE, 'anthro_dist_table.txt') or die "can't find the outfile $!\n"; my @reload = <MATFILE>; my @dists; for(my $entry=0;$entry<=$#reload;$entry++) # each line of the distance matrix { @line = (split /\t/, $reload[$entry]); push @dists, [ @line ]; # make anonymous array of arrays } for(my $x=1;$x<=$#dists-1;$x++) { for(my $y=1;$y<=$#dists;$y++) # square matrix, or hash { $dist_hash{$dists[$x][0]}{$dists[0][$y]} = $dists[$x][$y]; # HoH # print "\n$dists[$x][0] to $dists[0][$y] is $dists[$x][$y]"; } } my %nn; # hash of nearest neighbours my %nn_dist; my %nn_same; # hash of nearest neighbours with identical attribute my %nn_same_dist; for(my $x=1;$x<=$#dists-1;$x++) { my $nn_dist = my $nn_same_dist = 2000000; # arbitrarily high for(my $y=1;$y<=$#dists;$y++) # square matrix, or hash { if($dist_hash{$dists[$x][0]}{$dists[0][$y]} < $nn_dist && $dists[$x][0] ne $dists[0][$y]) { $nn_dist = $dist_hash{$dists[$x][0]}{$dists[0][$y]}; $nn{$dists[$x][0]} = $dists[0][$y]; $nn_dist{$dists[$x][0]} = $nn_dist; } if($dist_hash{$dists[$x][0]}{$dists[0][$y]} < $nn_same_dist && $attributes{$dists[$x][0]} eq $attributes{$dists[0][$y]} && $dists[$x][0] ne $dists[0][$y] && $attributes{$dists[$x][0]} !~ /[\.X]/) { $nn_same_dist = $dist_hash{$dists[$x][0]}{$dists[0][$y]}; $nn_same{$dists[$x][0]} = $dists[0][$y]; $nn_same_dist{$dists[$x][0]} = $nn_same_dist; } } } my $valids = 0; # number of informative comparisons my $diff = 0; # total extra distance needed my $yes =0; # numberof nns that are also same for(my $x=1;$x<=$#dists-1;$x++) { if(defined $nn_same{$dists[$x][0]}) { $diff += $nn_same_dist{$dists[$x][0]} - $nn_dist{$dists[$x][0]}; $valids++; $yes++ if($nn_same_dist{$dists[$x][0]} == $nn_dist{$dists[$x][0]}) } } $diff /= $valids; print "\nAverage distance to nearest neighbour of same trait type = "; printf("%.2f", $diff); print "\nover $valids comparisons"; print "\nPercentage same nns = "; printf("%.2f", $yes/$valids); print "\nPercentage isolates = "; printf("\t%.2f", (1-($yes/$valids))); } # subroutine acos input: an angle in radians # output: returns the arc cosine of the angle # description: this is needed because perl does not provide an arc cosine function sub acos { my($x) = @_; $x = sprintf("%.6f", $x); my $ret = atan2(sqrt(1 - $x**2), $x); return $ret; }
© Copyright Journal of Artificial Societies and Social Simulation, [2002]