#!/usr/perl/perl580/bin/perl use strict; use Algorithm::Cluster qw/kcluster/; #if ($ARGV[0] || $ARGV[1] || $ARGV[2] || $ARGV[3] eq "") { # die ("This program has 4 ARGV values that must be given:\n # ARGV[0] -> file path;\n # ARGV[1] -> first collum to be taken;\n # ARGV[2] -> last collum to be taken;\n # ARGV[3] -> nuber of iteractions of k-means\n"); #} my $file = "$ARGV[0]"; my $i = 0; my (@orfname,@orfdata,@weight,@mask); open(DATA,"<$file") or die "Can't open file $file: $!"; #------------------ # Read in the data file, and save the data to @orfdata # We know that the file is intact and has no holes, # so just set the mask to 1 for every item. # We don't check for errors in this case, because the file # is short and we can spot errors by eye. my $diferenca = (($ARGV[2] - $ARGV[1])+1); #this will generate how much masks and weights will be generated my $firstline = ; # Skip the title line while() { chomp(my $line = $_); my @field = split /\t/, $line; $orfname[$i] = $field[0]; # print $orfname[$i]; $orfdata[$i] = [@field[$ARGV[1]..$ARGV[2]]]; ##selecting what columms will be taken by ARGV; # print $orfdata[$i]; # $a = ; $mask[$i] = (1.0) x $diferenca; $i++; } close(DATA); #------------------ # Make a reverse-lookup index of the @orfnames hash: # my %orfname_by_rowid; $i=0; $orfname_by_rowid{$i++} = $_, foreach(@orfname); @weight = (1.0) x $diferenca; #------------------ # Define the params we want to pass to kcluster my %params = ( nclusters => 2, transpose => 1, npass => $ARGV[3], #how much interactions will happen method => 'a', dist => 'e', data => \@orfdata, mask => \@mask, weight => \@weight, ); #------------------ # Here is where we invoke the library function! # my ($clusters, $centroids, $error, $found) = kcluster(%params); # #------------------ #------------------ # Create a reverse index of the ORF names, by cluster ID # my %orfname_by_cluster; $i=0; foreach(@{$clusters}) { push @{$orfname_by_cluster{$_}}, $orfname_by_rowid{$i++}; } #------------------ # Print out a list of the ORFs, grouped by cluster ID, # as returned by the kcluster() function. # $i=0; foreach(@{$centroids}) { print "------------------\n"; printf("Cluster %d: %d ORFs, centroids: ( %s )\n\n", $i, scalar(@{$orfname_by_cluster{$i} }), join ', ', map { sprintf "%.3f",$_ } @{$_} ); print "\t$_\n", foreach( sort { $a cmp $b } @{$orfname_by_cluster{$i} } ); print "\n"; ++$i; }