$max = 92; #initialize maximum region sequence length $min = 3; #initialize minimum region sequence length $regioncounter = 18200; #initialize for total number of records in output.txt file. if (open(InputOutputFile, 'output.txt')) { print "File output.txt opened OK \n"; } else { print "Cannot open file output.txt\n"; exit; } open (TempFile, ">OutputTemp.txt"); open(OutputRICOFile, ">OutputRICO.txt"); open(OutputC45File, ">OutputC45.txt"); @lengtharray = (); #temp array to keep track of the number of rows for same length region sequences. for ($loop1 = $min; $loop1 <= $max; $loop1 = $loop1+1) { #loop for same length sequences open(InputOutputFile, 'output.txt'); #open again for each loop, go to beginning of file $rowcounter = 0; for ($loop2 = 1; $loop2 <= $regioncounter; $loop2=$loop2+1) { #inner loop to loop through whole file $line = ; chomp ($line); if ((length $line) == $loop1) { $rowcounter = $rowcounter + 1; #count number of rows for same length sequences $line1 = join(',', split(//, $line)); #split the stupid thing and join them (to insert ,) print TempFile $line1."\n"; } } $lengtharray[$loop1] = $rowcounter; } #temp array to keep track of the number of rows for same length region sequences. # foreach $element (@lengtharray) { # print $element, "\n"; # } close(TempFile); open(InputTempFile, 'OutputTemp.txt'); #write to file = RICO format for ($loop1 = $min; $loop1 <= $max; $loop1 = $loop1+1) { if ($lengtharray[$loop1] > 0) { print OutputRICOFile $loop1."\n"; #number of attributes = length of sequence print OutputRICOFile $lengtharray[$loop1]."\n"; #number of rows of sequences of same length for ($loop2 = 1; $loop2 <= $loop1; $loop2 = $loop2+1) { print OutputRICOFile "a,c,d,e,f,g,h,i,k,l,m,n,p,q,r,s,t,v,w,y\n"; } for ($loop2 = 1; $loop2 <= $lengtharray[$loop1]; $loop2 = $loop2+1) { $line = ; print OutputRICOFile $line; } print OutputRICOFile "=======================================\n"; } } close(TempFile); open(InputTempFile, 'OutputTemp.txt'); #write to file = C45 format for ($loop1 = $min; $loop1 <= $max; $loop1 = $loop1+1) { if ($lengtharray[$loop1] > 0) { for ($loop2 = 1; $loop2 <= $lengtharray[$loop1]; $loop2 = $loop2+1) { $line = ; if (substr($line, ((length $line) -2), 1) eq 'a') { substr($line, ((length $line) -2), 1) = "+a"; #replace last nucleotide = length - /n - start at 0 } else { substr($line, ((length $line) -2), 1) = "-a"; } print OutputC45File $line; } print OutputC45File "=======================================\n"; } } exit;