重复更新哈希表和 k-核苷酸字符串

作者:Daniel Carrera

http://benchmarksgame.alioth.debian.org/u32/performance.php?test=knucleotide

基于 Perl 5 的提交。

用法:perl6 k-nucleotide.p6 k-nucleotide.input

预期输出

A 30.298
T 30.157
C 19.793
G 19.752

AA 9.177
TA 9.137
AT 9.136
TT 9.094
AC 6.000
CA 5.999
GA 5.986
AG 5.985
TC 5.970
CT 5.970
GT 5.957
TG 5.956
CC 3.915
CG 3.910
GC 3.908
GG 3.902

14717GGT
4463GGTA
472GGTATT
902GGTATTTTAATT
902GGTATTTTAATTGGTATTTTAATTTATAGT

源代码: k-nucleotide.p6

use v6;

sub MAIN($input-file = $*SPEC.catdir($*PROGRAM-NAME.IO.dirname, "k-nucleotide.input")) {
    my $fh = open $input-file.IO;

    # Read FASTA file and extract DNA sequence THREE.
    my $sequence = '';
    my $lines;

    while $fh.get -> $line {
        last if ($line.substr(0,6) eq '>THREE');
    }
    while $fh.get -> $line {
        last if ($line.substr(0,1) eq '>');
        $sequence = $sequence ~ lc $line.subst(/\n/,'');
    }

    # Count nucleotide sequences
    my $len = $sequence.chars;
    my (@keys,%table,$sum,$frame_size);
    for 1..2 -> $frame_size {
        %table = ();
        update_hash($frame_size);

        # Print.
        $sum = $len - $frame_size + 1;
        for %table.sort: {$^b.value <=> $^a.value||$^a.key leg $^b.key} {
            printf "%s %.3f\n", .key, .value*100/$sum;
        }
        print "\n";
    }

    for <ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt> -> $seq {
        %table = ();
        update_hash($seq.chars);
        printf "%3d    $seq\n", (%table{$seq} || 0);
    }

    # Procedure to update a hashtable of k-nucleotide keys and count values
    # for a particular reading-frame.
    sub update_hash($frame_size) {
        for 0..($len - $frame_size) -> $i {
            %table{$sequence.substr($i,$frame_size)}++;
        }
    }
}