Ensembl Core API Tutorial

Xref

my $slice_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Slice' );

# get a slice adaptor for the human core database
my $slice_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Slice' );

# Obtain a slice covering the entire chromosome X
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', 'X' );

# Obtain a slice covering the entire clone AL359765.6
$slice = $slice_adaptor->fetch_by_region( 'clone', 'AL359765.6' );

# Obtain a slice covering an entire scaffold
$slice = $slice_adaptor->fetch_by_region( 'scaffold', 'KI270510.1' );

# Obtain a slice covering the region from 1MB to 2MB (inclusively) of
# chromosome 20
$slice = $slice_adaptor->fetch_by_region( 'chromosome', '20', 1e6, 2e6 );

my $slice = $slice_adaptor->fetch_by_gene_stable_id( 'ENSG00000099889', 5e3 );

# Retrieve slices of every chromosome in the database
@slices = @{ $slice_adaptor->fetch_all('chromosome') };

# Retrieve slices of every BAC clone in the database
@slices = @{ $slice_adaptor->fetch_all('clone') };

use Bio::EnsEMBL::Utils::Slice qw(split_Slices);

# ...

my $slices = $slice_adaptor->fetch_all('chromosome');

# Base pair overlap between returned slices
my $overlap = 0;

# Maximum size of returned slices
my $max_size = 100_000;

# Break chromosomal slices into smaller 100k component slices
$slices = split_Slices( $slices, $max_size, $overlap );

my $sequence = $slice->seq();
print $sequence, "\n";

$sequence = $slice->subseq( 100, 200 );

# The method coord_system() returns a Bio::EnsEMBL::CoordSystem object
my $coord_sys  = $slice->coord_system()->name();
my $seq_region = $slice->seq_region_name();
my $start      = $slice->start();
my $end        = $slice->end();
my $strand     = $slice->strand();

print "Slice: $coord_sys $seq_region $start-$end ($strand)\n";

@genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };

# Another way of doing the same thing:
@genes = @{ $slice->get_all_Genes() };

# Fetch the slice adaptor and slice
my $slice_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Slice' );
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', '10', 3770000, 3790000 );

# Get all the genes in the slice
my @genes = @{ $slice->get_all_Genes() };

foreach my $gene (@genes){
	printf( "In terms of slice: %d-%d (%+d)\n",
    	$gene->start(),
    	$gene->end(),
    	$gene->strand() );
	printf( "In terms of seq_region: %d-%d (%+d)\n",
    	$gene->seq_region_start(),
    	$gene->seq_region_end(),
    	$gene->seq_region_strand() );
}

$feature_slice = $feature->feature_Slice();

# Display the sequence of the feature region
print $feature_slice->seq(), "\n";

# Display the sequence of the feature region + 5000bp flanking sequence
print $feature_slice->expand( 5000, 5000 )->seq(), "\n";

# Get all genes which overlap the feature
$genes = $feature_slice->get_all_Genes();

my $slice_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Slice' );
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', '10', 3770000, 3790000 );

foreach my $gene ( @{ $slice->get_all_Genes() } ) {
    print "Gene ID: ", $gene->stable_id, "\n";

    foreach my $transcript ( @{ $gene->get_all_Transcripts() } ) {
        print  "\tTranscript ID: ", $transcript->stable_id, "\n";

        foreach my $exon ( @{ $transcript->get_all_Exons() } ) {
            print  "\t\tExon ID: ", $exon->stable_id, "\n";
        }
    }
}

# The spliced_seq() method returns the concatenation of the exon
# sequences. This is the cDNA of the transcript
print "cDNA: ", $transcript->spliced_seq(), "\n";

# The translateable_seq() method returns only the CDS of the transcript
print "CDS: ", $transcript->translateable_seq(), "\n";

# UTR sequences are obtained via the five_prime_utr() and
# three_prime_utr() methods
my $fiv_utr = $transcript->five_prime_utr();
my $thr_utr = $transcript->three_prime_utr();

print "5' UTR: ", ( defined $fiv_utr ? $fiv_utr->seq() : "None" ), "\n";
print "3' UTR: ", ( defined $thr_utr ? $thr_utr->seq() : "None" ), "\n";

# The protein sequence is obtained from the translate() method. If the
# transcript is non-coding, undef is returned.
my $protein = $transcript->translate();

print "Translation: ", ( defined $protein ? $protein->seq() : "None" ), "\n";

print $gene->display_id(), "\n";

my $stable_id = 'ENST00000528762';

my $transcript_adaptor =
  $registry->get_adaptor( 'Human', 'Core', 'Transcript' );
my $transcript = $transcript_adaptor->fetch_by_stable_id($stable_id);

print $transcript->translation()->stable_id(), "\n";
print $transcript->translate()->seq(),         "\n";

print $transcript->translation()->transcript()->stable_id(), "\n";

$translation = $transcript->translation();

my $pfeatures = $translation->get_all_ProteinFeatures();
while ( my $pfeature = shift @{$pfeatures} ) {
    my $logic_name = $pfeature->analysis()->logic_name();

    printf(
        "%d-%d %s %s %s\n",
        $pfeature->start(), $pfeature->end(), $logic_name,
        $pfeature->interpro_ac(),
        $pfeature->idesc()
    );
}

my $seg_features    = $translation->get_all_ProteinFeatures('Seg');
my $domain_features = $translation->get_all_DomainFeatures();

my $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );

# Get the 'COG6' gene from human
my $gene = $gene_adaptor->fetch_by_display_label('COG6');

print "GENE ", $gene->stable_id(), "\n";
my @dbentries = @{ $gene->get_all_DBEntries() };

foreach my $dbe ( @dbentries ) {
    printf "\tXREF %s (%s)\n", $dbe->display_id(), $dbe->dbname();
}

my $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );

# Get the 'COG6' gene from human
my $gene = $gene_adaptor->fetch_by_display_label('COG6');

print "GENE ", $gene->stable_id(), "\n";
my @dblinks = @{ $gene->get_all_DBLinks() };

foreach my $dbl ( @dblinks ) {
    printf "\tXREF %s (%s)\n", $dbl->display_id(), $dbl->dbname();
}

my @dblinks = @{ $gene->get_all_DBLinks('Uniprot%') };

# Retrieve dna-dna alignment features from the slice region
my @features = @{ $slice->get_all_DnaAlignFeatures('Vertrna') };

foreach my $feature ( @features ) {

    printf(
        "%s %d-%d (%+d)\t=> %d-%d (%+d)\n",
        $feature->hseqname(), $feature->hstart(), $feature->hend(),
        $feature->hstrand(),  $feature->start(),  $feature->end(),
        $feature->strand()
        );

    print "Percent identity: ", $feature->percent_id(),   "\n";
    print "Cigar string: ",     $feature->cigar_string(), "\n";

    my @ungapped = $feature->ungapped_features();

    print "ungapped:\n"; 
    foreach my $feature_pair (@ungapped) { 
        printf( 
	    "%s %d-%d (%+d)\t=> %d-%d (%+d)\n", 
	    $feature_pair->hseqname(), $feature_pair->hstart(), $feature_pair->hend(), 
	    $feature_pair->hstrand(), $feature_pair->start(), $feature_pair->end(), 
	    $feature_pair->strand() 
	    ); 
    }
    print "\n";
}

my @repeats = @{ $slice->get_all_RepeatFeatures() };

foreach my $repeat (@repeats) {
    printf( "%s %d-%d\n",
        $repeat->display_id(), $repeat->start(), $repeat->end() );
}

my $unmasked_seq   = $slice->seq();
my $hardmasked_seq = $slice->get_repeatmasked_seq()->seq();
my $softmasked_seq = $slice->get_repeatmasked_seq( undef, 1 )->seq();

# Soft-mask sequence using TRF results only
my $tandem_masked_seq = $slice->get_repeatmasked_seq( ['TRF'], 1 )->seq();

my $marker_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Marker' );

# Obtain marker by synonym (this returns a list, but there's seldom more
# than one marker in the list)
my $marker = $marker_adaptor->fetch_all_by_synonym('D9S1038E')->[0];

# Display the various names associated with the same marker
foreach my $synonym ( @{ $marker->get_all_MarkerSynonyms() } ) {
    if ( defined $synonym->source() ) {
        print $synonym->source(), ':';
    }
    print $synonym->name(), ' ';
}
print "\n";

# Display the primer info
printf( "left primer : %s\n", $marker->left_primer() );
printf( "right primer: %s\n", $marker->right_primer() );
printf(
    "product size: %d-%d\n",
    $marker->min_primer_dist(),
    $marker->max_primer_dist()
);

# Display out genetic/RH/FISH map information
print "Map locations:\n";
foreach my $map_loc ( @{ $marker->get_all_MapLocations() } ) {
    printf( "\t%s %s %s\n",
        $map_loc->map_name(), $map_loc->chromosome_name(),
        $map_loc->position() );
}

# Obtain the positions for an already retrieved marker
foreach my $marker_feature ( @{ $marker->get_all_MarkerFeatures() } ) {
    printf( "%s %d-%d\n",
        $marker_feature->seq_region_name(),
        $marker_feature->start(),
        $marker_feature->end() );
}

# Retrieve all marker features in a given region
my $marker_features = $slice->get_all_MarkerFeatures();
while ( my $marker_feature = shift @{$marker_features} ) {
    printf( "%s %s %d-%d\n",
        $marker_feature->display_id(), $marker_feature->seq_region_name(),
        $marker_feature->start(),      $marker_feature->end() );
}

my $encode_regions = $slice->get_all_MiscFeatures('encode');

while ( my $encode_region = shift @{$encode_regions} ) {
    my @attributes = @{ $encode_region->get_all_Attributes() };
    foreach my $attribute ( @attributes ) {
        printf "%s:%s\n", $attribute->name(), $attribute->value();
    }
}

my $mf_adaptor = $registry->get_adaptor( 'Human', 'Core', 'MiscFeature' );

my $clones =
  $mf_adaptor->fetch_all_by_attribute_type_value( 'clone_name', 'RP5-60P11' );

while ( my $clone = shift @{$clones} ) {
    my $slice = $clone->slice();

    printf( "%s %s %d-%d\n",
        $slice->coord_system->name(),
        $slice->seq_region_name(),
        $clone->start(), $clone->end() );

    my @attributes = @{ $clone->get_all_Attributes() };
    foreach my $attribute ( @attributes ) {
        printf "\t%s:%s\n", $attribute->name(), $attribute->value();
    }
}

my $cs_adaptor = $registry->get_adaptor( 'Human', 'Core', 'CoordSystem' );
my $cs = $cs_adaptor->fetch_by_name('chromosome');

printf "Coordinate system: %s %s\n", $cs->name(), $cs->version();

$slice =
  $slice_adaptor->fetch_by_region( 'chromosome', 'X', 1e6, 10e6, '1', 'NCBI36' );

my @chromosomes = @{ $slice_adaptor->fetch_all('chromosome') };
my @clones      = @{ $slice_adaptor->fetch_all('clone') };

# List all coordinate systems in this database:
my @coord_systems = @{ $cs_adaptor->fetch_all() };
foreach $cs (@coord_systems) {
    printf "Coordinate system: %s %s\n", $cs->name(), $cs->version;
}

# Get all slices on the highest coordinate system:
my @slices = @{ $slice_adaptor->fetch_all('toplevel') };

if ( my $new_feature = $feature->transform('clone') ) {
    printf(
        "Feature's clonal position is: %s %d-%d (%+d)\n",
        $new_feature->slice->seq_region_name(),
        $new_feature->start(), $new_feature->end(), $new_feature->strand()
    );
} else {
    print "Feature is not defined in clonal coordinate system\n";
}

                  |~~~~~~~| (Feature A) |~~~~| (Feature B)

 (ctg 1) [=============]
         (ctg 2) (------==========] (ctg 2)
                      (ctg 3)   (--============] (ctg3)

my $new_feature = $feature->transform('toplevel');

printf(
    "Feature's toplevel position is: %s %s %d-%d (%+d)\n",
    $new_feature->slice->coord_system->name(),
    $new_feature->slice->seq_region_name(),
    $new_feature->start(),
    $new_feature->end(),
    $new_feature->strand()
);

my $slice = $slice_adaptor->fetch_by_region( 'chromosome', '2',
1e6, 2e6 );

my $new_slice =
  $slice_adaptor->fetch_by_region( 'chromosome', '2', 1.5e6, 2e6 );

foreach my $simple_feature ( @{ $slice->get_all_SimpleFeatures('Eponine') } ) {
    printf(
        "Before:\t%6d - %6d\n",
        $simple_feature->start(),
        $simple_feature->end()
    );

    my $new_feature = $simple_feature->transfer($new_slice);
    if ( !defined $new_feature ) {
      print "Could not transfer feature\n";
    } else {
      printf( "After:\t%6d - %6d\n",
      $new_feature->start(),
      $new_feature->end()
      );
    }
}

printf( "Feature at: %s %d-%d (%+d) projects to\n",
    $feature->seq_region_name(), $feature->start(),
    $feature->end(),             $feature->strand() );

my $projection = $feature->project('clone');

foreach my $segment ( @{$projection} ) {
    my $to_slice = $segment->to_Slice();

    printf(
        "\t%s %d-%d (%+d)\n",
        $to_slice->seq_region_name(), $to_slice->start(),
        $to_slice->end(),             $to_slice->strand()
    );
}

Method	Converts from	Converts to
Transform	Feature in slice or coordinate system	Feature in a different coordinate system
Transfer	Feature in slice or coordinate system	Feature in a different slice
Project	Feature in slice or coordinate system	Feature spanning multiple seq_regions in a coordinate system

Ensembl Core API Tutorial

Introduction

Slices: genomic regions

Registry

Fetch by region

Fetch by gene

Fetch all

Split slices

Get information about the slice

Get features in a slice

Features

Slice and seq-region location

Slices from features

Genes, Transcripts, and Exons

Sequences

Names

Translations and ProteinFeatures

External References

Alignment Features

Repeats

Markers

MiscFeatures

Coordinate Systems

Transform

Transfer

Project

Further help

About Us

Get help

Our sister sites

Follow us