This site has been permanently archived. This is a static copy provided by the University of Southampton.
@misc{cogprints3915,
month = {October},
title = {Four basic symmetry types in the universal 7-cluster
structure of 143 complete bacterial genomic sequences},
author = {A.N. Gorban and T.G. Popova and A.Yu. Zinovyev},
year = {2004},
keywords = {codon usage, cluster structure, mean field, frequency dictionary},
url = {http://cogprints.org/3915/},
abstract = {Coding information is the main source of heterogeneity
(non-randomness) in the sequences of bacterial genomes. This
information can be naturally modeled by analysing cluster structures in the ``in-phase'' triplet distributions of relatively short genomic fragments (200-400bp). We found a universal 7-cluster structure in all 143 completely sequenced bacterial genomes available in Genbank in August 2004, and explained its properties.
The 7-cluster structure is responsible for the main part of sequence heterogeneity in bacterial genomes. In this sense, our 7 clusters is the basic model of bacterial genome sequence. We demonstrated that there are four basic ``pure'' types of this model, observed in nature: ``parallel triangles'', ``perpendicular triangles'',
degenerated case and the flower-like type. We show that codon usage of bacterial genomes is a multi-linear function of their genomic G+C-content with high accuracy (more precisely, by two similar functions, one for eubacterial genomes and the other one for archaea).
All 143 cluster animated 3D-scatters are collected in a database and is made available on our web-site:
http://www.ihes.fr/{\texttt{\char126}}zinovyev/7clusters
The finding can be readily introduced into any software for gene prediction, sequence alignment or bacterial genomes classification.
}
}