% This file was created with JabRef 2.5. % Encoding: UTF-8 @BOOK{Herelle1922, title = {The Bacteriophage; its role in immunity}, publisher = {Williams and Wilkins, Baltimore}, year = {1922}, author = {Fran\c{c}ois d'Herelle}, date-added = {2007-04-11 16:31:06 +0200}, date-modified = {2007-04-11 16:38:50 +0200}, owner = {mbailly}, timestamp = {2009.11.23} } @ARTICLE{Akashi:2001, author = {Akashi, Hiroshi}, title = {Gene expression and molecular evolution}, journal = {Current Opinion in Genetics \& Development}, year = {2001}, volume = {11}, pages = {660--666}, number = {6}, abstract = {The combination of complete genome sequence information and estimates of mRNA abundances have begun to reveal causes of both silent and protein sequence evolution. Translational selection appears to explain patterns of synonymous codon usage in many prokaryotes as well as a number of eukaryotic model organisms (with the notable exception of vertebrates). Relationships between gene length and codon usage bias, however, remain unexplained. Intriguing correlations between expression patterns and protein divergence suggest some general mechanisms underlying protein evolution.}, date-added = {2007-04-16 12:31:02 +0200}, date-modified = {2007-07-05 19:08:10 +0200}, keywords = {codon usage, weak selection, translational selection, protein evolution}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6VS0-448YP2P-D/2/bc46dbd4dfefbbdc29ec46599c432573} } @ARTICLE{Akashi:2002, author = {Akashi, Hiroshi and Gojobori, Takashi}, title = {Metabolic efficiency and amino acid composition in the proteomes of \emph{{E}scherichia coli} and \emph{{B}acillus subtilis}}, journal = {Proceedings of the National Academy of Sciences, USA}, year = {2002}, volume = {99}, pages = {3695--3700}, number = {6}, abstract = {Biosynthesis of an \emph{{E}scherichia coli} cell, with organic compounds as sources of energy and carbon, requires approximately 20 to 60 billion high-energy phosphate bonds (Stouthamer, A. H. (1973) Antonie van Leeuwenhoek 39, 545-565). A substantial fraction of this energy budget is devoted to biosynthesis of amino acids, the building blocks of proteins. The fueling reactions of central metabolism provide precursor metabolites for synthesis of the 20 amino acids incorporated into proteins. Thus, synthesis of an amino acid entails a dual cost: energy is lost by diverting chemical intermediates from fueling reactions and additional energy is required to convert precursor metabolites to amino acids. Among amino acids, costs of synthesis vary from 12 to 74 high-energy phosphate bonds per molecule. The energetic advantage to encoding a less costly amino acid in a highly expressed gene can be greater than 0.025{\%} of the total energy budget. Here, we provide evidence that amino acid composition in the proteomes of E. coli and \emph{{B}acillus subtilis} reflects the action of natural selection to enhance metabolic efficiency. We employ synonymous codon usage bias as a measure of translation rates and show increases in the abundance of less energetically costly amino acids in highly expressed proteins.}, annote = {10.1073/pnas.062526999}, date-added = {2007-06-01 15:23:21 +0200}, date-modified = {2007-07-05 19:08:27 +0200}, jo = {PNAS}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.pnas.org/cgi/content/abstract/99/6/3695} } @ARTICLE{Archetti:2004c, author = {Marco Archetti}, title = {Codon Usage Bias and Mutation Constraints Reduce the Level of ErrorMinimization of the Genetic Code}, journal = {Journal of Molecular Evolution}, year = {2004}, volume = {59}, pages = {258--266}, number = {2}, date-added = {2007-04-18 11:56:29 +0200}, date-modified = {2007-07-05 19:09:35 +0200}, m3 = {10.1007/s00239-004-2620-0}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://dx.doi.org/10.1007/s00239-004-2620-0} } @ARTICLE{Bailly-Bechet:2006, author = {Bailly-Bechet, Marc and Danchin, Antoine and Iqbal, Mudassar and Marsili, Matteo and Vergassola, Massimo}, title = {Codon Usage Domains over Bacterial Chromosomes}, journal = {PLoS Computational Biology}, year = {2006}, volume = {2}, pages = {e37}, number = {4}, abstract = {The geography of codon bias distributions over prokaryotic genomes and its impact upon chromosomal organization are analyzed. To this aim, we introduce a clustering method based on information theory, specifically designed to cluster genes according to their codon usage and apply it to the coding sequences of \emph{{E}scherichia coli} and \emph{{B}acillus subtilis}. One of the clusters identified in each of the organisms is found to be related to expression levels, as expected, but other groups feature an over-representation of genes belonging to different functional groups, namely horizontally transferred genes, motility, and intermediary metabolism. Furthermore, we show that genes with a similar bias tend to be close to each other on the chromosome and organized in coherent domains, more extended than operons, demonstrating a role of translation in structuring bacterial chromosomes. It is argued that a sizeable contribution to this effect comes from the dynamical compartimentalization induced by the recycling of tRNAs, leading to gene expression rates dependent on their genomic and expression context.}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-07-09 16:06:50 +0200}, owner = {mbailly}, sp = {e37 EP -}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://dx.doi.org/10.1371%2Fjournal.pcbi.0020037} } @ARTICLE{Bailly-Bechet2007, author = {Marc Bailly-Bechet and Massimo Vergassola and Eduardo Rocha}, title = {Causes for the intriguing presence of tRNAs in phages.}, journal = {Genome Res}, year = {2007}, month = {Sep}, abstract = {Phages have highly compact genomes with sizes reflecting their capacity to exploit the host resources. Here, we investigate the reasons for tRNAs being the only translation-associated genes frequently found in phages. We were able to unravel the selective processes shaping the tRNA distribution in phages by analyzing their genomes and those of their hosts. We found ample evidence against tRNAs being selected to facilitate phage integration in the prokaryotic chromosomes. Conversely, there is a significant association between tRNA distribution and codon usage. We support this observation by introducing a master equation model, where tRNAs are randomly gained from their hosts and then lost either neutrally or according to a set of different selection mechanisms. Those tRNAs present in phages tend to correspond to codons that are simultaneously highly used by the phage genes, while rare in the host genome. Accordingly, we propose that a selective recruitment of tRNAs compensates for the compositional differences between the phage and the host genomes. To further understand the importance of these results in phage biology, we analyzed the differences between temperate and virulent phages. Virulent phages contain more tRNAs than temperate ones, higher codon usage biases, and more important compositional differences with respect to the host genome. These differences are thus in perfect agreement with the results of our master equation model and further suggest that tRNA acquisition may contribute to higher virulence. Thus, even though phages use most of the cell's translation machinery, they can complement it with their own genetic information to attain higher fitness. These results suggest that similar selection pressures may act upon other cellular essential genes that are being found in the recently uncovered large viruses.}, doi = {10.1101/gr.6649807}, owner = {mbailly}, pii = {gr.6649807}, pmid = {17785533}, timestamp = {2007.09.20}, url = {http://dx.doi.org/10.1101/gr.6649807} } @ARTICLE{Barla2008, author = {Annalisa Barla and Giuseppe Jurman and Samantha Riccadonna and Stefano Merler and Marco Chierici and Cesare Furlanello}, title = {Machine learning methods for predictive proteomics.}, journal = {Brief Bioinform}, year = {2008}, volume = {9}, pages = {119--128}, number = {2}, month = {Mar}, abstract = {The search for predictive biomarkers of disease from high-throughput mass spectrometry (MS) data requires a complex analysis path. Preprocessing and machine-learning modules are pipelined, starting from raw spectra, to set up a predictive classifier based on a shortlist of candidate features. As a machine-learning problem, proteomic profiling on MS data needs caution like the microarray case. The risk of overfitting and of selection bias effects is pervasive: not only potential features easily outnumber samples by 10(3) times, but it is easy to neglect information-leakage effects during preprocessing from spectra to peaks. The aim of this review is to explain how to build a general purpose design analysis protocol (DAP) for predictive proteomic profiling: we show how to limit leakage due to parameter tuning and how to organize classification and ranking on large numbers of replicate versions of the original data to avoid selection bias. The DAP can be used with alternative components, i.e. with different preprocessing methods (peak clustering or wavelet based), classifiers e.g. Support Vector Machine (SVM) or feature ranking methods (recursive feature elimination or I-Relief). A procedure for assessing stability and predictive value of the resulting biomarkers' list is also provided. The approach is exemplified with experiments on synthetic datasets (from the Cromwell MS simulator) and with publicly available datasets from cancer studies.}, doi = {10.1093/bib/bbn008}, institution = {FBK, via Sommarive 18, I-38100 Povo (Trento), Italy.}, keywords = {Algorithms; Animals; Area Under Curve; Artificial Intelligence; Biological Markers; Gene Expression Profiling; Humans; Mass Spectrometry; Microarray Analysis; Pattern Recognition, Automated; Proteomics; Signal Processing, Computer-Assisted}, owner = {mbailly}, pii = {bbn008}, pmid = {18310105}, timestamp = {2009.05.27}, url = {http://dx.doi.org/10.1093/bib/bbn008} } @ARTICLE{Bernardi:2007, author = {Bernardi, Giorgio}, title = {The neoselectionist theory of genome evolution}, journal = {Proceedings of the National Academy of Sciences, USA}, year = {2007}, volume = {104}, pages = {8385--8390}, number = {20}, abstract = {The vertebrate genome is a mosaic of GC-poor and GC-rich isochores, megabase-sized DNA regions of fairly homogeneous base composition that differ in relative amount, gene density, gene expression, replication timing, and recombination frequency. At the emergence of warm-blooded vertebrates, the gene-rich, moderately GC-rich isochores of the cold-blooded ancestors underwent a GC increase. This increase was similar in mammals and birds and was maintained during the evolution of mammalian and avian orders. Neither the GC increase nor its conservation can be accounted for by the random fixation of neutral or nearly neutral single-nucleotide changes (i.e., the vast majority of nucleotide substitutions) or by a biased gene conversion process occurring at random genome locations. Both phenomena can be explained, however, by the neoselectionist theory of genome evolution that is presented here. This theory fully accepts Ohta's nearly neutral view of point mutations but proposes in addition (i) that the AT-biased mutational input present in vertebrates pushes some DNA regions below a certain GC threshold; (ii) that these lower GC levels cause regional changes in chromatin structure that lead to deleterious effects on replication and transcription; and (iii) that the carriers of these changes undergo negative (purifying) selection, the final result being a compositional conservation of the original isochore pattern in the surviving population. Negative selection may also largely explain the GC increase accompanying the emergence of warm-blooded vertebrates. In conclusion, the neoselectionist theory not only provides a solution to the neutralist/selectionist debate but also introduces an epigenomic component in genome evolution.}, annote = {10.1073/pnas.0701652104}, date-added = {2007-05-16 19:24:33 +0200}, date-modified = {2007-07-05 19:12:04 +0200}, jo = {PNAS}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.pnas.org/cgi/content/abstract/104/20/8385} } @ARTICLE{Burkholder1985, author = {A. C. Burkholder and L. H. Hartwell}, title = {The yeast alpha-factor receptor: structural properties deduced from the sequence of the STE2 gene.}, journal = {Nucleic Acids Res}, year = {1985}, volume = {13}, pages = {8463--8475}, number = {23}, month = {Dec}, __markedentry = {[mbailly]}, abstract = {The STE2 gene of the yeast Saccharomyces cerevisiae encodes a component of the receptor for the oligopeptide pheromone alpha-factor. We have cloned and determined the nucleotide sequence of the STE2 gene. A sequence involved in the control of cell-type expression of the STE2 gene was found 5' of an open reading frame that could encode a protein of 431 amino acids. The predicted STE2 protein contains seven hydrophobic segments, suggesting that the alpha-factor receptor is an integral membrane protein. No extensive homology at the primary sequence level was detected between the predicted STE2 protein and other available protein sequences.}, keywords = {Amino Acid Sequence; Base Sequence; DNA, Fungal, genetics; Fungal Proteins, genetics; Genes, Fungal; Genetic Complementation Test; Membrane Proteins, genetics; Peptides, metabolism; Pheromones, metabolism; Protein Conformation; Receptors, Cell Surface, genetics/metabolism; Saccharomyces cerevisiae, genetics; Solubility}, language = {eng}, medline-pst = {ppublish}, owner = {mbailly}, pmid = {3001640}, timestamp = {2010.06.11} } @ARTICLE{Carbone:2005, author = {Carbone, A. and Kepes, F. and Zinovyev, A.}, title = {Codon Bias Signatures, Organization of Microorganisms in Codon Space, and Lifestyle}, journal = {Molecular Biology and Evolution}, year = {2005}, volume = {22}, pages = {547--561}, number = {3}, abstract = {New and simple numerical criteria based on a codon adaptation index are applied to the complete genomic sequences of 80 Eubacteria and 16 Archaea, to infer weak and strong genome tendencies toward content bias, translational bias, and strand bias. These criteria can be applied to all microbial genomes, even those for which little biological information is known, and a codon bias signature, that is the collection of strong biases displayed by a genome, can be automatically derived. A codon bias space, where genomes are identified by their preferred codons, is proposed as a novel formal framework to interpret genomic relationships. Principal component analysis confirms that although GC content has a dominant effect on codon bias space, thermophilic and mesophilic species can be identified and separated by codon preferences. Two more examples concerning lifestyle are studied with linear discriminant analysis: suitable separating functions characterized by sets of preferred codons are provided to discriminate: translationally biased (hyper)thermophiles from mesophiles, and organisms with different respiratory characteristics, aerobic, anaerobic, facultative aerobic and facultative anaerobic. These results suggest that codon bias space might reflect the geometry of a prokaryotic {\tt{}"{}}physiology space.{\tt{}"{}} Evolutionary perspectives are noted, numerical criteria and distances among organisms are validated on known cases, and various results and predictions are discussed both on methodological and biological grounds.}, annote = {10.1093/molbev/msi040}, date-added = {2007-04-19 13:13:05 +0200}, date-modified = {2007-07-05 19:19:15 +0200}, jo = {Mol Biol Evol}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://mbe.oxfordjournals.org/cgi/content/abstract/22/3/547} } @ARTICLE{Carbone2003, author = {Carbone, A. and Zinovyev, A. and Kepes, F.}, title = {Codon adaptation index as a measure of dominating codon bias}, journal = {Bioinformatics}, year = {2003}, volume = {19}, pages = {2005-2015}, number = {16}, abstract = {We propose a simple algorithm to detect dominating synonymous codon usage bias in genomes. The algorithm is based on a precise mathematical formulation of the problem that lead us to use the Codon Adaptation Index (CAI) as a universal' measure of codon bias. This measure has been previously employed in the specific context of translational bias. With the set of coding sequences as a sole source of biological information, the algorithm provides a reference set of genes which is highly representative of the bias. This set can be used to compute the CAI of genes of prokaryotic and eukaryotic organisms, including those whose functional annotation is not yet available. An important application concerns the detection of a reference set characterizing translational bias which is known to correlate to expression levels; in this case, the algorithm becomes a key tool to predict gene expression levels, to guide regulatory circuit reconstruction, and to compare species. The algorithm detects also leading-lagging strands bias, GC-content bias, GC3 bias, and horizontal gene transfer. The approach is validated on 12 slow-growing and fast-growing bacteria, Saccharomyces cerevisiae, Caenorhabditis elegans and Drosophila melanogaster. Availability: http://www.ihes.fr/~materials.}, date-added = {2007-03-30 10:34:27 +0200}, date-modified = {2007-07-09 20:36:07 +0200}, doi = {10.1093/bioinformatics/btg272}, eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/19/16/2005.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/19/16/2005} } @ARTICLE{HubertCharles09112006, author = {Charles, Hubert and Calevro, Federica and Vinuelas, Jose and Fayard, Jean-Michel and Rahbe, Yvan}, title = {Codon usage bias and t{RNA} over-expression in \emph{{B}uchnera aphidicola} after aromatic amino acid nutritional stress on its host \emph{{A}cyrthosiphon pisum}}, journal = {Nucleic Acids Research}, year = {2006}, volume = {34}, pages = {4583-4592}, number = {16}, abstract = {Codon usage bias and relative abundances of tRNA isoacceptors were analysed in the obligate intracellular symbiotic bacterium, Buchnera aphidicola from the aphid Acyrthosiphon pisum, using a dedicated 35mer oligonucleotide microarray. Buchnera is archetypal of organisms living with minimal metabolic requirements and presents a reduced genome with high-evolutionary rate. Codonusage in Buchnera has been overcome by the high mutational bias towards AT bases. However, several lines of evidence for codon usage selection are given here. A significant correlation was found between tRNA relative abundances and codon composition of Buchnera genes. A significant codon usage bias was found for the choice of rare codons in Buchnera: C-ending codons are preferred in highly expressed genes, whereas G-ending codons are avoided. This bias is not explained by GC skew in the bacteria and might correspond to a selection for perfect matching between codon-anticodon pairs for some essential amino acids in Buchnera proteins. Nutritional stress applied to the aphid host induced a significant overexpression of most of the tRNA isoacceptors in bacteria. Although, molecular regulation of the tRNA operons in Buchnera was not investigated, a correlation between relative expression levels and organization in transcription unit was found in the genome of Buchnera.}, date-added = {2007-01-12 11:58:38 +0100}, date-modified = {2007-07-09 20:35:58 +0200}, doi = {10.1093/nar/gkl597}, eprint = {http://nar.oxfordjournals.org/cgi/reprint/34/16/4583.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/34/16/4583} } @ARTICLE{Chen:2004, author = {Chen, Swaine L. and Lee, William and Hottes, Alison K. and Shapiro, Lucy and McAdams, Harley H.}, title = {Codon usage between genomes is constrained by genome-wide mutational processes}, journal = {Proceedings of the National Academy of Sciences, USA}, year = {2004}, volume = {101}, pages = {3480--3485}, number = {10}, abstract = {Analysis of genome-wide codon bias shows that only two parameters effectively differentiate the genome-wide codon bias of 100 eubacterial and archaeal organisms. The first parameter correlates with genome GC content, and the second parameter correlates with context-dependent nucleotide bias. Both of these parameters may be calculated from intergenic sequences. Therefore, genome-wide codon bias in eubacteria and archaea may be predicted from intergenic sequences that are not translated. When these two parameters are calculated for genes from nonmammalian eukaryotic organisms, genes from the same organism again have similar values, and genome-wide codon bias may also be predicted from intergenic sequences. In mammals, genes from the same organism are similar only in the second parameter, because GC content varies widely among isochores. Our results suggest that, in general, genome-wide codon bias is determined primarily by mutational processes that act throughout the genome, and only secondarily by selective forces acting on translated sequences.}, annote = {10.1073/pnas.0307827100}, date-added = {2007-04-15 17:24:10 +0200}, date-modified = {2007-07-05 19:24:40 +0200}, jo = {PNAS}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.pnas.org/cgi/content/abstract/101/10/3480} } @ARTICLE{Dethlefsen:2005, author = {Dethlefsen, Les and Schmidt, Thomas}, title = {Differences in codon bias cannot explain differences in translational power among microbes}, journal = {BMC Bioinformatics}, year = {2005}, volume = {6}, pages = {3}, number = {1}, abstract = {BACKGROUND:Translational power is the cellular rate of protein synthesis normalized to the biomass invested in translational machinery. Published data suggest a previously unrecognized pattern: translational power is higher among rapidly growing microbes, and lower among slowly growing microbes. One factor known to affect translational power is biased use of synonymous codons. The correlation within an organism between expression level and degree of codon bias among genes of \emph{{E}scherichia coli} and other bacteria capable of rapid growth is commonly attributed to selection for high translational power. Conversely, the absence of such a correlation in some slowly growing microbes has been interpreted as the absence of selection for translational power. Because codon bias caused by translational selection varies between rapidly growing and slowly growing microbes, we investigated whether observed differences in translational power among microbes could be explained entirely by differences in the degree of codon bias. Although the data are not available to estimate the effect of codon bias in other species, we developed an empirically-based mathematical model to compare the translation rate of E. coli to the translation rate of a hypothetical strain which differs from E. coli only by lacking codon bias.RESULTS:Our reanalysis of data from the scientific literature suggests that translational power can differ by a factor of 5 or more between E. coli and slowly growing microbial species. Using empirical codon-specific in vivo translation rates for 29 codons, and several scenarios for extrapolating from these data to estimates over all codons, we find that codon bias cannot account for more than a doubling of the translation rate in E. coli, even with unrealistic simplifying assumptions that exaggerate the effect of codon bias. With more realistic assumptions, our best estimate is that codon bias accelerates translation in E. coli by no more than 60% in comparison to microbes with very little codon bias.CONCLUSIONS:While codon bias confers a substantial benefit of faster translation and hence greater translational power, the magnitude of this effect is insufficient to explain observed differences in translational power among bacterial and archaeal species, particularly the differences between slowly growing and rapidly growing species. Hence, large differences in translational power suggest that the translational apparatus itself differs among microbes in ways that influence translational performance.}, date-added = {2007-01-12 11:56:00 +0100}, date-modified = {2007-04-18 16:34:36 +0200}, doi = {10.1186/1471-2105-6-3}, issn = {1471-2105}, owner = {mbailly}, pubmedid = {15636642}, timestamp = {2009.10.08}, url = {http://www.biomedcentral.com/1471-2105/6/3} } @ARTICLE{Dong1996, author = {Dong, Hengjiang and Nilsson, Lars and Kurland, Charles G.}, title = {Co-variation of t{RNA} Abundance and Codon Usage in \emph{{E}scherichia coli} at Different Growth Rates}, journal = {Journal of Molecular Biology}, year = {1996}, volume = {260}, pages = {649--663}, number = {5}, abstract = {We have used two-dimensional polyacrylamide gel electrophoresis to fractionate tRNAs from \emph{{E}scherichia coli}. A sufficiently high degree of resolution was obtained for 44 out of 46 tRNA species in E. coli to be resolved into individual electrophoretic components. These isolated components were identified by hybridization to tRNA-specific oligonucle otide probes. Systematic measurements of the abundance of each individual tRNA isoacceptor inE. coli, grown at rates varying from 0.4 to 2.5 doublings per hour, were made with the aid of this electrophoretic protocol. We find that there is a biased distribution of the tRNA abundance at all growth rates, and that this can be roughly correlated with the values of codon frequencies in the mRNA pools calculated for bacteria growing at different rates. The tRNA species cognate to abundant codons increase in concentration as the growth rate increases but not as dramatically as might be anticipated. The levels of most of the tRNA isoacceptors cognate to less abundant codons remain unchanged with increasing growth rates. The result of these changes in tRNA abundance is that the relative increase in the amounts of major tRNA species in the bacteria growing at the fastest growth rates is more modest than previous estimates from this laboratory suggested. Furthermore, a systematic error in previous estimates of ribosomal RNA content of the bacteria has been detected. This will account for the quantitative discrepancies between the previous and the present data for tRNA abundance.}, date-modified = {2007-03-27 10:50:46 +0200}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.sciencedirect.com/science/article/B6WK7-45MG2PV-6F/2/fb1b411b9a4b4e57d38bf857de08385e} } @ARTICLE{Dopman2007, author = {Erik B Dopman and Daniel L Hartl}, title = {A portrait of copy-number polymorphism in Drosophila melanogaster.}, journal = {Proc Natl Acad Sci U S A}, year = {2007}, volume = {104}, pages = {19920--19925}, number = {50}, month = {Dec}, abstract = {Thomas Hunt Morgan and colleagues identified variation in gene copy number in Drosophila in the 1920s and 1930s and linked such variation to phenotypic differences [Bridges CB (1936) Science 83:210]. Yet the extent of variation in the number of chromosomes, chromosomal regions, or gene copies, and the importance of this variation within species, remain poorly understood. Here, we focus on copy-number variation in Drosophila melanogaster. We characterize copy-number polymorphism (CNP) across genomic regions, and we contrast patterns to infer the evolutionary processes acting on this variation. Copy-number variation in D. melanogaster is nonrandomly distributed, presumably because of a mutational bias produced by tandem repeats or other mechanisms. Comparisons of coding and noncoding CNPs, however, reveal a strong effect of purifying selection in the removal of structural variation from functionally constrained regions. Most patterns of CNP in D. melanogaster suggest that negative selection and mutational biases are the primary agents responsible for shaping structural variation.}, doi = {10.1073/pnas.0709888104}, institution = {inity Avenue, Cambridge, MA 02138, USA.}, owner = {mbailly}, pii = {0709888104}, pmid = {18056801}, timestamp = {2007.12.18}, url = {http://dx.doi.org/10.1073/pnas.0709888104} } @ARTICLE{Francino:1997, author = {Francino, M. Pilar and Ochman, Howard}, title = {Strand asymmetries in {DNA} evolution}, journal = {Trends in Genetics}, year = {1997}, volume = {13}, pages = {240--245}, number = {6}, abstract = {The complementary strands of DNA differ with respect to replication and transcription. Both of these processes are asymmetric and can bias the occurrence of mutations between the strands: during replication, the discontinuous lagging strand undergoes certain errors at higher rates, and transcription overexposes the nontranscribed strand to DNA damage while targeting repair enzymes to the transcribed strand. While biases introduced during replication apparently have little impact on sequence evolution, the effects of transcription are observed in the asymmetric patterns of substitution in bacterial genes and might be influencing genome-wide patterns of base composition.}, date-added = {2007-04-15 17:24:41 +0200}, date-modified = {2007-07-05 19:32:39 +0200}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6TCY-3RH0BV3-F/2/20450269cd7905c348c0f6fad377bae1} } @ARTICLE{Gilchrist:2006, author = {Gilchrist, Michael A. and Wagner, Andreas}, title = {A model of protein translation including codon bias, nonsense errors, and ribosome recycling}, journal = {Journal of Theoretical Biology}, year = {2006}, volume = {239}, pages = {417--434}, number = {4}, abstract = {We present and analyse a model of protein translation at the scale of an individual messenger RNA (mRNA) transcript. The model we develop is unique in that it incorporates the phenomena of ribosome recycling and nonsense errors. The model conceptualizes translation as a probabilistic wave of ribosome occupancy traveling down a heterogeneous medium, the mRNA transcript. Our results show that the heterogeneity of the codon translation rates along the mRNA results in short-scale spikes and dips in the wave. Nonsense errors attenuate this wave on a longer scale while ribosome recycling reinforces it. We find that the combination of nonsense errors and codon usage bias can have a large effect on the probability that a ribosome will completely translate a transcript. We also elucidate how these forces interact with ribosome recycling to determine the overall translation rate of an mRNA transcript. We derive a simple cost function for nonsense errors using our model and apply this function to the yeast (Saccharomyces cervisiae) genome. Using this function we are able to detect position dependent selection on codon bias which correlates with gene expression levels as predicted a priori. These results indirectly validate our underlying model assumptions and confirm that nonsense errors can play an important role in shaping codon usage bias.}, date-added = {2007-04-17 13:07:07 +0200}, date-modified = {2007-07-05 19:34:59 +0200}, keywords = {Protein translation, Codon bias, Nonsense errors, Ribosome recycling, Translational efficiency, Translational accuracy}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6WMD-4H4T3D5-5/2/d9d93b5e2223a4d254038c673bc26034} } @ARTICLE{Goodarzi:2007, author = {Goodarzi, Hani and Katanforoush, Ali and Torabi, Noorossadat and Najafabadi, Hamed Shateri}, title = {Solvent accessibility, residue charge and residue volume, the three ingredients of a robust amino acid substitution matrix}, journal = {Journal of Theoretical Biology}, year = {2007}, volume = {245}, pages = {715--725}, number = {4}, abstract = {Cost measure matrices or different amino acid indices have been widely used for studies in many fields of biology. One major criticism of these studies might be based on the unavailability of an unbiased and yet effective amino acid substitution matrix. Throughout this study we have devised a cost measure matrix based on the solvent accessibility, residue charge, and residue volume indices. Performed analyses on this novel substitution matrix (i.e. solvent accessibility charge volume (SCV) matrix) support the uncontaminated nature of this matrix regarding the genetic code. Although highly similar to a number of previously available cost measure matrices, the SCV matrix results in a more significant optimality in the error-buffering capacity of the genetic code when compared to many other amino acid substitution matrices. Besides, a method to compare an SCV-based scoring matrix with a number of widely used matrices has been devised, the results of which highlights the robustness of this matrix in protein family discrimination.}, date-added = {2007-04-14 16:15:12 +0200}, date-modified = {2007-07-05 19:36:15 +0200}, keywords = {Load minimization, Cost measure matrix, Optimality, Amino acid substitution matrix, Genetic code, Scoring matrix}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6WMD-4MM1P6Y-1/2/ae968fc6fd2d033dbd7206757fd23293} } @ARTICLE{Goodarzi:2005, author = {Goodarzi, Hani and Najafabadi, Hamed Shateri and Hassani, Kasra and Nejad, Hamed Ahmadi and Torabi, Noorossadat}, title = {On the optimality of the genetic code, with the consideration of coevolution theory by comparison of prominent cost measure matrices}, journal = {Journal of Theoretical Biology}, year = {2005}, volume = {235}, pages = {318--325}, number = {3}, abstract = {Statistical and biochemical studies have revealed non-random patterns in codon assignments. The canonical genetic code is known to be highly efficient in minimizing the effects of mistranslation errors and point mutations, since it is known that when an amino acid is converted to another due to error, the biochemical properties of the resulted amino acid are usually very similar to those of the original one. In this study, using altered forms of the fitness functions used in the prior studies, we have optimized the parameters involved in the calculation of the error minimizing property of the genetic code so that the genetic code outscores the random codes as much as possible. This work also compares two prominent matrices, the Mutation Matrix and Point Accepted Mutations 74-100 (PAM74-100). It has been resulted that the hypothetical properties of the coevolution theory of the genetic code are already considered in PAM74-100, giving more evidence on the existence of bias towards the genetic code in this matrix. Furthermore, our results indicate that PAM74-100 is biased towards the single base mistranslation occurrences in second codon position as well as the frequency of amino acids. Thus PAM74-100 is not a suitable substitution matrix for the studies conducted on the evolution of the genetic code.}, date-added = {2007-04-14 16:15:12 +0200}, date-modified = {2007-07-05 19:36:21 +0200}, keywords = {Coevolution theory, Fitness function, Genetic code, Load minimization, Optimality}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6WMD-4FPDRJ7-3/2/baad5f75c17812a333173cda78287361} } @ARTICLE{Guy:2004, author = {Guy, Lionel and Roten, Claude-Alain H.}, title = {Genometric analyses of the organization of circular chromosomes: a universal pressure determines the direction of ribosomal RNA genes transcription relative to chromosome replication}, journal = {Gene}, year = {2004}, volume = {340}, pages = {45--52}, number = {1}, abstract = {Selective pressures related to gene function and chromosomal architecture are acting on genome sequences and can be revealed, for instance, by appropriate genometric methods. Cumulative nucleotide skew analyses, i.e., GC, TA, and ORF orientation skews, predict the location of the origin of DNA replication for 88 out of 100 completely sequenced bacterial chromosomes. These methods appear fully reliable for proteobacteria, Gram-positives, and spirochetes as well as for euryarchaeotes. Based on this genome architecture information, coorientation analyses reveal that in prokaryotes, ribosomal RNA (rRNA) genes encoding the small and large ribosomal subunits are all transcribed in the same direction as DNA replication; that is, they are located along the leading strand. This result offers a simple and reliable method for circumscribing the region containing the origin of the DNA replication and reveals a strong selective pressure acting on the orientation of rRNA genes similar to the weaker one acting on the orientation of ORFs. Rate of coorientation of transfer RNA (tRNA) genes with DNA replication appears to be taxon-specific. Analyzing nucleotide biases such as GC and TA skews of genes and plotting one against the other reveals a taxonomic clusterization of species. All ribosomal RNA genes are enriched in Gs and depleted in Cs, the only so far known exception being the rRNA genes of deuterostomian mitochondria. However, this exception can be explained by the fact that in the chromosome of the human mitochondrion, the model of the deuterostomian organelle genome, DNA replication, and rRNA transcription proceed in opposite directions. A general rule is deduced from prokaryotic and mitochondrial genomes: ribosomal RNA genes that are transcribed in the same direction as the DNA replication are enriched in Gs, and those transcribed in the opposite direction are depleted in Gs.}, date-added = {2007-07-05 13:50:57 +0200}, date-modified = {2007-07-05 13:50:57 +0200}, keywords = {Origin and terminus of replication, Gene orientation, Genometrics, Skews, RNA genes}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T39-4D4PVY6-1/2/c8b7e5cbf832d1f1513db32de37983af} } @ARTICLE{Gygi1999, author = {S. P. Gygi and Y. Rochon and B. R. Franza and R. Aebersold}, title = {Correlation between protein and mRNA abundance in yeast.}, journal = {Mol Cell Biol}, year = {1999}, volume = {19}, pages = {1720--1730}, number = {3}, month = {Mar}, abstract = {We have determined the relationship between mRNA and protein expression levels for selected genes expressed in the yeast Saccharomyces cerevisiae growing at mid-log phase. The proteins contained in total yeast cell lysate were separated by high-resolution two-dimensional (2D) gel electrophoresis. Over 150 protein spots were excised and identified by capillary liquid chromatography-tandem mass spectrometry (LC-MS/MS). Protein spots were quantified by metabolic labeling and scintillation counting. Corresponding mRNA levels were calculated from serial analysis of gene expression (SAGE) frequency tables (V. E. Velculescu, L. Zhang, W. Zhou, J. Vogelstein, M. A. Basrai, D. E. Bassett, Jr., P. Hieter, B. Vogelstein, and K. W. Kinzler, Cell 88:243-251, 1997). We found that the correlation between mRNA and protein levels was insufficient to predict protein expression levels from quantitative mRNA data. Indeed, for some genes, while the mRNA levels were of the same value the protein levels varied by more than 20-fold. Conversely, invariant steady-state levels of certain proteins were observed with respective mRNA transcript levels that varied by as much as 30-fold. Another interesting observation is that codon bias is not a predictor of either protein or mRNA levels. Our results clearly delineate the technical boundaries of current approaches for quantitative analysis of protein expression and reveal that simple deduction from mRNA transcript analysis is insufficient.}, institution = {Department of Molecular Biotechnology, University of Washington, Seattle, Washington 98195-7730, USA.}, keywords = {Codon; Fungal Proteins; Gene Expression Regulation, Fungal; RNA, Fungal; RNA, Messenger; Saccharomyces cerevisiae}, owner = {mbailly}, pmid = {10022859}, timestamp = {2008.05.30} } @ARTICLE{Haiser2007, author = {Henry J Haiser and Fedor V Karginov and Gregory J Hannon and Marie A Elliot}, title = {Developmentally regulated cleavage of tRNAs in the bacterium Streptomyces coelicolor.}, journal = {Nucleic Acids Res}, year = {2007}, month = {Dec}, abstract = {The ability to sense and respond to environmental and physiological signals is critical for the survival of the soil-dwelling Gram-positive bacterium Streptomyces coelicolor. Nutrient deprivation triggers the onset of a complex morphological differentiation process that involves the raising of aerial hyphae and formation of spore chains, and coincides with the production of a diverse array of clinically relevant antibiotics and other secondary metabolites. These processes are tightly regulated; however, the genes and signals involved have not been fully elucidated. Here, we report a novel tRNA cleavage event that follows the same temporal regulation as morphological and physiological differentiation, and is growth medium dependent. All tRNAs appear to be susceptible to cleavage; however, there appears to be a bias towards increased cleavage of those tRNAs that specify highly utilized codons. In contrast to what has been observed in eukaryotes, accumulation of tRNA halves in S. coelicolor is not significantly affected by amino acid starvation, and is also not affected by induction of the stringent response or inhibition of ribosome function. Mutants defective in aerial development and antibiotic production exhibit altered tRNA cleavage profiles relative to wild-type strains.}, doi = {10.1093/nar/gkm1096}, institution = {iological Sciences, Howard Hughes Medical Institute, 1 Bungtown Road, Cold Spring Harbor, NY 11724, USA.}, owner = {mbailly}, pii = {gkm1096}, pmid = {18084030}, timestamp = {2008.02.12}, url = {http://dx.doi.org/10.1093/nar/gkm1096} } @ARTICLE{Jaillon2008, author = {Olivier Jaillon and Khaled Bouhouche and Jean-François Gout and Jean-Marc Aury and Benjamin Noel and Baptiste Saudemont and Mariusz Nowacki and Vincent Serrano and Betina M Porcel and Béatrice Ségurens and Anne Le Mouël and Gersende Lepère and Vincent Schächter and Mireille Bétermier and Jean Cohen and Patrick Wincker and Linda Sperling and Laurent Duret and Eric Meyer}, title = {Translational control of intron splicing in eukaryotes.}, journal = {Nature}, year = {2008}, volume = {451}, pages = {359--362}, number = {7176}, month = {Jan}, abstract = {Most eukaryotic genes are interrupted by non-coding introns that must be accurately removed from pre-messenger RNAs to produce translatable mRNAs. Splicing is guided locally by short conserved sequences, but genes typically contain many potential splice sites, and the mechanisms specifying the correct sites remain poorly understood. In most organisms, short introns recognized by the intron definition mechanism cannot be efficiently predicted solely on the basis of sequence motifs. In multicellular eukaryotes, long introns are recognized through exon definition and most genes produce multiple mRNA variants through alternative splicing. The nonsense-mediated mRNA decay (NMD) pathway may further shape the observed sets of variants by selectively degrading those containing premature termination codons, which are frequently produced in mammals. Here we show that the tiny introns of the ciliate Paramecium tetraurelia are under strong selective pressure to cause premature termination of mRNA translation in the event of intron retention, and that the same bias is observed among the short introns of plants, fungi and animals. By knocking down the two P. tetraurelia genes encoding UPF1, a protein that is crucial in NMD, we show that the intrinsic efficiency of splicing varies widely among introns and that NMD activity can significantly reduce the fraction of unspliced mRNAs. The results suggest that, independently of alternative splicing, species with large intron numbers universally rely on NMD to compensate for suboptimal splicing efficiency and accuracy.}, doi = {10.1038/nature06495}, institution = {Genoscope (CEA), 2 rue Gaston Crémieux CP5706, 91057 Evry, France.}, owner = {mbailly}, pii = {nature06495}, pmid = {18202663}, timestamp = {2008.01.21}, url = {http://dx.doi.org/10.1038/nature06495} } @ARTICLE{Jenness1983, author = {D. D. Jenness and A. C. Burkholder and L. H. Hartwell}, title = {Binding of alpha-factor pheromone to yeast a cells: chemical and genetic evidence for an alpha-factor receptor.}, journal = {Cell}, year = {1983}, volume = {35}, pages = {521--529}, number = {2 Pt 1}, month = {Dec}, abstract = {The division cycle of yeast a cells is inhibited by alpha-factor. Haploid a cells were found to bind 35S-labeled alpha-factor, whereas haploid alpha cells and diploid a/alpha cells showed little binding. The association of alpha-factor with a cells was reversible upon dilution. Unlabeled alpha-factor competed for binding of 35S-alpha-factor; the concentration dependence for competition indicated 9 X 10(5) binding sites per cell with a dissociation constant (KD) of 3 X 10(-7) M. The rates of association (kon = 3 X 10(3) M-1 sec-1) and dissociation (koff = 9 X 10(-4) sec-1) were consistent with the equilibrium constant. The alpha-factor binding activity associated with five temperature-sensitive ste2 mutants was thermolabile, suggesting that the STE2 gene encodes the receptor for alpha-factor. In contrast, the binding activity of other temperature-sensitive mutants (ste4, ste5, ste7, ste11, and ste12) showed no thermolability.}, keywords = {Binding Sites; Genotype; Haploidy; Kinetics; Mutation; Peptides, genetics/metabolism; Saccharomyces cerevisiae, genetics; Species Specificity; Temperature}, language = {eng}, medline-pst = {ppublish}, owner = {mbailly}, pii = {0092-8674(83)90186-1}, pmid = {6360378}, timestamp = {2010.06.11} } @ARTICLE{Jia:2005, author = {Jia, Mengwen and Li, Yanda}, title = {The relationship among gene expression, folding free energy and codon usage bias in \emph{{E}scherichia coli}}, journal = {FEBS Letters}, year = {2005}, volume = {579}, pages = {5333--5337}, number = {24}, abstract = {Taking advantage of microarray data in \emph{{E}scherichia coli} genome, the relationship among mRNA expression levels, folding free energy and codon usage bias are investigated. Our results indicate that mRNA expression is correlated to the stability of mRNA secondary structure and the codon usage bias. The decrease of the stability of mRNA structure contributes to the increase of mRNA expression. There is a negative correlation between codon adaptation index (CAI) and mRNA expression in genes with less stable structure. The relationship between the stability of mRNA structure and mRNA half-life indicates the stability of mRNA structure is different from mRNA half-life.}, date-added = {2007-04-18 17:52:02 +0200}, date-modified = {2007-07-09 20:05:05 +0200}, keywords = {mRNA expression level, CAI, Z-score, Free energy, Random sequence}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T36-4H3Y8PG-2/2/3527574020c248e9bdd6888ec17b31c7} } @ARTICLE{Kanaya2001, author = {S. Kanaya and Y. Yamada and M. Kinouchi and Y. Kudo and T. Ikemura}, title = {Codon usage and tRNA genes in eukaryotes: correlation of codon usage diversity with translation efficiency and with CG-dinucleotide usage as assessed by multivariate analysis.}, journal = {J Mol Evol}, year = {2001}, volume = {53}, pages = {290--298}, number = {4-5}, abstract = {The species-specific diversity of codon usage in five eukaryotes (Schizosaccharomyces pombe, Caenorhabditis elegans, Drosophila melanogaster, Xenopus laevis, and Homo sapiens) was investigated with principal component analysis. Optimal codons for translation were predicted on the basis of tRNA-gene copy numbers. Highly expressed genes, such as those encoding ribosomal proteins and histones in S. pombe, C. elegans, and D. melanogaster, have biased patterns of codon usage which have been observed in a wide range of unicellular organisms. In S. pombe and C. elegans, codons contributing positively to the principal component with the largest variance (Z1-parameter) corresponded to the optimal codons which were predicted on the basis of tRNA gene numbers. In D. melanogaster, this correlation was less evident, and the codons contributing positively to the Z1-parameter corresponded primarily to codons with a C or G in the codon third position. In X. laevis and H. sapiens, codon usage in the genes encoding ribosomal proteins and histones was not significantly biased, suggesting that the primary factor influencing codon-usage diversity in these species is not translation efficiency. Codon-usage diversity in these species is known to reflect primarily isochore structures. In the present study, the second additional factor was explained by the level of use of codons containing CG-dinucleotides, and this is discussed with respect to transcription regulation via methylation of CG-dinucleotides, which is observed in mammalian genomes.}, doi = {10.1007/s002390010219}, keywords = {Animals; Base Sequence; Caenorhabditis elegans; Codon; Drosophila melanogaster; Eukaryotic Cells; Evolution, Molecular; Gene Expression; Humans; Models, Genetic; Multivariate Analysis; Protein Biosynthesis; RNA, Transfer; Schizosaccharomyces; Variation (Genetics); Vertebrates}, owner = {mbailly}, pmid = {11675589}, timestamp = {2007.10.05}, url = {http://dx.doi.org/10.1007/s002390010219} } @ARTICLE{Kano-Sueoka:1999zr, author = {Kano-Sueoka, Tamiko and Lobry, Jean R. and Sueoka, Noboru}, title = {Intra-strand biases in bacteriophage {T4} genome}, journal = {Gene}, year = {1999}, volume = {238}, pages = {59--64}, number = {1}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-07-09 20:10:17 +0200}, keywords = {Asymmetric mutation pressure, Bias from Parity Rule 2, Sense versus anti-sense strand, Transcription- and translation-associated biases}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T39-3XG1T5S-7/2/daf41d2a9e4a12894d22ee3974a8e178} } @ARTICLE{Karlin:2001, author = {Karlin, Samuel}, title = {Detecting anomalous gene clusters and pathogenicity islands in diverse bacterial genomes}, journal = {Trends in Microbiology}, year = {2001}, volume = {9}, pages = {335--343}, number = {7}, abstract = {A gene in a genome is defined as putative alien (pA) if its codon usage difference from the average gene exceeds a high threshold and codon usage differences from ribosomal protein genes, chaperone genes and protein-synthesis-processing factors are also high. pA gene clusters in bacterial genomes are relevant for detecting genomic islands (GIs), including pathogenicity islands (PAIs). Four other analyses appropriate to this task are G+C genome variation (the standard method); genomic signature divergences (dinucleotide bias); extremes of codon bias; and anomalies of amino acid usage. For example, the cagA domain of Helicobacter pylori is highly deviant in its genome signature and codon bias from the rest of the genome. Using these methods we can detect two potential PAIs in the Neisseria meningitidis genome, which contain hemagglutinin and/or hemolysin-related genes. Additionally, G+C variation and genome signature differences of the Mycobacterium tuberculosis genome indicate two pA gene clusters.}, date-added = {2007-04-16 18:11:06 +0200}, date-modified = {2007-04-16 18:11:06 +0200}, keywords = {horizontal gene transfer, pathogenicity island, genome, Genome analysis, bacteria, alien}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6TD0-43C5RK5-15/2/77e0c72e2b56d424d868179c6dedf053} } @ARTICLE{Karlin:1998, author = {Karlin,Samuel and Mrazek,Jan and Campbell,Allan M.}, title = {Codon usages in different gene classes of the \emph{{E}scherichia coli} genome}, journal = {Molecular Microbiology}, year = {1998}, volume = {29}, pages = {1341--1355}, number = {6}, abstract = {A new measure for assessing codon bias of one group of genes with respect to a second group of genes is introduced. In this formulation, codon bias correlations for \emph{{E}scherichia coli} genes are evaluated for level of expression, for contrasts along genes, for genes in different 200 kb (or longer) contigs around the genome, for effects of gene size, for variation over different function classes, for codon bias in relation to possible lateral transfer and for dicodon bias for some gene classes. Among the function classes, codon biases of ribosomal proteins are the most deviant from the codon frequencies of the average E. coli gene. Other classes of 'highly expressed genes' (e.g. amino acyl tRNA synthetases, chaperonins, modification genes essential to translation activities) show less extreme codon biases. Consistently for genes with experimentally determined expression rates in the exponential growth phase, those of highest molar abundances are more deviant from the average gene codon frequencies and are more similar in codon frequencies to the average ribosomal protein gene. Independent of gene size, the codon biases in the 5' third of genes deviate by more than a factor of two from those in the middle and 3' thirds. In this context, there appear to be conflicting selection pressures imposed by the constraints of ribosomal binding, or more generally the early phase of protein synthesis (about the first 50 codons) may be more biased than the complete nascent polypeptide. In partitioning the E. coli genome into 10 equal lengths, pronounced differences in codon site 3 G+C frequencies accumulate. Genes near to oriC have 5{\%} greater codon site 3 G+C frequencies than do genes from the ter region. This difference also is observed between small (100-300 codons) and large (>800 codons) genes. This result contrasts with that for eukaryotic genomes (including human, Caenorhabditis elegans and yeast) where long genes tend to have site 3 more AT rich than short genes. Many of the above results are special for E. coli genes and do not apply to genes of most bacterial genomes. A gene is defined as alien (possibly horizontally transferred) if its codon bias relative to the average gene exceeds a high threshold and the codon bias relative to ribosomal proteins is also appropriately high. These are identified, including four clusters (operons). The bulk of these genes have no known function.}, date-added = {2007-04-16 18:07:00 +0200}, date-modified = {2007-04-16 18:07:00 +0200}, jo = {Molecular Microbiology}, m3 = {doi:10.1046/j.1365-2958.1998.01008.x}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.blackwell-synergy.com/doi/abs/10.1046/j.1365-2958.1998.01008.x} } @ARTICLE{Karlin1998, author = {S. Karlin and J. Mrázek and A. M. Campbell}, title = {Codon usages in different gene classes of the Escherichia coli genome.}, journal = {Mol Microbiol}, year = {1998}, volume = {29}, pages = {1341--1355}, number = {6}, month = {Sep}, abstract = {A new measure for assessing codon bias of one group of genes with respect to a second group of genes is introduced. In this formulation, codon bias correlations for Escherichia coli genes are evaluated for level of expression, for contrasts along genes, for genes in different 200 kb (or longer) contigs around the genome, for effects of gene size, for variation over different function classes, for codon bias in relation to possible lateral transfer and for dicodon bias for some gene classes. Among the function classes, codon biases of ribosomal proteins are the most deviant from the codon frequencies of the average E. coli gene. Other classes of 'highly expressed genes' (e.g. amino acyl tRNA synthetases, chaperonins, modification genes essential to translation activities) show less extreme codon biases. Consistently for genes with experimentally determined expression rates in the exponential growth phase, those of highest molar abundances are more deviant from the average gene codon frequencies and are more similar in codon frequencies to the average ribosomal protein gene. Independent of gene size, the codon biases in the 5' third of genes deviate by more than a factor of two from those in the middle and 3' thirds. In this context, there appear to be conflicting selection pressures imposed by the constraints of ribosomal binding, or more generally the early phase of protein synthesis (about the first 50 codons) may be more biased than the complete nascent polypeptide. In partitioning the E. coli genome into 10 equal lengths, pronounced differences in codon site 3 G+C frequencies accumulate. Genes near to oriC have 5\% greater codon site 3 G+C frequencies than do genes from the ter region. This difference also is observed between small (100-300 codons) and large (>800 codons) genes. This result contrasts with that for eukaryotic genomes (including human, Caenorhabditis elegans and yeast) where long genes tend to have site 3 more AT rich than short genes. Many of the above results are special for E. coli genes and do not apply to genes of most bacterial genomes. A gene is defined as alien (possibly horizontally transferred) if its codon bias relative to the average gene exceeds a high threshold and the codon bias relative to ribosomal proteins is also appropriately high. These are identified, including four clusters (operons). The bulk of these genes have no known function.}, keywords = {Amino Acyl-tRNA Synthetases; Animals; Bacterial Proteins; Base Composition; Chromosomes, Bacterial; Codon; Coliphages; DNA, Bacterial; DNA, Viral; Escherichia coli; Gene Expression; Genes, Bacterial; Genome, Bacterial; Humans; Operon; Protein Biosynthesis; Protein Folding; Ribosomal Proteins; Species Specificity}, owner = {mbailly}, pmid = {9781873}, timestamp = {2007.10.05} } @ARTICLE{Keller:2007, author = {Keller, Irene and Bensasson, Douda and Nichols, Richard A.}, title = {Transition-Transversion Bias Is Not Universal: A Counter Example from Grasshopper Pseudogenes}, journal = {PLoS Genetics}, year = {2007}, volume = {3}, number = {2}, abstract = {Comparisons of the DNA sequences of metazoa show an excess of transitional over transversional substitutions. Part of this bias is due to the relatively high rate of mutation of methylated cytosines to thymine. Postmutation processes also introduce a bias, particularly selection for codon-usage bias in coding regions. It is generally assumed, however, that there is a universal bias in favour of transitions over transversions, possibly as a result of the underlying chemistry of mutation. Surprisingly, this underlying trend has been evaluated only in two types of metazoan, namely Drosophila and the Mammalia. Here, we investigate a third group, and find no such bias. We characterize the point substitution spectrum in Podisma pedestris, a grasshopper species with a very large genome. The accumulation of mutations was surveyed in two pseudogene families, nuclear mitochondrial and ribosomal DNA sequences. The cytosine-guanine (CpG) dinucleotides exhibit the high transition frequencies expected of methylated sites. The transition rate at other cytosine residues is significantly lower. After accounting for this methylation effect, there is no significant difference between transition and transversion rates. These results contrast with reports from other taxa and lead us to reject the hypothesis of a universal transition/transversion bias. Instead we suggest fundamental interspecific differences in point substitution processes.}, date-added = {2007-04-15 17:24:05 +0200}, date-modified = {2007-04-15 17:24:05 +0200}, owner = {mbailly}, sp = {e22 EP -}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://dx.doi.org/10.1371%2Fjournal.pgen.0030022} } @ARTICLE{Kloster2008, author = {Morten Kloster and Chao Tang}, title = {SCUMBLE: a method for systematic and accurate detection of codon usage bias by maximum likelihood estimation.}, journal = {Nucleic Acids Res}, year = {2008}, volume = {36}, pages = {3819--3827}, number = {11}, month = {Jun}, abstract = {The genetic code is degenerate--most amino acids can be encoded by from two to as many as six different codons. The synonymous codons are not used with equal frequency: not only are some codons favored over others, but also their usage can vary significantly from species to species and between different genes in the same organism. Known causes of codon bias include differences in mutation rates as well as selection pressure related to the expression level of a gene, but the standard analysis methods can account for only a fraction of the observed codon usage variation. We here introduce an explicit model of codon usage bias, inspired by statistical physics. Combining this model with a maximum likelihood approach, we are able to clearly identify different sources of bias in various genomes. We have applied the algorithm to Saccharomyces cerevisiae as well as 325 prokaryote genomes, and in most cases our model explains essentially all observed variance.}, doi = {10.1093/nar/gkn288}, institution = {Department of Bioengineering and Therapeutic Sciences, UCSF, San Francisco, California 94158, USA.}, owner = {mbailly}, pii = {gkn288}, pmid = {18495752}, timestamp = {2008.06.25}, url = {http://dx.doi.org/10.1093/nar/gkn288} } @ARTICLE{Kurland:1991, author = {Kurland, C. G.}, title = {Codon bias and gene expression}, journal = {FEBS Letters}, year = {1991}, volume = {285}, pages = {165--169}, number = {2}, abstract = {The frequencies with which individual synonymous codons are used to code their cognate amino acids is quite variable from genome to genome and within genomes, from gene to gene. One particularly well documented codon bias is that associated with highly expressed genes in bacteria as well as in yeast; this is the so-called major codon bias. Here, it is suggested that the major codon bias is not an arrangement for regulating individual gene expression. Instead, the data suggest that this codon bias, which is correlated with a corresponding bias of tRNA abundance, is a global arrangement for optimizing the growth efficiency of cells. On the practical side, it is suggested that heterologous gene expression is not as sensitive to codon bias as previously thought, but that it is quite sensitive to other characteristics of the heterologous gene.}, date-added = {2007-04-04 20:07:23 +0200}, date-modified = {2007-04-04 20:07:23 +0200}, keywords = {Codon bias, Aminoacyl-tRNA, Cell growth, mRNA stability, Protein stability}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T36-44P8JPX-8X/2/a0ed608846b3b3c7ee6d44735c225f6d} } @ARTICLE{Lee2007b, author = {Insuk Lee and Zhihua Li and Edward M Marcotte}, title = {An Improved, Bias-Reduced Probabilistic Functional Gene Network of Baker's Yeast, Saccharomyces cerevisiae.}, journal = {PLoS ONE}, year = {2007}, volume = {2}, pages = {e988}, number = {10}, abstract = {BACKGROUND: Probabilistic functional gene networks are powerful theoretical frameworks for integrating heterogeneous functional genomics and proteomics data into objective models of cellular systems. Such networks provide syntheses of millions of discrete experimental observations, spanning DNA microarray experiments, physical protein interactions, genetic interactions, and comparative genomics; the resulting networks can then be easily applied to generate testable hypotheses regarding specific gene functions and associations. METHODOLOGY/PRINCIPAL FINDINGS: We report a significantly improved version (v. 2) of a probabilistic functional gene network [1] of the baker's yeast, Saccharomyces cerevisiae. We describe our optimization methods and illustrate their effects in three major areas: the reduction of functional bias in network training reference sets, the application of a probabilistic model for calculating confidences in pair-wise protein physical or genetic interactions, and the introduction of simple thresholds that eliminate many false positive mRNA co-expression relationships. Using the network, we predict and experimentally verify the function of the yeast RNA binding protein Puf6 in 60S ribosomal subunit biogenesis. CONCLUSIONS/SIGNIFICANCE: YeastNet v. 2, constructed using these optimizations together with additional data, shows significant reduction in bias and improvements in precision and recall, in total covering 102,803 linkages among 5,483 yeast proteins (95\% of the validated proteome). YeastNet is available from http://www.yeastnet.org.}, doi = {10.1371/journal.pone.0000988}, owner = {mbailly}, pmid = {17912365}, timestamp = {2007.12.13}, url = {http://dx.doi.org/10.1371/journal.pone.0000988} } @ARTICLE{Li2008, author = {Chuan-Yun Li and Xizeng Mao and Liping Wei}, title = {Genes and (common) pathways underlying drug addiction.}, journal = {PLoS Comput Biol}, year = {2008}, volume = {4}, pages = {e2}, number = {1}, month = {Jan}, abstract = {Drug addiction is a serious worldwide problem with strong genetic and environmental influences. Different technologies have revealed a variety of genes and pathways underlying addiction; however, each individual technology can be biased and incomplete. We integrated 2,343 items of evidence from peer-reviewed publications between 1976 and 2006 linking genes and chromosome regions to addiction by single-gene strategies, microrray, proteomics, or genetic studies. We identified 1,500 human addiction-related genes and developed KARG (http://karg.cbi.pku.edu.cn), the first molecular database for addiction-related genes with extensive annotations and a friendly Web interface. We then performed a meta-analysis of 396 genes that were supported by two or more independent items of evidence to identify 18 molecular pathways that were statistically significantly enriched, covering both upstream signaling events and downstream effects. Five molecular pathways significantly enriched for all four different types of addictive drugs were identified as common pathways which may underlie shared rewarding and addictive actions, including two new ones, GnRH signaling pathway and gap junction. We connected the common pathways into a hypothetical common molecular network for addiction. We observed that fast and slow positive feedback loops were interlinked through CAMKII, which may provide clues to explain some of the irreversible features of addiction.}, doi = {10.1371/journal.pcbi.0040002}, institution = {Center for Bioinformatics, National Laboratory of Protein Engineering and Plant Genetic Engineering, College of Life Sciences, Peking University, Beijing, People's Republic of China.}, owner = {mbailly}, pii = {07-PLCB-RA-0425}, pmid = {18179280}, timestamp = {2008.02.05}, url = {http://dx.doi.org/10.1371/journal.pcbi.0040002} } @ARTICLE{Yeong-ShinLin09262006, author = {Lin, Yeong-Shin and Byrnes, Jake K. and Hwang, Jenn-Kang and Li, Wen-Hsiung}, title = {Codon-usage bias versus gene conversion in the evolution of yeast duplicate genes}, journal = {Proceedings of the National Academy of Sciences of the USA}, year = {2006}, volume = {103}, pages = {14412-14416}, number = {39}, abstract = {Many Saccharomyces cerevisiae duplicate genes that were derived from an ancient whole-genome duplication (WGD) unexpectedly show a small synonymous divergence (KS), a higher sequence similarity to each other than to orthologues in Saccharomyces bayanus, or slow evolution compared with the orthologue in Kluyveromyces waltii, a non-WGD species. This decelerated evolution was attributed to gene conversion between duplicates. Using {approx}300 WGD gene pairs in four species and their orthologues in non-WGD species, we show that codon-usage bias and protein-sequence conservation are two important causes for decelerated evolution of duplicate genes, whereas gene conversion is effective only in the presence of strong codon-usage bias or protein-sequence conservation. Furthermore, we find that change in mutation pattern or in tDNA copy number changed codon-usage bias and increased the KS distance between K. waltii and S. cerevisiae. Intriguingly, some proteins showed fast evolution before the radiation of WGD species but little or no sequence divergence between orthologues and paralogues thereafter, indicating that functional conservation after the radiation may also be responsible for decelerated evolution in duplicates.}, date-added = {2007-01-12 11:58:45 +0100}, date-modified = {2007-07-09 20:33:09 +0200}, doi = {10.1073/pnas.0606348103}, eprint = {http://www.pnas.org/cgi/reprint/103/39/14412.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.pnas.org/cgi/content/abstract/103/39/14412} } @ARTICLE{Lobry:2002, author = {Lobry, Jean and Sueoka, Noboru}, title = {Asymmetric directional mutation pressures in bacteria}, journal = {Genome Biology}, year = {2002}, volume = {3}, pages = {research0058.1--research0058.14}, number = {10}, abstract = {BACKGROUND:When there are no strand-specific biases in mutation and selection rates (that is, in the substitution rates) between the two strands of DNA, the average nucleotide composition is theoretically expected to be A = T and G = C within each strand. Deviations from these equalities are therefore evidence for an asymmetry in selection and/or mutation between the two strands. By focusing on weakly selected regions that could be oriented with respect to replication in 43 out of 51 completely sequenced bacterial chromosomes, we have been able to detect asymmetric directional mutation pressures.RESULTS:Most of the 43 chromosomes were found to be relatively enriched in G over C and T over A, and slightly depleted in G+C, in their weakly selected positions (intergenic regions and third codon positions) in the leading strand compared with the lagging strand. Deviations from A = T and G = C were highly correlated between third codon positions and intergenic regions, with a lower degree of deviation in intergenic regions, and were not correlated with overall genomic G+C content.CONCLUSIONS:During the course of bacterial chromosome evolution, the effects of asymmetric directional mutation pressures are commonly observed in weakly selected positions. The degree of deviation from equality is highly variable among species, and within species is higher in third codon positions than in intergenic regions. The orientation of these effects is almost universal and is compatible in most cases with the hypothesis of an excess of cytosine deamination in the single-stranded state during DNA replication. However, the variation in G+C content between species is influenced by factors other than asymmetric mutation pressure.}, date-added = {2007-04-15 17:24:08 +0200}, date-modified = {2007-04-15 17:24:08 +0200}, isbn = {1465-6906}, m3 = {10.1186/gb-2002-3-10-research0058}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://genomebiology.com/2002/3/10/research/0058} } @ARTICLE{Lobry:1994, author = {Lobry, J. R. and Gautier, C.}, title = {Hydrophobicity, expressivity and aromaticity are the major trends of amino-acid usage in 999 \emph{{E}scherichia coli} chromosome-encoded genes}, journal = {Nucleic Acids Research}, year = {1994}, volume = {22}, pages = {3174--3180}, number = {15}, abstract = {Multivariate analysis of the amino-acid compositions of 999 chromosome-encoded proteins from \emph{{E}scherichia coli} showed that three main factors influence the variability of amino-acid composition. The first factor was correlated with the global hydrophobicity of proteins, and it discriminated integral membrane proteins from the others. The second factor was correlated with gene expressivity, showing a bias in highly expressed genes towards amino-acids having abundant major tRNAs. Just as highly expressed genes have reduced codon diversity in protein coding sequences, so do they have a reduced diversity of amino-acid choice. This showed that translational constraints are important enough to affect the global amino-acid composition of proteins. The third factor was correlated with the aromaticity of proteins, showing that aromatic amino-acid content is highly variable.}, annote = {10.1093/nar/22.15.3174}, date-added = {2007-04-19 13:13:01 +0200}, date-modified = {2007-04-19 13:13:01 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/22/15/3174} } @ARTICLE{Loeytynoja2008, author = {Ari Löytynoja and Nick Goldman}, title = {Phylogeny-aware gap placement prevents errors in sequence alignment and evolutionary analysis.}, journal = {Science}, year = {2008}, volume = {320}, pages = {1632--1635}, number = {5883}, month = {Jun}, abstract = {Genetic sequence alignment is the basis of many evolutionary and comparative studies, and errors in alignments lead to errors in the interpretation of evolutionary information in genomes. Traditional multiple sequence alignment methods disregard the phylogenetic implications of gap patterns that they create and infer systematically biased alignments with excess deletions and substitutions, too few insertions, and implausible insertion-deletion-event histories. We present a method that prevents these systematic errors by recognizing insertions and deletions as distinct evolutionary events. We show theoretically and practically that this improves the quality of sequence alignments and downstream analyses over a wide range of realistic alignment problems. These results suggest that insertions and sequence turnover are more common than is currently thought and challenge the conventional picture of sequence evolution and mechanisms of functional and structural changes.}, doi = {10.1126/science.1158395}, institution = {European Molecular Biology Laboratory-European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton CB10 1SD, UK. ari@ebi.ac.uk}, owner = {mbailly}, pii = {320/5883/1632}, pmid = {18566285}, timestamp = {2008.06.25}, url = {http://dx.doi.org/10.1126/science.1158395} } @ARTICLE{Medigue:2005, author = {M\'{e}digue, Claudine and Krin, Evelyne and Pascal, G\'{e}raldine and Barbe, Valerie and Bernsel, Andreas and Bertin, Philippe N. and Cheung, Frankie and Cruveiller, Stephane and D'Amico, Salvino and Duilio, Angela and Fang, Gang and Feller, Georges and Ho, Christine and Mangenot, Sophie and Marino, Gennaro and Nilsson, Johan and Parrilli, Ermenegilda and Rocha, Eduardo P. C. and Rouy, Zoe and Sekowska, Agnieszka and Tutino, Maria Luisa and Vallenet, David and von Heijne, Gunnar and Danchin, Antoine}, title = {Coping with cold: The genome of the versatile marine {A}ntarctica bacterium \emph{{P}seudoalteromonas haloplanktis} {TAC125}}, journal = {Genome Research}, year = {2005}, volume = {15}, pages = {1325--1335}, number = {10}, abstract = {A considerable fraction of life develops in the sea at temperatures lower than 15{\{}degrees{\}}C. Little is known about the adaptive features selected under those conditions. We present the analysis of the genome sequence of the fast growing Antarctica bacterium Pseudoalteromonas haloplanktis TAC125. We find that it copes with the increased solubility of oxygen at low temperature by multiplying dioxygen scavenging while deleting whole pathways producing reactive oxygen species. Dioxygen-consuming lipid desaturases achieve both protection against oxygen and synthesis of lipids making the membrane fluid. A remarkable strategy for avoidance of reactive oxygen species generation is developed by P. haloplanktis, with elimination of the ubiquitous molybdopterin-dependent metabolism. The P. haloplanktis proteome reveals a concerted amino acid usage bias specific to psychrophiles, consistently appearing apt to accommodate asparagine, a residue prone to make proteins age. Adding to its originality, P. haloplanktis further differs from its marine counterparts with recruitment of a plasmid origin of replication for its second chromosome.}, annote = {10.1101/gr.4126905}, date-added = {2007-04-07 18:15:24 +0200}, date-modified = {2007-07-09 20:14:54 +0200}, jo = {Genome Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.genome.org/cgi/content/abstract/15/10/1325} } @ARTICLE{Marin2008, author = {Antonio Marín and Xuhua Xia}, title = {GC skew in protein-coding genes between the leading and lagging strands in bacterial genomes: New substitution models incorporating strand bias.}, journal = {J Theor Biol}, year = {2008}, month = {Apr}, abstract = {The DNA strands in most prokaryotic genomes experience strand-biased spontaneous mutation, especially C-->T mutations produced by deamination that occur preferentially in the leading strand. This has often been invoked to account for the asymmetry in nucleotide composition, typically measured by GC skew, between the leading and the lagging strand. Casting such strand asymmetry in the framework of a nucleotide substitution model is important for understanding genomic evolution and phylogenetic reconstruction. We present a substitution model showing that the increased C-->T mutation will lead to positive GC skew in one strand but negative GC skew in the other, with greater C-->T mutation pressure associated with greater differences in GC skew between the leading and the lagging strand. However, the model based on mutation bias alone does not predict any positive correlation in GC skew between the leading and lagging strands. We computed GC skew for coding sequences collinear with the leading and lagging strands across 339 prokaryotic genomes and found a strong and positive correlation in GC skew between the two strands. We show that the observed positive correlation can be satisfactorily explained by an improved substitution model with one additional parameter incorporating a general trend of C avoidance.}, doi = {10.1016/j.jtbi.2008.04.004}, institution = {Departamento de Genética, Universidad de Sevilla, Avenida Reina Mercedes 6, E-41012 Sevilla, Spain.}, owner = {mbailly}, pii = {S0022-5193(08)00179-3}, pmid = {18486155}, timestamp = {2008.07.16}, url = {http://dx.doi.org/10.1016/j.jtbi.2008.04.004} } @ARTICLE{McLachlan:1984, author = {McLachlan, Andrew D. and Staden, Rodger and Boswell, D. Ross}, title = {A method for measuring the non-random bias of a codon usage table}, journal = {Nucleic Acids Research}, year = {1984}, volume = {12}, pages = {9567--9575}, number = {24}, abstract = {We describe a new statistical method for measuring bias in the codon usage table of a gene. The test is based on the multinomial and Poisson distributions. The method is used to scan DNA sequences and measure the strength of codon preference. For E. Coli we show that the strength of codon preference is related to levels of gene expression. The method can also be used to compare base triplet frequencies with those expected from the base composition. This second type of codon bias test is useful for distinguishing coding from non-coding regions.}, annote = {10.1093/nar/12.24.9567}, date-added = {2007-04-16 18:23:21 +0200}, date-modified = {2007-04-16 18:23:21 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/12/24/9567} } @ARTICLE{McNulty:2003, author = {McNulty, Dean E. and Claffee, Brett A. and Huddleston, Michael J. and Kane, James F.}, title = {Mistranslational errors associated with the rare arginine codon {CGG} in \emph{{E}scherichia coli}}, journal = {Protein Expression and Purification}, year = {2003}, volume = {27}, pages = {365--374}, number = {2}, abstract = {In \emph{{E}scherichia coli}, CGG is a rare arginine codon occurring at a frequency of 0.54{\%} in all E. coli mRNAs or 9.8{\%} when an arginine residue is encoded for. When present in high numbers or in clusters in highly expressed recombinant mRNA, rare codons can cause expression problems compromising product yield and translational fidelity. The coding region for an N-terminally polyhistidine tagged p27 protease domain from Herpes Simplex Virus 2 (HSV-2) contains 11 of these rare arginine codons, with 3 occurring in tandem near the C-terminus of the protein. When expressed in E. coli, the majority of the recombinant material produced had an apparent molecular mass of 31 kDa by SDS-PAGE gels or 3 kDa higher than predicted. Detailed biochemical analysis was performed on chemical and enzymatic digests of the protein and peptide fragments were characterized by Edman and MS/MS sequencing approaches. Two major species were isolated comprising +1 frameshift events at both the second and third CGG codons in the triplet cluster. Translation proceeded in the missense frame to the next termination codon. In addition, significant levels of glutamine misincorporating for arginine were discovered, suggesting second base misreading of CGG as CAG. Coexpression of the argX gene, which encodes the cognate tRNA for CGG codons, largely eliminated both the frameshift and misincorporation events, and increased expression levels of authentic product by up to 7-fold. We conclude that supplementation of the rare arginyl tRNACGG levels by coexpression of the argX gene can largely alleviate the CGG codon bias present in E. coli, allowing for efficient and accurate translation of heterologous gene products.}, date-added = {2007-03-30 10:35:02 +0200}, date-modified = {2007-07-09 20:14:11 +0200}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6WPJ-47S6M4B-8/2/9202172ad2f753cfbd6e848c95659ab3} } @ARTICLE{Medrano-Soto:2004, author = {Medrano-Soto, Arturo and Moreno-Hagelsieb, Gabriel and Vinuesa, Pablo and Christen, J. Andres and Collado-Vides, Julio}, title = {Successful Lateral Transfer Requires Codon Usage Compatibility Between Foreign Genes and Recipient Genomes}, journal = {Molecular Biology and Evolution}, year = {2004}, volume = {21}, pages = {1884--1894}, number = {10}, abstract = {We present evidence supporting the notion that codon usage (CU) compatibility between foreign genes and recipient genomes is an important prerequisite to assess the selective advantage of imported functions, and therefore to increase the fixation probability of horizontal gene transfer (HGT) events. This contrasts with the current tendency in research to predict recent HGTs in prokaryotes by assuming that acquired genes generally display poor CU. By looking at the CU level (poor, typical, or rich) exhibited by putative xenologs still resembling their original CU, we found that most alien genes predominantly present typical CU immediately upon introgression, thereby suggesting that the role of CU amelioration in HGT has been overemphasized. In our strategy, we first scanned a representative set of 103 complete prokaryotic genomes for all pairs of candidate xenologs (exported/imported genes) displaying similar CU. We applied additional filtering criteria, including phylogenetic validations, to enhance the reliability of our predictions. Our approach makes no assumptions about the CU of foreign genes being typical or atypical within the recipient genome, thus providing a novel unbiased framework to study the evolutionary dynamics of HGT.}, annote = {10.1093/molbev/msh202}, date-added = {2007-04-17 11:11:41 +0200}, date-modified = {2007-04-17 11:11:41 +0200}, jo = {Mol Biol Evol}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://mbe.oxfordjournals.org/cgi/content/abstract/21/10/1884} } @ARTICLE{Merkl:2003, author = {Rainer Merkl}, title = {A Survey of Codon and Amino Acid Frequency Bias in Microbial Genomes Focusing on Translational Efficiency}, journal = {Journal of Molecular Evolution}, year = {2003}, volume = {57}, pages = {453--466}, number = {4}, date-added = {2007-04-16 12:20:23 +0200}, date-modified = {2007-04-16 12:20:23 +0200}, m3 = {10.1007/s00239-003-2499-1}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://dx.doi.org/10.1007/s00239-003-2499-1} } @ARTICLE{Mira:2001, author = {Mira, Alex and Ochman, Howard and Moran, Nancy A.}, title = {Deletional bias and the evolution of bacterial genomes}, journal = {Trends in Genetics}, year = {2001}, volume = {17}, pages = {589--596}, number = {10}, abstract = {Although bacteria increase their DNA content through horizontal transfer and gene duplication, their genomes remain small and, in particular, lack nonfunctional sequences. This pattern is most readily explained by a pervasive bias towards higher numbers of deletions than insertions. When selection is not strong enough to maintain them, genes are lost in large deletions or inactivated and subsequently eroded. Gene inactivation and loss are particularly apparent in obligate parasites and symbionts, in which dramatic reductions in genome size can result not from selection to lose DNA, but from decreased selection to maintain gene functionality. Here we discuss the evidence showing that deletional bias is a major force that shapes bacterial genomes.}, date-added = {2007-04-04 20:04:08 +0200}, date-modified = {2007-04-04 20:04:08 +0200}, keywords = {genome evolution, deletions, lateral transfer, symbionts, pathogens, pseudogenes, buchnera aphidicola, \emph{{E}scherichia coli}}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6TCY-47YYC44-Y/2/f97e77148015cd85395a06a7612302c3} } @ARTICLE{Moriyama:1998, author = {Moriyama, EN and Powell, JR}, title = {Gene length and codon usage bias in \emph{{D}rosophila melanogaster}, \emph{{S}accharomyces cerevisiae} and \emph{{E}scherichia coli}}, journal = {Nucleic Acids Research}, year = {1998}, volume = {26}, pages = {3188--3193}, number = {13}, annote = {10.1093/nar/26.13.3188}, date-added = {2007-04-14 11:44:02 +0200}, date-modified = {2007-07-09 20:15:50 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/26/13/3188} } @ARTICLE{Moszer1999, author = {Moszer, Ivan and Rocha, Eduardo PC and Danchin, Antoine}, title = {Codon usage and lateral gene transfer in \emph{{B}acillus subtilis}}, journal = {Current Opinion in Microbiology}, year = {1999}, volume = {2}, pages = {524--528}, number = {5}, abstract = {\emph{{B}acillus subtilis} possesses three classes of genes, differing by their codon preference. One class corresponds to prophages or prophage-like elements, indicative of the existence of systematic lateral gene transfer in this organism. The nature of the selection pressure that operates on codon bias is beginning to be understood.}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-03-22 17:25:51 +0100}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.sciencedirect.com/science/article/B6VS2-3Y6PDF0-G/2/3dd6a394a8420a68d9d3804b4a0c7032} } @ARTICLE{Musto:2006, author = {Musto, Hector and Naya, Hugo and Zavala, Alejandro and Romero, Hector and Alvarez-Valin, Fernando and Bernardi, Giorgio}, title = {Genomic {GC} level, optimal growth temperature, and genome size in prokaryotes}, journal = {Biochemical and Biophysical Research Communications}, year = {2006}, volume = {347}, pages = {1--3}, number = {1}, abstract = {Two years ago, we showed that positive correlations between optimal growth temperature (Topt) and genome GC are observed in 15 out of the 20 families of prokaryotes we analyzed, thus indicating that {\tt{}"{}}Topt is one of the factors that influence genomic GC in prokaryotes{\tt{}"{}}. Our results were disputed, but these criticisms were demonstrated to be mistaken and based on misconceptions. In a recent report, Wang et al. {$[$}H.C. Wang, E. Susko, A.J. Roger, On the correlation between genomic G+C content and optimal growth temperature in prokaryotes: data quality and confounding factors, Biochem. Biophys. Res. Commun. 342 (2006) 681-684{$]$} criticize our results by stating that {\tt{}"{}}all previous simple correlation analyses of GC versus temperature have ignored the fact that genomic GC content is influenced by multiple factors including both intrinsic mutational bias and extrinsic environmental factors{\tt{}"{}}. This statement, besides being erroneous, is surprising because it applies in fact not to ours but to the authors' article. Here, we rebut the points raised by Wang et al. and review some issues that have been a matter of debate, regarding the influence of environmental factors upon GC content in prokaryotes. Furthermore, we demonstrate that the relationship that exists between genome size and GC level is valid for aerobic, facultative, and microaerophilic species, but not for anaerobic prokaryotes.}, date-added = {2007-04-10 19:56:54 +0200}, date-modified = {2007-07-09 20:16:03 +0200}, keywords = {Prokaryotes, Genome evolution, Genome size, GC level, Optimal growth temperature}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6WBK-4K7158R-1/2/1e04a2b4599de63c570cfb203195bf39} } @ARTICLE{AkiraMuto01011987, author = {Muto, Akira and Osawa, Syozo}, title = {The Guanine and Cytosine Content of Genomic {DNA} and Bacterial Evolution}, journal = {Proceedings of the National Academy of Sciences, USA}, year = {1987}, volume = {84}, pages = {166-169}, number = {1}, abstract = {The genomic guanine and cytosine (G+C) content of eubacteria is related to their phylogeny. The G+C content of various parts of the genome (protein genes, stable RNA genes, and spacers) reveals a positive linear correlation with the G+C content of their genomic DNA. However, the plotted correlation slopes differ among various parts of the genome or among the first, second, and third positions of the codons depending on their functional importance. Facts suggest that biased mutation pressure, called A{middle dot} T/G{middle dot} C pressure, has affected whole DNA during evolution so as to determine the genomic G+C content in a given bacterium. The role of A{middle dot} T/G{middle dot} C pressure in diversification of bacterial DNA sequences and codon usage patterns is discussed in the perspective of the neutral theory of molecular evolution.}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-07-09 20:31:58 +0200}, doi = {10.1073/pnas.84.1.166}, eprint = {http://www.pnas.org/cgi/reprint/84/1/166.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.pnas.org/cgi/content/abstract/84/1/166} } @ARTICLE{Nakagawa2007, author = {So Nakagawa and Yoshihito Niimura and Takashi Gojobori and Hiroshi Tanaka and Kin-Ichiro Miura}, title = {Diversity of preferred nucleotide sequences around the translation initiation codon in eukaryote genomes.}, journal = {Nucleic Acids Res}, year = {2007}, month = {Dec}, abstract = {Understanding regulatory mechanisms of protein synthesis in eukaryotes is essential for the accurate annotation of genome sequences. Kozak reported that the nucleotide sequence GCCGCC(A/G)CCAUGG (AUG is the initiation codon) was frequently observed in vertebrate genes and that this 'consensus' sequence enhanced translation initiation. However, later studies using invertebrate, fungal and plant genes reported different 'consensus' sequences. In this study, we conducted extensive comparative analyses of nucleotide sequences around the initiation codon by using genomic data from 47 eukaryote species including animals, fungi, plants and protists. The analyses revealed that preferred nucleotide sequences are quite diverse among different species, but differences between patterns of nucleotide bias roughly reflect the evolutionary relationships of the species. We also found strong biases of A/G at position -3, A/C at position -2 and C at position +5 that were commonly observed in all species examined. Genes with higher expression levels showed stronger signals, suggesting that these nucleotides are responsible for the regulation of translation initiation. The diversity of preferred nucleotide sequences around the initiation codon might be explained by differences in relative contributions from two distinct patterns, GCCGCCAUG and AAAAAAAUG, which implies the presence of multiple molecular mechanisms for controlling translation initiation.}, doi = {10.1093/nar/gkm1102}, institution = {Department of Systems Biology, School of Biomedical Science, Department of Bioinformatics, Medical Research Institute, Tokyo Medical and Dental University, Yushima, Tokyo, Center for Information Biology and DNA Data Bank of Japan, National Institute of Genetics, Mishima, Shizuoka, Biological Information Research Center, National Institute of Advanced Industrial Science and Technology, Aomi, Tokyo and Department of Medical Genome Science, Graduate School of Frontier Science, University of Tokyo, Kashiwa, Chiba, Japan.}, owner = {mbailly}, pii = {gkm1102}, pmid = {18086709}, timestamp = {2008.02.12}, url = {http://dx.doi.org/10.1093/nar/gkm1102} } @ARTICLE{Naya:2002, author = {Naya, Hugo and Romero, H{\~A}{\copyright}ctor and Zavala, Alejandro and Alvarez, Beatriz and Musto, H{\~A}{\copyright}ctor}, title = {Aerobiosis Increases the Genomic Guanine Plus Cytosine Content (GC{\%}) in Prokaryotes}, journal = {Journal of Molecular Evolution}, year = {2002}, volume = {55}, pages = {260--264}, number = {3}, abstract = {The huge variation in the genomic guanine plus cytosine content (GC{\%}) among prokar-yotes has been explained by two mutually exclusive hypotheses, namely, selectionist and neutralist. The former proposals have in common the assumption that this feature is a form of adaptation to some ecological or physiological condition. On the other hand, the neutralist interpretation states that the variations are due only to different mutational biases. Since all of the traits that have been proposed by the selectionists either appeared to be limited to certain genera or were invalidated by the availability of more data, they cannot be considered as a selective force influencing the genomic GC{\%} across all prokaryotes. In this report we show that aerobic prokaryotes display a significant increment in genomic GC{\%} in relation to anaerobic ones. This is the first time that a link between a metabolic character and GC{\%} has been found, independently of phylogenetic relationships and with a statistically significant amount of data.}, date-added = {2007-07-05 13:51:15 +0200}, date-modified = {2007-07-05 13:51:15 +0200}, m3 = {10.1007/s00239-002-2323-3}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://dx.doi.org/10.1007/s00239-002-2323-3} } @ARTICLE{Nishizawa1998, author = {M. Nishizawa and K. Nishizawa}, title = {Biased usages of arginines and lysines in proteins are correlated with local-scale fluctuations of the G + C content of DNA sequences.}, journal = {J Mol Evol}, year = {1998}, volume = {47}, pages = {385--393}, number = {4}, month = {Oct}, abstract = {Amino acid residues arginine (R) and lysine (K) have similar physicochemical characteristics and are often mutually substituted during evolution without affecting protein function. Statistical examinations on human proteins show that more R than K residues are used in the proximity of R residues, whereas more K than R are used near K residues. This biased use occurs on both a global and a local scale (shorter than approximately 100 residues). Even within a given exon, G + C-rich and A + T-rich short DNA segments preferentially encode R and K, respectively. The biased use of R and K on a local scale is also seen in Saccharomyces cerevisiae and Caenorhabdidtis elegans, which lack global-scale mosaic structures with varying GC\%, or isochores. Besides R and K, several amino acids are also used with a positive or negative correlation with the local GC\% of third codon bases. The local-, or "within-gene"-, scale heterogeneity of the DNA sequence may influence the sequence of the encoded protein segment.}, keywords = {Amino Acid Substitution; Animals; Arginine; Base Composition; Base Sequence; Caenorhabditis elegans; Codon; DNA; Escherichia coli; Evolution, Molecular; Exons; Genetic Code; Humans; Lysine; Proteins; Saccharomyces cerevisiae}, owner = {mbailly}, pmid = {9767684}, timestamp = {2007.10.04} } @ARTICLE{Novembre08012002, author = {Novembre, John A.}, title = {Accounting for Background Nucleotide Composition When Measuring Codon Usage Bias}, journal = {Molecular Biology and Evolution}, year = {2002}, volume = {19}, pages = {1390-1394}, number = {8}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-07-09 20:31:45 +0200}, eprint = {http://mbe.oxfordjournals.org/cgi/reprint/19/8/1390.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://mbe.oxfordjournals.org} } @ARTICLE{Pascal:2006, author = {G\'{e}raldine Pascal and Claudine M\'{e}digue and Antoine Danchin}, title = {Persistent biases in the amino acid composition of prokaryotic proteins}, journal = {Bioessays}, year = {2006}, volume = {28}, pages = {726--738}, number = {7}, date-added = {2007-04-06 15:57:01 +0200}, date-modified = {2007-04-19 13:15:43 +0200}, owner = {mbailly}, timestamp = {2009.10.08} } @ARTICLE{Pascal2005, author = {G\'{e}raldine Pascal and Claudine M\'{e}digue and Antoine Danchin}, title = {Universal biases in protein composition of model prokaryotes}, journal = {Proteins: Structure, Function and Genetics}, year = {2005}, volume = {60}, pages = {27-35}, number = {1}, abstract = {The levels of cellular organization in living organisms are the results of a variety of selection pressures. We have investigated here the final outcome of this integrated selective process in proteins of the best known microbial models \emph{{E}scherichia coli}, \emph{{B}acillus subtilis}, and Methanococcus jannaschii, supposed to have undergone separate evolution for more than 1 billion years. Using multivariate analysis methods, including correspondence analysis, we studied the overall amino acid composition of all proteins making a proteome. Starting from and further developing previous results that had pointed out some general forces driving the amino acid composition of the proteomes of these model bacteria, we explored the correlations existing between the structure and functions of the proteins forming a proteome and their amino acid composition. The electric charge of amino acids measured against hydrophobicity creates a highly homogeneous cluster, made exclusively of proteins that are core components of the cytoplasmic membrane of the cell (integral inner membrane proteins). A second bias is imposed by the G+C content of the genome, indicating that protein functions are so robust with respect to amino acid changes that they can accommodate a large shift in the nucleotide content of the genome. A remarkable role of aromatic amino acids was uncovered. Expressed orphan proteins are enriched in these residues, suggesting that they might participate in a process of gain of function during evolution. Proteins 2005. 2005 Wiley-Liss, Inc.}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-03-22 17:25:51 +0100}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://dx.doi.org/10.1002/prot.20475} } @ARTICLE{Percudani1999, author = {R. Percudani and S. Ottonello}, title = {Selection at the wobble position of codons read by the same tRNA in Saccharomyces cerevisiae.}, journal = {Mol Biol Evol}, year = {1999}, volume = {16}, pages = {1752--1762}, number = {12}, month = {Dec}, abstract = {The transfer RNA gene complement of Saccharomyces cerevisiae was utilized for a whole-genome analysis of the deviation from a neutral usage of pyrimidine-ending cognate codons, that is, codons read by a single tRNA species having either inosine or guanosine as the first anticodon base. Mutational pressure at the wobble position was estimated from the base composition of the noncoding portion of the yeast genome. The selective pressure for translational efficiency was inferred from the degree of codon adaptation to tRNA gene redundancy and from mRNA abundance data derived from yeast transcriptome analysis. Amino acid conservation in orthologous comparisons with wholly sequenced microbial genomes was used to estimate translational accuracy requirements. A close correspondence was observed between the usage of wobble position pyrimidines and the frequency predicted by mutational bias. However, in the case of four cognate pairs (Gly: ggu/ggc; Asn: aau/aac; Phe: uuu/uuc; Tyr: uau/ uac) all read by guanosine-starting anticodons, we found evidence for a strong selective pressure driven by translational efficiency. Only for the glycine pair, wobble pyrimidine choice also appears to fulfill a translational accuracy requirement. Wobble pyrimidine selection is strictly related to the number of hydrogen bonds formed by alternative cognate codons: whenever a different number of hydrogen bonds can be formed at the wobble position, there is selection against six- or nine-hydrogen-bonded codon-anticodon pairs. Our results indicate that an intrinsic codon preference, critically dependent on the stability of codon-anticodon interaction and mainly reflecting selection for the optimization of translational efficiency, is built into the translational apparatus.}, keywords = {Codon; Genome, Fungal; Open Reading Frames; Protein Biosynthesis; Pyrimidines; RNA, Transfer; Saccharomyces cerevisiae; Sequence Alignment}, owner = {mbailly}, pmid = {10605116}, timestamp = {2007.10.05} } @ARTICLE{Perriere2002, author = {Perri\`{e}re, Guy and Thioulouse, Jean}, title = {Use and misuse of correspondence analysis in codon usage studies}, journal = {Nucleic Acids Research}, year = {2002}, volume = {30}, pages = {4548--4555}, number = {20}, abstract = {Correspondence analysis has frequently been used for codon usage studies but this method is often misused. Because amino acid composition exerts constraints on codon usage, it is common to use tables containing relative codon frequencies (or ratios of frequencies) instead of simple codon counts to get rid of these amino acid biases. The problem is that some important properties of correspondence analysis, such as rows weighting, are lost in the process. Moreover, the use of relative measures sometimes introduces other biases and often diminishes the quantity of information to analyse, occasionally resulting in interpretation errors. For instance, in the case of an organism such as Borrelia burgdorferi, the use of relative measures led to the conclusion that there was no translational selection, while analyses based on codon counts show that there is a possibility of a selective effect at that level. In this paper, we expose these problems and we propose alternative strategies to correspondence analysis for studying codon usage biases when amino acid composition effects must be removed.}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-03-22 17:25:51 +0100}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/30/20/4548} } @ARTICLE{Pfeiffer:2007, author = {Pfeiffer, Thomas and Hoffmann, Robert}, title = {Temporal patterns of genes in scientific publications}, journal = {Proceedings of the National Academy of Sciences}, year = {2007}, volume = {104}, pages = {12052--12056}, number = {29}, abstract = {Publications in scientific journals contain a considerable fraction of our scientific knowledge. Analyzing data from publication databases helps us understand how this knowledge is obtained and how it changes over time. In this study, we present a mathematical model for the temporal dynamics of data on the scientific content of publications. Our data set consists of references to thousands of genes in the >15 million publications listed in PubMed. We show that the observed dynamics may result from a simple process: Researchers predominantly publish on genes that already appear in many publications. This might be a rewarding strategy for researchers, because there is a positive correlation between the frequency of a gene in scientific publications and the journal impact of the publications. By comparing the empirical data with model predictions, we are able to detect unusual publication patterns that often correspond to major achievements in the field. We identify interactions between yeast genes from PubMed and show that the frequency differences of genes in publications lead to a biased picture of the resulting interaction network.}, annote = {10.1073/pnas.0701315104}, date-added = {2007-07-18 12:55:24 +0200}, date-modified = {2007-07-18 12:55:24 +0200}, jo = {PNAS}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.pnas.org/cgi/content/abstract/104/29/12052} } @ARTICLE{Price2004a, author = {Nathan D Price and Jan Schellenberger and Bernhard O Palsson}, title = {Uniform sampling of steady-state flux spaces: means to design experiments and to interpret enzymopathies.}, journal = {Biophys J}, year = {2004}, volume = {87}, pages = {2172--2186}, number = {4}, month = {Oct}, abstract = {Reconstruction of genome-scale metabolic networks is now possible using multiple different data types. Constraint-based modeling is an approach to interrogate capabilities of reconstructed networks by constraining possible cellular behavior through the imposition of physicochemical laws. As a result, a steady-state flux space is defined that contains all possible functional states of the network. Uniform random sampling of the steady-state flux space allows for the unbiased appraisal of its contents. Monte Carlo sampling of the steady-state flux space of the reconstructed human red blood cell metabolic network under simulated physiologic conditions yielded the following key results: 1), probability distributions for the values of individual metabolic fluxes showed a wide variety of shapes that could not have been inferred without computation; 2), pairwise correlation coefficients were calculated between all fluxes, determining the level of independence between the measurement of any two fluxes, and identifying highly correlated reaction sets; and 3), the network-wide effects of the change in one (or a few) variables (i.e., a simulated enzymopathy or fixing a flux range based on measurements) were computed. Mathematical models provide the most compact and informative representation of a hypothesis of how a cell works. Thus, understanding model predictions clearly is vital to driving forward the iterative model-building procedure that is at the heart of systems biology. Taken together, the Monte Carlo sampling procedure provides a broadening of the constraint-based approach by allowing for the unbiased and detailed assessment of the impact of the applied physicochemical constraints on a reconstructed network.}, doi = {10.1529/biophysj.104.043000}, institution = {Department of Bioengineering, University of California at San Diego, La Jolla, California 92093-0412, USA.}, keywords = {Animals; Blood Proteins; Computer Simulation; Erythrocytes; Gene Expression Regulation, Enzymologic; Humans; Metabolism, Inborn Errors; Models, Biological; Models, Statistical; Multienzyme Complexes; Sample Size; Signal Transduction}, owner = {mbailly}, pii = {87/4/2172}, pmid = {15454420}, timestamp = {2008.02.07}, url = {http://dx.doi.org/10.1529/biophysj.104.043000} } @ARTICLE{Rocha11012004, author = {Rocha, Eduardo P.C.}, title = {Codon usage bias from t{RNA}'s point of view: Redundancy, specialization, and efficient decoding for translation optimization}, journal = {Genome Research}, year = {2004}, volume = {14}, pages = {2279-2286}, number = {11}, abstract = {The selection-mutation-drift theory of codon usage plays a major role in the theory of molecular evolution by explaining the co-evolution of codon usage bias and tRNA content in the framework of translation optimization. Because most studies have focused only on codon usage, we analyzed the tRNA gene pool of 102 bacterial species. We show that as minimal generation times get shorter, the genomes contain more tRNA genes, but fewer anticodon species. Surprisingly, despite the wide G+C variation of bacterial genomes these anticodons are the same in most genomes. This suggests an optimization of the translation machinery to use a small subset of optimal codons and anticodons in fast-growing bacteria and in highly expressed genes. As a result, the overrepresented codons in highly expressed genes tend to be the same in very different genomes to match the same most-frequent anticodons. This is particularly important in fast-growing bacteria, which have higher codon usage bias in these genes. Three models were tested to understand the choice of codons recognized by the same anticodons, all providing significant fit, but under different classes of genes and genomes. Thus, co-evolution of tRNA gene composition and codon usage bias in genomes seen from tRNA's point of view agrees with the selection-mutation-drift theory. However, it suggests a much more universal trend in the evolution of anticodon and codon choice than previously thought. It also provides new evidence that a selective force for the optimization of the translation machinery is the maximization of growth.}, date-added = {2007-01-12 12:00:04 +0100}, date-modified = {2007-07-09 20:31:06 +0200}, doi = {10.1101/gr.2896904}, eprint = {http://www.genome.org/cgi/reprint/14/11/2279.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.genome.org/cgi/content/abstract/14/11/2279} } @ARTICLE{Rocha1999, author = {Eduardo P.C. Rocha and Antoine Danchin and Alain Viari}, title = {Universal replication biases in bacteria}, journal = {Molecular Microbiology}, year = {1999}, volume = {32}, pages = {11-16}, number = {1}, date-added = {2007-01-12 12:09:06 +0100}, date-modified = {2007-04-10 11:40:49 +0200}, doi = {10.1046/j.1365-2958.1999.01334.x}, eprint = {http://www.blackwell-synergy.com/doi/pdf/10.1046/j.1365-2958.1999.01334.x}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.blackwell-synergy.com/doi/abs/10.1046/j.1365-2958.1999.01334.x} } @ARTICLE{Rocha12012006, author = {Rocha, Eduardo P.C. and Touchon, Marie and Feil, Edward J.}, title = {Similar compositional biases are caused by very different mutational effects}, journal = {Genome Research}, year = {2006}, volume = {16}, pages = {1537-1547}, number = {12}, abstract = {Compositional replication strand bias, commonly referred to as GC skew, is present in many genomes of prokaryotes, eukaryotes, and viruses. Although cytosine deamination in ssDNA (resulting in C[->]T changes on the leading strand) is often invoked as its major cause, the precise contributions of this and other substitution types are currently unknown. It is also unclear if the underlying mutational asymmetries are the same among taxa, are stable over time, or how closely the observed biases are to mutational equilibrium. We analyzed nearly neutral sites of seven taxa each with between three and six complete bacterial genomes, and inferred the substitution spectra of fourfold degenerate positions in nonhighly expressed genes. Using a bootstrap procedure, we extracted compositional biases associated with replication and identified the significant asymmetries. Although all taxa showed an overrepresentation of G relative to C on the leading strand (and imbalances between A and T), widely variable substitution asymmetries are noted. Surprisingly, all substitution types show significant asymmetry in at least one taxon, but none were universally biased in all taxa. Notably, in the two most biased genomes, A[->]G, rather than C[->]T, shapes the compositional bias. Given the variability in these biases, we propose that the process is multifactorial. Finally, we also find that most genomes are not at compositional equilibrium, and suggest that mutational-based heterotachy is deeply imprinted in the history of biological macromolecules. This shows that similar compositional biases associated with the same essential well-conserved process, replication, do not reflect similar mutational processes in different genomes, and that caution is required in inferring the roles of specific mutational biases on the basis of contemporary patterns of sequence composition.}, date-added = {2007-01-12 12:01:06 +0100}, date-modified = {2007-07-09 20:31:15 +0200}, doi = {10.1101/gr.5525106}, eprint = {http://www.genome.org/cgi/reprint/16/12/1537.pdf}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://www.genome.org/cgi/content/abstract/16/12/1537} } @ARTICLE{Rocha:2002, author = {Rocha, Eduardo P. C.}, title = {Is there a role for replication fork asymmetry in the distribution of genes in bacterial genomes?}, journal = {Trends in Microbiology}, year = {2002}, volume = {10}, pages = {393--395}, number = {9}, abstract = {Replication generates bacterial chromosomes with strands that differ in the number of genes and base composition. It has been suggested that in bacteria such as \emph{{B}acillus subtilis}, PolC is responsible for the synthesis of the leading strand and DnaE for the lagging strand, whereas in many other bacteria DnaE is responsible for the synthesis of both strands. Here, I show that the possession of PolC correlates with leading strands that contain an average of 78{\%} of genes compared with 58{\%} for genomes that do not contain PolC. This suggests that asymmetrical replication forks could have a major role in defining and constraining the structure of the bacterial chromosome. The presence of PolC is not correlated with compositional strand bias, suggesting that the two biases result from different types of structural asymmetry.}, date-added = {2007-04-10 11:31:23 +0200}, date-modified = {2007-04-10 11:31:23 +0200}, keywords = {genome analysis, bacterial evolution, replication, strand bias}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6TD0-46PGF9B-1/2/033a30f9bcdf7a020671716e609e464e} } @ARTICLE{Rocha2003, author = {Rocha, Eduardo P C and Danchin, Antoine}, title = {Essentiality, not expressiveness, drives gene-strand bias in bacteria}, journal = {Nature Genetics}, year = {2003}, volume = {34}, pages = {377--378}, number = {4}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://dx.doi.org/10.1038/ng1209} } @ARTICLE{Rocha:2003, author = {Rocha, Eduardo P. C. and Danchin, Antoine}, title = {Gene essentiality determines chromosome organisation in bacteria}, journal = {Nucleic Acids Research}, year = {2003}, volume = {31}, pages = {6570--6577}, number = {22}, abstract = {In \emph{{E}scherichia coli} and \emph{{B}acillus subtilis}, essentiality, not expressivity, drives the distribution of genes between the two replicating strands. Although essential genes tend to be coded in the leading replicating strand, the underlying selective constraints and the evolutionary extent of these findings have still not been subject to comparative studies. Here, we extend our previous analysis to the genomes of low G + C firmicutes and {\{}gamma{\}}-proteobacteria, and in a second step to all sequenced bacterial genomes. The inference of essentiality by homology allows us to show that essential genes are much more frequent in the leading strand than other genes, even when compared with non- essential highly expressed genes. Smaller biases were found in the genomes of obligatory intracellular bacteria, for which the assignment of essentiality by homology from fast growing free-living bacteria is most problematic. Cross-comparisons used to assess potential errors in the assignment of essentiality by homology revealed that, in most cases, variations in the assignment criteria have little influence on the overall results. Essential genes tend to be more conserved in the leading strand than average genes, which is consistent with selection for this positioning and may impose a strong constraint on chromosomal rearrangements. These results indicate that essentiality plays a fundamental role in the distribution of genes in most bacterial genomes.}, annote = {10.1093/nar/gkg859}, date-added = {2007-04-06 11:43:11 +0200}, date-modified = {2007-04-06 11:43:11 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/31/22/6570} } @ARTICLE{Rocha:2002vn, author = {Rocha, Eduardo P. C. and Danchin, Antoine}, title = {Base composition bias might result from competition for metabolic resources}, journal = {Trends in Genetics}, year = {2002}, volume = {18}, pages = {291--294}, number = {6}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-03-22 17:25:51 +0100}, keywords = {genome analysis, bacterial evolution, nucleotide synthesis, horizontal transfer, parasitism}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6TCY-45Y6RV1-D/2/2a5d1a1be33570a03abc02809fa1971f} } @ARTICLE{Ronald2007, author = {James Ronald and Joshua M Akey}, title = {The evolution of gene expression QTL in Saccharomyces cerevisiae.}, journal = {PLoS ONE}, year = {2007}, volume = {2}, pages = {e678}, number = {7}, abstract = {Understanding the evolutionary forces that influence patterns of gene expression variation will provide insights into the mechanisms of evolutionary change and the molecular basis of phenotypic diversity. To date, studies of gene expression evolution have primarily been made by analyzing how gene expression levels vary within and between species. However, the fundamental unit of heritable variation in transcript abundance is the underlying regulatory allele, and as a result it is necessary to understand gene expression evolution at the level of DNA sequence variation. Here we describe the evolutionary forces shaping patterns of genetic variation for 1206 cis-regulatory QTL identified in a cross between two divergent strains of Saccharomyces cerevisiae. We demonstrate that purifying selection against mildly deleterious alleles is the dominant force governing cis-regulatory evolution in S. cerevisiae and estimate the strength of selection. We also find that essential genes and genes with larger codon bias are subject to slightly stronger cis-regulatory constraint and that positive selection has played a role in the evolution of major trans-acting QTL.}, doi = {10.1371/journal.pone.0000678}, owner = {mbailly}, pmid = {17668057}, timestamp = {2007.09.17}, url = {http://dx.doi.org/10.1371/journal.pone.0000678} } @ARTICLE{Sau:2005dq, author = {Sau, K. and Gupta, S. K. and Sau, S. and Ghosh, T. C.}, title = {Synonymous codon usage bias in 16 \emph{{S}taphylococcus aureus} phages: Implication in phage therapy}, journal = {Virus Research}, year = {2005}, volume = {113}, pages = {123--131}, number = {2}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-04-14 10:53:29 +0200}, keywords = {Staphylococcal phages, Synonymous codon usage, Mutational bias, Translational selection, Phage therapy}, local-url = {file://localhost/Users/marc/These/Bib/cit-abs-endnote-62471490.enw}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T32-4GFCST6-1/2/6946e871de2ca24b2357cbd10948e561} } @ARTICLE{Sau:2006lq, author = {Sau, K. and Gupta, S. K. and Sau, S. and Mandal, S. C. and Ghosh, T. C.}, title = {Factors influencing synonymous codon and amino acid usage biases in {M}imivirus}, journal = {Biosystems}, year = {2006}, volume = {85}, pages = {107--113}, number = {2}, date-added = {2007-03-22 17:25:51 +0100}, date-modified = {2007-07-09 20:21:23 +0200}, keywords = {Relative synonymous codon usage (RSCU), Codon adaptation index (CAI), Correspondence analysis, Amino acid usage, Mean molecular weight (MMW), Mimivirus}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T2K-4J3WSKN-1/2/65e1991d17d0849797af0b65d8fda935} } @ARTICLE{Selosse:2001, author = {Selosse, Marc-Andre and Albert, Beatrice and Godelle, Bernard}, title = {Reducing the genome size of organelles favours gene transfer to the nucleus}, journal = {Trends in Ecology \& Evolution}, year = {2001}, volume = {16}, pages = {135--141}, number = {3}, abstract = {Endosymbiotic organelles exhibit strong genetic erosion during their evolution as a result of the loss of unnecessary genes and of gene transfer to the nucleus. The reasons for this erosion are much debated. Unidirectionality of DNA exchange between cell compartments could favour biased gene transfer, but selection might also act to favour nuclear localization of genes, for example, because organelles accumulate more mutations than do nuclei. Selection for rapid replication might be a general cause of organelle genome reduction. This selection also accounts for the compactness of organelle genomes.}, date-added = {2007-04-10 19:23:28 +0200}, date-modified = {2007-04-10 19:23:28 +0200}, keywords = {Organelle genome, Gene transfer, Gene substitution, Genetic reduction, Plastids, Mitochondria, Genome compactness, Muller's ratchet}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6VJ1-429XTFM-H/2/91c3015b052a0479cba6dc6029163a61} } @ARTICLE{Shabalina:2006, author = {Shabalina, Svetlana A. and Ogurtsov, Aleksey Y. and Spiridonov, Nikolay A.}, title = {A periodic pattern of m{RNA} secondary structure created by the genetic code}, journal = {Nucleic Acids Research}, year = {2006}, volume = {34}, pages = {2428--2437}, number = {8}, abstract = {Single-stranded mRNA molecules form secondary structures through complementary self-interactions. Several hypotheses have been proposed on the relationship between the nucleotide sequence, encoded amino acid sequence and mRNA secondary structure. We performed the first transcriptome-wide in silico analysis of the human and mouse mRNA foldings and found a pronounced periodic pattern of nucleotide involvement in mRNA secondary structure. We show that this pattern is created by the structure of the genetic code, and the dinucleotide relative abundances are important for the maintenance of mRNA secondary structure. Although synonymous codon usage contributes to this pattern, it is intrinsic to the structure of the genetic code and manifests itself even in the absence of synonymous codon usage bias at the 4-fold degenerate sites. While all codon sites are important for the maintenance of mRNA secondary structure, degeneracy of the code allows regulation of stability and periodicity of mRNA secondary structure. We demonstrate that the third degenerate codon sites contribute most strongly to mRNA stability. These results convincingly support the hypothesis that redundancies in the genetic code allow transcripts to satisfy requirements for both protein structure and RNA structure. Our data show that selection may be operating on synonymous codons to maintain a more stable and ordered mRNA secondary structure, which is likely to be important for transcript stability and translation. We also demonstrate that functional domains of the mRNA {$[$}5'-untranslated region (5'-UTR), CDS and 3'-UTR{$]$} preferentially fold onto themselves, while the start codon and stop codon regions are characterized by relaxed secondary structures, which may facilitate initiation and termination of translation.}, annote = {10.1093/nar/gkl287}, date-added = {2007-04-14 11:44:05 +0200}, date-modified = {2007-07-09 20:21:44 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/34/8/2428} } @ARTICLE{Sharp1987, author = {Sharp, P.M. and Li, WH}, title = {The codon Adaptation Index -- a measure of directional synonymous codon usage bias, and its potential applications}, journal = {Nucleic Acids Research}, year = {1987}, volume = {15}, pages = {1281--1295}, number = {3}, date-modified = {2007-07-09 20:22:07 +0200}, owner = {mbailly}, timestamp = {2009.10.08}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/15/3/1281} } @ARTICLE{Sharp:2005, author = {Sharp, Paul M. and Bailes, Elizabeth and Grocock, Russell J. and Peden, John F. and Sockett, R. Elizabeth}, title = {Variation in the strength of selected codon usage bias among bacteria}, journal = {Nucleic Acids Research}, year = {2005}, volume = {33}, pages = {1141--1153}, number = {4}, abstract = {Among bacteria, many species have synonymous codon usage patterns that have been influenced by natural selection for those codons that are translated more accurately and/or efficiently. However, in other species selection appears to have been ineffective. Here, we introduce a population genetics-based model for quantifying the extent to which selection has been effective. The approach is applied to 80 phylogenetically diverse bacterial species for which whole genome sequences are available. The strength of selected codon usage bias, S, is found to vary substantially among species; in 30{\%} of the genomes examined, there was no significant evidence that selection had been effective. Values of S are highly positively correlated with both the number of rRNA operons and the number of tRNA genes. These results are consistent with the hypothesis that species exposed to selection for rapid growth have more rRNA operons, more tRNA genes and more strongly selected codon usage bias. For example, Clostridium perfringens, the species with the highest value of S, can have a generation time as short as 7 min.}, annote = {10.1093/nar/gki242}, date-added = {2007-04-14 13:14:19 +0200}, date-modified = {2007-04-14 13:14:19 +0200}, jo = {Nucl. Acids Res.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/33/4/1141} } @ARTICLE{Shrader:1993, author = {Shrader, T E and Tobias, J W and Varshavsky, A}, title = {The {N}-end rule in \emph{{E}scherichia coli}: cloning and analysis of the leucyl, phenylalanyl-t{RNA}-protein transferase gene \emph{aat}.}, journal = {Journal of Bacteriology}, year = {1993}, volume = {175}, pages = {4364--4374}, number = {14}, abstract = {The N-end rule relates the in vivo half-life of a protein to the identity of its N-terminal residue. Distinct versions of the N-end rule operate in bacteria, fungi, and mammals. We report the cloning and analysis of aat, the \emph{{E}scherichia coli} gene that encodes leucyl, phenylalanyl-tRNA-protein transferase (L/F-transferase), a component of the bacterial N-end rule pathway. L/F-transferase is required for the degradation of N-end rule substrates bearing an N-terminal arginine or lysine. The aat gene maps to the 19-min region of the E. coli chromosome and encodes a 234-residue protein whose sequence lacks significant similarities to sequences in data bases. In vitro, L/F-transferase catalyzes the posttranslational conjugation of leucine or phenylalanine to the N termini of proteins that bear an N-terminal arginine or lysine. However, the isolation and sequence analysis of a beta-galactosidase variant engineered to expose an N-terminal arginine in vivo revealed the conjugation of leucine but not of phenylalanine to the N terminus of the beta-galactosidase variant. Thus, the specificity of L/F-transferase in vivo may be greater than that in vitro. The aat gene is located approximately 1 kb from clpA, which encodes a subunit of ATP-dependent protease Clp. Although both aat and clpA are required for the degradation of certain N-end rule substrates, their nearly adjacent genes are convergently transcribed. The aat gene lies downstream of an open reading frame that encodes a homolog of the mammalian multidrug resistance P glycoproteins.}, date-added = {2007-04-06 11:00:50 +0200}, date-modified = {2007-07-09 20:22:59 +0200}, jo = {J. Bacteriol.}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://jb.asm.org/cgi/content/abstract/175/14/4364} } @ARTICLE{Sinoquet2008, author = {Christine Sinoquet and Sylvain Demey and Frédérique Braun}, title = {Large-scale computational and statistical analyses of high transcription potentialities in 32 prokaryotic genomes.}, journal = {Nucleic Acids Res}, year = {2008}, volume = {36}, pages = {3332--3340}, number = {10}, month = {Jun}, abstract = {This article compares 32 bacterial genomes with respect to their high transcription potentialities. The sigma70 promoter has been widely studied for Escherichia coli model and a consensus is known. Since transcriptional regulations are known to compensate for promoter weakness (i.e. when the promoter similarity with regard to the consensus is rather low), predicting functional promoters is a hard task. Instead, the research work presented here comes within the scope of investigating potentially high ORF expression, in relation with three criteria: (i) high similarity to the sigma70 consensus (namely, the consensus variant appropriate for each genome), (ii) transcription strength reinforcement through a supplementary binding site--the upstream promoter (UP) element--and (iii) enhancement through an optimal Shine-Dalgarno (SD) sequence. We show that in the AT-rich Firmicutes' genomes, frequencies of potentially strong sigma70-like promoters are exceptionally high. Besides, though they contain a low number of strong promoters (SPs), some genomes may show a high proportion of promoters harbouring an UP element. Putative SPs of lesser quality are more frequently associated with an UP element than putative strong promoters of better quality. A meaningful difference is statistically ascertained when comparing bacterial genomes with similarly AT-rich genomes generated at random; the difference is the highest for Firmicutes. Comparing some Firmicutes genomes with similarly AT-rich Proteobacteria genomes, we confirm the Firmicutes specificity. We show that this specificity is neither explained by AT-bias nor genome size bias; neither does it originate in the abundance of optimal SD sequences, a typical and significant feature of Firmicutes more thoroughly analysed in our study.}, doi = {10.1093/nar/gkn135}, institution = {Computer Science Institute of Nantes-Atlantic (Lina), U.M.R. C.N.R.S. 6241, University of Nantes, 2 rue de la Houssinière, BP 92208, 44322 Nantes Cedex, France. christine.sinoquet@univ-nantes.fr}, owner = {mbailly}, pii = {gkn135}, pmid = {18440978}, timestamp = {2008.06.10}, url = {http://dx.doi.org/10.1093/nar/gkn135} } @ARTICLE{Soufi2009, author = {Boumediene Soufi and Christian D Kelstrup and Gabriele Stoehr and Florian Fröhlich and Tobias C Walther and Jesper V Olsen}, title = {Global analysis of the yeast osmotic stress response by quantitative proteomics.}, journal = {Mol Biosyst}, year = {2009}, volume = {5}, pages = {1337--1346}, number = {11}, month = {Nov}, abstract = {Information on extracellular signals and conditions is often transduced by biological systems using cascades of protein phosphorylation that affect the activity of enzymes, the localization of proteins and gene expression. A model to study signal transduction is the response of the yeast Saccharomyces cerevisiae to osmotic changes as it shares many central themes with information processing modules in higher eukaryotes. Despite considerable progress in our understanding of this pathway, the scale and dynamics of this system have not been addressed systematically yet. Here, we report a comprehensive, quantitative, and time-resolved analysis using high-resolution mass spectrometry of phospho-proteome and proteome changes in response to osmotic stress in yeast. We identified 5534 unique phosphopeptide variants and 3383 yeast proteins. More than 15\% of the detected phosphorylation site status changed more than two-fold within 5 minutes of treatment. Many of the corresponding phosphoproteins are involved in the early response to environmental stress. Surprisingly, we find that 158 regulated phosphorylation sites are potential substrates of basophilic kinases as opposed to the classical proline-directed MAP kinase network implicated in stress response mechanisms such as p38 and HOG pathways. Proteome changes reveal an increase in abundance of more than one hundred proteins after 20 min of salt stress. Many of these are involved in the cellular response to increased osmolarity, which include proteins used for glycerol production that is up-regulated to counterbalance the increased osmolarity of the salt containing growth medium. Although the overall relationship between our proteome and published mRNA changes is poor we find an excellent correlation between the subset of osmotic shock up-regulated proteins and their corresponding mRNA changes.}, doi = {10.1039/b902256b}, institution = {The Novo Nordisk Foundation Center for Protein Research, Panum Institute, University of Copenhagen, Blegdamsvej 3, DK-2200 Copenhagen, Denmark.}, keywords = {Amino Acid Sequence; Models, Biological; Models, Theoretical; Molecular Sequence Data; Osmotic Pressure, physiology; Proteomics, methods; Saccharomyces cerevisiae, metabolism; Tandem Mass Spectrometry}, language = {eng}, medline-pst = {ppublish}, owner = {mbailly}, pmid = {19823750}, timestamp = {2010.06.11}, url = {http://dx.doi.org/10.1039/b902256b} } @ARTICLE{States:1994, author = {States, DJ and Gish, W}, title = {Combined use of sequence similarity and codon bias for coding region identification}, journal = {Journal of Computational Biology}, year = {1994}, volume = {1}, pages = {39--50}, number = {1}, date-added = {2007-04-16 18:13:39 +0200}, date-modified = {2007-04-16 18:14:44 +0200}, owner = {mbailly}, timestamp = {2009.10.08} } @ARTICLE{Stoletzki:2007, author = {Stoletzki, Nina and Eyre-Walker, Adam}, title = {Synonymous Codon Usage in \emph{{E}scherichia coli}: Selection for Translational Accuracy}, journal = {Molecular Biology and Evolution}, year = {2007}, volume = {24}, pages = {374--381}, number = {2}, abstract = {In many organisms, selection acts on synonymous codons to improve translation. However, the precise basis of this selection remains unclear in the majority of species. Selection could be acting to maximize the speed of elongation, to minimize the costs of proofreading, or to maximize the accuracy of translation. Using several data sets, we find evidence that codon use in \emph{{E}scherichia coli} is biased to reduce the costs of both missense and nonsense translational errors. Highly conserved sites and genes have higher codon bias than less conserved ones, and codon bias is positively correlated to gene length and production costs, both indicating selection against missense errors. Additionally, codon bias increases along the length of genes, indicating selection against nonsense errors. Doublet mutations or replacement substitutions do not explain our observations. The correlations remain when we control for expression level and for conflicting selection pressures at the start and end of genes. Considering each amino acid by itself confirms our results. We conclude that selection on synonymous codon use in E. coli is largely due to selection for translational accuracy, to reduce the costs of both missense and nonsense errors.}, annote = {10.1093/molbev/msl166}, date-added = {2007-04-18 12:41:02 +0200}, date-modified = {2007-04-18 12:41:02 +0200}, jo = {Mol Biol Evol}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://mbe.oxfordjournals.org/cgi/content/abstract/24/2/374} } @ARTICLE{Stoletzki:2005, author = {Stoletzki, Nina and Welch, John and Hermisson, Joachim and Eyre-Walker, Adam}, title = {A Dissection of Volatility in Yeast}, journal = {Molecular Biology and Evolution}, year = {2005}, volume = {22}, pages = {2022--2026}, number = {10}, abstract = {It has been suggested that volatility, the proportion of mutations which change an amino acid, can be used to infer the level of natural selection acting upon a gene. This conjecture is supported by a correlation between volatility and the rate of nonsynonymous substitution (dN), or the ratio of nonsynonymous and synonymous substitution rates, in a variety of organisms. These organisms include yeast, in which the correlations are quite strong. Here we show that these correlations are a by-product of a correlation between synonymous codon bias toward translationally optimal codons and dN. Although this analysis suggests that volatility is not a good measure of the selection, we suggest that it might be possible to infer something about the level of natural selection, from a single genome sequence, using translational codon bias.}, annote = {10.1093/molbev/msi192}, date-added = {2007-04-18 12:41:02 +0200}, date-modified = {2007-04-18 12:41:02 +0200}, jo = {Mol Biol Evol}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://mbe.oxfordjournals.org/cgi/content/abstract/22/10/2022} } @ARTICLE{Sueoka1995, author = {Sueoka, N}, title = {Intrastrand parity rules of {DNA} base composition and usage biases of synonymous codons}, journal = {Journal of Molecular Evolution}, year = {1995}, volume = {40}, pages = {318-325}, owner = {mbailly}, timestamp = {2009.10.08} } @ARTICLE{Tekaia:2002, author = {Tekaia, Fredj and Yeramian, Edouard and Dujon, Bernard}, title = {Amino acid composition of genomes, lifestyles of organisms, and evolutionary trends: a global picture with correspondence analysis}, journal = {Gene}, year = {2002}, volume = {297}, pages = {51--60}, number = {1-2}, abstract = {Can we infer the lifestyle of an organism from the characteristic properties of its genome? More precisely, what are the relations between easily quantifiable properties from genomic sequences, such as amino-acid compositions, and more subtle characteristics concerning for example lifestyles or evolutionary trends? Here, we seek a global picture for such properties, based on a large number (56) of complete genomes, including significant numbers of representatives from the three domains of life. We consider the amino acid compositions of the predicted proteomes, and we use correspondence analysis, as a multivariate method to extract the relevant information from the large-scale data. From these analyses we derive a series of conclusions, concerning lifestyles, as well as physico-chemical and evolutionary trends: (1) correspondence analysis of the amino acid compositions permits discrimination between the three known lifestyles (mesophily/thermophily/hyperthermophily). (2) For various organisms, amino-acid composition properties are essentially driven by GC content, and to a significantly lesser extent by growth temperatures associated with lifestyles. Roughly speaking, the respective contributions of these two components are 57 and 20{\%}. It is notable that these proportions are essentially unchanged with respect to a previous analysis (Nature 393 (1998) 537), which involved only 15 genomes, available at the time. (3) In terms of amino acid compositional biases, two specific `signatures' for thermophily (in a broad sense, including hyperthermophily) can be detected. First, thermophilic species display a relative abundance in glutamic acid (Glu), concomitantly with the depletion in glutamine. Second, in thermophilic species, the relative abundance in Glu (negative charge) is significantly correlated (Pearson correlation coefficient r=0.83 with P<0.0001), with the increase in the lumped `pool' lysine+arginine (positive charges). This correlation (absent in mesophiles) could be interpreted on a physico-chemical basis, relevant to the thermostability of proteins. (4) Statistically significant differences are observed between the average lengths of the genes in the surveyed species, which follow their distribution between the three domains of life. Also a significant difference is observed between the average lengths of thermophilic (283.0+/-5.8) versus mesophilic (340+/-9.4) genes. It is thus possible that the `general' shortening of the primary sequences in thermophilic proteins plays a role in thermostability. (5) Considering various combinations of conservation properties (genes conserved exclusively in eukaryotes, in archaea, in bacteria, in combinations of two domains, etc.) correspondence analysis reveals a trend towards thermophilic-hyperthermophilic profiles for the most conserved subset of genes (ancient genes). (6) When limited to the subset of species-specific genes, correspondence analysis leads to a different picture for the clustering of genomes following amino-acid compositions: for example, the `core' specific part of a genome can bear lifestyle signatures different from those of the complete genome. Various results are discussed both on methodological and biological grounds. The evolutionary perspectives opened by our analyses are noted.}, date-added = {2007-04-06 15:56:05 +0200}, date-modified = {2007-04-06 15:56:05 +0200}, keywords = {Hyperthermophiles, Mesophiles, Thermostability, Amino acid composition, Evolution, Multivariate analyses}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T39-46YJC6C-7/2/59ba73f855eaa5fb100788c41c18e2ca} } @ARTICLE{Tress2006, author = {Michael L Tress and Domenico Cozzetto and Anna Tramontano and Alfonso Valencia}, title = {An analysis of the Sargasso Sea resource and the consequences for database composition.}, journal = {BMC Bioinformatics}, year = {2006}, volume = {7}, pages = {213}, abstract = {BACKGROUND: The environmental sequencing of the Sargasso Sea has introduced a huge new resource of genomic information. Unlike the protein sequences held in the current searchable databases, the Sargasso Sea sequences originate from a single marine environment and have been sequenced from species that are not easily obtainable by laboratory cultivation. The resource also contains very many fragments of whole protein sequences, a side effect of the shotgun sequencing method.These sequences form a significant addendum to the current searchable databases but also present us with some intrinsic difficulties. While it is important to know whether it is possible to assign function to these sequences with the current methods and whether they will increase our capacity to explore sequence space, it is also interesting to know how current bioinformatics techniques will deal with the new sequences in the resource. RESULTS: The Sargasso Sea sequences seem to introduce a bias that decreases the potential of current methods to propose structure and function for new proteins. In particular the high proportion of sequence fragments in the resource seems to result in poor quality multiple alignments. CONCLUSION: These observations suggest that the new sequences should be used with care, especially if the information is to be used in large scale analyses. On a positive note, the results may just spark improvements in computational and experimental methods to take into account the fragments generated by environmental sequencing techniques.}, doi = {10.1186/1471-2105-7-213}, keywords = {Amino Acid Sequence; Bacterial Proteins; Base Sequence; Databases, Protein; Genes, Bacterial; Information Storage and Retrieval; Marine Biology; Molecular Sequence Data; Oceans and Seas; Variation (Genetics); Water Microbiology}, owner = {mbailly}, pii = {1471-2105-7-213}, pmid = {16623953}, timestamp = {2007.12.13}, url = {http://dx.doi.org/10.1186/1471-2105-7-213} } @ARTICLE{Venter2001, author = {J. C. Venter and M. D. Adams and E. W. Myers and P. W. Li and R. J. Mural and G. G. Sutton and H. O. Smith and M. Yandell and C. A. Evans and R. A. Holt and J. D. Gocayne and P. Amanatides and R. M. Ballew and D. H. Huson and J. R. Wortman and Q. Zhang and C. D. Kodira and X. H. Zheng and L. Chen and M. Skupski and G. Subramanian and P. D. Thomas and J. Zhang and G. L. Gabor Miklos and C. Nelson and S. Broder and A. G. Clark and J. Nadeau and V. A. McKusick and N. Zinder and A. J. Levine and R. J. Roberts and M. Simon and C. Slayman and M. Hunkapiller and R. Bolanos and A. Delcher and I. Dew and D. Fasulo and M. Flanigan and L. Florea and A. Halpern and S. Hannenhalli and S. Kravitz and S. Levy and C. Mobarry and K. Reinert and K. Remington and J. Abu-Threideh and E. Beasley and K. Biddick and V. Bonazzi and R. Brandon and M. Cargill and I. Chandramouliswaran and R. Charlab and K. Chaturvedi and Z. Deng and V. Di Francesco and P. Dunn and K. Eilbeck and C. Evangelista and A. E. Gabrielian and W. Gan and W. Ge and F. Gong and Z. Gu and P. Guan and T. J. Heiman and M. E. Higgins and R. R. Ji and Z. Ke and K. A. Ketchum and Z. Lai and Y. Lei and Z. Li and J. Li and Y. Liang and X. Lin and F. Lu and G. V. Merkulov and N. Milshina and H. M. Moore and A. K. Naik and V. A. Narayan and B. Neelam and D. Nusskern and D. B. Rusch and S. Salzberg and W. Shao and B. Shue and J. Sun and Z. Wang and A. Wang and X. Wang and J. Wang and M. Wei and R. Wides and C. Xiao and C. Yan and A. Yao and J. Ye and M. Zhan and W. Zhang and H. Zhang and Q. Zhao and L. Zheng and F. Zhong and W. Zhong and S. Zhu and S. Zhao and D. Gilbert and S. Baumhueter and G. Spier and C. Carter and A. Cravchik and T. Woodage and F. Ali and H. An and A. Awe and D. Baldwin and H. Baden and M. Barnstead and I. Barrow and K. Beeson and D. Busam and A. Carver and A. Center and M. L. Cheng and L. Curry and S. Danaher and L. Davenport and R. Desilets and S. Dietz and K. Dodson and L. Doup and S. Ferriera and N. Garg and A. Gluecksmann and B. Hart and J. Haynes and C. Haynes and C. Heiner and S. Hladun and D. Hostin and J. Houck and T. Howland and C. Ibegwam and J. Johnson and F. Kalush and L. Kline and S. Koduru and A. Love and F. Mann and D. May and S. McCawley and T. McIntosh and I. McMullen and M. Moy and L. Moy and B. Murphy and K. Nelson and C. Pfannkoch and E. Pratts and V. Puri and H. Qureshi and M. Reardon and R. Rodriguez and Y. H. Rogers and D. Romblad and B. Ruhfel and R. Scott and C. Sitter and M. Smallwood and E. Stewart and R. Strong and E. Suh and R. Thomas and N. N. Tint and S. Tse and C. Vech and G. Wang and J. Wetter and S. Williams and M. Williams and S. Windsor and E. Winn-Deen and K. Wolfe and J. Zaveri and K. Zaveri and J. F. Abril and R. Guigó and M. J. Campbell and K. V. Sjolander and B. Karlak and A. Kejariwal and H. Mi and B. Lazareva and T. Hatton and A. Narechania and K. Diemer and A. Muruganujan and N. Guo and S. Sato and V. Bafna and S. Istrail and R. Lippert and R. Schwartz and B. Walenz and S. Yooseph and D. Allen and A. Basu and J. Baxendale and L. Blick and M. Caminha and J. Carnes-Stine and P. Caulk and Y. H. Chiang and M. Coyne and C. Dahlke and A. Mays and M. Dombroski and M. Donnelly and D. Ely and S. Esparham and C. Fosler and H. Gire and S. Glanowski and K. Glasser and A. Glodek and M. Gorokhov and K. Graham and B. Gropman and M. Harris and J. Heil and S. Henderson and J. Hoover and D. Jennings and C. Jordan and J. Jordan and J. Kasha and L. Kagan and C. Kraft and A. Levitsky and M. Lewis and X. Liu and J. Lopez and D. Ma and W. Majoros and J. McDaniel and S. Murphy and M. Newman and T. Nguyen and N. Nguyen and M. Nodell and S. Pan and J. Peck and M. Peterson and W. Rowe and R. Sanders and J. Scott and M. Simpson and T. Smith and A. Sprague and T. Stockwell and R. Turner and E. Venter and M. Wang and M. Wen and D. Wu and M. Wu and A. Xia and A. Zandieh and X. Zhu}, title = {The sequence of the human genome.}, journal = {Science}, year = {2001}, volume = {291}, pages = {1304--1351}, number = {5507}, month = {Feb}, abstract = {A 2.91-billion base pair (bp) consensus sequence of the euchromatic portion of the human genome was generated by the whole-genome shotgun sequencing method. The 14.8-billion bp DNA sequence was generated over 9 months from 27,271,853 high-quality sequence reads (5.11-fold coverage of the genome) from both ends of plasmid clones made from the DNA of five individuals. Two assembly strategies-a whole-genome assembly and a regional chromosome assembly-were used, each combining sequence data from Celera and the publicly funded genome effort. The public data were shredded into 550-bp segments to create a 2.9-fold coverage of those genome regions that had been sequenced, without including biases inherent in the cloning and assembly procedure used by the publicly funded group. This brought the effective coverage in the assemblies to eightfold, reducing the number and size of gaps in the final assembly over what would be obtained with 5.11-fold coverage. The two assembly strategies yielded very similar results that largely agree with independent mapping data. The assemblies effectively cover the euchromatic regions of the human chromosomes. More than 90\% of the genome is in scaffold assemblies of 100,000 bp or more, and 25\% of the genome is in scaffolds of 10 million bp or larger. Analysis of the genome sequence revealed 26,588 protein-encoding transcripts for which there was strong corroborating evidence and an additional approximately 12,000 computationally derived genes with mouse matches or other weak supporting evidence. Although gene-dense clusters are obvious, almost half the genes are dispersed in low G+C sequence separated by large tracts of apparently noncoding sequence. Only 1.1\% of the genome is spanned by exons, whereas 24\% is in introns, with 75\% of the genome being intergenic DNA. Duplications of segmental blocks, ranging in size up to chromosomal lengths, are abundant throughout the genome and reveal a complex evolutionary history. Comparative genomic analysis indicates vertebrate expansions of genes associated with neuronal function, with tissue-specific developmental regulation, and with the hemostasis and immune systems. DNA sequence comparisons between the consensus sequence and publicly funded genome data provided locations of 2.1 million single-nucleotide polymorphisms (SNPs). A random pair of human haploid genomes differed at a rate of 1 bp per 1250 on average, but there was marked heterogeneity in the level of polymorphism across the genome. Less than 1\% of all SNPs resulted in variation in proteins, but the task of determining which SNPs have functional consequences remains an open challenge.}, doi = {10.1126/science.1058040}, institution = {Celera Genomics, 45 West Gude Drive, Rockville, MD 20850, USA. humangenome@celera.com}, keywords = {Algorithms; Animals; Chromosome Banding; Chromosome Mapping; Chromosomes, Artificial, Bacterial; Computational Biology; Consensus Sequence; CpG Islands; DNA, Intergenic; Databases, Factual; Evolution, Molecular; Exons; Female; Gene Duplication; Genes; Genome, Human; Human Genome Project; Humans; Introns; Male; Phenotype; Physical Chromosome Mapping; Polymorphism, Single Nucleotide; Proteins; Pseudogenes; Repetitive Sequences, Nucleic Acid; Retroelements; Sequence Analysis, DNA; Species Specificity; Variation (Genetics)}, owner = {mbailly}, pii = {291/5507/1304}, pmid = {11181995}, timestamp = {2008.01.09}, url = {http://dx.doi.org/10.1126/science.1058040} } @ARTICLE{Wapinski2007, author = {Ilan Wapinski and Avi Pfeffer and Nir Friedman and Aviv Regev}, title = {Natural history and evolutionary principles of gene duplication in fungi.}, journal = {Nature}, year = {2007}, volume = {449}, pages = {54--61}, number = {7158}, month = {Sep}, abstract = {Gene duplication and loss is a powerful source of functional innovation. However, the general principles that govern this process are still largely unknown. With the growing number of sequenced genomes, it is now possible to examine these events in a comprehensive and unbiased manner. Here, we develop a procedure that resolves the evolutionary history of all genes in a large group of species. We apply our procedure to seventeen fungal genomes to create a genome-wide catalogue of gene trees that determine precise orthology and paralogy relations across these species. We show that gene duplication and loss is highly constrained by the functional properties and interacting partners of genes. In particular, stress-related genes exhibit many duplications and losses, whereas growth-related genes show selection against such changes. Whole-genome duplication circumvents this constraint and relaxes the dichotomy, resulting in an expanded functional scope of gene duplication. By characterizing the functional fate of duplicate genes we show that duplicated genes rarely diverge with respect to biochemical function, but typically diverge with respect to regulatory control. Surprisingly, paralogous modules of genes rarely arise, even after whole-genome duplication. Rather, gene duplication may drive the modularization of functional networks through specialization, thereby disentangling cellular systems.}, doi = {10.1038/nature06107}, owner = {mbailly}, pii = {nature06107}, pmid = {17805289}, timestamp = {2007.09.27}, url = {http://dx.doi.org/10.1038/nature06107} } @ARTICLE{Willenbrock:2006, author = {Willenbrock, Hanni and Friis, Carsten and Juncker, Agnieszka and Ussery, David}, title = {An environmental signature for 323 microbial genomes based on codon adaptation indices}, journal = {Genome Biology}, year = {2006}, volume = {7}, number = {12}, abstract = {BACKGROUND:Codon adaptation indices (CAIs) represent an evolutionary strategy to modulate gene expression and have widely been used to predict potentially highly expressed genes within microbial genomes. Here, we evaluate and compare two very different methods for estimating CAI values, one corresponding to translational codon usage bias and the second obtained mathematically by searching for the most dominant codon bias.RESULTS:The level of correlation between these two CAI methods is a simple and intuitive measure of the degree of translational bias in an organism, and from this we confirm that fast replicating bacteria are more likely to have a dominant translational codon usage bias than are slow replicating bacteria, and that this translational codon usage bias may be used for prediction of highly expressed genes. By analyzing more than 300 bacterial genomes, as well as five fungal genomes, we show that codon usage preference provides an environmental signature by which it is possible to group bacteria according to their lifestyle, for instance soil bacteria and soil symbionts, spore formers, enteric bacteria, aquatic bacteria, and intercellular and extracellular pathogens.CONCLUSION:The results and the approach described here may be used to acquire new knowledge regarding species lifestyle and to elucidate relationships between organisms that are far apart evolutionarily.}, date-added = {2007-04-17 10:13:01 +0200}, date-modified = {2007-04-17 10:13:01 +0200}, isbn = {1465-6906}, m3 = {10.1186/gb-2006-7-12-r114}, owner = {mbailly}, sp = {R114}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://genomebiology.com/2006/7/12/R114} } @ARTICLE{Wright:1990, author = {Wright, Frank}, title = {The ``effective number of codons'' used in a gene}, journal = {Gene}, year = {1990}, volume = {87}, pages = {23--29}, number = {1}, abstract = {A simple measure is presented that quantifies how far the codon usage of a gene departs from equal usage of synonymous codons. This measure of synonymous codon usage bias, the `effective number of codons used in a gene', Nc, can be easily calculated from codon usage data alone, and is independent of gene length and amino acid (aa) composition. Nc can take values from 20, in the case of extreme bias where one codon is exclusively used for each aa, to 61 when the use of alternative synonymous codons is equally likely. Nc thus provides an intuitively meaningful measure of the extent of codon preference in a gene. Codon usage patterns across genes can be investigated by the Nc-plot: a plot of Nc vs. G + C content at synonymous sites. Nc-plots are produced for Homo sapiens, Saccharomyces cerevisiae, \emph{{E}scherichia coli}, \emph{{B}acillus subtilis}, Dictyostellium discoideum, and Drosophila melanogaster. A FORTRAN77 program written to calculate Nc is available on request.}, date-added = {2007-04-17 10:33:47 +0200}, date-modified = {2007-07-09 20:27:48 +0200}, keywords = {Synonymous codon usage bias, G + C content, amino acid sequence, Homo sapiens, Saccharomyces cerevisiae, \emph{{E}scherichia coli}, \emph{{B}acillus subtilis}, Dictyostelium discoideum, Drosophila melanogaster}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T39-48T6HPK-5/2/c0f43245823c09be96f8dfa1dfa20566} } @ARTICLE{Xia:1996, author = {Xia, X.}, title = {Maximizing Transcription Efficiency Causes Codon Usage Bias}, journal = {Genetics}, year = {1996}, volume = {144}, pages = {1309--1320}, number = {3}, date-added = {2007-04-17 12:45:19 +0200}, date-modified = {2007-04-17 12:45:19 +0200}, jo = {Genetics}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.genetics.org/cgi/content/abstract/144/3/1309} } @ARTICLE{Zalucki2007, author = {Yaramah M Zalucki and Peter M Power and Michael P Jennings}, title = {Selection for efficient translation initiation biases codon usage at second amino acid position in secretory proteins.}, journal = {Nucleic Acids Res}, year = {2007}, month = {Aug}, abstract = {The definition of a typical sec-dependent bacterial signal peptide contains a positive charge at the N-terminus, thought to be required for membrane association. In this study the amino acid distribution of all Escherichia coli secretory proteins were analysed. This revealed that there was a statistically significant bias for lysine at the second codon position (P2), consistent with a role for the positive charge in secretion. Removal of the positively charged residue P2 in two different model systems revealed that a positive charge is not required for protein export. A well-characterized feature of large amino acids like lysine at P2 is inhibition of N-terminal methionine removal by methionyl amino-peptidase (MAP). Substitution of lysine at P2 for other large or small amino acids did not affect protein export. Analysis of codon usage revealed that there was a bias for the AAA lysine codon at P2, suggesting that a non-coding function for the AAA codon may be responsible for the strong bias for lysine at P2 of secretory signal sequences. We conclude that the selection for high translation initiation efficiency maybe the selective pressure that has led to codon and consequent amino acid usage at P2 of secretory proteins.}, doi = {10.1093/nar/gkm577}, owner = {mbailly}, pii = {gkm577}, pmid = {17717002}, timestamp = {2007.09.28}, url = {http://dx.doi.org/10.1093/nar/gkm577} } @ARTICLE{Zhang:2005, author = {Zhang, Jianzhi}, title = {On the Evolution of Codon Volatility}, journal = {Genetics}, year = {2005}, volume = {169}, pages = {495--501}, number = {1}, abstract = {Volatility of a codon is defined as the probability that a random point mutation in the codon generates a nonsynonymous change. It has been proposed that higher-than-expected mean codon volatility of a gene indicates that positive selection for nonsynonymous changes has acted on the gene in the recent past. I show that strong frequency-dependent selection (minority advantage) in large populations can increase codon volatility slightly, whereas directional positive selection has no effect on volatility. Factors unrelated to positive selection, such as expression-related or GC-content-related codon usage bias, also affect volatility. These and other considerations suggest that codon volatility has only limited utility for detecting positive selection at the DNA sequence level.}, annote = {10.1534/genetics.104.034884}, date-added = {2007-04-18 12:40:58 +0200}, date-modified = {2007-04-18 12:40:58 +0200}, jo = {Genetics}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.genetics.org/cgi/content/abstract/169/1/495} } @ARTICLE{Zhao:2003rr, author = {Zhao, Kong-Nan and Liu, Wen Jun and Frazer, Ian H.}, title = {Codon usage bias and {A+T} content variation in human papillomavirus genomes}, journal = {Virus Research}, year = {2003}, volume = {98}, pages = {95--104}, number = {2}, date-added = {2007-01-12 12:04:17 +0100}, date-modified = {2007-07-09 20:29:04 +0200}, keywords = {Human papillomavirus (HPV), Open reading frames (ORFs), Codon usage bias, DNA base, A+T content}, owner = {mbailly}, timestamp = {2009.10.08}, ty = {JOUR}, url = {http://www.sciencedirect.com/science/article/B6T32-49W36PY-2/2/0652920d44ba610cad05bd1dd877547d} } @ARTICLE{Zhao2008, author = {Sheng Zhao and Qin Zhang and Xiaolin Liu and Xuemin Wang and Huilin Zhang and Yan Wu and Fei Jiang}, title = {Analysis of synonymous codon usage in 11 Human Bocavirus isolates.}, journal = {Biosystems}, year = {2008}, volume = {92}, pages = {207--214}, number = {3}, month = {Jun}, abstract = {Human Bocavirus (HBoV) is a novel virus which can cause respiratory tract disease in infants or children. In this study, the codon usage bias and the base composition variations in the available 11 complete HBoV genome sequences have been investigated. Although, there is a significant variation in codon usage bias among different HBoV genes, codon usage bias in HBoV is a little slight, which is mainly determined by the base compositions on the third codon position and the effective number of codons (ENC) value. The results of correspondence analysis (COA) and Spearman's rank correlation analysis reveals that the G+C compositional constraint is the main factor that determines the codon usage bias in HBoV and the gene's function also contributes to the codon usage in this virus. Moreover, it was found that the hydrophobicity of each protein and the gene length are also critical in affecting these viruses' codon usage, although they were less important than that of the mutational bias and the genes' function. At last, the relative synonymous codon usage (RSCU) of 44 genes from these 11 HBoV isolates is analyzed using a hierarchical cluster method. The result suggests that genes with same function yet from different isolates are classified into the same lineage and it does not depend on geographical location. These conclusions not only can offer an insight into the codon usage patterns and gene classification of HBoV, but also may help in increasing the efficiency of gene delivery/expression systems.}, doi = {10.1016/j.biosystems.2008.01.006}, institution = {F University, Xinong Road No. 22, Yangling 712100, Shaanxi Province, PR China; Jingchu University of Technology, Jingmen 448000, Hubei, PR China.}, owner = {mbailly}, pii = {S0303-2647(08)00033-6}, pmid = {18378386}, timestamp = {2008.05.14}, url = {http://dx.doi.org/10.1016/j.biosystems.2008.01.006} } @comment{jabref-meta: selector_publisher:} @comment{jabref-meta: selector_author:} @comment{jabref-meta: selector_journal:} @comment{jabref-meta: selector_keywords:} @comment{jabref-meta: groupsversion:3;} @comment{jabref-meta: groupstree: 0 AllEntriesGroup:; 1 ExplicitGroup:codon\;0\;Akashi:2002\;Haiser2007\;HubertCharles091120 06\;Jaillon2008\;; 1 KeywordGroup:bailly\;0\;author\;Bailly\;0\;0\;; }