-
Sebastian Will authoredSebastian Will authored
biblio.bib 220.56 KiB
% Encoding: UTF-8
@PhdThesis{YPonty2006,
author = {{Ponty}, {Yann}},
title = {{Models for structured genomic sequences, random generation and applications}},
school = {{Université Paris Sud - Paris XI}},
year = {2006},
type = {Theses},
month = Nov,
hal_id = {tel-00144130},
hal_local_reference = {No d'ordre 8480},
hal_version = {v2},
keywords = {Models for random sequences ; {RNA} structure ; Context-free grammars ; Random generation ; Enumerative combinatorics ; Modèles de séquences aléatoires ; Structure de l'ARN ; Grammaires hors-contexte pondérées ; Génération aléatoire ; Combinatoire énumérative},
pdf = {https://tel.archives-ouvertes.fr/tel-00144130/file/these-ponty.pdf},
url = {https://tel.archives-ouvertes.fr/tel-00144130},
}
@Article{Barcucci1994,
author = {Barcucci, Elena and Pinzani, Renzo and Sprugnoli, Renzo},
title = {The random generation of directed animals},
journal = {Theoretical Computer Science},
year = {1994},
volume = {127},
number = {2},
pages = {333--350},
publisher = {Elsevier},
}
@Article{Nicaud2010,
author = {Nicaud, Cyril and Gouyou-Beauchamps, Dominique},
title = {Random Generation Using Binomial Approximations},
journal = {Discrete Mathematics \& Theoretical Computer Science},
year = {2010},
publisher = {Episciences. org},
}
@Article{Flajolet1987,
author = {Philippe Flajolet},
title = {Analytic models and ambiguity of context-free languages},
journal = {Theoretical Computer Science},
year = {1987},
volume = {49},
pages = {283--309},
issn = {0304-3975},
doi = {10.1016/0304-3975(87)90011-9},
}
@Article{Denise1999,
author = {Alain Denise and Paul Zimmermann},
title = {Uniform random generation of decomposable structures using floating-point arithmetic},
journal = {Theoretical Computer Science},
year = {1999},
volume = {218},
number = {2},
pages = {233 - 248},
issn = {0304-3975},
abstract = {The recursive method formalized by Nijenhuis and Wilf (1998) and systematized by Flajolet, Van Cutsem and Zimmermann (1994), is extended here to floating-point arithmetic. The resulting ADZ method enables one to generate decomposable data structures — both labelled or unlabelled — uniformly at random, in expected O(n1 + ε) time and space, after a preprocessing phase of O(n2 + ε) time, which reduces to O(n1 + ε) for context-free grammars.
Résumé
La méthode récursive mise au point par Nijenhuis et Wilf (1998) et systématisée par Flajolet, Van Cutsem et Zimmermann (1994), est ici étendue à l'utilisation de nombres flottants. La méthode qui en découle, appelée ADZ, permet de générer aléatoirement et uniformément des structures décomposables — étiquetées ou non — en temps et espace moyens O(n1 + ε), après un précalcul de complexité en temps O(n2 + ε), se réduisant à O(n1 + ε) pour des grammaires algébriques.},
doi = {https://doi.org/10.1016/S0304-3975(98)00323-5},
url = {http://www.sciencedirect.com/science/article/pii/S0304397598003235},
}
@Article{Mishna2009,
author = {Mishna, Marni and Rechnitzer, Andrew},
title = {Two non-holonomic lattice walks in the quarter plane},
journal = {Theoretical Computer Science},
year = {2009},
volume = {410},
number = {38-40},
pages = {3616--3630},
publisher = {Elsevier},
}
@InProceedings{Bendkowski2018,
author = {Bendkowski, Maciej and Bodini, Olivier and Dovgal, Sergey},
title = {Polynomial tuning of multiparametric combinatorial samplers},
booktitle = {2018 Proceedings of the Fifteenth Workshop on Analytic Algorithmics and Combinatorics (ANALCO)},
year = {2018},
pages = {92--106},
organization = {SIAM},
}
@Book{Nesterov1994,
title = {Interior-point polynomial algorithms in convex programming},
publisher = {Siam},
year = {1994},
author = {Nesterov, Yurii and Nemirovskii, Arkadii},
volume = {13},
}
@Article{Duchon2004,
author = {Duchon, Philippe and Flajolet, Philippe and Louchard, Guy and Schaeffer, Gilles},
title = {Boltzmann samplers for the random generation of combinatorial structures},
journal = {Combinatorics, Probability and Computing},
year = {2004},
volume = {13},
number = {4-5},
pages = {577--625},
publisher = {Cambridge University Press},
}
@Article{Altschul1985,
author = {Altschul, Stephen F and Erickson, Blake W},
title = {Significance of nucleotide sequence alignments: a method for random sequence permutation that preserves dinucleotide and codon usage.},
journal = {Molecular biology and evolution},
year = {1985},
volume = {2},
pages = {526--538},
month = nov,
issn = {0737-4038},
abstract = {The similarity of two nucleotide sequences is often expressed in terms of evolutionary distance, a measure of the amount of change needed to transform one sequence into the other. Given two sequences with a small distance between them, can their similarity be explained by their base composition alone? The nucleotide order of these sequences contributes to their similarity if the distance is much smaller than their average permutation distance, which is obtained by calculating the distances for many random permutations of these sequences. To determine whether their similarity can be explained by their dinucleotide and codon usage, random sequences must be chosen from the set of permuted sequences that preserve dinucleotide and codon usage. The problem of choosing random dinucleotide and codon-preserving permutations can be expressed in the language of graph theory as the problem of generating random Eulerian walks on a directed multigraph. An efficient algorithm for generating such walks is described. This algorithm can be used to choose random sequence permutations that preserve (1) dinucleotide usage, (2) dinucleotide and trinucleotide usage, or (3) dinucleotide and codon usage. For example, the similarity of two 60-nucleotide DNA segments from the human beta-1 interferon gene (nucleotides 196-255 and 499-558) is not just the result of their nonrandom dinucleotide and codon usage.},
chemicals = {Codon, Interferon Type I},
citation-subset = {IM},
completed = {1988-06-20},
country = {United States},
doi = {10.1093/oxfordjournals.molbev.a040370},
issn-linking = {0737-4038},
issue = {6},
keywords = {Base Sequence; Biological Evolution; Codon, genetics; Humans; Interferon Type I, genetics; Models, Genetic; Molecular Sequence Data; Sequence Homology, Nucleic Acid},
nlm-id = {8501455},
owner = {NLM},
pmid = {3870875},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-01-09},
}
@Article{Hurst2001,
author = {Hurst, L D and Merchant, A R},
title = {High guanine-cytosine content is not an adaptation to high temperature: a comparative analysis amongst prokaryotes.},
journal = {Proceedings. Biological sciences},
year = {2001},
volume = {268},
pages = {493--497},
month = mar,
issn = {0962-8452},
abstract = {The causes of the variation between genomes in their guanine (G) and cytosine (C) content is one of the central issues in evolutionary genomics. The thermal adaptation hypothesis conjectures that, as G:C pairs in DNA are more thermally stable than adenonine:thymine pairs, high GC content may he a selective response to high temperature. A compilation of data on genomic GC content and optimal growth temperature for numerous prokaryotes failed to demonstrate the predicted correlation. By contrast, the GC content of Structural {RNA}s is higher at high temperatures. The issue that we address here is whether more freely evolving sites in exons (i.e. codonic third positions) evolve in the same manner as genomic DNA as a whole, Showing no correlated response, or like structural {RNA}s showing a strong correlation. The latter pattern would provide strong support for the thermal adaptation hypothesis, as the variation in GC content between orthologous genes is typically most profoundly seen at codon third sites (GC3). Simple analysis of completely sequenced prokaryotic genomes shows that GC3, but not genomic GC, is higher on average in thermophilic species. This demonstrates, if nothing else, that the results from the two measures cannot be presumed to be the same. A proper analysis, however, requires phylogenetic control. Here, therefore, we report the results of a comparative analysis of GC composition and optimal growth temperature for over 100 prokaryotes. Comparative analysis fails to show, in either Archea or Eubacteria, any hint of connection between optimal growth temperature and GC content in the genome as a whole, in protein-coding regions or, more crucially at GC. Conversely, comparable analysis confirms that GC content of structural {RNA} is strongly correlated with optimal temperature. Against the expectations of the thermal adaptation hypothesis, within prokaryotes GC content in protein-coding genies, even at relatively freely evolving sites, cannot be considered an adaptation to the thermal environment.},
chemicals = {DNA, Archaeal, DNA, Bacterial},
citation-subset = {IM},
completed = {2001-06-28},
country = {England},
doi = {10.1098/rspb.2000.1397},
issn-linking = {0962-8452},
issue = {1466},
keywords = {Adaptation, Physiological; Archaea, chemistry, genetics; Bacteria, chemistry, genetics; Base Composition; DNA, Archaeal, chemistry, genetics; DNA, Bacterial, chemistry, genetics; Evolution, Molecular; Genetic Variation; Prokaryotic Cells; Temperature},
nlm-id = {101245157},
owner = {NLM},
pmc = {PMC1088632},
pmid = {11296861},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Hildebrand2010,
author = {Hildebrand, Falk and Meyer, Axel and Eyre-Walker, Adam},
title = {Evidence of selection upon genomic GC-content in bacteria.},
journal = {PLoS genetics},
year = {2010},
volume = {6},
pages = {e1001107},
month = sep,
issn = {1553-7404},
abstract = {The genomic GC-content of bacteria varies dramatically, from less than 20% to more than 70%. This variation is generally ascribed to differences in the pattern of mutation between bacteria. Here we test this hypothesis by examining patterns of synonymous polymorphism using datasets from 149 bacterial species. We find a large excess of synonymous GC→AT mutations over AT→GC mutations segregating in all but the most AT-rich bacteria, across a broad range of phylogenetically diverse species. We show that the excess of GC→AT mutations is inconsistent with mutation bias, since it would imply that most GC-rich bacteria are declining in GC-content; such a pattern would be unsustainable. We also show that the patterns are probably not due to translational selection or biased gene conversion, because optimal codons tend to be AT-rich, and the excess of GC→AT SNPs is observed in datasets with no evidence of recombination. We therefore conclude that there is selection to increase synonymous GC-content in many species. Since synonymous GC-content is highly correlated to genomic GC-content, we further conclude that there is selection on genomic base composition in many bacteria.},
citation-subset = {IM},
completed = {2011-01-04},
country = {United States},
doi = {10.1371/journal.pgen.1001107},
issn-linking = {1553-7390},
issue = {9},
keywords = {Bacteria, classification, genetics; Base Composition, genetics; Bias; Genome, Bacterial, genetics; Models, Genetic; Mutation, genetics; Polymorphism, Single Nucleotide, genetics; Protein Biosynthesis, genetics; Selection, Genetic},
nlm-id = {101239074},
owner = {NLM},
pii = {e1001107},
pmc = {PMC2936529},
pmid = {20838593},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Clote2005,
author = {Clote, Peter and Ferré, Fabrizio and Kranakis, Evangelos and Krizanc, Danny},
title = {Structural {RNA} has lower folding energy than random {RNA} of the same dinucleotide frequency.},
journal = {{RNA} (New York, N.Y.)},
year = {2005},
volume = {11},
pages = {578--591},
month = may,
issn = {1355-8382},
abstract = {We present results of computer experiments that indicate that several {RNA}s for which the native state (minimum free energy secondary structure) is functionally important (type III hammerhead ribozymes, signal recognition particle {RNA}s, U2 small nucleolar spliceosomal {RNA}s, certain riboswitches, etc.) all have lower folding energy than random {RNA}s of the same length and dinucleotide frequency. Additionally, we find that whole m{RNA} as well as 5'-UTR, 3'-UTR, and cds regions of m{RNA} have folding energies comparable to that of random {RNA}, although there may be a statistically insignificant trace signal in 3'-UTR and cds regions. Various authors have used nucleotide (approximate) pattern matching and the computation of minimum free energy as filters to detect potential {RNA}s in ESTs and genomes. We introduce a new concept of the asymptotic Z-score and describe a fast, whole-genome scanning algorithm to compute asymptotic minimum free energy Z-scores of moving-window contents. Asymptotic Z-score computations offer another filter, to be used along with nucleotide pattern matching and minimum free energy computations, to detect potential functional {RNA}s in ESTs and genomic regions.},
chemicals = {3' Untranslated Regions, 5' Untranslated Regions, Nucleotides, {RNA}},
citation-subset = {IM},
completed = {2005-05-24},
country = {United States},
doi = {10.1261/rna.7220505},
issn-linking = {1355-8382},
issue = {5},
keywords = {3' Untranslated Regions, chemistry, genetics, metabolism; 5' Untranslated Regions, chemistry, genetics, metabolism; Algorithms; Base Composition; Base Sequence; Computational Biology; Computer Simulation; Expressed Sequence Tags; Markov Chains; Nucleic Acid Conformation; Nucleotides, analysis, chemistry, genetics, metabolism; {RNA}, chemistry, genetics, metabolism; Thermodynamics},
nlm-id = {9509184},
owner = {NLM},
pii = {11/5/578},
pmc = {PMC1370746},
pmid = {15840812},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Denise1996,
author = {Alain Denise},
title = {Génération aléatoire et uniforme de mots},
journal = {Discrete Mathematics},
year = {1996},
volume = {156},
pages = {69--84},
}
@InProceedings{Viennot1985,
author = {Viennot, G. and Vauchaussade de Chaumont, M.},
title = {Enumeration of {RNA} Secondary Structures by Complexity},
booktitle = {Mathematics in Biology and Medicine},
year = {1985},
editor = {Capasso, V. and Grosso, E. and Paveri-Fontana, S. L.},
pages = {360--365},
address = {Berlin, Heidelberg},
publisher = {Springer Berlin Heidelberg},
abstract = {Many investigations in studying primary and secondary structures in Biology require theoretical statistical (that is enumerative) work. We solve one of these problems: enumerate secondary structures of single-stranded nucleic acids ({RNA}, t{RNA}, etc{\ldots}) having a given complexity. This parameter has been introduced for energy computation purpose in order to predict the most stable secondary structure. The method relies on the (non-classical) use of non-commutative variables. Some orthogonal polynomials appear. The final solution shows a relationship between the parameter complexity and another parameter appearing in Hydrography and Botanic.},
isbn = {978-3-642-93287-8},
}
@Article{Hofacker1998,
author = {Hofacker, Ivo L and Schuster, Peter and Stadler, Peter F},
title = {Combinatorics of {RNA} secondary structures},
journal = {Discrete Applied Mathematics},
year = {1998},
volume = {88},
number = {1-3},
pages = {207--237},
publisher = {Elsevier},
}
@Article{Zuker1984,
author = {Zuker, Michael and Sankoff, David},
title = {{{RNA}} secondary structures and their prediction},
journal = {Bulletin of mathematical biology},
year = {1984},
volume = {46},
number = {4},
pages = {591--621},
publisher = {Springer},
}
@Article{Nebel2002,
author = {Nebel, Markus E.},
title = {Combinatorial Properties of {RNA} Secondary Structures},
journal = {Journal of Computational Biology},
year = {2002},
volume = {9},
number = {3},
pages = {541-573},
note = {PMID: 12162892},
abstract = { The secondary structure of an {RNA} molecule is of great importance and possesses influence, e.g., on the interaction of t{RNA} molecules with proteins or on the stabilization of m{RNA} molecules. The classification of secondary structures by means of their order proved useful with respect to numerous applications. In 1978, Waterman, who gave the first precise formal framework for the topic, suggested to determine the number an,p of secondary structures of size n and given order p. Since then, no satisfactory result has been found. Based on an observation due to Viennot et al., we will derive generating functions for the secondary structures of order p from generating functions for binary tree structures with Horton-Strahler number p. These generating functions enable us to compute a precise asymptotic equivalent for an,p. Furthermore, we will determine the related number of structures when the number of unpaired bases shows up as an additional parameter. Our approach proves to be general enough to compute the average order of a secondary structure together with all the r-th moments and to enumerate substructures such as hairpins or bulges in dependence on the order of the secondary structures considered. },
doi = {10.1089/106652702760138628},
eprint = {https://doi.org/10.1089/106652702760138628},
url = {
https://doi.org/10.1089/106652702760138628
},
}
@Article{Bundschuh2002,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {Statistical mechanics of secondary structures formed by random {RNA} sequences.},
journal = {Physical review. E, Statistical, nonlinear, and soft matter physics},
year = {2002},
volume = {65},
pages = {031903},
month = mar,
issn = {1539-3755},
abstract = {The formation of secondary structures by a random {RNA} sequence is studied as a model system for the sequence-structure problem omnipresent in biopolymers. Several toy energy models are introduced to allow detailed analytical and numerical studies. First, a two-replica calculation is performed. By mapping the two-replica problem to the denaturation of a single homogeneous {RNA} molecule in six-dimensional embedding space, we show that sequence disorder is perturbatively irrelevant, i.e., an {RNA} molecule with weak sequence disorder is in a molten phase where many secondary structures with comparable total energy coexist. A numerical study of various models at high temperature reproduces behaviors characteristic of the molten phase. On the other hand, a scaling argument based on the external statistics of rare regions can be constructed to show that the low-temperature phase is unstable to sequence disorder. We performed a detailed numerical study of the low-temperature phase using the droplet theory as a guide, and characterized the statistics of large-scale, low-energy excitations of the secondary structures from the ground state structure. We find the excitation energy to grow very slowly (i.e., logarithmically) with the length scale of the excitation, suggesting the existence of a marginal glass phase. The transition between the low-temperature glass phase and the high-temperature molten phase is also characterized numerically. It is revealed by a change in the coefficient of the logarithmic excitation energy, from being disorder dominated to being entropy dominated.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2002-06-10},
country = {United States},
doi = {10.1103/PhysRevE.65.031903},
issn-linking = {1539-3755},
issue = {3 Pt 1},
keywords = {Biophysics, methods; Glass; Models, Statistical; Nucleic Acid Conformation; {RNA}, chemistry; Temperature},
nlm-id = {101136452},
owner = {NLM},
pmid = {11909105},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2006-11-15},
}
@Article{Bundschuh2002a,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {Phases of the secondary structures of {RNA} sequences},
journal = {EPL (Europhysics Letters)},
year = {2002},
volume = {59},
number = {6},
pages = {903},
publisher = {IOP Publishing},
}
@Article{Bundschuh2008,
author = {Bundschuh, Ralf and Bruinsma, Robijn},
title = {Melting of branched {RNA} molecules},
journal = {Physical review letters},
year = {2008},
volume = {100},
number = {14},
pages = {148101},
publisher = {APS},
}
@Article{David2007,
author = {David, Francois and Wiese, Kay Joerg},
title = {Systematic field theory of the {RNA} glass transition},
journal = {Physical review letters},
year = {2007},
volume = {98},
number = {12},
pages = {128102},
publisher = {APS},
}
@Article{Bundschuh1999,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {{RNA} Secondary Structure Formation: A Solvable Model of Heteropolymer Folding},
journal = {Phys. Rev. Lett.},
year = {1999},
volume = {83},
pages = {1479--1482},
month = {Aug},
doi = {10.1103/PhysRevLett.83.1479},
issue = {7},
numpages = {0},
publisher = {American Physical Society},
url = {https://link.aps.org/doi/10.1103/PhysRevLett.83.1479},
}
@Article{Jin2008,
author = {Jin, Emma Y and Reidys, Christian M},
title = {Asymptotic enumeration of {RNA} structures with pseudoknots.},
journal = {Bulletin of mathematical biology},
year = {2008},
volume = {70},
pages = {951--970},
month = may,
issn = {0092-8240},
abstract = {In this paper, we present the asymptotic enumeration of {RNA} structures with pseudoknots. We develop a general framework for the computation of exponential growth rate and the asymptotic expansion for the numbers of k-noncrossing {RNA} structures. Our results are based on the generating function for the number of k-noncrossing {RNA} pseudoknot structures, Sk(n), derived in Bull. Math. Biol. (2008), where k-1 denotes the maximal size of sets of mutually intersecting bonds. We prove a functional equation for the generating function Sigman>or=0 Sk(n)zn and obtain for k=2 and k=3, the analytic continuation and singular expansions, respectively. It is implicit in our results that for arbitrary k singular expansions exist and via transfer theorems of analytic combinatorics, we obtain asymptotic expression for the coefficients. We explicitly derive the asymptotic expressions for 2- and 3-noncrossing {RNA} structures. Our main result is the derivation of the formula S3(n) approximately 10.4724.4!/n(n-1)...(n-4)(5+[sqrt]21/2)n.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-06-23},
country = {United States},
doi = {10.1007/s11538-007-9265-2},
issn-linking = {0092-8240},
issue = {4},
keywords = {Mathematics; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry},
nlm-id = {0401404},
owner = {NLM},
pmid = {18340497},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2008-04-04},
}
@Article{Huang2008,
author = {Huang, Fenix W D and Reidys, Christian M},
title = {Statistics of canonical {RNA} pseudoknot structures.},
journal = {Journal of theoretical biology},
year = {2008},
volume = {253},
pages = {570--578},
month = aug,
issn = {1095-8541},
abstract = {In this paper we study canonical {RNA} pseudoknot structures. We prove central limit theorems for the distributions of the arc-numbers of k-noncrossing {RNA} structures with given minimum stack-size tau over n nucleotides. Furthermore we compare the space of all canonical structures with canonical minimum free energy pseudoknot structures. Our results generalize the analysis of Schuster et al. obtained for {RNA} secondary structures [Hofacker, I.L., Schuster, P., Stadler, P.F., 1998. Combinatorics of {RNA} secondary structures. Discrete Appl. Math. 88, 207-237; Jin, E.Y., Reidys, C.M., 2007b. Central and local limit theorems for {RNA} structures. J. Theor. Biol. 250 (2008), 547-559; 2007a. Asymptotic enumeration of {RNA} structures with pseudoknots. Bull. Math. Biol., 70 (4), 951-970] to k-noncrossing {RNA} structures. Here k2 and tau are arbitrary natural numbers. We compare canonical pseudoknot structures to arbitrary structures and show that canonical pseudoknot structures exhibit significantly smaller exponential growth rates. We then compute the asymptotic distribution of their arc-numbers. Finally, we analyze how the minimum stack-size and crossing number factor into the distributions.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-09-04},
country = {England},
doi = {10.1016/j.jtbi.2008.04.002},
issn-linking = {0022-5193},
issue = {3},
keywords = {Algorithms; Animals; Models, Genetic; Models, Molecular; Nucleic Acid Conformation; {RNA}, genetics},
nlm-id = {0376342},
owner = {NLM},
pii = {S0022-5193(08)00176-8},
pmid = {18511081},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2008-07-18},
}
@Article{Clote2006,
author = {Clote, Peter},
title = {Combinatorics of saturated secondary structures of {RNA}.},
journal = {Journal of computational biology : a journal of computational molecular cell biology},
year = {2006},
volume = {13},
pages = {1640--1657},
month = nov,
issn = {1066-5277},
abstract = {Following Zuker (1986), a saturated secondary structure for a given {RNA} sequence is a secondary structure such that no base pair can be added without violating the definition of secondary structure, e.g., without introducing a pseudoknot. In the Nussinov-Jacobson energy model (Nussinov and Jacobson, 1980), where the energy of a secondary structure is -1 times the number of base pairs, saturated secondary structures are local minima in the energy landscape, hence form kinetic traps during the folding process. Here we present recurrence relations and closed form asymptotic limits for combinatorial problems related to the number of saturated secondary structures. In addition, Python source code to compute the number of saturated secondary structures having k base pairs can be found at the web servers link of bioinformatics.bc.edu/clotelab/.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2007-01-12},
country = {United States},
doi = {10.1089/cmb.2006.13.1640},
issn-linking = {1066-5277},
issue = {9},
keywords = {Base Pairing; Base Sequence; Biometry; Kinetics; Models, Molecular; Models, Statistical; Nucleic Acid Conformation; {RNA}, chemistry, genetics; Thermodynamics},
nlm-id = {9433358},
owner = {NLM},
pmid = {17147486},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2006-12-06},
}
@Article{Banderier2015,
author = {Banderier, Cyril and Drmota, Michael},
title = {Formulae and Asymptotics for Coefficients of Algebraic Functions},
journal = {Combinatorics, Probability and Computing},
year = {2015},
volume = {24},
number = {1},
pages = {1–53},
doi = {10.1017/S0963548314000728},
publisher = {Cambridge University Press},
}
@Article{Pringsheim1893,
author = {A. Pringsheim},
title = {Zur Theorie der Taylor'schen Reihe unde der analytischen Funcktionen mit beschränklen Existenzbereich},
journal = {Mathematische Annalen},
year = {1893},
volume = {42},
pages = {180},
}
@Article{Flajolet1990,
author = {Philippe Flajolet and Andrew M. Odlyzko},
title = {Singularity Analysis of Generating Functions},
journal = {{SIAM} J. Discrete Math.},
year = {1990},
volume = {3},
number = {2},
pages = {216--240},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/bib/journals/siamdm/FlajoletO90},
doi = {10.1137/0403019},
timestamp = {Fri, 26 May 2017 22:54:48 +0200},
url = {https://doi.org/10.1137/0403019},
}
@Article{Lalley1993,
author = {Lalley, Steven P},
title = {Finite range random walk on free groups and homogeneous trees},
journal = {The Annals of Probability},
year = {1993},
pages = {2087--2130},
publisher = {JSTOR},
}
@Article{Woods1997,
author = {Woods, Alan R},
title = {Coloring rules for finite trees, and probabilities of monadic second order sentences},
journal = {Random Structures \& Algorithms},
year = {1997},
volume = {10},
number = {4},
pages = {453--485},
publisher = {Wiley Online Library},
}
@Article{Akutsu2000,
author = {Tatsuya Akutsu},
title = {Dynamic programming algorithms for {RNA} secondary structure prediction with pseudoknots},
journal = {Discrete Applied Mathematics},
year = {2000},
volume = {104},
number = {1},
pages = {45 - 62},
issn = {0166-218X},
abstract = {This paper shows simple dynamic programming algorithms for {RNA} secondary structure prediction with pseudoknots. For a basic version of the problem (i.e., maximizing the number of base pairs), this paper presents an O(n4) time exact algorithm and an O(n4−δ) time approximation algorithm. The latter one outputs, for most {RNA} sequences, a secondary structure in which the number of base pairs is at least 1−ε of the optimal, where ε,δ are any constants satisfying 0<ε,δ<1. Several related results are shown too.},
doi = {https://doi.org/10.1016/S0166-218X(00)00186-4},
keywords = {{RNA} secondary structure, Pseudoknot, Approximation algorithms, Computational biology, Dynamic programming},
url = {http://www.sciencedirect.com/science/article/pii/S0166218X00001864},
}
@Article{Leontis2001,
author = {Leontis, Neocles B and Westhof, Eric},
title = {Geometric nomenclature and classification of {{RNA}} base pairs},
journal = {{{RNA}}},
year = {2001},
volume = {7},
number = {4},
pages = {499--512},
publisher = {Cambridge University Press},
}
@Article{Yoffe2011,
author = {Yoffe, Aron M and Prinsen, Peter and Gelbart, William M and Ben-Shaul, Avinoam},
title = {The ends of a large {RNA} molecule are necessarily close.},
journal = {Nucleic acids research},
year = {2011},
volume = {39},
pages = {292--299},
month = jan,
issn = {1362-4962},
abstract = {We show on general theoretical grounds that the two ends of single-stranded (ss) {RNA} molecules (consisting of roughly equal proportions of A, C, G and U) are necessarily close together, largely independent of their length and sequence. This is demonstrated to be a direct consequence of two generic properties of the equilibrium secondary structures, namely that the average proportion of bases in pairs is ∼60% and that the average duplex length is ∼4. Based on mfold and Vienna computations on large numbers of ss{RNA}s of various lengths (1000-10 000 nt) and sequences (both random and biological), we find that the 5'-3' distance-defined as the sum of H-bond and covalent (ss) links separating the ends of the {RNA} chain-is small, averaging 15-20 for each set of viral sequences tested. For random sequences this distance is ∼12, consistent with the theory. We discuss the relevance of these results to evolved sequence complementarity and specific protein binding effects that are known to be important for keeping the two ends of viral and messenger {RNA}s in close proximity. Finally we speculate on how our conclusions imply indistinguishability in size and shape of equilibrated forms of linear and covalently circularized ss{RNA} molecules.},
chemicals = {{RNA}, Circular, {RNA}, Viral, {RNA}},
citation-subset = {IM},
completed = {2011-02-09},
country = {England},
doi = {10.1093/nar/gkq642},
issn-linking = {0305-1048},
issue = {1},
keywords = {Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; {RNA}, Circular; {RNA}, Viral, chemistry},
nlm-id = {0411011},
owner = {NLM},
pii = {gkq642},
pmc = {PMC3017586},
pmid = {20810537},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Wells1998,
author = {Wells, S E and Hillner, P E and Vale, R D and Sachs, A B},
title = {Circularization of m{RNA} by eukaryotic translation initiation factors.},
journal = {Molecular cell},
year = {1998},
volume = {2},
pages = {135--140},
month = jul,
issn = {1097-2765},
abstract = {Communication between the 5' cap structure and 3' poly(A) tail of eukaryotic m{RNA} results in the synergistic enhancement of translation. The cap and poly(A) tail binding proteins, eIF4E and Pab1p, mediate this effect in the yeast S. cerevisiae through their interactions with different parts of the translation factor eIF4G. Here, we demonstrate the reconstitution of an eIF4E/eIF4G/Pab1p complex with recombinant proteins, and show by atomic force microscopy that the complex can circularize capped, polyadenylated {RNA}. Our results suggest that formation of circular m{RNA} by translation factors could contribute to the control of m{RNA} expression in the eukaryotic cell.},
chemicals = {EIF4G1 protein, human, Eukaryotic Initiation Factor-4E, Eukaryotic Initiation Factor-4G, Fungal Proteins, Macromolecular Substances, Peptide Fragments, Peptide Initiation Factors, Poly(A)-Binding Proteins, {RNA}, Circular, {RNA}, Fungal, {RNA}, Messenger, {RNA}-Binding Proteins, Recombinant Fusion Proteins, Saccharomyces cerevisiae Proteins, TIF4631 protein, S cerevisiae, {RNA}, Glutathione Transferase},
citation-subset = {IM},
completed = {1998-08-31},
country = {United States},
doi = {10.1016/s1097-2765(00)80122-7},
issn-linking = {1097-2765},
issue = {1},
keywords = {Eukaryotic Initiation Factor-4E; Eukaryotic Initiation Factor-4G; Fungal Proteins, metabolism; Glutathione Transferase, genetics, metabolism; Macromolecular Substances; Microscopy, Atomic Force; Nucleic Acid Conformation; Peptide Fragments, genetics, metabolism; Peptide Initiation Factors, genetics, metabolism, ultrastructure; Poly(A)-Binding Proteins; Protein Biosynthesis; {RNA}, biosynthesis, ultrastructure; {RNA}, Circular; {RNA}, Fungal, chemistry, metabolism, ultrastructure; {RNA}, Messenger, chemistry, metabolism, ultrastructure; {RNA}-Binding Proteins, metabolism, ultrastructure; Recombinant Fusion Proteins, metabolism; Saccharomyces cerevisiae, genetics; Saccharomyces cerevisiae Proteins},
nlm-id = {9802571},
owner = {NLM},
pii = {S1097-2765(00)80122-7},
pmid = {9702200},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Albert2002,
author = {Albert, R. AND Barab{\'a}si, A.-L.},
title = {Statistical mechanics of complex networks},
journal = {Reviews of modern {P}hysics},
year = {2002},
volume = {74},
pages = {47--97},
}
@Article{Bowman2010,
author = {Bowman, G. R. AND Pande, V. S.},
title = {Protein folded states are kinetic hubs.},
journal = {Proc. Natl. Acad. Sci. U.S.A.},
year = {2010},
volume = {107},
number = {24},
pages = {10890--10895},
month = {June},
}
@Article{Scala2001,
author = {Scala, A. AND Nunes~Amaral, L.A. AND Barth{\'e}l{\'e}my, M.},
title = {Small-world networks and the conformation space of a short lattice polymer chain},
journal = {Europhys. Lett.},
year = {2001},
volume = {55},
number = {4},
pages = {594--600},
}
@Article{VanNoort2004,
author = {Van Noort, V. AND Snel, B. AND Huynen, M. A.},
title = {The yeast coexpression network has a small-world, scale-free architecture and can be explained by a simple model.},
journal = {EMBO Rep.},
year = {2004},
volume = {5},
number = {3},
pages = {280--284},
month = {March},
}
@Article{Watts1998,
author = {Watts, D. J. AND Strogatz, S. H.},
title = {Collective dynamics of 'small-world' networks.},
journal = {Nature},
year = {1998},
volume = {393},
number = {6684},
pages = {440--442},
month = {June},
}
@Article{Wuchty2003,
author = {Wuchty, S.},
title = {Small worlds in {{RNA}} structures.},
journal = {Nucleic. Acids. Res.},
year = {2003},
volume = {31},
number = {3},
pages = {1108--1117},
month = {February},
}
@Article{Newman2001,
author = {Newman, M. E. AND Strogatz, S. H. AND Watts, D. J.},
title = {Random graphs with arbitrary degree distributions and their applications.},
journal = {Phys. Rev. E},
year = {2001},
volume = {64},
number = {2},
pages = {026118},
month = {August},
}
@Article{Flamm2000,
author = {C. Flamm and W. Fontana and I.L. Hofacker and P. Schuster},
title = {{{RNA}} folding at elementary step resolution},
journal = {{RNA}},
year = {2000},
volume = {6},
pages = {325--338},
}
@Article{Cont2008,
author = {Cont, R. AND Tanimura, E.},
title = {Small-world graphs: characterization and alternative constructions},
journal = {Adv. in Appl. Probab.},
year = {2008},
volume = {40},
number = {4},
pages = {939--965},
}
@Article{Clote2015,
author = {Clote, Peter},
title = {Expected degree for {RNA} secondary structure networks.},
journal = {Journal of computational chemistry},
year = {2015},
volume = {36},
pages = {103--117},
month = jan,
issn = {1096-987X},
abstract = {Consider the network of all secondary structures of a given {RNA} sequence, where nodes are connected when the corresponding structures have base pair distance one. The expected degree of the network is the average number of neighbors, where average may be computed with respect to the either the uniform or Boltzmann probability. Here, we describe the first algorithm, {RNA}expNumNbors, that can compute the expected number of neighbors, or expected network degree, of an input sequence. For {RNA} sequences from the Rfam database, the expected degree is significantly less than the constrained minimum free energy structure, defined to have minimum free energy (MFE) over all structures consistent with the Rfam consensus structure. The expected degree of structural {RNA}s, such as purine riboswitches, paradoxically appears to be smaller than that of random {RNA}, yet the difference between the degree of the MFE structure and the expected degree is larger than that of random {RNA}. Expected degree does not seem to correlate with standard structural diversity measures of {RNA}, such as positional entropy and ensemble defect. The program {RNA}expNumNbors is written in C, runs in cubic time and quadratic space, and is publicly available at http://bioinformatics.bc.edu/clotelab/{RNA}expNumNbors.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2015-10-28},
country = {United States},
doi = {10.1002/jcc.23776},
issn-linking = {0192-8651},
issue = {2},
keywords = {Algorithms; Base Sequence; Databases, Factual; Nucleic Acid Conformation; {RNA}, chemistry; Software; Thermodynamics; {RNA} secondary structure; macromolecular network; network degree; small-world},
nlm-id = {9878362},
owner = {NLM},
pmid = {25382310},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2014-12-16},
}
@Article{Andronescu2008,
author = {Andronescu, Mirela and Bereg, Vera and Hoos, Holger H and Condon, Anne},
title = {{RNA} STRAND: the {RNA} secondary structure and statistical analysis database.},
journal = {BMC bioinformatics},
year = {2008},
volume = {9},
pages = {340},
month = aug,
issn = {1471-2105},
abstract = {The ability to access, search and analyse secondary structures of a large set of known {RNA} molecules is very important for deriving improved {RNA} energy models, for evaluating computational predictions of {RNA} secondary structures and for a better understanding of {RNA} folding. Currently there is no database that can easily provide these capabilities for almost all {RNA} molecules with known secondary structures. In this paper we describe {RNA} STRAND - the {RNA} secondary STRucture and statistical ANalysis Database, a curated database containing known secondary structures of any type and organism. Our new database provides a wide collection of known {RNA} secondary structures drawn from public databases, searchable and downloadable in a common format. Comprehensive statistical information on the secondary structures in our database is provided using the {RNA} Secondary Structure Analyser, a new tool we have developed to analyse {RNA} secondary structures. The information thus obtained is valuable for understanding to which extent and with which probability certain structural motifs can appear. We outline several ways in which the data provided in {RNA} STRAND can facilitate research on {RNA} structure, including the improvement of {RNA} energy models and evaluation of secondary structure prediction programs. In order to keep up-to-date with new {RNA} secondary structure experiments, we offer the necessary tools to add solved {RNA} secondary structures to our database and invite researchers to contribute to {RNA} STRAND. {RNA} STRAND is a carefully assembled database of trusted {RNA} secondary structures, with easy on-line tools for searching, analyzing and downloading user selected entries, and is publicly available at http://www.rnasoft.ca/strand.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-10-17},
country = {England},
doi = {10.1186/1471-2105-9-340},
issn-linking = {1471-2105},
keywords = {Computer Graphics; Computer Simulation; Database Management Systems; Databases, Genetic; Information Storage and Retrieval, methods; Models, Chemical; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry, ultrastructure; User-Computer Interface},
nlm-id = {100965194},
owner = {NLM},
pii = {1471-2105-9-340},
pmc = {PMC2536673},
pmid = {18700982},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Giegerich2004,
author = {Giegerich, Robert and Voss, Björn and Rehmsmeier, Marc},
title = {Abstract shapes of {RNA}.},
journal = {Nucleic acids research},
year = {2004},
volume = {32},
pages = {4843--4851},
issn = {1362-4962},
abstract = {The function of a non-protein-coding {RNA} is often determined by its structure. Since experimental determination of {RNA} structure is time-consuming and expensive, its computational prediction is of great interest, and efficient solutions based on thermodynamic parameters are known. Frequently, however, the predicted minimum free energy structures are not the native ones, leading to the necessity of generating suboptimal solutions. While this can be accomplished by a number of programs, the user is often confronted with large outputs of similar structures, although he or she is interested in structures with more fundamental differences, or, in other words, with different abstract shapes. Here, we formalize the concept of abstract shapes and introduce their efficient computation. Each shape of an {RNA} molecule comprises a class of similar structures and has a representative structure of minimal free energy within the class. Shape analysis is implemented in the program {RNA}shapes. We applied {RNA}shapes to the prediction of optimal and suboptimal abstract shapes of several {RNA}s. For a given energy range, the number of shapes is considerably smaller than the number of structures, and in all cases, the native structures were among the top shape representatives. This demonstrates that the researcher can quickly focus on the structures of interest, without processing up to thousands of near-optimal solutions. We complement this study with a large-scale analysis of the growth behaviour of structure and shape spaces. {RNA}shapes is available for download and as an online version on the Bielefeld Bioinformatics Server.},
chemicals = {5' Untranslated Regions, {RNA}, Small Nuclear, {RNA}, Untranslated, {RNA}, Viral, U2 small nuclear {RNA}, {RNA}, Transfer},
citation-subset = {IM},
completed = {2004-09-24},
country = {England},
doi = {10.1093/nar/gkh779},
issn-linking = {0305-1048},
issue = {16},
keywords = {5' Untranslated Regions, chemistry; Base Sequence; Computational Biology, methods; HIV-1, genetics; Humans; Internet; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, Small Nuclear, chemistry; {RNA}, Transfer, chemistry; {RNA}, Untranslated, chemistry; {RNA}, Viral, chemistry; Software; Terminology as Topic},
nlm-id = {0411011},
owner = {NLM},
pii = {32/16/4843},
pmc = {PMC519098},
pmid = {15371549},
pubmodel = {Electronic-Print},
pubstatus = {epublish},
revised = {2019-12-10},
}
@Article{Reeder2005,
author = {Reeder, Jens and Giegerich, Robert},
title = {Consensus shapes: an alternative to the Sankoff algorithm for {RNA} consensus structure prediction.},
journal = {Bioinformatics (Oxford, England)},
year = {2005},
volume = {21},
pages = {3516--3523},
month = sep,
issn = {1367-4803},
abstract = {The well-known Sankoff algorithm for simultaneous {RNA} sequence alignment and folding is currently considered an ideal, but computationally over-expensive method. Available tools implement this algorithm under various pragmatic restrictions. They are still expensive to use, and it is difficult to judge if the moderate quality of results is because of the underlying model or to its imperfect implementation. We propose to redefine the consensus structure prediction problem in a way that does not imply a multiple sequence alignment step. For a family of {RNA} sequences, our method explicitly and independently enumerates the near-optimal abstract shape space, and predicts as the consensus an abstract shape common to all sequences. For each sequence, it delivers the thermodynamically best structure which has this common shape. Since the shape space is much smaller than the structure space, and identification of common shapes can be done in linear time (in the number of shapes considered), the method is essentially linear in the number of sequences. Our evaluation shows that the new method compares favorably with available alternatives. The new method has been implemented in the program {RNA}cast and is available on the Bielefeld Bioinformatics Server. jreeder@TechFak.Uni-Bielefeld.DE, robert@TechFak.Uni-Bielefeld.DE SUPPLEMENTARY INFORMATION: Available at http://bibiserv.techfak.uni-bielefeld.de/rnacast/supplementary.html},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2005-12-07},
country = {England},
doi = {10.1093/bioinformatics/bti577},
issn-linking = {1367-4803},
issue = {17},
keywords = {Algorithms; Base Sequence; Computer Simulation; Consensus Sequence; Models, Chemical; Models, Molecular; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, analysis, chemistry; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods; Sequence Homology, Nucleic Acid; Software},
nlm-id = {9808944},
owner = {NLM},
pii = {bti577},
pmid = {16020472},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Janssen2008,
author = {Janssen, Stefan and Reeder, Jens and Giegerich, Robert},
title = {Shape based indexing for faster search of {RNA} family databases.},
journal = {BMC bioinformatics},
year = {2008},
volume = {9},
pages = {131},
month = feb,
issn = {1471-2105},
abstract = {Most non-coding {RNA} families exert their function by means of a conserved, common secondary structure. The Rfam data base contains more than five hundred structurally annotated {RNA} families. Unfortunately, searching for new family members using covariance models (CMs) is very time consuming. Filtering approaches that use the sequence conservation to reduce the number of CM searches, are fast, but it is unknown to which sacrifice. We present a new filtering approach, which exploits the family specific secondary structure and significantly reduces the number of CM searches. The filter eliminates approximately 85% of the queries and discards only 2.6% true positives when evaluating Rfam against itself. First results also capture previously undetected non-coding {RNA}s in a recent human {RNA}z screen. The {RNA} shape index filter ({RNA}sifter) is based on the following rationale: An {RNA} family is characterised by structure, much more succinctly than by sequence content. Structures of individual family members, which naturally have different length and sequence composition, may exhibit structural variation in detail, but overall, they have a common shape in a more abstract sense. Given a fixed release of the Rfam data base, we can compute these abstract shapes for all families. This is called a shape index. If a query sequence belongs to a certain family, it must be able to fold into the family shape with reasonable free energy. Therefore, rather than matching the query against all families in the data base, we can first (and quickly) compute its feasible shape(s), and use the shape index to access only those families where a good match is possible due to a common shape with the query.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-04-23},
country = {England},
doi = {10.1186/1471-2105-9-131},
issn-linking = {1471-2105},
keywords = {Algorithms; Base Sequence; Database Management Systems; Databases, Genetic; Information Storage and Retrieval, methods; Molecular Sequence Data; {RNA}, genetics; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods},
nlm-id = {100965194},
owner = {NLM},
pii = {1471-2105-9-131},
pmc = {PMC2277397},
pmid = {18312625},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Burbano2007,
author = {Burbano, Hernán A and Andrade, Eugenio},
title = {Analysis of t{RNA} abstract shapes of precursor/derivative amino acids in Archaea.},
journal = {Gene},
year = {2007},
volume = {396},
pages = {75--83},
month = jul,
issn = {0378-1119},
abstract = {Wong's theory of the genetic code's origin states that because of historical constraints, codon assignment depends on the relation between precursor and derivative amino acids, a result of the coevolutionary process between amino acids' biosynthetic pathways and t{RNA}s. Based on arguments supporting the assumption that natural selection favors more stable and thus functionally constrained structures, we tested whether precursor and derivative t{RNA}s are equally evolved by measuring their structural parameters, thermostability and molecular plasticity. We also estimated the extent to which precursor and derivative t{RNA}s differ within Archaea. We used Archaea sequences of both precursor and derivative t{RNA}s in order to examine the plastic repertoires or sets of suboptimal structures at a defined free energy interval. We grouped secondary structures according to their helix nesting and adjacency using abstract shapes analysis. This clustering enabled us to infer a consensus sequence for all shapes that fit the clover leaf secondary structure [Giegerich, R., et al., Nucleic Acids Res 2004; 32 (16): 4843-51.]. This consensus sequence was then folded in order to retrieve a set of suboptimal structures. For each pair of precursor and derivative t{RNA}s, we compared these plastic repertoires based on the number of secondary structures, the thermostability of the minimum free energy structure and two structural parameters (base pair propensity (P) and mean length of helical stem structures (S)), which were measured for every representative secondary structure [Schultes, E.A., et al., J Mol Evol 1999; 49 (1): 76-83.]. We found that derivative t{RNA}s have fewer numbers of shapes, higher thermostability and more stable parameters than precursor t{RNA}s, a fact in full agreement with Wong's coevolution theory of the genetic code.},
chemicals = {Amino Acids, {RNA}, Transfer},
citation-subset = {IM},
completed = {2007-08-01},
country = {Netherlands},
doi = {10.1016/j.gene.2007.02.024},
issn-linking = {0378-1119},
issue = {1},
keywords = {Amino Acids, genetics; Archaea, genetics; Base Composition, genetics; Base Pairing; Base Sequence; Evolution, Molecular; Genetic Code; Molecular Sequence Data; {RNA}, Transfer, chemistry, genetics; Thermodynamics},
nlm-id = {7706761},
owner = {NLM},
pii = {S0378-1119(07)00113-8},
pmid = {17433860},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2007-06-04},
}
@Article{Voss2006,
author = {Voss, Björn and Giegerich, Robert and Rehmsmeier, Marc},
title = {Complete probabilistic analysis of {RNA} shapes.},
journal = {BMC biology},
year = {2006},
volume = {4},
pages = {5},
month = feb,
issn = {1741-7007},
abstract = {Soon after the first algorithms for {RNA} folding became available, it was recognised that the prediction of only one energetically optimal structure is insufficient to achieve reliable results. An in-depth analysis of the folding space as a whole appeared necessary to deduce the structural properties of a given {RNA} molecule reliably. Folding space analysis comprises various methods such as suboptimal folding, computation of base pair probabilities, sampling procedures and abstract shape analysis. Common to many approaches is the idea of partitioning the folding space into classes of structures, for which certain properties can be derived. In this paper we extend the approach of abstract shape analysis. We show how to compute the accumulated probabilities of all structures that share the same shape. While this implies a complete (non-heuristic) analysis of the folding space, the computational effort depends only on the size of the shape space, which is much smaller. This approach has been integrated into the tool {RNA} shapes, and we apply it to various {RNA}s. Analyses of conformational switches show the existence of two shapes with probabilities approximately 2/3 vs. 1/3, whereas the analysis of a micro{RNA} precursor reveals one shape with a probability near to 1.0. Furthermore, it is shown that a shape can outperform an energetically more favourable one by achieving a higher probability. From these results, and the fact that we use a complete and exact analysis of the folding space, we conclude that this approach opens up new and promising routes for investigating and understanding {RNA} secondary structure.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2006-07-17},
country = {England},
doi = {10.1186/1741-7007-4-5},
issn-linking = {1741-7007},
keywords = {Algorithms; Computational Biology, methods; Models, Statistical; Models, Theoretical; Nucleic Acid Conformation; Probability; Programming Languages; {RNA}, chemistry},
nlm-id = {101190720},
owner = {NLM},
pii = {1741-7007-4-5},
pmc = {PMC1479382},
pmid = {16480488},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Findeiss2017,
author = {Findei{\ss}, Sven and Etzel, Maja and Will, Sebastian and M{\"o}rl, Mario and Stadler, Peter F},
title = {Design of Artificial Riboswitches as Biosensors.},
journal = {Sensors (Basel, Switzerland)},
year = {2017},
volume = {17},
number = {9},
pages = {E1990},
month = aug,
issn = {1424-8220},
abstract = {{RNA} aptamers readily recognize small organic molecules, polypeptides, as well as other nucleic acids in a highly specific manner. Many such aptamers have evolved as parts of regulatory systems in nature. Experimental selection techniques such as SELEX have been very successful in finding artificial aptamers for a wide variety of natural and synthetic ligands. Changes in structure and/or stability of aptamers upon ligand binding can propagate through larger {RNA} constructs and cause specific structural changes at distal positions. In turn, these may affect transcription, translation, splicing, or binding events. The {RNA} secondary structure model realistically describes both thermodynamic and kinetic aspects of {RNA} structure formation and refolding at a single, consistent level of modelling. Thus, this framework allows studying the function of natural riboswitches in silico. Moreover, it enables rationally designing artificial switches, combining essentially arbitrary sensors with a broad choice of read-out systems. Eventually, this approach sets the stage for constructing versatile biosensors.},
chemicals = {Aptamers, Nucleotide, Ligands, Riboswitch},
citation-subset = {IM},
completed = {2018-05-31},
country = {Switzerland},
doi = {10.3390/s17091990},
issn-linking = {1424-8220},
issue = {9},
keywords = {Aptamers, Nucleotide; Biosensing Techniques; Kinetics; Ligands; Riboswitch; {RNA} structure; aptamer; folding kinetics; ligand binding; rational design; refolding; thermodynamics},
nlm-id = {101204366},
owner = {NLM},
pii = {E1990},
pmc = {PMC5621056},
pmid = {28867802},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2019-01-16},
}
@Article{Grabbe2016,
author = {Grabbe, Stephan and Haas, Heinrich and Diken, Mustafa and Kranz, Lena M and Langguth, Peter and Sahin, Ugur},
title = {Translating nanoparticulate-personalized cancer vaccines into clinical applications: case study with {RNA}-lipoplexes for the treatment of melanoma.},
journal = {Nanomedicine (London, England)},
year = {2016},
volume = {11},
pages = {2723--2734},
month = oct,
issn = {1748-6963},
abstract = {The development of nucleic acid based vaccines against cancer has gained considerable momentum through the advancement of modern sequencing technologies and on novel {RNA}-based synthetic drug formats, which can be readily adapted following identification of every patient's tumor-specific mutations. Furthermore, affordable and individual 'on demand' production of molecularly optimized vaccines should allow their application in large groups of patients. This has resulted in the therapeutic concept of an active personalized cancer vaccine, which has been brought into clinical testing. Successful trials have been performed by intranodal administration of sterile isotonic solutions of synthetic {RNA} vaccines. The second generation of {RNA} vaccines which is currently being developed encompasses intravenously injectable {RNA} nanoparticle formulations (lipoplexes), made up from lipid excipients, denoted {RNA} . A first product that has made its way from bench to bedside is a therapeutic vaccine for intravenous administration based on a fixed set of four {RNA} lipoplex drug products, each encoding for one shared tumor antigen (Lipoplex Melanoma {RNA} Immunotherapy, 'Lipo-MERIT'). This article describes the steps for translating these novel {RNA} nanomedicines into clinical trials.},
chemicals = {Antigens, Neoplasm, Cancer Vaccines, Excipients, Liposomes, {RNA}, Messenger, {RNA}},
citation-subset = {IM},
completed = {2018-03-22},
country = {England},
doi = {10.2217/nnm-2016-0275},
issn-linking = {1743-5889},
issue = {20},
keywords = {Animals; Antigens, Neoplasm, genetics, immunology; Cancer Vaccines, immunology; Clinical Trials as Topic; Excipients; Humans; Immunotherapy, methods; Liposomes, chemistry; Melanoma, immunology, therapy; Nanomedicine; Nanoparticles, chemistry, therapeutic use; Precision Medicine; {RNA}, administration & dosage, chemistry, immunology; {RNA}, Messenger, administration & dosage, chemistry, pharmacology, therapeutic use; cancer; drug delivery; lipoplex; liposomes; m{RNA}; tumor immunotherapy},
nlm-id = {101278111},
owner = {NLM},
pmid = {27700619},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-03-22},
}
@Article{Takahashi2013,
author = {Takahashi, Melissa K. and Lucks, Julius B.},
title = {A modular strategy for engineering orthogonal chimeric {{RNA}} transcription regulators},
journal = {Nucleic Acids Research},
year = {2013},
volume = {41},
number = {15},
pages = {7577-7588},
doi = {10.1093/nar/gkt452},
}
@Article{Wu2014,
author = {Wu, Sherry Y. and Lopez-Berestein, Gabriel and Calin, George A. and Sood, Anil K.},
title = {{{RNA}}i Therapies: Drugging the Undruggable},
journal = {Science Translational Medicine},
year = {2014},
volume = {6},
number = {240},
pages = {240ps7},
doi = {10.1126/scitranslmed.3008362},
}
@InProceedings{Bonnet2018,
author = {{\'{E}}douard Bonnet and Pawe{\l} Rz{{a}}{\.{z}}ewski and Florian Sikora},
title = {Designing {{RNA}} Secondary Structures Is Hard},
booktitle = {Research in Computational Molecular Biology - 22nd Annual International Conference, {RECOMB} 2018},
year = {2018},
editor = {Benjamin J. Raphael},
volume = {10812},
series = {Lecture Notes in Computer Science},
pages = {248--250},
address = {Paris},
publisher = {Springer},
timestamp = {Mon, 13 May 2019 09:30:09 +0200},
}
@Article{Dirks2004,
author = {Dirks, Robert M. and Lin, Milo and Winfree, Erik and Pierce, Niles A.},
title = {Paradigms for computational nucleic acid design.},
journal = {Nucleic Acids Research},
year = {2004},
volume = {32},
number = {4},
pages = {1392--1403},
doi = {10.1093/nar/gkh291},
}
@Article{Aguirre-Hernandez2007,
author = {Aguirre-Hern{\'a}ndez, Rosal{\'\i}a and Hoos, Holger H and Condon, Anne},
title = {Computational {{{RNA}}} secondary structure design: empirical complexity and improved methods},
journal = {BMC Bioinformatics},
year = {2007},
volume = {8},
pages = {34},
doi = {10.1186/1471-2105-8-34},
}
@Article{Chyzak2008,
author = {Chyzak, Frédéric and Drmota, Michael and Klausner, Thomas and Kok, Gerard},
title = {The Distribution of Patterns in Random Trees},
journal = {Combinatorics, Probability and Computing},
year = {2008},
volume = {17},
number = {1},
pages = {21–59},
doi = {10.1017/S0963548307008425},
publisher = {Cambridge University Press},
}
@Unpublished{Collet2018,
author = {Collet, Gwendal and David, Julien and Jacquot, Alice},
title = {Random Sampling of Ordered Trees according to the Number of Occurrences of a Pattern},
note = {Submitted},
year = {2018},
}
@Book{Klop2001,
title = {Term rewriting systems},
publisher = {Cambridge University Press},
year = {2001},
author = {Klop, Jan Willem and Bezem, Marc and De Vrijer, RC},
}
@Article{Nussinov1980,
author = {Nussinov, R and Jacobson, A B},
title = {Fast algorithm for predicting the secondary structure of single-stranded {RNA}.},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
year = {1980},
volume = {77},
pages = {6309--6313},
month = nov,
issn = {0027-8424},
abstract = {A computer method is presented for finding the most stable secondary structures in long single-stranded {RNA}s. It is 1-2 orders of magnitude faster than existing codes. The time required for its application increases as N3 for a chain N nucleotides long. As many as 1000 nucleotides can be searched in a single run. The approach is systematic and builds an optimal structure in a straightforward inductive procedure based on an exact mathematical algorithm. Two simple half-matrices are constructed and the best folded form is read directly from the second matrix by a simple back-tracking procedure. The program utilizes published values for base-pairing energies to compute one structure with the lowest free energy.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {1981-03-24},
country = {United States},
doi = {10.1073/pnas.77.11.6309},
issn-linking = {0027-8424},
issue = {11},
keywords = {Base Sequence; Computers; Hydrogen Bonding; Nucleic Acid Conformation; {RNA}; Thermodynamics},
nlm-id = {7505876},
owner = {NLM},
pmc = {PMC350273},
pmid = {6161375},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-05-01},
}
@PhdThesis{Michalik2019,
author = {Juraj Michalik},
title = {Non-redundant sampling in {RNA} bioinformatics},
school = {Interface Graduate school -- Université Paris-Saclay},
year = {2019},
}
@PhdThesis{Saaidi2019b,
author = {Afaf Saaidi},
title = {Multi-dimensional probing for {{RNA}} secondary structure(s) prediction},
school = {Interface Graduate school -- Université Paris-Saclay},
year = {2019},
}
@Article{Giegerich2014,
author = {Giegerich, Robert and Touzet, H´el’ene},
title = {Modeling Dynamic Programming Problems over Sequences and Trees with Inverse Coupled Rewrite Systems},
journal = {Algorithms},
year = {2014},
volume = {7},
number = {1},
pages = {62–144},
month = {Mar},
issn = {1999-4893},
doi = {10.3390/a7010062},
publisher = {MDPI AG},
url = {http://dx.doi.org/10.3390/a7010062},
}
@Article{Finkelstein1993,
author = {A. V. Finkelstein and M. A. Roytberg},
title = {Computation of biopolymers: a general approach to different problems.},
journal = {Biosystems},
year = {1993},
volume = {30},
number = {1-3},
pages = {1--19},
abstract = {A comparative analysis of some effective algorithms widely used in
analysis, computation and comparison of chain molecules is presented.
A notion of a stream in an oriented hypergraph is introduced, which
generalizes a notion of a path in a graph. All considered algorithms
looking over exponential sets of structures in polynomial time can
be described as variants of a general algorithm of analysis of paths
in graphs and of streams in oriented hypergraphs.},
institution = {Institute of Protein Research, Russian Academy of Sciences, Pushchino, Moscow region.},
keywords = {Algorithms; Amino Acid Sequence; Biophysical Phenomena; Biophysics; Biopolymers; DNA, chemistry; Mathematics; Models, Chemical; Molecular Sequence Data; Nucleic Acid Conformation; Peptides, chemistry; Protein Conformation; {RNA}, chemistry; Software},
owner = {ponty},
pmid = {7690608},
timestamp = {2010.04.26},
}
@InProceedings{Giegerich2002,
author = {Giegerich, Robert and Meyer, Carsten},
title = {Algebraic dynamic programming},
booktitle = {International Conference on Algebraic Methodology and Software Technology},
year = {2002},
pages = {349--364},
organization = {Springer},
}
@Article{Sauthoff2013,
author = {Sauthoff, Georg and M{\"o}hl, Mathias and Janssen, Stefan and Giegerich, Robert},
title = {Bellman’s GAP—a language and compiler for dynamic programming in sequence analysis},
journal = {Bioinformatics},
year = {2013},
volume = {29},
number = {5},
pages = {551--560},
publisher = {Oxford University Press},
}
@Article{Siederdissen2015,
author = {zu Siederdissen, Christian H{\"o}ner and Prohaska, Sonja J and Stadler, Peter F},
title = {Algebraic dynamic programming over general data structures},
journal = {BMC bioinformatics},
year = {2015},
volume = {16},
number = {19},
pages = {S2},
publisher = {BioMed Central},
}
@Article{McCaskill1990,
author = {McCaskill, John S},
title = {The equilibrium partition function and base pair binding probabilities for {{RNA}} secondary structure},
journal = {Biopolymers: Original Research on Biomolecules},
year = {1990},
volume = {29},
number = {6-7},
pages = {1105--1119},
publisher = {Wiley Online Library},
}
@Article{Lari1990,
author = {K. Lari and S. J. Young},
title = {The estimation of stochastic context-free grammars using the Inside-Outside algorithm},
journal = {Computer Speech and Language},
year = {1990},
volume = {4},
pages = {35--56},
}
@Article{Baker1979,
author = {Baker,J. K.},
title = {Trainable grammars for speech recognition},
journal = {The Journal of the Acoustical Society of America},
year = {1979},
volume = {65},
number = {S1},
pages = {S132-S132},
doi = {10.1121/1.2017061},
eprint = {https://doi.org/10.1121/1.2017061},
url = {
https://doi.org/10.1121/1.2017061
},
}
@Article{Do2006,
author = {Do, Chuong B and Woods, Daniel A and Batzoglou, Serafim},
title = {CONTRAfold: {RNA} secondary structure prediction without physics-based models.},
journal = {Bioinformatics (Oxford, England)},
year = {2006},
volume = {22},
pages = {e90--e98},
month = jul,
issn = {1367-4811},
abstract = {For several decades, free energy minimization methods have been the dominant strategy for single sequence {RNA} secondary structure prediction. More recently, stochastic context-free grammars (SCFGs) have emerged as an alternative probabilistic methodology for modeling {RNA} structure. Unlike physics-based methods, which rely on thousands of experimentally-measured thermodynamic parameters, SCFGs use fully-automated statistical learning algorithms to derive model parameters. Despite this advantage, however, probabilistic methods have not replaced free energy minimization methods as the tool of choice for secondary structure prediction, as the accuracies of the best current SCFGs have yet to match those of the best physics-based models. In this paper, we present CONTRAfold, a novel secondary structure prediction method based on conditional log-linear models (CLLMs), a flexible class of probabilistic models which generalize upon SCFGs by using discriminative training and feature-rich scoring. In a series of cross-validation experiments, we show that grammar-based secondary structure prediction methods formulated as CLLMs consistently outperform their SCFG analogs. Furthermore, CONTRAfold, a CLLM incorporating most of the features found in typical thermodynamic models, achieves the highest single sequence prediction accuracies to date, outperforming currently available probabilistic and physics-based techniques. Our result thus closes the gap between probabilistic and thermodynamic models, demonstrating that statistical learning procedures provide an effective alternative to empirical measurement of thermodynamic parameters for {RNA} secondary structure prediction. Source code for CONTRAfold is available at http://contra.stanford.edu/contrafold/.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2006-10-05},
country = {England},
doi = {10.1093/bioinformatics/btl246},
issn-linking = {1367-4803},
issue = {14},
keywords = {Algorithms; Base Sequence; Computer Simulation; Models, Chemical; Models, Molecular; Models, Statistical; Molecular Sequence Data; Nucleic Acid Conformation; Physics, methods; {RNA}, chemistry; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods; Software},
nlm-id = {9808944},
owner = {NLM},
pii = {22/14/e90},
pmid = {16873527},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2009-11-04},
}
@Article{Hamada2009,
author = {Hamada, Michiaki and Kiryu, Hisanori and Sato, Kengo and Mituyama, Toutai and Asai, Kiyoshi},
title = {Prediction of {RNA} secondary structure using generalized centroid estimators.},
journal = {Bioinformatics (Oxford, England)},
year = {2009},
volume = {25},
pages = {465--473},
month = feb,
issn = {1367-4811},
abstract = {Recent studies have shown that the methods for predicting secondary structures of {RNA}s on the basis of posterior decoding of the base-pairing probabilities has an advantage with respect to prediction accuracy over the conventionally utilized minimum free energy methods. However, there is room for improvement in the objective functions presented in previous studies, which are maximized in the posterior decoding with respect to the accuracy measures for secondary structures. We propose novel estimators which improve the accuracy of secondary structure prediction of {RNA}s. The proposed estimators maximize an objective function which is the weighted sum of the expected number of the true positives and that of the true negatives of the base pairs. The proposed estimators are also improved versions of the ones used in previous works, namely CONTRAfold for secondary structure prediction from a single {RNA} sequence and McCaskill-MEA for common secondary structure prediction from multiple alignments of {RNA} sequences. We clarify the relations between the proposed estimators and the estimators presented in previous works, and theoretically show that the previous estimators include additional unnecessary terms in the evaluation measures with respect to the accuracy. Furthermore, computational experiments confirm the theoretical analysis by indicating improvement in the empirical accuracy. The proposed estimators represent extensions of the centroid estimators proposed in Ding et al. and Carvalho and Lawrence, and are applicable to a wide variety of problems in bioinformatics. Supporting information and the CentroidFold software are available online at: http://www.ncrna.org/software/centroidfold/.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2009-03-17},
country = {England},
doi = {10.1093/bioinformatics/btn601},
issn-linking = {1367-4803},
issue = {4},
keywords = {Base Pairing; Base Sequence; Computational Biology, methods; Databases, Genetic; Entropy; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, chemistry; Sequence Analysis, {RNA}, methods},
nlm-id = {9808944},
owner = {NLM},
pii = {btn601},
pmid = {19095700},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2009-11-04},
}
@Article{Aldous1987,
author = {Aldous, David},
title = {On the Markov chain simulation method for uniform combinatorial distributions and simulated annealing},
journal = {Probability in the Engineering and Informational Sciences},
year = {1987},
volume = {1},
number = {1},
pages = {33--46},
publisher = {Cambridge University Press},
}
@Book{Gamerman2006,
title = {Markov chain Monte Carlo: stochastic simulation for Bayesian inference},
publisher = {Chapman and Hall/CRC},
year = {2006},
author = {Gamerman, Dani and Lopes, Hedibert F},
}
@Article{Propp1996,
author = {Propp, James Gary and Wilson, David Bruce},
title = {Exact sampling with coupled Markov chains and applications to statistical mechanics},
journal = {Random Structures \& Algorithms},
year = {1996},
volume = {9},
number = {1-2},
pages = {223--252},
publisher = {Wiley Online Library},
}
@Article{Miklos2005,
author = {István Miklós and Irmtraud M Meyer and Borbála Nagy},
title = {Moments of the Boltzmann distribution for {{RNA}} secondary structures.},
journal = {Bull Math Biol},
year = {2005},
volume = {67},
number = {5},
pages = {1031--1047},
month = {Sep},
doi = {10.1016/j.bulm.2004.12.003},
keywords = {Algorithms; Analysis of Variance; Base Pairing; Base Sequence; Micro{RNA}s, chemistry; Models, Molecular; Nucleic Acid Conformation; Nucleotides, chemistry; {RNA}, Ribosomal, 5S, chemistry; {RNA}, Transfer, chemistry; {RNA}, chemistry; Statistical Distributions; Thermodynamics},
owner = {ponty},
pii = {S0092-8240(05)00006-6},
pmid = {15998494},
timestamp = {2010.05.10},
}
@Article{Freyhult2007,
author = {Freyhult, Eva and Moulton, Vincent and Clote, Peter},
title = {Boltzmann probability of {RNA} structural neighbors and riboswitch detection.},
journal = {Bioinformatics (Oxford, England)},
year = {2007},
volume = {23},
pages = {2054--2062},
month = aug,
issn = {1367-4811},
abstract = {We describe algorithms implemented in a new software package, {RNA}bor, to investigate structures in a neighborhood of an input secondary structure S of an {RNA} sequence s. The input structure could be the minimum free energy structure, the secondary structure obtained by analysis of the X-ray structure or by comparative sequence analysis, or an arbitrary intermediate structure. A secondary structure T of s is called a delta-neighbor of S if T and S differ by exactly delta base pairs. {RNA}bor computes the number (N(delta)), the Boltzmann partition function (Z(delta)) and the minimum free energy (MFE(delta)) and corresponding structure over the collection of all delta-neighbors of S. This computation is done simultaneously for all delta < or = m, in run time O (mn3) and memory O(mn2), where n is the sequence length. We apply {RNA}bor for the detection of possible {RNA} conformational switches, and compare {RNA}bor with the switch detection method pa{RNA}ss. We also provide examples of how {RNA}bor can at times improve the accuracy of secondary structure prediction. http://bioinformatics.bc.edu/clotelab/{RNA}bor/. Supplementary data are available at Bioinformatics online.},
chemicals = {{RNA}, Catalytic},
citation-subset = {IM},
completed = {2007-10-18},
country = {England},
doi = {10.1093/bioinformatics/btm314},
issn-linking = {1367-4803},
issue = {16},
keywords = {Algorithms; Base Sequence; Computer Simulation; Models, Chemical; Models, Molecular; Models, Statistical; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, Catalytic, chemistry, ultrastructure; Sequence Analysis, {RNA}, methods; Structure-Activity Relationship},
nlm-id = {9808944},
owner = {NLM},
pii = {btm314},
pmid = {17573364},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2009-11-04},
}
@Article{Waldispuehl2008,
author = {Waldispühl, Jérôme and Devadas, Srinivas and Berger, Bonnie and Clote, Peter},
title = {Efficient algorithms for probing the {RNA} mutation landscape.},
journal = {PLoS computational biology},
year = {2008},
volume = {4},
pages = {e1000124},
month = aug,
issn = {1553-7358},
abstract = {The diversity and importance of the role played by {RNA}s in the regulation and development of the cell are now well-known and well-documented. This broad range of functions is achieved through specific structures that have been (presumably) optimized through evolution. State-of-the-art methods, such as McCaskill's algorithm, use a statistical mechanics framework based on the computation of the partition function over the canonical ensemble of all possible secondary structures on a given sequence. Although secondary structure predictions from thermodynamics-based algorithms are not as accurate as methods employing comparative genomics, the former methods are the only available tools to investigate novel {RNA}s, such as the many {RNA}s of unknown function recently reported by the ENCODE consortium. In this paper, we generalize the McCaskill partition function algorithm to sum over the grand canonical ensemble of all secondary structures of all mutants of the given sequence. Specifically, our new program, {RNA}mutants, simultaneously computes for each integer k the minimum free energy structure MFE(k) and the partition function Z(k) over all secondary structures of all k-point mutants, even allowing the user to specify certain positions required not to mutate and certain positions required to base-pair or remain unpaired. This technically important extension allows us to study the resilience of an {RNA} molecule to pointwise mutations. By computing the mutation profile of a sequence, a novel graphical representation of the mutational tendency of nucleotide positions, we analyze the deleterious nature of mutating specific nucleotide positions or groups of positions. We have successfully applied {RNA}mutants to investigate deleterious mutations (mutations that radically modify the secondary structure) in the Hepatitis C virus cis-acting replication element and to evaluate the evolutionary pressure applied on different regions of the HIV trans-activation response element. In particular, we show qualitative agreement between published Hepatitis C and HIV experimental mutagenesis studies and our analysis of deleterious mutations using {RNA}mutants. Our work also predicts other deleterious mutations, which could be verified experimentally. Finally, we provide evidence that the 3' UTR of the GB {RNA} virus C has been optimized to preserve evolutionarily conserved stem regions from a deleterious effect of pointwise mutations. We hope that there will be long-term potential applications of {RNA}mutants in de novo {RNA} design and drug design against {RNA} viruses. This work also suggests potential applications for large-scale exploration of the {RNA} sequence-structure network. Binary distributions are available at http://{RNA}mutants.csail.mit.edu/.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-12-18},
country = {United States},
doi = {10.1371/journal.pcbi.1000124},
issn-linking = {1553-734X},
issue = {8},
keywords = {Algorithms; Cluster Analysis; Computational Biology, methods; Evolution, Molecular; HIV, genetics; Hepacivirus, genetics; Humans; Mutagenesis, physiology; Mutation, physiology; Nucleic Acid Conformation; {RNA}, chemistry, genetics; Replicon; Response Elements; Software; Thermodynamics},
nlm-id = {101238922},
owner = {NLM},
pmc = {PMC2475669},
pmid = {18688270},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@InProceedings{Cupal1996,
author = {Jan Cupal and Ivo L. Hofacker and Peter F. Stadler},
title = {Dynamic Programming Algorithm for the Density of States of {{RNA}} Secondary Structures},
booktitle = {Proceedings of the German Conference on Bioinformatics, {GCB}},
year = {1996},
pages = {184--186},
address = {Leipzig, Germany},
month = sep,
timestamp = {Sat, 07 Sep 2019 11:59:23 +0200},
}
@InProceedings{Lorenz2009,
author = {Ronny Lorenz and Christoph Flamm and Ivo L. Hofacker},
title = {2D Projections of {{RNA}} Folding Landscapes},
booktitle = {German Conference on Bioinformatics 2009},
year = {2009},
editor = {Ivo Grosse and Steffen Neumann and Stefan Posch and Falk Schreiber and Peter F. Stadler},
pages = {11--20},
address = {Martin Luther University Halle-Wittenberg, Germany},
month = sep,
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/bib/conf/gcb/LorenzFH09},
timestamp = {Thu, 14 Nov 2019 16:35:28 +0100},
url = {https://dl.gi.de/20.500.12116/20295},
}
@Article{Cooley1969,
author = {J. {Cooley} and P. {Lewis} and P. {Welch}},
title = {The finite {Fourier} transform},
journal = {IEEE Transactions on Audio and Electroacoustics},
year = {1969},
volume = {17},
number = {2},
pages = {77--85},
month = jun,
issn = {1558-2582},
doi = {10.1109/TAU.1969.1162036},
keywords = {Fourier transforms, Convolution, Sampling methods, Frequency, Fast Fourier transforms, Mathematical analysis, Mathematics, Data analysis, Books, Fourier series},
}
@Article{Cooley1965,
author = {James Cooley and John Tukey},
title = {An Algorithm for the Machine Calculation of Complex Fourier Series},
journal = {Mathematics of Computation},
year = {1965},
volume = {19},
number = {90},
pages = {297--301},
}
@Article{Kucharik2014,
author = {Kucharík, Marcel and Hofacker, Ivo L and Stadler, Peter F and Qin, Jing},
title = {Basin Hopping Graph: a computational framework to characterize {{RNA}} folding landscapes.},
journal = {Bioinformatics (Oxford, England)},
year = {2014},
volume = {30},
pages = {2009--2017},
month = jul,
issn = {1367-4811},
abstract = {{{RNA}} folding is a complicated kinetic process. The minimum free energy structure provides only a static view of the most stable conformational state of the system. It is insufficient to give detailed insights into the dynamic behavior of {RNA}s. A sufficiently sophisticated analysis of the folding free energy landscape, however, can provide the relevant information. We introduce the Basin Hopping Graph (BHG) as a novel coarse-grained model of folding landscapes. Each vertex of the BHG is a local minimum, which represents the corresponding basin in the landscape. Its edges connect basins when the direct transitions between them are 'energetically favorable'. Edge weights endcode the corresponding saddle heights and thus measure the difficulties of these favorable transitions. BHGs can be approximated accurately and efficiently for {{RNA}} molecules well beyond the length range accessible to enumerative algorithms. The algorithms described here are implemented in C++ as standalone programs. Its source code and supplemental material can be freely downloaded from http://www.tbi.univie.ac.at/bhg.html.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2014-09-18},
country = {England},
doi = {10.1093/bioinformatics/btu156},
issn-linking = {1367-4803},
issue = {14},
keywords = {Algorithms; Computational Biology, methods; Kinetics; {RNA}, chemistry; {{RNA}} Folding},
nlm-id = {9808944},
owner = {NLM},
pii = {btu156},
pmc = {PMC4125618},
pmid = {24648041},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-12-02},
}
@Unpublished{Voisin2019,
author = {Voisin, Fr{\'e}d{\'e}ric and Gaudel, M.-C},
title = {{Drawing uniformly at random in dynamic sets of paths}},
note = {HAL preprint},
month = Oct,
year = {2019},
hal_id = {hal-02314807},
hal_version = {v1},
pdf = {https://hal.archives-ouvertes.fr/hal-02314807/file/DynamicDrawing.pdf},
url = {https://hal.archives-ouvertes.fr/hal-02314807},
}
@Article{Flajolet1994,
author = {Philippe Flajolet and Paul Zimmermann and Bernard Van Cutsem},
title = {A calculus for the random generation of labelled combinatorial structures},
journal = {Theoretical Computer Science},
year = {1994},
volume = {132},
number = {1},
pages = {1 - 35},
issn = {0304-3975},
abstract = {A systematic approach to the random generation of labelled combinatorial objects is presented. It applies to structures that are decomposable, i.e., formally specifiable by grammars involving set, sequence, and cycle constructions. A general strategy is developed for solving the random generation problem with two closely related types of methods: for structures of size n, the boustrophedonic algorithms exhibit a worst-case behaviour of the form O(n log n); the sequential algorithms have worst case O(n2), while offering good potential for optimizations in the average case. The complexity model is in terms of arithmetic operations and both methods appeal to precomputed numerical table of linear size that can be computed in time O(n2). A companion calculus permits systematically to compute the average case cost of the sequential generation algorithm associated to a given specification. Using optimizations dictated by the cost calculus, several random generation algorithms of the sequential type are developed; most of them have expected complexity 1/2n log n, and are thus only slightly superlinear. The approach is exemplified by the random generation of a number of classical combinatorial structures including Cayley trees, hierarchies, the cycle decomposition of permutations, binary trees, functional graphs, surjections, and set partitions.},
doi = {https://doi.org/10.1016/0304-3975(94)90226-7},
url = {http://www.sciencedirect.com/science/article/pii/0304397594902267},
}
@Article{Ieong2003,
author = {Samuel Ieong and Ming-yang Kao and Tak-wah Lam and Wing-kin Sung and Siu-ming Yiu},
title = {Predicting {RNA} Secondary Structures with Arbitrary Pseudoknots by Maximizing the Number of Stacking Pairs},
journal = {Journal Of Computational Biology},
year = {2003},
volume = {10},
number = {6},
pages = {981--995},
owner = {ponty},
timestamp = {2012.06.25},
}
@Article{Tabaska1998,
author = {J. E. Tabaska and R. B. Cary and H. N. Gabow and G. D. Stormo},
title = {An {{RNA}} folding method capable of identifying pseudoknots and base triples.},
journal = {Bioinformatics},
year = {1998},
volume = {14},
number = {8},
pages = {691--699},
institution = {1Department of Molecular, Cellular and Developmental Biology and 2Department of Computer Science, University of Colorado, Boulder, CO 80309, USA.},
keywords = {Algorithms; Bacillus subtilis, genetics; Base Sequence; Escherichia coli, genetics; Molecular Sequence Data; Nucleic Acid Conformation; Phylogeny; {RNA}, Bacterial, chemistry; {RNA}, chemistry; Thermodynamics},
language = {eng},
medline-pst = {ppublish},
owner = {ponty},
pii = {btb097},
pmid = {9789095},
timestamp = {2011.08.16},
}
@InProceedings{Lyngso2004,
author = {Rune Lyngs{\o}},
title = {Complexity of Pseudoknot Prediction in Simple Models},
booktitle = {Proceedings of ICALP},
year = {2004},
owner = {ponty},
timestamp = {2012.06.25},
}
@Article{Lyngso1999,
author = {Rune B. Lyngs{\o} and Michael Zuker and Christian N. S. Pedersen},
title = {Fast evaluation of internal loops in {RNA} secondary structure prediction},
journal = {Bioinformatics},
year = {1999},
volume = {15},
number = {6},
pages = {440-445},
}
@Article{Markham2005,
author = {N. R. Markham and M. Zuker},
title = {DINAMelt web server for nucleic acid melting prediction},
journal = {Nucleic Acids Res.},
year = {2005},
volume = {33},
pages = {577--581},
}
@Article{Reeder2004,
author = {J. Reeder and R. Giegerich},
title = {Design, implementation and evaluation of a practical pseudoknot folding algorithm based on thermodynamics},
journal = {BMC Bioinformatics},
year = {2004},
volume = {5},
pages = {104},
}
@Article{Reidys2011,
author = {Reidys, Christian M. and Huang, Fenix W D. and Andersen, Jørgen E. and Penner, Robert C. and Stadler, Peter F. and Nebel, Markus E.},
title = {Topology and prediction of {RNA} pseudoknots.},
journal = {Bioinformatics},
year = {2011},
volume = {27},
number = {8},
pages = {1076--1085},
month = {Apr},
abstract = {Several dynamic programming algorithms for predicting {RNA} structures
with pseudoknots have been proposed that differ dramatically from
one another in the classes of structures considered.Here, we use
the natural topological classification of {RNA} structures in terms
of irreducible components that are embeddable in the surfaces of
fixed genus. We add to the conventional secondary structures four
building blocks of genus one in order to construct certain structures
of arbitrarily high genus. A corresponding unambiguous multiple context-free
grammar provides an efficient dynamic programming approach for energy
minimization, partition function and stochastic sampling. It admits
a topology-dependent parametrization of pseudoknot penalties that
increases the sensitivity and positive predictive value of predicted
base pairs by 10-20\% compared with earlier approaches. More general
models based on building blocks of higher genus are also discussed.The
source code of gfold is freely available at http://www.combinatorics.cn/cbpc/gfold.tar.gz.duck@santafe.eduSupplementary
data are available at Bioinformatics online.},
doi = {10.1093/bioinformatics/btr090},
institution = {Department of Mathematics, Center for Combinatorics, Key Laboratory of Pure Mathematics and Combinatorics, College of Life Science, Nankai University Tianjin 300071, PR China. duck@santafe.edu},
keywords = {Algorithms; Base Pairing; Nucleic Acid Conformation; {RNA}, chemistry/classification; Sequence Analysis, {RNA}; Software},
language = {eng},
medline-pst = {ppublish},
owner = {Yann},
pii = {btr090},
pmid = {21335320},
timestamp = {2014.11.27},
url = {http://dx.doi.org/10.1093/bioinformatics/btr090},
}
@Article{Cao2006,
author = {S. Cao and S.J. Chen},
title = {Predicting {{RNA}} pseudoknot folding thermodynamics},
journal = {Nucleic Acids Research},
year = {2006},
volume = {34},
number = {9},
pages = {2634--2652},
}
@Article{Rivas1999,
author = {E. Rivas and S.R. Eddy},
title = {A dynamic programming algorithm for {{RNA}} structure prediction including pseudoknots},
journal = {J Mol Biol},
year = {1999},
volume = {285},
pages = {2053--2068},
owner = {ponty},
timestamp = {2008.12.28},
}
@Article{Bon2011,
author = {Bon, Michaël and Orland, Henri},
title = {{TT2NE: a novel algorithm to predict {RNA} secondary structures with pseudoknots}},
journal = {Nucleic Acids Research},
year = {2011},
volume = {39},
number = {14},
pages = {e93-e93},
month = {05},
issn = {0305-1048},
abstract = {{We present TT2NE, a new algorithm to predict {RNA} secondary structures with pseudoknots. The method is based on a classification of {RNA} structures according to their topological genus. TT2NE is guaranteed to find the minimum free energy structure regardless of pseudoknot topology. This unique proficiency is obtained at the expense of the maximum length of sequences that can be treated, but comparison with state-of-the-art algorithms shows that TT2NE significantly improves the quality of predictions. Analysis of TT2NE's incorrect predictions sheds light on the need to study how sterical constraints limit the range of pseudoknotted structures that can be formed from a given sequence. An implementation of TT2NE on a public server can be found at http://ipht.cea.fr/rna/tt2ne.php.}},
doi = {10.1093/nar/gkr240},
eprint = {https://academic.oup.com/nar/article-pdf/39/14/e93/18786604/gkr240.pdf},
url = {https://doi.org/10.1093/nar/gkr240},
}
@Article{Dirks2003,
author = {Dirks, R.M. and Pierce, N.A.},
title = {A partition function algorithm for nucleic acid secondary structure including pseudoknots},
journal = {J Comput Chem},
year = {2003},
volume = {24},
pages = {1664-1677},
}
@Article{Markham2008,
author = {Markham, Nicholas R and Zuker, Michael},
title = {UNAFold: software for nucleic acid folding and hybridization.},
journal = {Methods in molecular biology (Clifton, N.J.)},
year = {2008},
volume = {453},
pages = {3--31},
issn = {1064-3745},
abstract = {The UNAFold software package is an integrated collection of programs that simulate folding, hybridization, and melting pathways for one or two single-stranded nucleic acid sequences. The name is derived from "Unified Nucleic Acid Folding." Folding (secondary structure) prediction for single-stranded {RNA} or DNA combines free energy minimization, partition function calculations and stochastic sampling. For melting simulations, the package computes entire melting profiles, not just melting temperatures. UV absorbance at 260 nm, heat capacity change (C(p)), and mole fractions of different molecular species are computed as a function of temperature. The package installs and runs on all Unix and Linux platforms that we have looked at, including Mac OS X. Images of secondary structures, hybridizations, and dot plots may be computed using common formats. Similarly, a variety of melting profile plots is created when appropriate. These latter plots include experimental results if they are provided. The package is "command line" driven. Underlying compiled programs may be used individually, or in special combinations through the use of a variety of Perl scripts. Users are encouraged to create their own scripts to supplement what comes with the package. This evolving software is available for download at http://www.bioinfo.rpi.edu/applications/hybrid/download.php .},
chemicals = {{RNA}, DNA},
citation-subset = {IM},
completed = {2008-10-10},
country = {United States},
doi = {10.1007/978-1-60327-429-6_1},
issn-linking = {1064-3745},
keywords = {Base Sequence; DNA, chemistry; Databases, Nucleic Acid; Nucleic Acid Conformation; Nucleic Acid Denaturation; Nucleic Acid Hybridization; {RNA}, chemistry; Software; Thermodynamics},
nlm-id = {9214969},
owner = {NLM},
pmid = {18712296},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2008-08-20},
}
@InProceedings{Theis2010,
author = {Theis, Corinna and Janssen, Stefan and Giegerich, Robert},
title = {Prediction of {RNA} Secondary Structure Including Kissing Hairpin Motifs},
booktitle = {Algorithms in Bioinformatics},
year = {2010},
editor = {Moulton, Vincent and Singh, Mona},
pages = {52--64},
address = {Berlin, Heidelberg},
publisher = {Springer Berlin Heidelberg},
abstract = {We present three heuristic strategies for folding {RNA} sequences into secondary structures including kissing hairpin motifs. The new idea is to construct a kissing hairpin motif from an overlay of two simple canonical pseudoknots. The difficulty is that the overlay does not satisfy Bellman's Principle of Optimality, and the kissing hairpin cannot simply be built from optimal pseudoknots. Our strategies have time/space complexities of O(n4) / O(n2), O(n4) / O(n3), and O(n5) / O(n2). All strategies have been implemented in the program pKiss and were evaluated against known structures. Surprisingly, our simplest strategy performs best. As it has the same complexity as the previous algorithm for simple pseudoknots, the overlay idea opens a way to construct a variety of practically useful algorithms for pseudoknots of higher topological complexity within O(n4) time and O(n2) space.},
isbn = {978-3-642-15294-8},
}
@Article{Saule2011,
author = {Saule, Cédric and Régnier, Mireille and Steyaert, Jean-Marc and Denise, Alain},
title = {Counting {RNA} pseudoknotted structures.},
journal = {Journal of computational biology : a journal of computational molecular cell biology},
year = {2011},
volume = {18},
pages = {1339--1351},
month = oct,
issn = {1557-8666},
abstract = {In 2004, Condon and coauthors gave a hierarchical classification of exact {RNA} structure prediction algorithms according to the generality of structure classes that they handle. We complete this classification by adding two recent prediction algorithms. More importantly, we precisely quantify the hierarchy by giving closed or asymptotic formulas for the theoretical number of structures of given size n in all the classes but one. This allows us to assess the tradeoff between the expressiveness and the computational complexity of {RNA} structure prediction algorithms.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2012-01-19},
country = {United States},
doi = {10.1089/cmb.2010.0086},
issn-linking = {1066-5277},
issue = {10},
keywords = {Algorithms; Computational Biology, methods; Computer Simulation; Models, Molecular; Nucleic Acid Conformation; Probability; {RNA}, chemistry; Sequence Analysis, {RNA}},
nlm-id = {9433358},
owner = {NLM},
pmc = {PMC3179619},
pmid = {21548808},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@InProceedings{Huang2005,
author = {Huang, Liang and Chiang, David},
title = {Better k-best parsing},
booktitle = {Proceedings of the Ninth International Workshop on Parsing Technology},
year = {2005},
pages = {53--64},
organization = {Association for Computational Linguistics},
}
@Article{Bellman1954,
author = {Bellman, Richard},
title = {The theory of dynamic programming},
journal = {Bulletin of the American Mathematical Society},
year = {1954},
volume = {60},
number = {6},
pages = {503--515},
publisher = {American Mathematical Society},
}
@Article{HoenerzuSiederdissen2012,
author = {H\"{o}ner zu Siederdissen, Christian},
title = {Sneaking around ConcatMap: Efficient Combinators for Dynamic Programming},
journal = {SIGPLAN Not.},
year = {2012},
volume = {47},
number = {9},
pages = {215–226},
month = sep,
issn = {0362-1340},
address = {New York, NY, USA},
doi = {10.1145/2398856.2364559},
issue_date = {October 2012},
keywords = {algebraic dynamic programming, program fusion, functional programming},
numpages = {12},
publisher = {Association for Computing Machinery},
url = {https://doi.org/10.1145/2398856.2364559},
}
@Article{Mohri2002,
author = {Mohri, Mehryar},
title = {Semiring Frameworks and Algorithms for Shortest-Distance Problems},
journal = {J. Autom. Lang. Comb.},
year = {2002},
volume = {7},
number = {3},
pages = {321–350},
month = jan,
issn = {1430-189X},
address = {DEU},
issue_date = {January 2002},
keywords = {semirings, rational power series, shortest-paths algorithms, finite automata},
numpages = {30},
publisher = {Otto-von-Guericke-Universitat},
}
@Book{Miklos2019,
title = {Computational Complexity of Counting and Sampling},
publisher = {Chapman and Hall/{CRC}},
year = {2019},
author = {Istv{\'{a}}n Mikl{\'{o}}s},
series = {Discrete Mathematics and Its Applications},
month = {feb},
doi = {10.1201/b22024},
}
@Article{Lescoute2005,
author = {Lescoute, Aurélie and Leontis, Neocles B and Massire, Christian and Westhof, Eric},
title = {Recurrent structural {RNA} motifs, Isostericity Matrices and sequence alignments.},
journal = {Nucleic acids research},
year = {2005},
volume = {33},
pages = {2395--2409},
issn = {1362-4962},
abstract = {The occurrences of two recurrent motifs in ribosomal {RNA} sequences, the Kink-turn and the C-loop, are examined in crystal structures and systematically compared with sequence alignments of r{RNA}s from the three kingdoms of life in order to identify the range of the structural and sequence variations. Isostericity Matrices are used to analyze structurally the sequence variations of the characteristic non-Watson-Crick base pairs for each motif. We show that Isostericity Matrices for non-Watson-Crick base pairs provide important tools for deriving the sequence signatures of recurrent motifs, for scoring and refining sequence alignments, and for determining whether motifs are conserved throughout evolution. The systematic use of Isostericity Matrices identifies the positions of the insertion or deletion of one or more nucleotides relative to the structurally characterized examples of motifs and, most importantly, specifies whether these changes result in new motifs. Thus, comparative analysis coupled with Isostericity Matrices allows one to produce and refine structural sequence alignments. The analysis, based on both sequence and structure, permits therefore the evaluation of the conservation of motifs across phylogeny and the derivation of rules of equivalence between structural motifs. The conservations observed in Isostericity Matrices form a predictive basis for identifying motifs in sequences.},
chemicals = {{RNA}, Ribosomal},
citation-subset = {IM},
completed = {2005-05-05},
country = {England},
doi = {10.1093/nar/gki535},
issn-linking = {0305-1048},
issue = {8},
keywords = {Base Pairing; Models, Molecular; Nucleic Acid Conformation; {RNA}, Ribosomal, chemistry; Sequence Alignment; Sequence Analysis, {RNA}, methods; Sequence Homology, Nucleic Acid},
nlm-id = {0411011},
owner = {NLM},
pii = {33/8/2395},
pmc = {PMC1087784},
pmid = {15860776},
pubmodel = {Electronic-Print},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Stombaugh2009,
author = {Stombaugh, Jesse and Zirbel, Craig L and Westhof, Eric and Leontis, Neocles B},
title = {Frequency and isostericity of {RNA} base pairs.},
journal = {Nucleic acids research},
year = {2009},
volume = {37},
pages = {2294--2312},
month = apr,
issn = {1362-4962},
abstract = {Most of the hairpin, internal and junction loops that appear single-stranded in standard {RNA} secondary structures form recurrent 3D motifs, where non-Watson-Crick base pairs play a central role. Non-Watson-Crick base pairs also play crucial roles in tertiary contacts in structured {RNA} molecules. We previously classified {RNA} base pairs geometrically so as to group together those base pairs that are structurally similar (isosteric) and therefore able to substitute for each other by mutation without disrupting the 3D structure. Here, we introduce a quantitative measure of base pair isostericity, the IsoDiscrepancy Index (IDI), to more accurately determine which base pair substitutions can potentially occur in conserved motifs. We extract and classify base pairs from a reduced-redundancy set of {RNA} 3D structures from the Protein Data Bank (PDB) and calculate centroids (exemplars) for each base combination and geometric base pair type (family). We use the exemplars and IDI values to update our online Basepair Catalog and the Isostericity Matrices (IM) for each base pair family. From the database of base pairs observed in 3D structures we derive base pair occurrence frequencies for each of the 12 geometric base pair families. In order to improve the statistics from the 3D structures, we also derive base pair occurrence frequencies from r{RNA} sequence alignments.},
chemicals = {{RNA}, Bacterial, {RNA}, Ribosomal, {RNA}},
citation-subset = {IM},
completed = {2009-06-04},
country = {England},
doi = {10.1093/nar/gkp011},
issn-linking = {0305-1048},
issue = {7},
keywords = {Base Pairing; Base Sequence; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; {RNA}, Bacterial, chemistry; {RNA}, Ribosomal, chemistry; Sequence Alignment; Sequence Analysis, {RNA}},
nlm-id = {0411011},
owner = {NLM},
pii = {gkp011},
pmc = {PMC2673412},
pmid = {19240142},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Jiang1995,
author = {Tao Jiang and Lusheng Wang and Kaizhong Zhang},
title = {Alignment of Trees - An Alternative to Tree Edit},
journal = {Theor. Comput. Sci.},
year = {1995},
volume = {143},
number = {1},
pages = {137--148},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/tcs/JiangWZ95},
doi = {10.1016/0304-3975(95)80029-9},
timestamp = {Wed, 19 Nov 2003 16:10:52 +0100},
url = {http://dx.doi.org/10.1016/0304-3975(95)80029-9},
}
@Article{Blin2010,
author = {Guillaume Blin and Alain Denise and Serge Dulucq and Claire Herrbach and H{\'{e}}l{\`{e}}ne Touzet},
title = {Alignments of {{RNA}} Structures},
journal = {{IEEE/ACM} Trans. Comput. Biology Bioinform.},
year = {2010},
volume = {7},
number = {2},
pages = {309--322},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/tcbb/BlinDDHT10},
doi = {10.1145/1791396.1791409},
timestamp = {Wed, 12 May 2010 09:19:07 +0200},
url = {http://doi.acm.org/10.1145/1791396.1791409},
}
@Article{Herrbach2010,
author = {Claire Herrbach and Alain Denise and Serge Dulucq},
title = {Average complexity of the {J}iang-{W}ang-{Z}hang pairwise tree alignment algorithm and of a {{RNA}} secondary structure alignment algorithm},
journal = {Theor. Comput. Sci.},
year = {2010},
volume = {411},
number = {26-28},
pages = {2423--2432},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/tcs/HerrbachDD10},
doi = {10.1016/j.tcs.2010.01.014},
timestamp = {Thu, 27 May 2010 14:38:31 +0200},
url = {http://dx.doi.org/10.1016/j.tcs.2010.01.014},
}
@Article{Schirmer2013,
author = {Stefanie Schirmer and Robert Giegerich},
title = {Forest alignment with affine gaps and anchors, applied in {{RNA}} structure comparison},
journal = {Theor. Comput. Sci.},
year = {2013},
volume = {483},
pages = {51--67},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/tcs/SchirmerG13},
doi = {10.1016/j.tcs.2012.07.040},
timestamp = {Thu, 18 Apr 2013 14:14:31 +0200},
url = {http://dx.doi.org/10.1016/j.tcs.2012.07.040},
}
@Article{Viterbi1967,
author = {A. Viterbi},
title = {Error bounds for convolutional codes and an asymptotically optimum decoding algorithm},
journal = {{IEEE} Transactions on Information Theory},
year = {1967},
volume = {13},
number = {2},
pages = {260--269},
month = {apr},
doi = {10.1109/tit.1967.1054010},
publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}
@Article{Fontana1993,
author = {Fontana, Walter and Stadler, Peter F. and Bornberg-Bauer, Erich G. and Griesmacher, Thomas and Hofacker, Ivo L. and Tacker, Manfred and Tarazona, Pedro and Weinberger, Edward D. and Schuster, Peter},
title = {{RNA} folding and combinatory landscapes},
journal = {Phys. Rev. E},
year = {1993},
volume = {47},
pages = {2083--2099},
month = {Mar},
doi = {10.1103/PhysRevE.47.2083},
issue = {3},
numpages = {0},
publisher = {American Physical Society},
url = {https://link.aps.org/doi/10.1103/PhysRevE.47.2083},
}
@Article{Will2012,
author = {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L and Stadler, Peter F and Backofen, Rolf},
title = {LocA{RNA}-P: accurate boundary prediction and improved detection of structural {RNA}s.},
journal = {{RNA} (New York, N.Y.)},
year = {2012},
volume = {18},
pages = {900--914},
month = may,
issn = {1469-9001},
abstract = {Current genomic screens for noncoding {RNA}s (nc{RNA}s) predict a large number of genomic regions containing potential structural nc{RNA}s. The analysis of these data requires highly accurate prediction of nc{RNA} boundaries and discrimination of promising candidate nc{RNA}s from weak predictions. Existing methods struggle with these goals because they rely on sequence-based multiple sequence alignments, which regularly misalign {RNA} structure and therefore do not support identification of structural similarities. To overcome this limitation, we compute columnwise and global reliabilities of alignments based on sequence and structure similarity; we refer to these structure-based alignment reliabilities as STARs. The columnwise STARs of alignments, or STAR profiles, provide a versatile tool for the manual and automatic analysis of nc{RNA}s. In particular, we improve the boundary prediction of the widely used nc{RNA} gene finder {RNA}z by a factor of 3 from a median deviation of 47 to 13 nt. Post-processing {RNA}z predictions, LocA{RNA}-P's STAR score allows much stronger discrimination between true- and false-positive predictions than {RNA}z's own evaluation. The improved accuracy, in this scenario increased from AUC 0.71 to AUC 0.87, significantly reduces the cost of successive analysis steps. The ready-to-use software tool LocA{RNA}-P produces structure-based multiple {RNA} alignments with associated columnwise STARs and predicts nc{RNA} boundaries. We provide additional results, a web server for LocA{RNA}/LocA{RNA}-P, and the software package, including documentation and a pipeline for refining screens for structural nc{RNA}, at http://www.bioinf.uni-freiburg.de/Supplements/LocA{RNA}-P/.},
chemicals = {{RNA}, Untranslated, {RNA}},
citation-subset = {IM},
completed = {2012-06-12},
country = {United States},
doi = {10.1261/rna.029041.111},
issn-linking = {1355-8382},
issue = {5},
keywords = {Algorithms; Computational Biology, methods; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; {RNA}, Untranslated, chemistry; Reproducibility of Results; Sequence Alignment; Software},
nlm-id = {9509184},
owner = {NLM},
pii = {rna.029041.111},
pmc = {PMC3334699},
pmid = {22450757},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@PhdThesis{Pivoteau2008,
author = {Carine Pivoteau},
title = {Génération aléatoire de structures combinatoires : méthode de Boltzmann effective},
school = {{Université Pierre et Marie Curie - Paris VI}},
year = {2008},
type = {phdthesis},
}
@Article{Pivoteau2012,
author = {Carine Pivoteau and Bruno Salvy and Mich{\`{e}}le Soria},
title = {Algorithms for combinatorial structures: Well-founded systems and Newton iterations},
journal = {Journal of Combinatorial Theory, Series A},
year = {2012},
volume = {119},
number = {8},
pages = {1711--1773},
month = {nov},
doi = {10.1016/j.jcta.2012.05.007},
publisher = {Elsevier {BV}},
}
@Article{Flajolet1992,
author = {Philippe Flajolet and Danièle Gardy and Loÿs Thimonier},
title = {Birthday paradox, coupon collectors, caching algorithms and self-organizing search},
journal = {Discrete Applied Mathematics},
year = {1992},
volume = {39},
number = {3},
pages = {207 - 229},
issn = {0166-218X},
abstract = {This paper introduces a unified framework for the analysis of a class of random allocation processes that include: (i) the birthday paradox; (ii) the coupon collector problem; (iii) least-recently-used (LRU) caching in memory management systems under the independent reference model; (iv) the move-to-front heuristic of self-organizing search. All analyses are relative to general nonuniform probability distributions. Our approach to these problems comprises two stages. First, the probabilistic phenomena of interest are described by means of regular languages extended by addition of the shuffle product. Next, systematic translation mechanisms are used to derive integral representations for expectations and probability distributions.},
doi = {https://doi.org/10.1016/0166-218X(92)90177-C},
url = {http://www.sciencedirect.com/science/article/pii/0166218X9290177C},
}
@InProceedings{Berenbrink2009,
author = {P. Berenbrink and T. Sauerwald},
title = {The weighted coupon collector's problem and applications},
booktitle = {15th International Computing and Combinatorics Conference (COCOON'10)},
year = {2009},
owner = {ponty},
timestamp = {2009.07.21},
}
@Article{Hellmuth2009,
author = {Hellmuth, Marc and Merkle, Daniel and Middendorf, Martin},
title = {Extended shapes for the combinatorial design of {RNA} sequences.},
journal = {International journal of computational biology and drug design},
year = {2009},
volume = {2},
pages = {371--384},
issn = {1756-0756},
__markedentry = {[yann:]},
abstract = {It is known that for two given secondary structures (defined by position of base pairings) an {RNA} string can easily be found that can fold into both structures. For more than two secondary structures this is not necessarily possible. In this paper, we introduce pseudo edges that are used to forbid that certain base pairs can bind and therefore can be used to define the properties of possible {RNA} secondary structures. We study the complexity of the problem to design an {RNA} sequence that can fold into different secondary structures each of them is described by a set of required and forbidden base pairs. We refine the NP-completeness results of Clote et al. (2005) and show an analogous NP-completeness result for the realisation problem concerning the removal of (pseudo) edges. We also present a polynomial time method for checking the realisability of extended shape graphs. Furthermore, we empirically analyse the influence of pseudo edges on the realisability for sets of random {RNA} sequences and for sets of aptamers.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2010-04-15},
country = {England},
doi = {10.1504/IJCBDD.2009.030767},
issn-linking = {1756-0756},
issue = {4},
keywords = {Base Sequence; Computational Biology, methods; Drug Design; Nucleic Acid Conformation; {RNA}, chemistry},
nlm-id = {101479540},
owner = {NLM},
pii = {IJCBDD.2009.030767},
pmid = {20090177},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2010-01-21},
}
@Article{Groot2019,
author = {Natalia Sanchez de Groot and Alexandros Armaos and Ricardo Gra{\~{n}}a-Montes and Marion Alriquet and Giulia Calloni and R. Martin Vabulas and Gian Gaetano Tartaglia},
title = {{{RNA}} structure drives interaction with proteins},
journal = {Nature Communications},
year = {2019},
volume = {10},
number = {1},
month = jul,
doi = {10.1038/s41467-019-10923-5},
publisher = {Springer Science and Business Media {LLC}},
}
@Article{,
author = {Parisien, Marc and Major, François},
title = {The MC-Fold and MC-Sym pipeline infers {RNA} structure from sequence data.},
journal = {Nature},
year = {2008},
volume = {452},
pages = {51--55},
month = mar,
issn = {1476-4687},
__markedentry = {[yann:]},
abstract = {The classical {RNA} secondary structure model considers A.U and G.C Watson-Crick as well as G.U wobble base pairs. Here we substitute it for a new one, in which sets of nucleotide cyclic motifs define {RNA} structures. This model allows us to unify all base pairing energetic contributions in an effective scoring function to tackle the problem of {RNA} folding. We show how pipelining two computer algorithms based on nucleotide cyclic motifs, MC-Fold and MC-Sym, reproduces a series of experimentally determined {RNA} three-dimensional structures from the sequence. This demonstrates how crucial the consideration of all base-pairing interactions is in filling the gap between sequence and structure. We use the pipeline to define rules of precursor micro{RNA} folding in double helices, despite the presence of a number of presumed mismatches and bulges, and to propose a new model of the human immunodeficiency virus-1 -1 frame-shifting element.},
chemicals = {Micro{RNA}s, {RNA} Precursors, {RNA}, Viral, {RNA}},
citation-subset = {IM},
completed = {2008-04-09},
country = {England},
doi = {10.1038/nature06684},
issn-linking = {0028-0836},
issue = {7183},
keywords = {Algorithms; Base Pairing; Base Sequence; Computational Biology; Frameshifting, Ribosomal; Genes, gag, genetics; Genes, pol, genetics; HIV-1, genetics; Humans; Micro{RNA}s, chemistry, metabolism; Models, Genetic; Models, Molecular; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, chemistry, genetics; {RNA} Precursors, chemistry, metabolism; {RNA}, Viral, chemistry, genetics, metabolism; Software; Thermodynamics},
nlm-id = {0410462},
owner = {NLM},
pii = {nature06684},
pmid = {18322526},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2008-03-06},
}
@Article{Andronescu2004,
author = {Andronescu, Mirela and Fejes, Anthony P. and Hutter, Frank and Hoos, Holger H. and Condon, Anne},
title = {{A new algorithm for {{RNA}} secondary structure design.}},
journal = {Journal of Molecular Biology},
year = {2004},
volume = {336},
number = {3},
pages = {607--624},
doi = {10.1016/j.jmb.2003.12.041},
}
@Article{Avihoo2011,
author = {Avihoo, Assaf and Churkin, Alexander and Barash, Danny},
title = {{{RNA}}exinv: An Extended Inverse {{RNA}} Folding from Shape and Physical Attributes to Sequences},
journal = {BMC Bioinformatics},
year = {2011},
volume = {12},
number = {1},
pages = {319},
doi = {10.1186/1471-2105-12-319},
}
@Article{Bellaousov2018,
author = {Bellaousov, Stanislav and Kayedkhordeh, Mohammad and Peterson, Raymond J and Mathews, David H},
title = {Accelerated {RNA} secondary structure design using preselected sequences for helices and loops.},
journal = {{RNA} (New York, N.Y.)},
year = {2018},
volume = {24},
pages = {1555--1567},
month = nov,
issn = {1469-9001},
abstract = {Nucleic acids can be designed to be nano-machines, pharmaceuticals, or probes. {RNA} secondary structures can form the basis of self-assembling nanostructures. There are only four natural {RNA} bases, therefore it can be difficult to design sequences that fold to a single, specified structure because many other structures are often possible for a given sequence. One approach taken by state-of-the-art sequence design methods is to select sequences that fold to the specified structure using stochastic, iterative refinement. The goal of this work is to accelerate design. Many existing iterative methods select and refine sequences one base pair and one unpaired nucleotide at a time. Here, the hypothesis that sequences can be preselected in order to accelerate design was tested. To this aim, a database was built of helix sequences that demonstrate thermodynamic features found in natural sequences and that also have little tendency to cross-hybridize. Additionally, a database was assembled of {RNA} loop sequences with low helix-formation propensity and little tendency to cross-hybridize with either the helices or other loops. These databases of preselected sequences accelerate the selection of sequences that fold with minimal ensemble defect by replacing some of the trial and error of current refinement approaches. When using the database of preselected sequences as compared to randomly chosen sequences, sequences for natural structures are designed 36 times faster, and random structures are designed six times faster. The sequences selected with the aid of the database have similar ensemble defect as those sequences selected at random. The sequence database is part of {RNA}structure package at http://rna.urmc.rochester.edu/{RNA}structure.html.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2018-12-31},
country = {United States},
doi = {10.1261/rna.066324.118},
issn-linking = {1355-8382},
issue = {11},
keywords = {Algorithms; Computational Biology, methods; Databases, Nucleic Acid; Nucleic Acid Conformation; {RNA}, chemistry; {RNA} Folding; Sequence Analysis, {RNA}; Thermodynamics; {RNA} folding thermodynamics; {RNA} partition function; {RNA} sequence design; ensemble defect},
nlm-id = {9509184},
owner = {NLM},
pii = {rna.066324.118},
pmc = {PMC6191713},
pmid = {30097542},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-12-31},
}
@Article{Busch2007,
author = {Busch, Anke and Backofen, Rolf},
title = {INFO-{{RNA}}---a server for fast inverse {{RNA}} folding satisfying sequence constraints},
journal = {Nucleic Acids Research},
year = {2007},
volume = {35},
number = {Web Server issue},
pages = {W310--W313},
doi = {10.1093/nar/gkm218},
}
@Article{Busch2006,
author = {Busch, Anke and Backofen, Rolf},
title = {{INFO}-{{RNA}}---a fast approach to inverse {{RNA}} folding},
journal = {Bioinformatics},
year = {2006},
volume = {22},
number = {15},
pages = {1823-31},
doi = {10.1093/bioinformatics/btl194},
}
@Article{Churkin2018,
author = {Churkin, Alexander and Retwitzer, Matan Drory and Reinharz, Vladimir and Ponty, Yann and Waldisp{\"u}hl, J{\'e}r{\^o}me and Barash, Danny},
title = {Design of {RNA}s: comparing programs for inverse {RNA} folding.},
journal = {Briefings in bioinformatics},
year = {2018},
volume = {19},
pages = {350--358},
month = mar,
issn = {1477-4054},
abstract = {Computational programs for predicting {RNA} sequences with desired folding properties have been extensively developed and expanded in the past several years. Given a secondary structure, these programs aim to predict sequences that fold into a target minimum free energy secondary structure, while considering various constraints. This procedure is called inverse {RNA} folding. Inverse {RNA} folding has been traditionally used to design optimized {RNA}s with favorable properties, an application that is expected to grow considerably in the future in light of advances in the expanding new fields of synthetic biology and {RNA} nanostructures. Moreover, it was recently demonstrated that inverse {RNA} folding can successfully be used as a valuable preprocessing step in computational detection of novel noncoding {RNA}s. This review describes the most popular freeware programs that have been developed for such purposes, starting from {RNA}inverse that was devised when formulating the inverse {RNA} folding problem. The most recently published ones that consider {RNA} secondary structure as input are anta{RNA}, {RNA}iFold and inca{RNA}fbinv, each having different features that could be beneficial to specific biological problems in practice. The various programs also use distinct approaches, ranging from ant colony optimization to constraint programming, in addition to adaptive walk, simulated annealing and Boltzmann sampling. This review compares between the various programs and provides a simple description of the various possibilities that would benefit practitioners in selecting the most suitable program. It is geared for specific tasks requiring {RNA} design based on input secondary structure, with an outlook toward the future of {RNA} design programs.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2019-02-28},
country = {England},
doi = {10.1093/bib/bbw120},
issn-linking = {1467-5463},
issue = {2},
keywords = {Algorithms; Animals; Computational Biology, methods; Humans; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; {RNA} Folding; Software},
nlm-id = {100912837},
owner = {NLM},
pii = {2666340},
pmc = {PMC6018860},
pmid = {28049135},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-02-28},
}
@InProceedings{Dai2009,
author = {Denny C. Dai and Herbert H. Tsang and Kay C. Wiese},
title = {{rnaDesign}: Local search for {{RNA}} secondary structure design},
booktitle = {Proceedings of the 2009 {IEEE} Symposium on Computational Intelligence in Bioinformatics and Computational Biology, {CIBCB} 2009, Nashville, TN, USA, March 30 - April 2, 2009},
year = {2009},
pages = {1--7},
publisher = {{IEEE}},
doi = {10.1109/CIBCB.2009.4925700},
url = {https://doi.org/10.1109/CIBCB.2009.4925700},
}
@Article{Ding2005,
author = {Ding, Ye and Chan, Chi Yu and Lawrence, Charles E},
title = {{RNA} secondary structure prediction by centroids in a Boltzmann weighted ensemble.},
journal = {{RNA} (New York, N.Y.)},
year = {2005},
volume = {11},
pages = {1157--1166},
month = aug,
issn = {1355-8382},
abstract = {Prediction of {RNA} secondary structure by free energy minimization has been the standard for over two decades. Here we describe a novel method that forsakes this paradigm for predictions based on Boltzmann-weighted structure ensemble. We introduce the notion of a centroid structure as a representative for a set of structures and describe a procedure for its identification. In comparison with the minimum free energy (MFE) structure using diverse types of structural {RNA}s, the centroid of the ensemble makes 30.0% fewer prediction errors as measured by the positive predictive value (PPV) with marginally improved sensitivity. The Boltzmann ensemble can be separated into a small number (3.2 on average) of clusters. Among the centroids of these clusters, the "best cluster centroid" as determined by comparison to the known structure simultaneously improves PPV by 46.5% and sensitivity by 21.7%. For 58% of the studied sequences for which the MFE structure is outside the cluster containing the best centroid, the improvements by the best centroid are 62.5% for PPV and 31.4% for sensitivity. These results suggest that the energy well containing the MFE structure under the current incomplete energy model is often different from the one for the unavailable complete model that presumably contains the unique native structure. Centroids are available on the Sfold server at http://sfold.wadsworth.org.},
chemicals = {{RNA}, Bacterial, {RNA}},
citation-subset = {IM},
completed = {2005-09-30},
country = {United States},
doi = {10.1261/rna.2500605},
issn-linking = {1355-8382},
issue = {8},
keywords = {Base Pairing; Nucleic Acid Conformation; {RNA}, ultrastructure; {RNA}, Bacterial, chemistry, ultrastructure; Reproducibility of Results; Sensitivity and Specificity; Thermodynamics},
nlm-id = {9509184},
owner = {NLM},
pii = {11/8/1157},
pmc = {PMC1370799},
pmid = {16043502},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Dotu2015,
author = {Dotu, Ivan and Garcia-Martin, Juan Antonio and Slinger, Betty L. and Mechery, Vinodh and Meyer, Michelle M. and Clote, Peter},
title = {Complete {{RNA}} inverse folding: computational design of functional hammerhead ribozymes.},
journal = {Nucleic Acids Research},
year = {2015},
volume = {42},
number = {18},
pages = {11752--11762},
doi = {10.1093/nar/gku740},
}
@Article{Draghi2010,
author = {Draghi, Jeremy A and Parsons, Todd L and Wagner, G{\"u}nter P and Plotkin, Joshua B},
title = {Mutational robustness can facilitate adaptation.},
journal = {Nature},
year = {2010},
volume = {463},
pages = {353--355},
month = jan,
issn = {1476-4687},
abstract = {Robustness seems to be the opposite of evolvability. If phenotypes are robust against mutation, we might expect that a population will have difficulty adapting to an environmental change, as several studies have suggested. However, other studies contend that robust organisms are more adaptable. A quantitative understanding of the relationship between robustness and evolvability will help resolve these conflicting reports and will clarify outstanding problems in molecular and experimental evolution, evolutionary developmental biology and protein engineering. Here we demonstrate, using a general population genetics model, that mutational robustness can either impede or facilitate adaptation, depending on the population size, the mutation rate and the structure of the fitness landscape. In particular, neutral diversity in a robust population can accelerate adaptation as long as the number of phenotypes accessible to an individual by mutation is smaller than the total number of phenotypes in the fitness landscape. These results provide a quantitative resolution to a significant ambiguity in evolutionary theory.},
citation-subset = {IM},
completed = {2010-03-03},
country = {England},
doi = {10.1038/nature08694},
issn-linking = {0028-0836},
issue = {7279},
keywords = {Adaptation, Physiological, genetics; Biological Evolution; Genetic Fitness, genetics; Genetic Variation, genetics; Genetics, Population; Genotype; Models, Genetic; Mutagenesis, genetics; Mutation, genetics; Phenotype; Population Density; Time Factors},
mid = {NIHMS250567},
nlm-id = {0410462},
owner = {NLM},
pii = {nature08694},
pmc = {PMC3071712},
pmid = {20090752},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@InProceedings{DuBoisberranger2012,
author = {Du Boisberranger, J{\'e}r{\'e}mie and Gardy, Dani{\`e}le and Ponty, Yann},
title = {The weighted words collector},
booktitle = {International Meeting on Probabilistic, Combinatorial and Asymptotic Methods for the Analysis of Algorithms (AOFA 2012)},
year = {2012},
editor = {Nicolas, Broutin (INRIA, France) and Luc, Devroye (McGill, Canada)},
volume = {AQ},
series = {Discrete Mathematics \& Theoretical Computer Science},
pages = {243--264},
publisher = {Episciences.org},
journal = {Discrete Mathematics \& Theoretical Computer Science},
}
@Article{Esmaili-Taheri2015,
author = {Esmaili-Taheri, Ali and Ganjtabesh, Mohammad},
title = {ERD: a fast and reliable tool for {RNA} design including constraints.},
journal = {BMC bioinformatics},
year = {2015},
volume = {16},
pages = {20},
month = jan,
issn = {1471-2105},
abstract = {The function of an {RNA} in cellular processes is directly related to its structure. The free energy of {RNA} structure in another important key to its function as only some structures with a specific level of free energy can take part in cellular reactions. Therefore, to perform a specific function, a particular {RNA} structure with specific level of free energy is required. For a given {RNA} structure, the goal of the {RNA} design problem is to design an {RNA} sequence that folds into the given structure. To mimic the biological features of {RNA} sequences and structures, some sequence and energy constraints should be considered in designing {RNA}. Although the level of free energy is important, it is not considered in the available approaches for {RNA} design problem. In this paper, we present a new version of our evolutionary algorithm for {RNA} design problem, entitled ERD, and extend it to handle some sequence and energy constraints. In the sequence constraints, one can restrict sequence positions to a fixed nucleotide or to a subset of nucleotides. As for the energy constraint, one can specify an interval for the free energy ranges of the designed sequences. We compare our algorithm with INFO-{RNA}, MODENA, NUPACK, and {RNA}iFold approaches for some artificial and natural {RNA} secondary structures and constraints. The results indicate that our algorithm outperforms the other mentioned approaches in terms of accuracy, speedup, divergency, nucleotides distribution, and similarity to the natural {RNA} sequences. Particularly, the designed {RNA} sequences in our method are much more reliable and similar to the natural counterparts. The generated sequences are more diverse and they have closer nucleotides distribution to the natural one. The ERD tool and web server are freely available at http://mostafa.ut.ac.ir/corna/erd-cons/ .},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2015-08-18},
country = {England},
doi = {10.1186/s12859-014-0444-5},
issn-linking = {1471-2105},
keywords = {Algorithms; Base Sequence; Humans; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, chemistry; {RNA} Folding; Sequence Analysis, {RNA}, methods; Software},
nlm-id = {100965194},
owner = {NLM},
pii = {s12859-014-0444-5},
pmc = {PMC4384295},
pmid = {25626878},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-12-02},
}
@Article{Esmaili-Taheri2014,
author = {Esmaili-Taheri, Ali and Ganjtabesh, Mohammad and Mohammad-Noori, Morteza},
title = {{Evolutionary solution for the {{RNA}} design problem.}},
journal = {Bioinformatics},
year = {2014},
volume = {30},
number = {9},
pages = {1250--1258},
doi = {10.1093/bioinformatics/btu001},
}
@Article{Fontana1987,
author = {Fontana, W and Schuster, P},
title = {A computer model of evolutionary optimization.},
journal = {Biophysical chemistry},
year = {1987},
volume = {26},
pages = {123--147},
month = may,
issn = {0301-4622},
abstract = {Molecular evolution is viewed as a typical combinatorial optimization problem. We analyse a chemical reaction model which considers {RNA} replication including correct copying and point mutations together with hydrolytic degradation and the dilution flux of a flow reactor. The corresponding stochastic reaction network is implemented on a computer in order to investigate some basic features of evolutionary optimization dynamics. Characteristic features of real molecular systems are mimicked by folding binary sequences into unknotted two-dimensional structures. Selective values are derived from these molecular 'phenotypes' by an evaluation procedure which assigns numerical values to different elements of the secondary structure. The fitness function obtained thereby contains nontrivial long-range interactions which are typical for real systems. The fitness landscape also reveals quite involved and bizarre local topologies which we consider also representative of polynucleotide replication in actually occurring systems. Optimization operates on an ensemble of sequences via mutation and natural selection. The strategy observed in the simulation experiments is fairly general and resembles closely a heuristic widely applied in operations research areas. Despite the relative smallness of the system--we study 2000 molecules of chain length v = 70 in a typical simulation experiment--features typical for the evolution of real populations are observed as there are error thresholds for replication, evolutionary steps and quasistationary sequence distributions. The relative importance of selectively neutral or almost neutral variants is discussed quantitatively. Four characteristic ensemble properties, entropy of the distribution, ensemble correlation, mean Hamming distance and diversity of the population, are computed and checked for their sensitivity in recording major optimization events during the simulation.},
citation-subset = {IM, S},
completed = {1987-08-28},
country = {Netherlands},
issn-linking = {0301-4622},
issue = {2-3},
keywords = {Base Sequence; Biological Evolution; Computer Simulation; Mathematics; Models, Genetic; Nucleic Acid Conformation},
nasa = {87271963},
nlm-id = {0403171},
owner = {NLM},
pii = {0301-4622(87)80017-0},
pmid = {3607225},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2010-11-18},
}
@Article{Frid2010,
author = {Frid, Yelena and Gusfield, Dan},
title = {A simple, practical and complete $O(n^3/\log n)$-time algorithm for {{RNA}} folding using the {Four-Russians} speedup.},
journal = {Algorithms for Molecular Biology},
year = {2010},
volume = {5},
pages = {13},
doi = {10.1186/1748-7188-5-13},
}
@Article{Gao2010,
author = {Gao, James Zm and Li, Linda Ym and Reidys, Christian M.},
title = {{Inverse folding of {{RNA}} pseudoknot structures.}},
journal = {Algorithms for Molecular Biology},
year = {2010},
volume = {5},
pages = {27},
doi = {10.1186/1748-7188-5-27},
}
@Article{Garcia-Martin2013,
author = {Garcia-Martin, Juan Antonio and Clote, Peter and Dotu, Ivan},
title = {{{{RNA}}iFold: a web server for {{RNA}} inverse folding and molecular design.}},
journal = {Nucleic Acids Research},
year = {2013},
volume = {41},
number = {Web Server issue},
pages = {W465--W470},
doi = {10.1093/nar/gkt280},
}
@Article{Garcia-Martin2013a,
author = {Garcia-Martin, Juan Antonio and Clote, Peter and Dotu, Ivan},
title = {{{{RNA}}iFOLD: a constraint programming algorithm for {{RNA}} inverse folding and molecular design.}},
journal = {Journal of Bioinformatics and Computational Biology},
year = {2013},
volume = {11},
number = {2},
pages = {1350001},
doi = {10.1142/S0219720013500017},
}
@Article{Gomes-Filho2019,
author = {Gomes-Filho, Jos{\'e} Vicente and Randau, Lennart},
title = {{RNA} stabilization in hyperthermophilic archaea.},
journal = {Annals of the New York Academy of Sciences},
year = {2019},
pages = {14060},
month = apr,
issn = {1749-6632},
abstract = {Analyses of the {RNA} metabolism of hyperthermophilic archaea highlight the efficiency of regulatory {RNA}s and {RNA}-guided processes at extreme temperatures. These organisms must overcome the intrinsic thermolability of {RNA}s. Elevated levels of {RNA} modifications and structured GC-rich regions are observed for many universal noncoding {RNA} families. Guide {RNA}s are often protected from degradation by their presence within ribonucleoprotein complexes. Modification and ligation of {RNA} termini can be employed to impair exonucleolytic degradation. Finally, antisense strand transcription promotes the formation of {RNA} duplexes and can be used to stabilize {RNA} regions. In our review, we provide examples of these {RNA} stabilization mechanisms that have been observed in hyperthermophilic archaeal model organisms.},
country = {United States},
doi = {10.1111/nyas.14060},
issn-linking = {0077-8923},
keywords = {C/D box s{RNA}s; {RNA} structure; {RNA}-seq; archaea; modifications},
nlm-id = {7506858},
owner = {NLM},
pmid = {30994930},
pubmodel = {Print-Electronic},
pubstatus = {aheadofprint},
revised = {2019-04-17},
}
@Article{Griffiths-Jones2003,
author = {S. Griffiths-Jones and A. Bateman and M. Marshall and A. Khanna and S. R. Eddy},
title = {{RFAM}: an {{{RNA}}} family database},
journal = {Nucleic Acids Research},
year = {2003},
volume = {31},
number = {1},
pages = {439--441},
doi = {10.1093/nar/gkg006},
}
@Article{Gupta2005,
author = {Gupta, Arvind and Manuch, Ján and Stacho, Ladislav},
title = {Structure-approximating inverse protein folding problem in the 2D HP model.},
journal = {Journal of Computational Biology},
year = {2005},
volume = {12},
number = {10},
pages = {1328--1345},
doi = {10.1089/cmb.2005.12.1328},
}
@Article{Hammer2019,
author = {Stefan Hammer and Christian Günzel and Mario Mörl and Sven Findeiß},
title = {Evolving methods for rational de novo design of functional {RNA} molecules},
journal = {Methods},
year = {2019},
volume = {161},
pages = {54 - 63},
issn = {1046-2023},
note = {Development and engineering of artificial {RNA}s},
abstract = {Artificial {RNA} molecules with novel functionality have many applications in synthetic biology, pharmacy and white biotechnology. The de novo design of such devices using computational methods and prediction tools is a resource-efficient alternative to experimental screening and selection pipelines. In this review, we describe methods common to many such computational approaches, thoroughly dissect these methods and highlight open questions for the individual steps. Initially, it is essential to investigate the biological target system, the regulatory mechanism that will be exploited, as well as the desired components in order to define design objectives. Subsequent computational design is needed to combine the selected components and to obtain novel functionality. This process can usually be split into constrained sequence sampling, the formulation of an optimization problem and an in silico analysis to narrow down the number of candidates with respect to secondary goals. Finally, experimental analysis is important to check whether the defined design objectives are indeed met in the target environment and detailed characterization experiments should be performed to improve the mechanistic models and detect missing design requirements.},
doi = {https://doi.org/10.1016/j.ymeth.2019.04.022},
keywords = {{RNA} design, Rational design, Synthetic biology, Artificial {RNA} devices, Mechanistic models, Sequence sampling, Experimental validation, {RNA} design tools},
url = {http://www.sciencedirect.com/science/article/pii/S1046202318302895},
}
@Article{hammer2019fixed,
author = {Hammer, Stefan and Wang, Wei and Will, Sebastian and Ponty, Yann},
title = {Fixed-parameter tractable sampling for {RNA} design with multiple target structures},
journal = {BMC bioinformatics},
year = {2019},
volume = {20},
number = {1},
pages = {209},
publisher = {BioMed Central},
}
@Article{HoenerZuSiederdissen2013,
author = {{H{\"o}ner Zu Siederdissen}, Christian and Hammer, Stefan and Abfalter, Ingrid and Hofacker, Ivo L. and Flamm, Christoph and Stadler, Peter F.},
title = {{Computational design of {{RNA}}s with complex energy landscapes.}},
journal = {Biopolymers},
year = {2013},
volume = {99},
number = {12},
pages = {1124--1136},
doi = {10.1002/bip.22337},
}
@Article{Kennedy2010,
author = {Kennedy, Ryan and Lladser, Manuel E and Wu, Zhiyuan and Zhang, Chen and Yarus, Michael and De Sterck, Hans and Knight, Rob},
title = {Natural and artificial {RNA}s occupy the same restricted region of sequence space.},
journal = {{RNA} (New York, N.Y.)},
year = {2010},
volume = {16},
pages = {280--289},
month = feb,
issn = {1469-9001},
abstract = {Different chemical and mutational processes within genomes give rise to sequences with different compositions and perhaps different capacities for evolution. The evolution of functional {RNA}s may occur on a "neutral network" in which sequences with any given function can easily mutate to sequences with any other. This neutral network hypothesis is more likely if there is a particular region of composition that contains sequences that are functional in general, and if many different functions are possible within this preferred region of composition. We show that sequence preferences in active sites recovered by in vitro selection combine with biophysical folding rules to support the neutral network hypothesis. These simple active-site specifications and folding preferences obtained by artificial selection experiments recapture the previously observed purine bias and specific spread along the GC axis of naturally occurring aptamers and ribozymes isolated from organisms, although other types of {RNA}s, such as mi{RNA} precursors and spliceosomal {RNA}s, that act primarily through complementarity to other amino acids do not share these preferences. These universal evolved sequence features are therefore intrinsic in {RNA} molecules that bind small-molecule targets or catalyze reactions.},
chemicals = {Aptamers, Nucleotide, {RNA}, Catalytic, {RNA}},
citation-subset = {IM},
completed = {2010-02-18},
country = {United States},
doi = {10.1261/rna.1923210},
issn-linking = {1355-8382},
issue = {2},
keywords = {Aptamers, Nucleotide, chemistry, genetics, metabolism; Base Composition; Base Sequence; Binding Sites, genetics; Biophysical Phenomena; Computational Biology; Models, Genetic; Models, Molecular; Models, Statistical; Mutation; Nucleic Acid Conformation; Poisson Distribution; {RNA}, chemistry, genetics, metabolism; {RNA}, Catalytic, chemistry, genetics, metabolism; SELEX Aptamer Technique; Selection, Genetic},
nlm-id = {9509184},
owner = {NLM},
pii = {rna.1923210},
pmc = {PMC2811657},
pmid = {20032164},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Kleinkauf2015,
author = {Kleinkauf, Robert and Mann, Martin and Backofen, Rolf},
title = {anta{RNA}: ant colony-based {RNA} sequence design.},
journal = {Bioinformatics (Oxford, England)},
year = {2015},
volume = {31},
pages = {3114--3121},
month = oct,
issn = {1367-4811},
abstract = {{RNA} sequence design is studied at least as long as the classical folding problem. Although for the latter the functional fold of an {RNA} molecule is to be found ,: inverse folding tries to identify {RNA} sequences that fold into a function-specific target structure. In combination with {RNA}-based biotechnology and synthetic biology ,: reliable {RNA} sequence design becomes a crucial step to generate novel biochemical components. In this article ,: the computational tool anta{RNA} is presented. It is capable of compiling {RNA} sequences for a given structure that comply in addition with an adjustable full range objective GC-content distribution ,: specific sequence constraints and additional fuzzy structure constraints. anta{RNA} applies ant colony optimization meta-heuristics and its superior performance is shown on a biological datasets. http://www.bioinf.uni-freiburg.de/Software/anta{RNA} CONTACT: backofen@informatik.uni-freiburg.de Supplementary data are available at Bioinformatics online.},
citation-subset = {IM},
completed = {2016-04-25},
country = {England},
doi = {10.1093/bioinformatics/btv319},
issn-linking = {1367-4803},
issue = {19},
keywords = {Animals; Ants, genetics; Base Composition, genetics; Base Sequence; Databases, Genetic; Sequence Analysis, {RNA}, methods; Software},
nlm-id = {9808944},
owner = {NLM},
pii = {btv319},
pmc = {PMC4576691},
pmid = {26023105},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-12-02},
}
@Article{Lee2014,
author = {Lee, Jeehyung and Kladwang, Wipapat and Lee, Minjae and Cantu, Daniel and Azizyan, Martin and Kim, Hanjoo and Limpaecher, Alex and Yoon, Sungroh and Treuille, Adrien and Das, Rhiju and , EteR. N. A Participants},
title = {{{{RNA}} design rules from a massive open laboratory.}},
journal = {Proceedings of the National Academy of Sciences U S A},
year = {2014},
volume = {111},
number = {6},
pages = {2122--2127},
doi = {10.1073/pnas.1313039111},
}
@Article{Levin2012,
author = {Levin, Alex and Lis, Mieszko and Ponty, Yann and O'Donnell, Charles W and Devadas, Srinivas and Berger, Bonnie and Waldisp{\"u}hl, J{\'e}r{\^o}me},
title = {{A global sampling approach to designing and reengineering {{RNA}} secondary structures}},
journal = {Nucleic Acids Research},
year = {2012},
volume = {40},
number = {20},
pages = {10041-52},
doi = {10.1093/nar/gks768},
}
@Article{Lorenz2011,
author = {Lorenz, Ronny and Bernhart, Stephan H and H{\"o}ner Zu Siederdissen, Christian and Tafer, Hakim and Flamm, Christoph and Stadler, Peter F and Hofacker, Ivo L},
title = {Vienna{RNA} Package 2.0.},
journal = {Algorithms for molecular biology : AMB},
year = {2011},
volume = {6},
pages = {26},
month = nov,
issn = {1748-7188},
abstract = {Secondary structure forms an important intermediate level of description of nucleic acids that encapsulates the dominating part of the folding energy, is often well conserved in evolution, and is routinely used as a basis to explain experimental findings. Based on carefully measured thermodynamic parameters, exact dynamic programming algorithms can be used to compute ground states, base pairing probabilities, as well as thermodynamic properties. The Vienna{RNA} Package has been a widely used compilation of {RNA} secondary structure related computer programs for nearly two decades. Major changes in the structure of the standard energy model, the Turner 2004 parameters, the pervasive use of multi-core CPUs, and an increasing number of algorithmic variants prompted a major technical overhaul of both the underlying {RNA}lib and the interactive user programs. New features include an expanded repertoire of tools to assess {RNA}-{RNA} interactions and restricted ensembles of structures, additional output information such as centroid structures and maximum expected accuracy structures derived from base pairing probabilities, or z-scores for locally stable secondary structures, and support for input in fasta format. Updates were implemented without compromising the computational efficiency of the core algorithms and ensuring compatibility with earlier versions. The Vienna{RNA} Package 2.0, supporting concurrent computations via OpenMP, can be downloaded from http://www.tbi.univie.ac.at/{RNA}.},
completed = {2012-10-02},
country = {England},
doi = {10.1186/1748-7188-6-26},
issn-linking = {1748-7188},
nlm-id = {101265088},
owner = {NLM},
pii = {1748-7188-6-26},
pmc = {PMC3319429},
pmid = {22115189},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Lyngso2012,
author = {Lyngs{\o}, Rune B and Anderson, James Wj and Sizikova, Elena and Badugu, Amarendra and Hyland, Tomas and Hein, Jotun},
title = {{F{RNA}kenstein: multiple target inverse {{RNA}} folding}},
journal = {BMC Bioinformatics},
year = {2012},
volume = {13},
pages = {260},
doi = {10.1186/1471-2105-13-260},
}
@Article{Mak2018,
author = {Mak, Chi H and Phan, Ethan N H},
title = {Topological Constraints and Their Conformational Entropic Penalties on {RNA} Folds.},
journal = {Biophysical journal},
year = {2018},
volume = {114},
pages = {2059--2071},
month = may,
issn = {1542-0086},
abstract = {Functional {RNA}s can fold into intricate structures using a number of different secondary and tertiary structural motifs. Many factors contribute to the overall free energy of the target fold. This study aims at quantifying the entropic costs coming from the loss of conformational freedom when the sugar-phosphate backbone is subjected to constraints imposed by secondary and tertiary contacts. Motivated by insights from topology theory, we design a diagrammatic scheme to represent different types of {RNA} structures so that constraints associated with a folded structure may be segregated into mutually independent subsets, enabling the total conformational entropy loss to be easily calculated as a sum of independent terms. We used high-throughput Monte Carlo simulations to simulate large ensembles of single-stranded {RNA} sequences in solution to validate the assumptions behind our diagrammatic scheme, examining the entropic costs for hairpin initiation and formation of many multiway junctions. Our diagrammatic scheme aids in the factorization of secondary/tertiary constraints into distinct topological classes and facilitates the discovery of interrelationships among multiple constraints on {RNA} folds. This perspective, which to our knowledge is novel, leads to useful insights into the inner workings of some functional {RNA} sequences, demonstrating how they might operate by transforming their structures among different topological classes.},
country = {United States},
doi = {10.1016/j.bpj.2018.03.035},
issn-linking = {0006-3495},
issue = {9},
nlm-id = {0370626},
owner = {NLM},
pii = {S0006-3495(18)30452-1},
pmc = {PMC5961522},
pmid = {29742400},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-05-08},
}
@Article{Mathews1999,
author = {D. H. Mathews and J. Sabina and M. Zuker and D. H. Turner},
title = {Expanded sequence dependence of thermodynamic parameters improves prediction of {{RNA}} secondary structure},
journal = {Journal of Molecular Biology},
year = {1999},
volume = {288},
number = {5},
pages = {911-940},
doi = {10.1006/jmbi.1999.2700},
}
@Article{Meurer2017,
author = {Meurer, Aaron and Smith, Christopher P. and Paprocki, Mateusz and \v{C}ert\'{i}k, Ond\v{r}ej and Kirpichev, Sergey B. and Rocklin, Matthew and Kumar, Amit and Ivanov, Sergiu and Moore, Jason K. and Singh, Sartaj and Rathnayake, Thilina and Vig, Sean and Granger, Brian E. and Muller, Richard P. and Bonazzi, Francesco and Gupta, Harsh and Vats, Shivam and Johansson, Fredrik and Pedregosa, Fabian and Curry, Matthew J. and Terrel, Andy R. and Rou\v{c}ka, \v{S}t\v{e}p\'{a}n and Saboo, Ashutosh and Fernando, Isuru and Kulal, Sumith and Cimrman, Robert and Scopatz, Anthony},
title = {SymPy: symbolic computing in Python},
journal = {PeerJ Computer Science},
year = {2017},
volume = {3},
pages = {e103},
month = jan,
issn = {2376-5992},
abstract = {
SymPy is an open source computer algebra system written in pure Python. It is built with a focus on extensibility and ease of use, through both interactive and programmatic applications. These characteristics have led SymPy to become a popular symbolic library for the scientific Python ecosystem. This paper presents the architecture of SymPy, a description of its features, and a discussion of select submodules. The supplementary material provides additional examples and further outline details of the architecture and features of SymPy.
},
doi = {10.7717/peerj-cs.103},
keywords = {Python, Computer algebra system, Symbolics},
url = {https://doi.org/10.7717/peerj-cs.103},
}
@Article{Nebel2012,
author = {Nebel, Markus E and Weinberg, Frank},
title = {Algebraic and combinatorial properties of common {RNA} pseudoknot classes with applications.},
journal = {Journal of computational biology : a journal of computational molecular cell biology},
year = {2012},
volume = {19},
pages = {1134--1150},
month = oct,
issn = {1557-8666},
abstract = {Predicting {RNA} structures with pseudoknots in general is an NP-complete problem. Accordingly, several authors have suggested subclasses that provide polynomial time prediction algorithms by allowing (respectively, disallowing) certain structural motives. In this article, we introduce a unifying algebraic view on most of these classes. That way it becomes possible to find linear time recognition algorithms that decide whether or not a given structure is member of a class (we offer these algorithms as a web service to the scientific community). Furthermore, by presenting a general translation scheme of our algebraic descriptions into multiple context-free grammars, and proving a new correspondence of multiple context-free grammars and generating functions, it becomes possible to derive the precise asymptotic size of all the classes, solving some open problems such as enumerating the Rivas & Eddy class of pseudoknots.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2013-02-26},
country = {United States},
doi = {10.1089/cmb.2011.0094},
issn-linking = {1066-5277},
issue = {10},
keywords = {Algorithms; Models, Chemical; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry, genetics},
nlm-id = {9433358},
owner = {NLM},
pmc = {PMC3469209},
pmid = {23057823},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Reinharz2013,
author = {Reinharz, Vladimir and Ponty, Yann and Waldisp{\"u}hl, J{\'e}r{\^o}me},
title = {{A weighted sampling algorithm for the design of {{RNA}} sequences with targeted secondary structure and nucleotide distribution.}},
journal = {Bioinformatics},
year = {2013},
volume = {29},
number = {13},
pages = {i308--i315},
doi = {10.1093/bioinformatics/btt217},
}
@Article{Rivas2017,
author = {Rivas, Elena and Clements, Jody and Eddy, Sean R},
title = {A statistical test for conserved {RNA} structure shows lack of evidence for structure in lnc{RNA}s.},
journal = {Nature methods},
year = {2017},
volume = {14},
pages = {45--48},
month = jan,
issn = {1548-7105},
abstract = {Many functional {RNA}s have an evolutionarily conserved secondary structure. Conservation of {RNA} base pairing induces pairwise covariations in sequence alignments. We developed a computational method, R-scape ({RNA} Structural Covariation Above Phylogenetic Expectation), that quantitatively tests whether covariation analysis supports the presence of a conserved {RNA} secondary structure. R-scape analysis finds no statistically significant support for proposed secondary structures of the long noncoding {RNA}s HOTAIR, SRA, and Xist.},
chemicals = {{RNA}, Long Noncoding},
citation-subset = {IM},
completed = {2017-06-20},
country = {United States},
doi = {10.1038/nmeth.4066},
issn-linking = {1548-7091},
issue = {1},
keywords = {Base Pairing; Base Sequence; Evolution, Molecular; Humans; Nucleic Acid Conformation; Phylogeny; {RNA}, Long Noncoding, chemistry, genetics},
mid = {HHMIMS884950},
nlm-id = {101215604},
owner = {NLM},
pii = {nmeth.4066},
pmc = {PMC5554622},
pmid = {27819659},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Rodrigo2012,
author = {Rodrigo, Guillermo and Landrain, Thomas E and Jaramillo, Alfonso},
title = {De novo automated design of small {{{RNA}}} circuits for engineering synthetic riboregulation in living cells},
journal = {Proceedings of the National Academy of Sciences U S A},
year = {2012},
volume = {109},
number = {38},
pages = {15271-6},
doi = {10.1073/pnas.1203831109},
}
@Article{Rodrigo2013,
author = {Rodrigo, Guillermo AND Landrain, Thomas E. AND Majer, Eszter AND Dar{\`o}s, Jos{\'e}-Antonio AND Jaramillo, Alfonso},
title = {Full Design Automation of Multi-State {{RNA}} Devices to Program Gene Expression Using Energy-Based Optimization},
journal = {PLoS Computational Biology},
year = {2013},
volume = {9},
number = {8},
pages = {e1003172},
doi = {10.1371/journal.pcbi.1003172},
}
@Article{Salvy1994,
author = {Salvy, Bruno and Zimmermann, Paul},
title = {GFUN: A Maple Package for the Manipulation of Generating and Holonomic Functions in One Variable},
journal = {ACM Trans. Math. Softw.},
year = {1994},
volume = {20},
number = {2},
pages = {163--177},
month = jun,
issn = {0098-3500},
acmid = {178368},
address = {New York, NY, USA},
doi = {10.1145/178365.178368},
issue_date = {June 1994},
keywords = {computer algebra, generating functions, linear differential equations, linear recurrences},
numpages = {15},
publisher = {ACM},
url = {http://doi.acm.org/10.1145/178365.178368},
}
@Article{Schuster1994,
author = {Schuster, P and Fontana, W and Stadler, P F and Hofacker, I L},
title = {From sequences to shapes and back: a case study in {RNA} secondary structures.},
journal = {Proceedings. Biological sciences},
year = {1994},
volume = {255},
pages = {279--284},
month = mar,
issn = {0962-8452},
abstract = {{RNA} folding is viewed here as a map assigning secondary structures to sequences. At fixed chain length the number of sequences far exceeds the number of structures. Frequencies of structures are highly non-uniform and follow a generalized form of Zipf's law: we find relatively few common and many rare ones. By using an algorithm for inverse folding, we show that sequences sharing the same structure are distributed randomly over sequence space. All common structures can be accessed from an arbitrary sequence by a number of mutations much smaller than the chain length. The sequence space is percolated by extensive neutral networks connecting nearest neighbours folding into identical structures. Implications for evolutionary adaptation and for applied molecular evolution are evident: finding a particular structure by mutation and selection is much simpler than expected and, even if catalytic activity should turn out to be sparse of {RNA} structures, it can hardly be missed by evolutionary processes.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {1994-08-04},
country = {England},
doi = {10.1098/rspb.1994.0040},
issn-linking = {0962-8452},
issue = {1344},
keywords = {Base Composition; Base Sequence; Models, Structural; Nucleic Acid Conformation; {RNA}, chemistry; Thermodynamics},
nlm-id = {101245157},
owner = {NLM},
pmid = {7517565},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2016-10-18},
}
@Article{Stern2014,
author = {Stern, Adi and Bianco, Simone and Yeh, Ming Te and Wright, Caroline and Butcher, Kristin and Tang, Chao and Nielsen, Rasmus and Andino, Raul},
title = {Costs and benefits of mutational robustness in {RNA} viruses.},
journal = {Cell reports},
year = {2014},
volume = {8},
pages = {1026--1036},
month = aug,
issn = {2211-1247},
abstract = {The accumulation of mutations in {RNA} viruses is thought to facilitate rapid adaptation to changes in the environment. However, most mutations have deleterious effects on fitness, especially for viruses. Thus, tolerance to mutations should determine the nature and extent of genetic diversity that can be maintained in the population. Here, we combine population genetics theory, computer simulation, and experimental evolution to examine the advantages and disadvantages of tolerance to mutations, also known as mutational robustness. We find that mutational robustness increases neutral diversity and, as expected, can facilitate adaptation to a new environment. Surprisingly, under certain conditions, robustness may also be an impediment for viral adaptation, if a highly diverse population contains a large proportion of previously neutral mutations that are deleterious in the new environment. These findings may inform therapeutic strategies that cause extinction of otherwise robust viral populations. },
citation-subset = {IM},
completed = {2015-05-26},
country = {United States},
doi = {10.1016/j.celrep.2014.07.011},
issue = {4},
keywords = {Algorithms; Computer Simulation; Evolution, Molecular; Genes, Viral; Genetic Fitness; Humans; Models, Genetic; Mutation Rate; {RNA} Virus Infections, transmission, virology; {RNA} Viruses, genetics},
mid = {NIHMS615158},
nlm-id = {101573691},
owner = {NLM},
pii = {S2211-1247(14)00578-6},
pmc = {PMC4142091},
pmid = {25127138},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{SurujonPC2019,
author = {Surujon, Defne and Ponty, Yann and Clote, Peter},
title = {Small-World Networks and {RNA} Secondary Structures},
journal = {Journal of Computational Biology},
year = {2019},
volume = {26},
number = {1},
pages = {16-26},
doi = {10.1089/cmb.2018.0125},
}
@Article{Taneda2012,
author = {Taneda, Akito},
title = {{Multi-objective genetic algorithm for pseudoknotted {{RNA}} sequence design.}},
journal = {Frontiers in Genetics},
year = {2012},
volume = {3},
pages = {36},
doi = {10.3389/fgene.2012.00036},
}
@Article{Taneda2011,
author = {Taneda, Akito},
title = {{MODENA: a multi-objective {{RNA}} inverse folding.}},
journal = {Advances in Applied Bioinformatics Chemistry},
year = {2011},
volume = {4},
pages = {1--12},
pmid = {21918633},
}
@Article{Ven2014,
author = {Belaji Venkatachalam and Dan Gusfield and Yelena Frid},
title = {Faster algorithms for {{RNA}}-folding using the {Four-Russians} method},
journal = {Algorithms for Molecular Biology},
year = {2014},
volume = {9},
pages = {5},
doi = {10.1186/1748-7188-9-5},
}
@Article{Weinberg2017,
author = {Weinberg, Zasha and L{\"u}nse, Christina E and Corbino, Keith A and Ames, Tyler D and Nelson, James W and Roth, Adam and Perkins, Kevin R and Sherlock, Madeline E and Breaker, Ronald R},
title = {Detection of 224 candidate structured {RNA}s by comparative analysis of specific subsets of intergenic regions.},
journal = {Nucleic acids research},
year = {2017},
volume = {45},
pages = {10811--10823},
month = oct,
issn = {1362-4962},
abstract = {The discovery of structured non-coding {RNA}s (nc{RNA}s) in bacteria can reveal new facets of biology and biochemistry. Comparative genomics analyses executed by powerful computer algorithms have successfully been used to uncover many novel bacterial nc{RNA} classes in recent years. However, this general search strategy favors the discovery of more common nc{RNA} classes, whereas progressively rarer classes are correspondingly more difficult to identify. In the current study, we confront this problem by devising several methods to select subsets of intergenic regions that can concentrate these rare {RNA} classes, thereby increasing the probability that comparative sequence analysis approaches will reveal their existence. By implementing these methods, we discovered 224 novel nc{RNA} classes, which include ROOL {RNA}, an {RNA} class averaging 581 nt and present in multiple phyla, several highly conserved and widespread nc{RNA} classes with properties that suggest sophisticated biochemical functions and a multitude of putative cis-regulatory {RNA} classes involved in a variety of biological processes. We expect that further research on these newly found {RNA} classes will reveal additional aspects of novel biology, and allow for greater insights into the biochemistry performed by nc{RNA}s.},
chemicals = {{RNA}, Bacterial, {RNA}, Untranslated, Regulatory Sequences, Ribonucleic Acid},
citation-subset = {IM},
completed = {2017-11-07},
country = {England},
doi = {10.1093/nar/gkx699},
issn-linking = {0305-1048},
issue = {18},
keywords = {Integrons; Nucleotide Motifs; Plasmids, genetics; {RNA}, Bacterial, chemistry; {RNA}, Untranslated, chemistry; Regulatory Sequences, Ribonucleic Acid; Reverse Transcription},
nlm-id = {0411011},
owner = {NLM},
pii = {4080188},
pmc = {PMC5737381},
pmid = {28977401},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Wolfe2014,
author = {Wolfe, Brian R. and Pierce, Niles A.},
title = {Sequence Design for a Test Tube of Interacting Nucleic Acid Strands.},
journal = {ACS Synthetic Biology},
year = {2014},
doi = {10.1021/sb5002196},
}
@Article{Zadeh2011,
author = {Zadeh, Joseph N and Wolfe, Brian R and Pierce, Niles A},
title = {Nucleic acid sequence design via efficient ensemble defect optimization},
journal = {Journal of Computational Chemistry},
year = {2011},
volume = {32},
number = {3},
pages = {439-52},
doi = {10.1002/jcc.21633},
}
@Article{Zakov2011,
author = {Zakov, Shay and Tsur, Dekel and Ziv-Ukelson, Michal},
title = {Reducing the worst case running times of a family of {{RNA}} and CFG problems, using Valiant's approach.},
journal = {Algorithms for Molecular Biology},
year = {2011},
volume = {6},
number = {1},
pages = {20},
doi = {10.1186/1748-7188-6-20},
}
@InProceedings{Zhou2013,
author = {Yu Zhou and Yann Ponty and St{\'{e}}phane Vialette and J{\'{e}}r{\^{o}}me Waldisp{\"{u}}hl and Yi Zhang and Alain Denise},
title = {Flexible {{RNA}} design under structure and sequence constraints using formal languages},
booktitle = {{ACM} Conference on Bioinformatics, Computational Biology and Biomedical Informatics. {ACM-BCB} 2013, Washington, DC, USA, September 22-25, 2013},
year = {2013},
editor = {Jing Gao},
pages = {229},
publisher = {{ACM}},
doi = {10.1145/2506583.2506623},
url = {https://doi.org/10.1145/2506583.2506623},
}
@Book{Flajolet2009,
title = {Analytic combinatorics},
publisher = {cambridge University press},
year = {2009},
author = {Flajolet, Philippe and Sedgewick, Robert},
address = {New York, NY, USA},
edition = {1},
isbn = {0521898064, 9780521898065},
}
@Article{Drmota1997,
author = {Drmota, Michael},
title = {Systems of functional equations},
journal = {Random Structures and Algorithms},
year = {1997},
volume = {10},
number = {1-2},
pages = {103--124},
owner = {Yann},
timestamp = {2017.11.02},
}
@Article{Waterman1978,
author = {Michael Waterman},
title = {Secondary Structure of Single-Stranded Nucleic Acids},
journal = {Advances in Mathematics: Supplementary Studies},
year = {1978},
volume = {1},
pages = {167--212},
}
@Article{Lyngsoe2000,
author = {Lyngsø, R B and Pedersen, C N},
title = {{RNA} pseudoknot prediction in energy-based models.},
journal = {Journal of computational biology : a journal of computational molecular cell biology},
year = {2000},
volume = {7},
pages = {409--427},
issn = {1066-5277},
abstract = {{RNA} molecules are sequences of nucleotides that serve as more than mere intermediaries between DNA and proteins, e.g., as catalytic molecules. Computational prediction of {RNA} secondary structure is among the few structure prediction problems that can be solved satisfactorily in polynomial time. Most work has been done to predict structures that do not contain pseudoknots. Allowing pseudoknots introduces modeling and computational problems. In this paper we consider the problem of predicting {RNA} secondary structures with pseudoknots based on free energy minimization. We first give a brief comparison of energy-based methods for predicting {RNA} secondary structures with pseudoknots. We then prove that the general problem of predicting {RNA} secondary structures containing pseudoknots is NP complete for a large class of reasonable models of pseudoknots.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2001-04-12},
country = {United States},
doi = {10.1089/106652700750050862},
issn-linking = {1066-5277},
issue = {3-4},
keywords = {Algorithms; Computational Biology; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; Thermodynamics},
nlm-id = {9433358},
owner = {NLM},
pmid = {11108471},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2006-11-15},
}
@Article{Ding2003,
author = {Ding, Ye and Lawrence, Charles E},
title = {A statistical sampling algorithm for {{RNA}} secondary structure prediction.},
journal = {Nucleic acids research},
year = {2003},
volume = {31},
pages = {7280--7301},
month = dec,
issn = {1362-4962},
abstract = {An {{RNA}} molecule, particularly a long-chain m{RNA}, may exist as a population of structures. Further more, multiple structures have been demonstrated to play important functional roles. Thus, a representation of the ensemble of probable structures is of interest. We present a statistical algorithm to sample rigorously and exactly from the Boltzmann ensemble of secondary structures. The forward step of the algorithm computes the equilibrium partition functions of {{RNA}} secondary structures with recent thermodynamic parameters. Using conditional probabilities computed with the partition functions in a recursive sampling process, the backward step of the algorithm quickly generates a statistically representative sample of structures. With cubic run time for the forward step, quadratic run time in the worst case for the sampling step, and quadratic storage, the algorithm is efficient for broad applicability. We demonstrate that, by classifying sampled structures, the algorithm enables a statistical delineation and representation of the Boltzmann ensemble. Applications of the algorithm show that alternative biological structures are revealed through sampling. Statistical sampling provides a means to estimate the probability of any structural motif, with or without constraints. For example, the algorithm enables probability profiling of single-stranded regions in {{RNA}} secondary structure. Probability profiling for specific loop types is also illustrated. By overlaying probability profiles, a mutual accessibility plot can be displayed for predicting {RNA}:{{RNA}} interactions. Boltzmann probability-weighted density of states and free energy distributions of sampled structures can be readily computed. We show that a sample of moderate size from the ensemble of an enormous number of possible structures is sufficient to guarantee statistical reproducibility in the estimates of typical sampling statistics. Our applications suggest that the sampling algorithm may be well suited to prediction of m{{RNA}} structure and target accessibility. The algorithm is applicable to the rational design of small interfering {RNA}s (si{RNA}s), antisense oligonucleotides, and trans-cleaving ribozymes in gene knock-down studies.},
chemicals = {{RNA}, Protozoan, {RNA}, Spliced Leader, {RNA}},
citation-subset = {IM},
completed = {2004-01-14},
country = {England},
doi = {10.1093/nar/gkg938},
issn-linking = {0305-1048},
issue = {24},
keywords = {Algorithms; Base Sequence; Computational Biology, methods; Drug Design; Molecular Sequence Data; Nucleic Acid Conformation; Probability; {RNA}, chemistry, genetics; {{RNA}} Stability; {RNA}, Protozoan, chemistry, genetics; {RNA}, Spliced Leader, chemistry, genetics; Statistical Distributions; Thermodynamics},
nlm-id = {0411011},
owner = {NLM},
pmc = {PMC297010},
pmid = {14654704},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Joerg2008,
author = {J{\"o}rg, Thomas and Martin, Olivier C and Wagner, Andreas},
title = {{Neutral network sizes of biological {RNA} molecules can be computed and are not atypically small.}},
journal = {BMC bioinformatics},
year = {2008},
volume = {9},
pages = {464},
month = oct,
issn = {1471-2105},
abstract = {Neutral networks or sets consist of all genotypes with a given phenotype. The size and structure of these sets has a strong influence on a biological system's robustness to mutations, and on its evolvability, the ability to produce phenotypic variation; in the few studied cases of molecular phenotypes, the larger this set, the greater both robustness and evolvability of phenotypes. Unfortunately, any one neutral set contains generally only a tiny fraction of genotype space. Thus, current methods cannot measure neutral set sizes accurately, except in the smallest genotype spaces. Here we introduce a generalized Monte Carlo approach that can measure neutral set sizes in larger spaces. We apply our method to the genotype-to-phenotype mapping of {RNA} molecules, and show that it can reliably measure neutral set sizes for molecules up to 100 bases. We also study neutral set sizes of {RNA} structures in a publicly available database of functional, noncoding {RNA}s up to a length of 50 bases. We find that these neutral sets are larger than the neutral sets in 99.99% of random phenotypes. Software to estimate neutral network sizes is available at (http://www.bioc.uzh.ch/wagner/publications-software.html). The biological {RNA} structures we examined are more abundant than random structures. This indicates that their robustness and their ability to produce new phenotypic variants may also be high.},
chemicals = {{RNA}, Untranslated, {RNA}},
citation-subset = {IM},
completed = {2009-03-17},
country = {England},
doi = {10.1186/1471-2105-9-464},
issn-linking = {1471-2105},
keywords = {Algorithms; Databases, Nucleic Acid; Genotype; Monte Carlo Method; Neural Networks (Computer); Nucleic Acid Conformation; Phenotype; {RNA}, chemistry; {RNA}, Untranslated, chemistry; Software},
nlm-id = {100965194},
owner = {NLM},
pii = {1471-2105-9-464},
pmc = {PMC2639431},
pmid = {18973652},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Turner2010,
author = {Turner, Douglas H and Mathews, David H},
title = {{NNDB}: the nearest neighbor parameter database for predicting stability of nucleic acid secondary structure.},
journal = {Nucleic acids research},
year = {2010},
volume = {38},
number = {Database issue},
pages = {D280--D282},
month = jan,
issn = {1362-4962},
abstract = {The Nearest Neighbor Database (NNDB, http://rna.urmc.rochester.edu/NNDB) is a web-based resource for disseminating parameter sets for predicting nucleic acid secondary structure stabilities. For each set of parameters, the database includes the set of rules with descriptive text, sequence-dependent parameters in plain text and html, literature references to experiments and usage tutorials. The initial release covers parameters for predicting {{RNA}} folding free energy and enthalpy changes.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2010-02-01},
country = {England},
doi = {10.1093/nar/gkp892},
issn-linking = {0305-1048},
issue = {Database issue},
keywords = {Algorithms; Base Sequence; Computational Biology, methods, trends; Databases, Genetic; Databases, Nucleic Acid; Information Storage and Retrieval, methods; Internet; Molecular Sequence Data; Nucleic Acid Conformation; Nucleic Acid Denaturation; {RNA}, chemistry; Software; Thermodynamics},
nlm-id = {0411011},
owner = {NLM},
pii = {gkp892},
pmc = {PMC2808915},
pmid = {19880381},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Lu2009,
author = {Lu, Zhi John and Gloor, Jason W and Mathews, David H},
title = {{Improved {RNA} secondary structure prediction by maximizing expected pair accuracy}},
journal = {{RNA}},
year = {2009},
volume = {15},
number = {10},
pages = {1805--1813},
month = oct,
issn = {1469-9001},
abstract = {Free energy minimization has been the most popular method for {RNA} secondary structure prediction for decades. It is based on a set of empirical free energy change parameters derived from experiments using a nearest-neighbor model. In this study, a program, MaxExpect, that predicts {RNA} secondary structure by maximizing the expected base-pair accuracy, is reported. This approach was first pioneered in the program CONTRAfold, using pair probabilities predicted with a statistical learning method. Here, a partition function calculation that utilizes the free energy change nearest-neighbor parameters is used to predict base-pair probabilities as well as probabilities of nucleotides being single-stranded. MaxExpect predicts both the optimal structure (having highest expected pair accuracy) and suboptimal structures to serve as alternative hypotheses for the structure. Tested on a large database of different types of {RNA}, the maximum expected accuracy structures are, on average, of higher accuracy than minimum free energy structures. Accuracy is measured by sensitivity, the percentage of known base pairs correctly predicted, and positive predictive value (PPV), the percentage of predicted pairs that are in the known structure. By favoring double-strandedness or single-strandedness, a higher sensitivity or PPV of prediction can be favored, respectively. Using MaxExpect, the average PPV of optimal structure is improved from 66{\%} to 68{\%} at the same sensitivity level (73{\%}) compared with free energy minimization.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2009-10-06},
country = {United States},
doi = {10.1261/rna.1643609},
issn-linking = {1355-8382},
issue = {10},
keywords = {free energy minimization,nearest-neighbor model,partition function,rna secondary structure},
nlm-id = {9509184},
owner = {NLM},
pii = {rna.1643609},
pmc = {PMC2743040},
pmid = {19703939},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Kalvari2018,
author = {Kalvari, Ioanna and Argasinska, Joanna and Quinones-Olvera, Natalia and Nawrocki, Eric P and Rivas, Elena and Eddy, Sean R and Bateman, Alex and Finn, Robert D and Petrov, Anton I},
title = {Rfam 13.0: shifting to a genome-centric resource for non-coding {RNA} families.},
journal = {Nucleic acids research},
year = {2018},
volume = {46},
pages = {D335--D342},
month = jan,
issn = {1362-4962},
abstract = {The Rfam database is a collection of {RNA} families in which each family is represented by a multiple sequence alignment, a consensus secondary structure, and a covariance model. In this paper we introduce Rfam release 13.0, which switches to a new genome-centric approach that annotates a non-redundant set of reference genomes with {RNA} families. We describe new web interface features including faceted text search and R-scape secondary structure visualizations. We discuss a new literature curation workflow and a pipeline for building families based on {RNA}central. There are 236 new families in release 13.0, bringing the total number of families to 2687. The Rfam website is http://rfam.org.},
chemicals = {{RNA}, Untranslated},
completed = {2019-08-01},
country = {England},
doi = {10.1093/nar/gkx1038},
issn-linking = {0305-1048},
issue = {D1},
keywords = {Databases, Nucleic Acid; Genome; Humans; Molecular Sequence Annotation; Nucleic Acid Conformation; {RNA}, Untranslated, chemistry, classification, genetics; Sequence Alignment; Sequence Analysis, {RNA}},
nlm-id = {0411011},
owner = {NLM},
pii = {4588106},
pmc = {PMC5753348},
pmid = {29112718},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-08-01},
}
@Article{Sankoff1985,
author = {Sankoff, David.},
title = {Simultaneous Solution of the {RNA} Folding, Alignment and Protosequence Problems},
journal = {SIAM Journal on Applied Mathematics},
year = {1985},
volume = {45},
number = {5},
pages = {810-825},
doi = {10.1137/0145048},
eprint = {https://doi.org/10.1137/0145048},
url = {
https://doi.org/10.1137/0145048
},
}
@Article{Hofacker1994,
author = {Hofacker, Ivo L and Fontana, Walter and Stadler, Peter F and Bonhoeffer, L Sebastian and Tacker, Manfred and Schuster, Peter},
title = {Fast folding and comparison of {{RNA}} secondary structures},
journal = {Monatshefte f{\"u}r Chemie/Chemical Monthly},
year = {1994},
volume = {125},
number = {2},
pages = {167--188},
doi = {10.1007/BF00818163},
owner = {Yann},
publisher = {Springer},
timestamp = {2017.11.03},
}
@Article{Will2015,
author = {Will, Sebastian and Otto, Christina and Miladi, Milad and Möhl, Mathias and Backofen, Rolf},
title = {SPARSE: quadratic time simultaneous alignment and folding of {RNA}s without sequence-based heuristics.},
journal = {Bioinformatics (Oxford, England)},
year = {2015},
volume = {31},
pages = {2489--2496},
month = aug,
issn = {1367-4811},
abstract = {{RNA}-Seq experiments have revealed a multitude of novel nc{RNA}s. The gold standard for their analysis based on simultaneous alignment and folding suffers from extreme time complexity of [Formula: see text]. Subsequently, numerous faster 'Sankoff-style' approaches have been suggested. Commonly, the performance of such methods relies on sequence-based heuristics that restrict the search space to optimal or near-optimal sequence alignments; however, the accuracy of sequence-based methods breaks down for {RNA}s with sequence identities below 60%. Alignment approaches like LocA{RNA} that do not require sequence-based heuristics, have been limited to high complexity ([Formula: see text] quartic time). Breaking this barrier, we introduce the novel Sankoff-style algorithm 'sparsified prediction and alignment of {RNA}s based on their structure ensembles (SPARSE)', which runs in quadratic time without sequence-based heuristics. To achieve this low complexity, on par with sequence alignment algorithms, SPARSE features strong sparsification based on structural properties of the {RNA} ensembles. Following PMcomp, SPARSE gains further speed-up from lightweight energy computation. Although all existing lightweight Sankoff-style methods restrict Sankoff's original model by disallowing loop deletions and insertions, SPARSE transfers the Sankoff algorithm to the lightweight energy model completely for the first time. Compared with LocA{RNA}, SPARSE achieves similar alignment and better folding quality in significantly less time (speedup: 3.7). At similar run-time, it aligns low sequence identity instances substantially more accurate than RAF, which uses sequence-based heuristics.},
citation-subset = {IM},
completed = {2016-03-10},
country = {England},
doi = {10.1093/bioinformatics/btv185},
issn-linking = {1367-4803},
issue = {15},
keywords = {Algorithms; Heuristics; {RNA} Folding; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods},
nlm-id = {9808944},
owner = {NLM},
pii = {btv185},
pmc = {PMC4514930},
pmid = {25838465},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-12-02},
}
@InProceedings{Schnall-Levin2008,
author = {Michael Schnall{-}Levin and Leonid Chindelevitch and Bonnie Berger},
title = {Inverting the {Viterbi} algorithm: an abstract framework for structure design},
booktitle = {Machine Learning, Proceedings of the Twenty-Fifth International Conference {(ICML} 2008), Helsinki, Finland, June 5-9, 2008},
year = {2008},
pages = {904--911},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/Schnall-LevinCB08},
doi = {10.1145/1390156.1390270},
opturl = {http://doi.acm.org/10.1145/1390156.1390270},
timestamp = {Thu, 14 Aug 2008 13:58:39 +0200},
}
@Article{Hammer2017,
author = {Hammer, Stefan and Tschiatschek, Birgit and Flamm, Christoph and Hofacker, Ivo L and Findeiß, Sven},
title = {{{RNA}blueprint}: flexible multiple target nucleic acid sequence design.},
journal = {Bioinformatics (Oxford, England)},
year = {2017},
volume = {33},
pages = {2850--2858},
month = sep,
issn = {1367-4811},
abstract = {Realizing the value of synthetic biology in biotechnology and medicine requires the design of molecules with specialized functions. Due to its close structure to function relationship, and the availability of good structure prediction methods and energy models, {{RNA}} is perfectly suited to be synthetically engineered with predefined properties. However, currently available {{RNA}} design tools cannot be easily adapted to accommodate new design specifications. Furthermore, complicated sampling and optimization methods are often developed to suit a specific {{RNA}} design goal, adding to their inflexibility. We developed a C ++ library implementing a graph coloring approach to stochastically sample sequences compatible with structural and sequence constraints from the typically very large solution space. The approach allows to specify and explore the solution space in a well defined way. Our library also guarantees uniform sampling, which makes optimization runs performant by not only avoiding re-evaluation of already found solutions, but also by raising the probability of finding better solutions for long optimization runs. We show that our software can be combined with any other software package to allow diverse {{RNA}} design applications. Scripting interfaces allow the easy adaption of existing code to accommodate new scenarios, making the whole design process very flexible. We implemented example design approaches written in Python to demonstrate these advantages. {RNA}blueprint , Python implementations and benchmark datasets are available at github: https://github.com/Vienna{RNA} . s.hammer@univie.ac.at, ivo@tbi.univie.ac.at or sven@tbi.univie.ac.at. Supplementary data are available at Bioinformatics online.},
country = {England},
created = {2017-04-27},
doi = {10.1093/bioinformatics/btx263},
issn-linking = {1367-4803},
issue = {18},
nlm-id = {9808944},
owner = {NLM},
pii = {3752511},
pmid = {28449031},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2017-09-15},
timestamp = {2017.11.03},
}
@InProceedings{Runge2019,
author = {Frederic Runge and Danny Stoll and Stefan Falkner and Frank Hutter},
title = {Learning to Design {{RNA}}},
booktitle = {International Conference on Learning Representations},
year = {2019},
url = {https://openreview.net/forum?id=ByfyHh05tQ},
}
@Article{Koodli2019,
author = {Koodli, Rohan V. AND Keep, Benjamin AND Coppess, Katherine R. AND Portela, Fernando AND Eterna participants AND Das, Rhiju},
title = {EternaBrain: Automated {RNA} design through move sets and strategies from an Internet-scale {RNA} videogame},
journal = {PLOS Computational Biology},
year = {2019},
volume = {15},
number = {6},
pages = {1-22},
month = {06},
abstract = {Author summary The design of {RNA} sequences that fold into target structures is a computationally difficult task whose importance continues to grow with the advent of {RNA}-based therapeutics and diagnostics. This paper reports a new approach stemming from the Eterna massive open laboratory, a project that crowdsources {RNA} design to >250,000 ‘players’ on the internet. The efforts of Eterna participants have led to the accumulation of nearly 2 million moves that lead to successful in silico solutions on difficult puzzles, many of which are only solvable by humans. Inspired by recent advances in automated game playing, we discovered that these moves are sufficiently stereotyped so that a neural network can be trained to predict moves with accuracy significantly higher than random guessing. The resulting method EternaBrain allows solution of new {RNA} design problems when used to predict complete series of moves rather than just single moves. Further improvement comes from heuristic strategies that are well known amongst the Eterna community but not described in prior publications on automated {RNA} design. EternaBrain appears highly complementary to other emerging next-generation {RNA} design methods based on neural-network and game playing approaches, suggesting new routes for automated methods to emulate human experts in {RNA} design.},
doi = {10.1371/journal.pcbi.1007059},
publisher = {Public Library of Science},
url = {https://doi.org/10.1371/journal.pcbi.1007059},
}
@Article{Duraj2014,
author = {Jetlir Duraj},
title = {Random walks in cones: The case of nonzero drift},
journal = {Stochastic Processes and their Applications},
year = {2014},
volume = {124},
number = {4},
pages = {1503--1518},
month = {apr},
doi = {10.1016/j.spa.2013.12.003},
publisher = {Elsevier {BV}},
}
@Article{Aho1975,
author = {Alfred V. Aho and Margaret J. Corasick},
title = {Efficient string matching: an aid to bibliographic search},
journal = {Communications of the {ACM}},
year = {1975},
volume = {18},
number = {6},
pages = {333--340},
month = {jun},
doi = {10.1145/360825.360855},
publisher = {Association for Computing Machinery ({ACM})},
}
@Article{Raeihae1981,
author = {Kari-Jouko Räihä and Esko Ukkonen},
title = {The shortest common supersequence problem over binary alphabet is {NP}-complete},
journal = {Theoretical Computer Science},
year = {1981},
volume = {16},
number = {2},
pages = {187--198},
doi = {10.1016/0304-3975(81)90075-x},
publisher = {Elsevier {BV}},
}
@Article{Flamm2001,
author = {Flamm, C. and Hofacker, I. L and Maurer-Stroh, S. and Stadler, P. F and Zehl, M.},
title = {Design of multistable {{RNA}} molecules.},
journal = {{{RNA}} (New York, N.Y.)},
year = {2001},
volume = {7},
pages = {254--265},
month = {Feb},
issn = {1355-8382},
abstract = {We show that the problem of designing {{RNA}} sequences that can fold into multiple stable secondary structures can be transformed into a combinatorial optimization problem that can be solved by means of simple heuristics. Hence it is feasible to design {{RNA}} switches with prescribed structural alternatives. We discuss the theoretical background and present an efficient tool that allows the design of various types of switches. We argue that both the general properties of the sequence structure map of {{RNA}} secondary structures and the ease with which our design tool finds bistable {RNA}s strongly indicates that {{RNA}} switches are easily accessible in evolution. Thus conformational switches are yet another function for which {{RNA}} can be employed.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2001-03-22},
country = {United States},
created = {2001-03-06},
issn-linking = {1355-8382},
issue = {2},
keywords = {Base Pairing; Base Sequence; Computer Simulation; Drug Design; Hot Temperature; Mathematics; Models, Molecular; Mutation; Nucleic Acid Conformation; Phylogeny; {RNA}, chemistry, metabolism, pharmacology; {{RNA}} Stability},
nlm = {PMC1370083},
nlm-id = {9509184},
owner = {NLM},
pmc = {PMC1370083},
pmid = {11233982},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2017-02-19},
timestamp = {2017.09.20},
}
@Article{Ge2012,
author = {Ge, Qi and {\v{S}}tefankovi{\v{c}}, Daniel},
title = {A graph polynomial for independent sets of bipartite graphs},
journal = {Combinatorics, Probability and Computing},
year = {2012},
volume = {21},
number = {05},
pages = {695--714},
owner = {Yann},
publisher = {Cambridge Univ Press},
timestamp = {2017.02.18},
}
@Article{Wilf1977,
author = {H. S. Wilf},
title = {A Unified Setting for Sequencing, Ranking, And Selection Algorithms for Combinatorial Objects},
journal = {Advances in Mathematics},
year = {1977},
volume = {24},
pages = {281-291},
owner = {ponty},
timestamp = {2008.12.28},
}
@Article{Tinoco1999,
author = {Ignacio Tinoco and Carlos Bustamante},
title = {How {{RNA}} folds},
journal = {J Mol Biol},
year = {1999},
volume = {293},
number = {2},
pages = {271-81},
month = {Oct},
abstract = {We describe the {RNA} folding problem and contrast it with the much more difficult protein folding problem. {RNA} has four similar monomer units, whereas proteins have 20 very different residues. The folding of {RNA} is hierarchical in that secondary structure is much more stable than tertiary folding. In {RNA} the two levels of folding (secondary and tertiary) can be experimentally separated by the presence or absence of Mg2+. Secondary structure can be predicted successfully from experimental thermodynamic data on secondary structure elements: helices, loops, and bulges. Tertiary interactions can then be added without much distortion of the secondary structure. These observations suggest a folding algorithm to predict the structure of an {RNA} from its sequence. However, to solve the {RNA} folding problem one needs thermodynamic data on tertiary structure interactions, and identification and characterization of metal-ion binding sites. These data, together with force versus extension measurements on single {RNA} molecules, should provide the information necessary to test and refine the proposed algorithm.},
bdsk-url-1 = {https://dx.doi.org/10.1006/jmbi.1999.3001},
date-added = {2018-01-28 04:00:03 +0000},
date-modified = {2018-01-28 04:00:03 +0000},
doi = {10.1006/jmbi.1999.3001},
journal-full = {Journal of molecular biology},
mesh = {Algorithms; Base Sequence; Metals; Nucleic Acid Conformation; Protein Folding; {RNA}; Thermodynamics},
pmid = {10550208},
pst = {ppublish},
}
@Article{Miao2017,
author = {Miao, Zhichao and {{{RNA}-Puzzles Consortium}} and Westhof, Eric},
title = {{{RNA}-Puzzles Round III: 3D {RNA} structure prediction of five riboswitches and one ribozyme}},
journal = {{RNA}},
year = {2017},
volume = {23},
pages = {655--672},
issn = {1469-9001},
doi = {10.1261/rna.060368.116},
issue = {5},
}
@Article{Michel1990,
author = {Fran{\c{c}}ois Michel and Eric Westhof},
title = {Modelling of the three-dimensional architecture of group I catalytic introns based on comparative sequence analysis},
journal = {Journal of Molecular Biology},
year = {1990},
volume = {216},
number = {3},
pages = {585--610},
month = {dec},
doi = {10.1016/0022-2836(90)90386-z},
publisher = {Elsevier {BV}},
}
@Article{Deigan2009,
author = {Deigan, Katherine E. and Li, Tian W. and Mathews, David H. and Weeks, Kevin M.},
title = {{Accurate SHAPE-directed {RNA} structure determination}},
journal = {Proc Natl Acad Sci U S A},
year = {2009},
volume = {106},
number = {1},
pages = {97--102},
issn = {0027-8424},
abstract = {Almost all {RNA}s can fold to form extensive base-paired secondary structures. Many of these structures then modulate numerous fundamental elements of gene expression. Deducing these structure{\textendash}function relationships requires that it be possible to predict {RNA} secondary structures accurately. However, {RNA} secondary structure prediction for large {RNA}s, such that a single predicted structure for a single sequence reliably represents the correct structure, has remained an unsolved problem. Here, we demonstrate that quantitative, nucleotide-resolution information from a SHAPE experiment can be interpreted as a pseudo-free energy change term and used to determine {RNA} secondary structure with high accuracy. Free energy minimization, by using SHAPE pseudo-free energies, in conjunction with nearest neighbor parameters, predicts the secondary structure of deproteinized Escherichia coli 16S r{RNA} (\>1,300 nt) and a set of smaller {RNA}s (75{\textendash}155 nt) with accuracies of up to 96{\textendash}100\%, which are comparable to the best accuracies achievable by comparative sequence analysis.},
doi = {10.1073/pnas.0806929106},
eprint = {https://www.pnas.org/content/106/1/97.full.pdf},
publisher = {National Academy of Sciences},
url = {https://www.pnas.org/content/106/1/97},
}
@Article{Smola2015,
author = {Smola, Matthew J and Rice, Greggory M and Busan, Steven and Siegfried, Nathan A and Weeks, Kevin M},
title = {{Selective 2'-hydroxyl acylation analyzed by primer extension and mutational profiling ({SHAPE-MaP}) for direct, versatile and accurate {{RNA}} structure analysis}},
journal = {Nat Protoc},
year = {2015},
volume = {10},
pages = {1643--1669},
issn = {1750-2799},
abstract = {Selective 2'-hydroxyl acylation analyzed by primer extension (SHAPE) chemistries exploit small electrophilic reagents that react with 2'-hydroxyl groups to interrogate {RNA} structure at single-nucleotide resolution. Mutational profiling (MaP) identifies modified residues by using reverse transcriptase to misread a SHAPE-modified nucleotide and then counting the resulting mutations by massively parallel sequencing. The SHAPE-MaP approach measures the structure of large and transcriptome-wide systems as accurately as can be done for simple model {RNA}s. This protocol describes the experimental steps, implemented over 3 d, that are required to perform SHAPE probing and to construct multiplexed SHAPE-MaP libraries suitable for deep sequencing. Automated processing of MaP sequencing data is accomplished using two software packages. ShapeMapper converts raw sequencing files into mutational profiles, creates SHAPE reactivity plots and provides useful troubleshooting information. SuperFold uses these data to model {RNA} secondary structures, identify regions with well-defined structures and visualize probable and alternative helices, often in under 1 d. SHAPE-MaP can be used to make nucleotide-resolution biophysical measurements of individual {RNA} motifs, rare components of complex {RNA} ensembles and entire transcriptomes. },
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2016-03-28},
country = {England},
doi = {10.1038/nprot.2015.103},
issn-linking = {1750-2799},
issue = {11},
keywords = {Acylation; Computational Biology, methods; Models, Molecular; Molecular Biology, methods; Mutation; Nucleic Acid Conformation; {RNA}, chemistry; {RNA} Processing, Post-Transcriptional},
mid = {NIHMS790669},
nlm-id = {101284307},
owner = {NLM},
pii = {nprot.2015.103},
pmc = {PMC4900152},
pmid = {26426499},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Wilkinson2006,
author = {Wilkinson, Kevin A and Merino, Edward J and Weeks, Kevin M},
title = {{Selective 2'-hydroxyl acylation analyzed by primer extension (SHAPE): quantitative {RNA} structure analysis at single nucleotide resolution}},
journal = {Nat Protoc},
year = {2006},
volume = {1},
pages = {1610--1616},
issn = {1750-2799},
abstract = {Selective 2'-hydroxyl acylation analyzed by primer extension (SHAPE) interrogates local backbone flexibility in {RNA} at single-nucleotide resolution under diverse solution environments. Flexible {RNA} nucleotides preferentially sample local conformations that enhance the nucleophilic reactivity of 2'-hydroxyl groups toward electrophiles, such as N-methylisatoic anhydride (NMIA). Modified sites are detected as stops in an optimized primer extension reaction, followed by electrophoretic fragment separation. SHAPE chemistry scores local nucleotide flexibility at all four ribonucleotides in a single experiment and discriminates between base-paired versus unconstrained or flexible residues with a dynamic range of 20-fold or greater. Quantitative SHAPE reactivity information can be used to establish the secondary structure of an {RNA}, to improve the accuracy of structure prediction algorithms, to monitor structural differences between related {RNA}s or a single {RNA} in different states, and to detect ligand binding sites. SHAPE chemistry rarely needs significant optimization and requires two days to complete for an {RNA} of 100-200 nucleotides.},
chemicals = {DNA Primers, {RNA}},
citation-subset = {IM},
completed = {2007-06-28},
country = {England},
doi = {10.1038/nprot.2006.249},
issn-linking = {1750-2799},
issue = {3},
keywords = {Acylation; DNA Primers; Electrophoresis; Molecular Structure; Nucleic Acid Conformation; {RNA}, chemistry, genetics; Sequence Analysis, {RNA}; Structure-Activity Relationship},
nlm-id = {101284307},
owner = {NLM},
pii = {nprot.2006.249},
pmid = {17406453},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2008-03-24},
}
@Article{Higgs2014,
author = {Paul G. Higgs and Niles Lehman},
title = {The {{RNA}} World: molecular cooperation at the origins of life},
journal = {Nature Reviews Genetics},
year = {2014},
volume = {16},
number = {1},
pages = {7--17},
month = {nov},
doi = {10.1038/nrg3841},
publisher = {Springer Science and Business Media {LLC}},
}
@Article{Eddy1994,
author = {Sean R. Eddy and Richard Durbin},
title = {{{RNA}} sequence analysis using covariance models},
journal = {Nucleic Acids Research},
year = {1994},
volume = {22},
number = {11},
pages = {2079--2088},
doi = {10.1093/nar/22.11.2079},
publisher = {Oxford University Press ({OUP})},
}
@Article{Lai2013,
author = {D. Lai and J. R. Proctor and I. M. Meyer},
title = {On the importance of cotranscriptional {{RNA}} structure formation},
journal = {{{RNA}}},
year = {2013},
volume = {19},
number = {11},
pages = {1461--1473},
month = {oct},
doi = {10.1261/rna.037390.112},
publisher = {Cold Spring Harbor Laboratory},
}
@Article{Wolfinger2004,
author = {Michael T Wolfinger and W Andreas Svrcek-Seiler and Christoph Flamm and Ivo L Hofacker and Peter F Stadler},
title = {Efficient computation of {{RNA}} folding dynamics},
journal = {Journal of Physics A: Mathematical and General},
year = {2004},
volume = {37},
number = {17},
pages = {4731--4741},
month = {apr},
doi = {10.1088/0305-4470/37/17/005},
publisher = {{IOP} Publishing},
}
@Article{Xayaphoummine2003,
author = {A. Xayaphoummine and T. Bucher and F. Thalmann and H. Isambert},
title = {{Prediction and statistics of pseudoknots in {{RNA}} structures using exactly clustered stochastic simulations}},
journal = {Proc. Natl. Acad. Sci. U. S. A.},
year = {2003},
volume = {100},
number = {26},
pages = {15310-15315},
owner = {ponty},
timestamp = {2008.12.28},
}
@Article{Zuker1981,
author = {Michael Zuker and Patrick Stiegler},
title = {Optimal computer folding of large {{RNA}} sequences using thermodynamics and auxiliary information},
journal = {Nucleic Acids Research},
year = {1981},
volume = {9},
number = {1},
pages = {133--148},
__markedentry = {[yann:6]},
doi = {10.1093/nar/9.1.133},
publisher = {Oxford University Press ({OUP})},
}
@Article{Mathews2002,
author = {David H. Mathews and Douglas H. Turner},
title = {Dynalign: an algorithm for finding the secondary structure common to two {{RNA}} sequences},
journal = {Journal of Molecular Biology},
year = {2002},
volume = {317},
number = {2},
pages = {191--203},
month = {mar},
doi = {10.1006/jmbi.2001.5351},
publisher = {Elsevier {BV}},
}
@Article{Sundfeld2015,
author = {Daniel Sundfeld and Jakob H. Havgaard and Alba C. M. A. de Melo and Jan Gorodkin},
title = {Foldalign 2.5: multithreaded implementation for pairwise structural {{RNA}} alignment},
journal = {Bioinformatics},
year = {2015},
volume = {32},
number = {8},
pages = {1238--1240},
month = {dec},
doi = {10.1093/bioinformatics/btv748},
publisher = {Oxford University Press ({OUP})},
}
@Article{Mathews2004,
author = {D. H. Mathews},
title = {Using an {{RNA}} secondary structure partition function to determine confidence in base pairs predicted by free energy minimization},
journal = {{{RNA}}},
year = {2004},
volume = {10},
number = {8},
pages = {1178--1190},
month = {jul},
doi = {10.1261/rna.7650904},
publisher = {Cold Spring Harbor Laboratory},
}
@Article{Spasic2017,
author = {Spasic, A. and Assmann, S. M. and Bevilacqua, P. C. and Mathews, D. H.},
title = {{Modeling {RNA} secondary structure folding ensembles using SHAPE mapping data}},
journal = {Nucleic Acids Res},
year = {2017},
volume = {46},
number = {1},
pages = {314--323},
abstract = {{RNA} secondary structure prediction is widely used for developing hypotheses about the structures of {RNA} sequences, and structure can provide insight about {RNA} function. The accuracy of structure prediction is known to be improved using experimental mapping data that provide information about the pairing status of single nucleotides, and these data can now be acquired for whole transcriptomes using high-throughput sequencing. Prior methods for using these experimental data focused on predicting structures for sequences assuming that they populate a single structure. Most {RNA}s populate multiple structures, however, where the ensemble of strands populates structures with different sets of canonical base pairs. The focus on modeling single structures has been a bottleneck for accurately modeling {RNA} structure. In this work, we introduce Rsample, an algorithm for using experimental data to predict more than one {RNA} structure for sequences that populate multiple structures at equilibrium. We demonstrate, using SHAPE mapping data, that we can accurately model {RNA} sequences that populate multiple structures, including the relative probabilities of those structures. This program is freely available as part of the {RNA}structure software package.},
doi = {10.1093/nar/gkx1057},
url = {https://www.ncbi.nlm.nih.gov/pubmed/29177466},
}
@Article{Mathews2004a,
author = {Mathews, David H and Disney, Matthew D and Childs, Jessica L and Schroeder, Susan J and Zuker, Michael and Turner, Douglas H},
title = {{Incorporating chemical modification constraints into a dynamic programming algorithm for prediction of {RNA} secondary structure.}},
journal = {Proc Natl Acad Sci U S A},
year = {2004},
volume = {101},
number = {19},
pages = {7287--92},
abstract = {A dynamic programming algorithm for prediction of {RNA} secondary structure has been revised to accommodate folding constraints determined by chemical modification and to include free energy increments for coaxial stacking of helices when they are either adjacent or separated by a single mismatch. Furthermore, free energy parameters are revised to account for recent experimental results for terminal mismatches and hairpin, bulge, internal, and multibranch loops. To demonstrate the applicability of this method, in vivo modification was performed on 5S r{RNA} in both Escherichia coli and Candida albicans with 1-cyclohexyl-3-(2-morpholinoethyl) carbodiimide metho-p-toluene sulfonate, dimethyl sulfate, and kethoxal. The percentage of known base pairs in the predicted structure increased from 26.3{\%} to 86.8{\%} for the E. coli sequence by using modification constraints. For C. albicans, the accuracy remained 87.5{\%} both with and without modification data. On average, for these sequences and a set of 14 sequences with known secondary structure and chemical modification data taken from the literature, accuracy improves from 67{\%} to 76{\%}. This enhancement primarily reflects improvement for three sequences that are predicted with {\textless}40{\%} accuracy on the basis of energetics alone. For these sequences, inclusion of chemical modification constraints improves the average accuracy from 28{\%} to 78{\%}. For the 11 sequences with {\textless}6{\%} pseudoknotted base pairs, structures predicted with constraints from chemical modification contain on average 84{\%} of known canonical base pairs.},
keywords = {Algorithms,Base Pair Mismatch,Base Sequence,Candida albicans,Candida albicans: genetics,DNA Primers,Escherichia coli,Escherichia coli: genetics,Molecular Sequence Data,Nucleic Acid Conformation,{RNA}, Bacterial,{RNA}, Bacterial: chemistry,{RNA}, Fungal,{RNA}, Fungal: chemistry},
url = {http://www.pnas.org/content/101/19/7287.full},
}
@Article{Darty2009,
author = {Darty, K{\'e}vin and Denise, Alain and Ponty, Yann},
title = {{VARNA: Interactive drawing and editing of the RNA secondary structure.}},
journal = {{Bioinformatics}},
year = {2009},
volume = {25},
number = {15},
pages = {1974-5},
month = Aug,
doi = {10.1093/bioinformatics/btp250},
hal_id = {hal-00432548},
hal_version = {v1},
pdf = {https://hal.archives-ouvertes.fr/hal-00432548/file/VARNA-Bioinfo-R2-NoHighlight.pdf},
publisher = {{Oxford University Press (OUP)}},
url = {https://hal.archives-ouvertes.fr/hal-00432548},
}
@Comment{jabref-meta: databaseType:bibtex;}