Newer
Older
@Article{Lee2014-eterna100,
author = {Lee, Jeehyung and Kladwang, Wipapat and Lee, Minjae and Cantu, Daniel and Azizyan, Martin and Kim, Hanjoo and Limpaecher, Alex and Yoon, Sungroh and Treuille, Adrien and Das, Rhiju and EteRNA Participants},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
title = {{RNA} design rules from a massive open laboratory},
year = {2014},
issn = {1091-6490},
month = feb,
pages = {2122--2127},
volume = {111},
abstract = {Self-assembling {RNA} molecules present compelling substrates for the rational interrogation and control of living systems. However, imperfect in silico models--even at the secondary structure level--hinder the design of new {RNA}s that function properly when synthesized. Here, we present a unique and potentially general approach to such empirical problems: the Massive Open Laboratory. The EteRNA project connects 37,000 enthusiasts to {RNA} design puzzles through an online interface. Uniquely, EteRNA participants not only manipulate simulated molecules but also control a remote experimental pipeline for high-throughput {RNA} synthesis and structure mapping. We show herein that the EteRNA community leveraged dozens of cycles of continuous wet laboratory feedback to learn strategies for solving in vitro {RNA} design problems on which automated methods fail. The top strategies--including several previously unrecognized negative design rules--were distilled by machine learning into an algorithm, EteRNABot. Over a rigorous 1-y testing phase, both the EteRNA community and EteRNABot significantly outperformed prior algorithms in a dozen {RNA} secondary structure design tests, including the creation of dendrimer-like structures and scaffolds for small molecule sensors. These results show that an online community can carry out large-scale experiments, hypothesis generation, and algorithm design to create practical advances in empirical science.},
chemicals = {RNA},
citation-subset = {IM},
completed = {2014-05-15},
country = {United States},
doi = {10.1073/pnas.1313039111},
issn-linking = {0027-8424},
keywords = {Algorithms; Laboratories, organization & administration; Nucleic Acid Conformation; {RNA}, chemistry; Software; User-Computer Interface; {RNA} folding; citizen science; crowdsourcing; high-throughput experiments},
nlm-id = {7505876},
owner = {NLM},
pii = {1313039111},
pmc = {PMC3926058},
pmid = {24469816},
pubmodel = {Print-Electronic},
pubstate = {ppublish},
revised = {2018-11-13},
}
@article{Domin2017,
author = {Domin, Gesine and Findei{\ss}, Sven and Wachsmuth, Manja and Will, Sebastian and Stadler, Peter F. and M{\ifmmode\ddot{o}\else\"{o}\fi}rl, Mario},
title = {{Applicability of a computational design approach for synthetic riboswitches}},
journal = {Nucleic Acids Res.},
volume = {45},
number = {7},
pages = {4108},
year = {2017},
month = {Apr},
publisher = {Oxford University Press},
doi = {10.1093/nar/gkw1267}
}
@InProceedings{Yao2021,
author = {Yao, Hua-Ting and Waldisp{\ifmmode\ddot{u}\else\"{u}\fi}hl, J{\ifmmode\acute{e}\else\'{e}\fi}r{\ifmmode\hat{o}\else\^{o}\fi}me and Ponty, Yann and Will, Sebastian},
title = {{Taming Disruptive Base Pairs to Reconcile Positive and Negative Structural Design of RNA}},
year = {2021},
month = {Apr},
booktitle = {Research in Computational Molecular Biology - 25nd Annual International Conference, {RECOMB} 2021},
}
@article{Andronescu2007,
author = {Andronescu, Mirela and Condon, Anne and Hoos, Holger H. and Mathews, David H. and Murphy, Kevin P.},
title = {{Efficient parameter estimation for RNA secondary structure prediction}},
journal = {Bioinformatics},
volume = {23},
number = {13},
pages = {i19--i28},
year = {2007},
month = {Jul},
issn = {1367-4803},
publisher = {Oxford Academic},
doi = {10.1093/bioinformatics/btm223}
}
@article{Bodini2010,
author = {Bodini, Olivier and Ponty, Yann},
title = {{Multi-dimensional Boltzmann Sampling of Languages}},
journal = {Discrete Mathematics and Theoretical Computer Science},
volume = {DMTCS Proceedings vol. AM, 21st International Meeting on Probabilistic, Combinatorial, and Asymptotic Methods in the Analysis of Algorithms (AofA'10)},
pages = {49--64},
year = {2010},
month = {Jun},
publisher = {Discrete Mathematics and Theoretical Computer Science},
url = {https://hal.archives-ouvertes.fr/hal-00450763}
}
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@PhdThesis{YPonty2006,
author = {{Ponty}, {Yann}},
title = {{Models for structured genomic sequences, random generation and applications}},
school = {{Université Paris Sud - Paris XI}},
year = {2006},
type = {Theses},
month = Nov,
hal_id = {tel-00144130},
hal_local_reference = {No d'ordre 8480},
hal_version = {v2},
keywords = {Models for random sequences ; {RNA} structure ; Context-free grammars ; Random generation ; Enumerative combinatorics ; Modèles de séquences aléatoires ; Structure de l'ARN ; Grammaires hors-contexte pondérées ; Génération aléatoire ; Combinatoire énumérative},
pdf = {https://tel.archives-ouvertes.fr/tel-00144130/file/these-ponty.pdf},
url = {https://tel.archives-ouvertes.fr/tel-00144130},
}
@Article{Barcucci1994,
author = {Barcucci, Elena and Pinzani, Renzo and Sprugnoli, Renzo},
title = {The random generation of directed animals},
journal = {Theoretical Computer Science},
year = {1994},
volume = {127},
number = {2},
pages = {333--350},
publisher = {Elsevier},
}
@Article{Nicaud2010,
author = {Nicaud, Cyril and Gouyou-Beauchamps, Dominique},
title = {Random Generation Using Binomial Approximations},
journal = {Discrete Mathematics \& Theoretical Computer Science},
year = {2010},
publisher = {Episciences. org},
}
@Article{Flajolet1987,
author = {Philippe Flajolet},
title = {Analytic models and ambiguity of context-free languages},
journal = {Theoretical Computer Science},
year = {1987},
volume = {49},
pages = {283--309},
issn = {0304-3975},
doi = {10.1016/0304-3975(87)90011-9},
}
@Article{Denise1999,
author = {Alain Denise and Paul Zimmermann},
title = {Uniform random generation of decomposable structures using floating-point arithmetic},
journal = {Theoretical Computer Science},
year = {1999},
volume = {218},
number = {2},
pages = {233 - 248},
issn = {0304-3975},
abstract = {The recursive method formalized by Nijenhuis and Wilf (1998) and systematized by Flajolet, Van Cutsem and Zimmermann (1994), is extended here to floating-point arithmetic. The resulting ADZ method enables one to generate decomposable data structures — both labelled or unlabelled — uniformly at random, in expected O(n1 + ε) time and space, after a preprocessing phase of O(n2 + ε) time, which reduces to O(n1 + ε) for context-free grammars.
Résumé
La méthode récursive mise au point par Nijenhuis et Wilf (1998) et systématisée par Flajolet, Van Cutsem et Zimmermann (1994), est ici étendue à l'utilisation de nombres flottants. La méthode qui en découle, appelée ADZ, permet de générer aléatoirement et uniformément des structures décomposables — étiquetées ou non — en temps et espace moyens O(n1 + ε), après un précalcul de complexité en temps O(n2 + ε), se réduisant à O(n1 + ε) pour des grammaires algébriques.},
doi = {https://doi.org/10.1016/S0304-3975(98)00323-5},
url = {http://www.sciencedirect.com/science/article/pii/S0304397598003235},
}
@Article{Mishna2009,
author = {Mishna, Marni and Rechnitzer, Andrew},
title = {Two non-holonomic lattice walks in the quarter plane},
journal = {Theoretical Computer Science},
year = {2009},
volume = {410},
number = {38-40},
pages = {3616--3630},
publisher = {Elsevier},
}
@InProceedings{Bendkowski2018,
author = {Bendkowski, Maciej and Bodini, Olivier and Dovgal, Sergey},
title = {Polynomial tuning of multiparametric combinatorial samplers},
booktitle = {2018 Proceedings of the Fifteenth Workshop on Analytic Algorithmics and Combinatorics (ANALCO)},
year = {2018},
pages = {92--106},
organization = {SIAM},
}
@Book{Nesterov1994,
title = {Interior-point polynomial algorithms in convex programming},
publisher = {Siam},
year = {1994},
author = {Nesterov, Yurii and Nemirovskii, Arkadii},
volume = {13},
}
@Article{Duchon2004,
author = {Duchon, Philippe and Flajolet, Philippe and Louchard, Guy and Schaeffer, Gilles},
title = {Boltzmann samplers for the random generation of combinatorial structures},
journal = {Combinatorics, Probability and Computing},
year = {2004},
volume = {13},
number = {4-5},
pages = {577--625},
publisher = {Cambridge University Press},
}
@Article{Altschul1985,
author = {Altschul, Stephen F and Erickson, Blake W},
title = {Significance of nucleotide sequence alignments: a method for random sequence permutation that preserves dinucleotide and codon usage.},
journal = {Molecular biology and evolution},
year = {1985},
volume = {2},
pages = {526--538},
month = nov,
issn = {0737-4038},
abstract = {The similarity of two nucleotide sequences is often expressed in terms of evolutionary distance, a measure of the amount of change needed to transform one sequence into the other. Given two sequences with a small distance between them, can their similarity be explained by their base composition alone? The nucleotide order of these sequences contributes to their similarity if the distance is much smaller than their average permutation distance, which is obtained by calculating the distances for many random permutations of these sequences. To determine whether their similarity can be explained by their dinucleotide and codon usage, random sequences must be chosen from the set of permuted sequences that preserve dinucleotide and codon usage. The problem of choosing random dinucleotide and codon-preserving permutations can be expressed in the language of graph theory as the problem of generating random Eulerian walks on a directed multigraph. An efficient algorithm for generating such walks is described. This algorithm can be used to choose random sequence permutations that preserve (1) dinucleotide usage, (2) dinucleotide and trinucleotide usage, or (3) dinucleotide and codon usage. For example, the similarity of two 60-nucleotide DNA segments from the human beta-1 interferon gene (nucleotides 196-255 and 499-558) is not just the result of their nonrandom dinucleotide and codon usage.},
chemicals = {Codon, Interferon Type I},
citation-subset = {IM},
completed = {1988-06-20},
country = {United States},
doi = {10.1093/oxfordjournals.molbev.a040370},
issn-linking = {0737-4038},
issue = {6},
keywords = {Base Sequence; Biological Evolution; Codon, genetics; Humans; Interferon Type I, genetics; Models, Genetic; Molecular Sequence Data; Sequence Homology, Nucleic Acid},
nlm-id = {8501455},
owner = {NLM},
pmid = {3870875},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-01-09},
}
@Article{Hurst2001,
author = {Hurst, L D and Merchant, A R},
title = {High guanine-cytosine content is not an adaptation to high temperature: a comparative analysis amongst prokaryotes.},
journal = {Proceedings. Biological sciences},
year = {2001},
volume = {268},
pages = {493--497},
month = mar,
issn = {0962-8452},
abstract = {The causes of the variation between genomes in their guanine (G) and cytosine (C) content is one of the central issues in evolutionary genomics. The thermal adaptation hypothesis conjectures that, as G:C pairs in DNA are more thermally stable than adenonine:thymine pairs, high GC content may he a selective response to high temperature. A compilation of data on genomic GC content and optimal growth temperature for numerous prokaryotes failed to demonstrate the predicted correlation. By contrast, the GC content of Structural {RNA}s is higher at high temperatures. The issue that we address here is whether more freely evolving sites in exons (i.e. codonic third positions) evolve in the same manner as genomic DNA as a whole, Showing no correlated response, or like structural {RNA}s showing a strong correlation. The latter pattern would provide strong support for the thermal adaptation hypothesis, as the variation in GC content between orthologous genes is typically most profoundly seen at codon third sites (GC3). Simple analysis of completely sequenced prokaryotic genomes shows that GC3, but not genomic GC, is higher on average in thermophilic species. This demonstrates, if nothing else, that the results from the two measures cannot be presumed to be the same. A proper analysis, however, requires phylogenetic control. Here, therefore, we report the results of a comparative analysis of GC composition and optimal growth temperature for over 100 prokaryotes. Comparative analysis fails to show, in either Archea or Eubacteria, any hint of connection between optimal growth temperature and GC content in the genome as a whole, in protein-coding regions or, more crucially at GC. Conversely, comparable analysis confirms that GC content of structural {RNA} is strongly correlated with optimal temperature. Against the expectations of the thermal adaptation hypothesis, within prokaryotes GC content in protein-coding genies, even at relatively freely evolving sites, cannot be considered an adaptation to the thermal environment.},
chemicals = {DNA, Archaeal, DNA, Bacterial},
citation-subset = {IM},
completed = {2001-06-28},
country = {England},
doi = {10.1098/rspb.2000.1397},
issn-linking = {0962-8452},
issue = {1466},
keywords = {Adaptation, Physiological; Archaea, chemistry, genetics; Bacteria, chemistry, genetics; Base Composition; DNA, Archaeal, chemistry, genetics; DNA, Bacterial, chemistry, genetics; Evolution, Molecular; Genetic Variation; Prokaryotic Cells; Temperature},
nlm-id = {101245157},
owner = {NLM},
pmc = {PMC1088632},
pmid = {11296861},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Hildebrand2010,
author = {Hildebrand, Falk and Meyer, Axel and Eyre-Walker, Adam},
title = {Evidence of selection upon genomic GC-content in bacteria.},
journal = {PLoS genetics},
year = {2010},
volume = {6},
pages = {e1001107},
month = sep,
issn = {1553-7404},
abstract = {The genomic GC-content of bacteria varies dramatically, from less than 20% to more than 70%. This variation is generally ascribed to differences in the pattern of mutation between bacteria. Here we test this hypothesis by examining patterns of synonymous polymorphism using datasets from 149 bacterial species. We find a large excess of synonymous GC→AT mutations over AT→GC mutations segregating in all but the most AT-rich bacteria, across a broad range of phylogenetically diverse species. We show that the excess of GC→AT mutations is inconsistent with mutation bias, since it would imply that most GC-rich bacteria are declining in GC-content; such a pattern would be unsustainable. We also show that the patterns are probably not due to translational selection or biased gene conversion, because optimal codons tend to be AT-rich, and the excess of GC→AT SNPs is observed in datasets with no evidence of recombination. We therefore conclude that there is selection to increase synonymous GC-content in many species. Since synonymous GC-content is highly correlated to genomic GC-content, we further conclude that there is selection on genomic base composition in many bacteria.},
citation-subset = {IM},
completed = {2011-01-04},
country = {United States},
doi = {10.1371/journal.pgen.1001107},
issn-linking = {1553-7390},
issue = {9},
keywords = {Bacteria, classification, genetics; Base Composition, genetics; Bias; Genome, Bacterial, genetics; Models, Genetic; Mutation, genetics; Polymorphism, Single Nucleotide, genetics; Protein Biosynthesis, genetics; Selection, Genetic},
nlm-id = {101239074},
owner = {NLM},
pii = {e1001107},
pmc = {PMC2936529},
pmid = {20838593},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Clote2005,
author = {Clote, Peter and Ferré, Fabrizio and Kranakis, Evangelos and Krizanc, Danny},
title = {Structural {RNA} has lower folding energy than random {RNA} of the same dinucleotide frequency.},
journal = {{RNA} (New York, N.Y.)},
year = {2005},
volume = {11},
pages = {578--591},
month = may,
issn = {1355-8382},
abstract = {We present results of computer experiments that indicate that several {RNA}s for which the native state (minimum free energy secondary structure) is functionally important (type III hammerhead ribozymes, signal recognition particle {RNA}s, U2 small nucleolar spliceosomal {RNA}s, certain riboswitches, etc.) all have lower folding energy than random {RNA}s of the same length and dinucleotide frequency. Additionally, we find that whole m{RNA} as well as 5'-UTR, 3'-UTR, and cds regions of m{RNA} have folding energies comparable to that of random {RNA}, although there may be a statistically insignificant trace signal in 3'-UTR and cds regions. Various authors have used nucleotide (approximate) pattern matching and the computation of minimum free energy as filters to detect potential {RNA}s in ESTs and genomes. We introduce a new concept of the asymptotic Z-score and describe a fast, whole-genome scanning algorithm to compute asymptotic minimum free energy Z-scores of moving-window contents. Asymptotic Z-score computations offer another filter, to be used along with nucleotide pattern matching and minimum free energy computations, to detect potential functional {RNA}s in ESTs and genomic regions.},
chemicals = {3' Untranslated Regions, 5' Untranslated Regions, Nucleotides, {RNA}},
citation-subset = {IM},
completed = {2005-05-24},
country = {United States},
doi = {10.1261/rna.7220505},
issn-linking = {1355-8382},
issue = {5},
keywords = {3' Untranslated Regions, chemistry, genetics, metabolism; 5' Untranslated Regions, chemistry, genetics, metabolism; Algorithms; Base Composition; Base Sequence; Computational Biology; Computer Simulation; Expressed Sequence Tags; Markov Chains; Nucleic Acid Conformation; Nucleotides, analysis, chemistry, genetics, metabolism; {RNA}, chemistry, genetics, metabolism; Thermodynamics},
nlm-id = {9509184},
owner = {NLM},
pii = {11/5/578},
pmc = {PMC1370746},
pmid = {15840812},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-11-13},
}
@Article{Denise1996,
author = {Alain Denise},
title = {Génération aléatoire et uniforme de mots},
journal = {Discrete Mathematics},
year = {1996},
volume = {156},
pages = {69--84},
}
@InProceedings{Viennot1985,
author = {Viennot, G. and Vauchaussade de Chaumont, M.},
title = {Enumeration of {RNA} Secondary Structures by Complexity},
booktitle = {Mathematics in Biology and Medicine},
year = {1985},
editor = {Capasso, V. and Grosso, E. and Paveri-Fontana, S. L.},
pages = {360--365},
address = {Berlin, Heidelberg},
publisher = {Springer Berlin Heidelberg},
abstract = {Many investigations in studying primary and secondary structures in Biology require theoretical statistical (that is enumerative) work. We solve one of these problems: enumerate secondary structures of single-stranded nucleic acids ({RNA}, t{RNA}, etc{\ldots}) having a given complexity. This parameter has been introduced for energy computation purpose in order to predict the most stable secondary structure. The method relies on the (non-classical) use of non-commutative variables. Some orthogonal polynomials appear. The final solution shows a relationship between the parameter complexity and another parameter appearing in Hydrography and Botanic.},
isbn = {978-3-642-93287-8},
}
@Article{Hofacker1998,
author = {Hofacker, Ivo L and Schuster, Peter and Stadler, Peter F},
title = {Combinatorics of {RNA} secondary structures},
journal = {Discrete Applied Mathematics},
year = {1998},
volume = {88},
number = {1-3},
pages = {207--237},
publisher = {Elsevier},
}
@Article{Zuker1984,
author = {Zuker, Michael and Sankoff, David},
title = {{{RNA}} secondary structures and their prediction},
journal = {Bulletin of mathematical biology},
year = {1984},
volume = {46},
number = {4},
pages = {591--621},
publisher = {Springer},
}
@Article{Nebel2002,
author = {Nebel, Markus E.},
title = {Combinatorial Properties of {RNA} Secondary Structures},
journal = {Journal of Computational Biology},
year = {2002},
volume = {9},
number = {3},
pages = {541-573},
note = {PMID: 12162892},
abstract = { The secondary structure of an {RNA} molecule is of great importance and possesses influence, e.g., on the interaction of t{RNA} molecules with proteins or on the stabilization of m{RNA} molecules. The classification of secondary structures by means of their order proved useful with respect to numerous applications. In 1978, Waterman, who gave the first precise formal framework for the topic, suggested to determine the number an,p of secondary structures of size n and given order p. Since then, no satisfactory result has been found. Based on an observation due to Viennot et al., we will derive generating functions for the secondary structures of order p from generating functions for binary tree structures with Horton-Strahler number p. These generating functions enable us to compute a precise asymptotic equivalent for an,p. Furthermore, we will determine the related number of structures when the number of unpaired bases shows up as an additional parameter. Our approach proves to be general enough to compute the average order of a secondary structure together with all the r-th moments and to enumerate substructures such as hairpins or bulges in dependence on the order of the secondary structures considered. },
doi = {10.1089/106652702760138628},
eprint = {https://doi.org/10.1089/106652702760138628},
url = {
https://doi.org/10.1089/106652702760138628
},
}
@Article{Bundschuh2002,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {Statistical mechanics of secondary structures formed by random {RNA} sequences.},
journal = {Physical review. E, Statistical, nonlinear, and soft matter physics},
year = {2002},
volume = {65},
pages = {031903},
month = mar,
issn = {1539-3755},
abstract = {The formation of secondary structures by a random {RNA} sequence is studied as a model system for the sequence-structure problem omnipresent in biopolymers. Several toy energy models are introduced to allow detailed analytical and numerical studies. First, a two-replica calculation is performed. By mapping the two-replica problem to the denaturation of a single homogeneous {RNA} molecule in six-dimensional embedding space, we show that sequence disorder is perturbatively irrelevant, i.e., an {RNA} molecule with weak sequence disorder is in a molten phase where many secondary structures with comparable total energy coexist. A numerical study of various models at high temperature reproduces behaviors characteristic of the molten phase. On the other hand, a scaling argument based on the external statistics of rare regions can be constructed to show that the low-temperature phase is unstable to sequence disorder. We performed a detailed numerical study of the low-temperature phase using the droplet theory as a guide, and characterized the statistics of large-scale, low-energy excitations of the secondary structures from the ground state structure. We find the excitation energy to grow very slowly (i.e., logarithmically) with the length scale of the excitation, suggesting the existence of a marginal glass phase. The transition between the low-temperature glass phase and the high-temperature molten phase is also characterized numerically. It is revealed by a change in the coefficient of the logarithmic excitation energy, from being disorder dominated to being entropy dominated.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2002-06-10},
country = {United States},
doi = {10.1103/PhysRevE.65.031903},
issn-linking = {1539-3755},
issue = {3 Pt 1},
keywords = {Biophysics, methods; Glass; Models, Statistical; Nucleic Acid Conformation; {RNA}, chemistry; Temperature},
nlm-id = {101136452},
owner = {NLM},
pmid = {11909105},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2006-11-15},
}
@Article{Bundschuh2002a,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {Phases of the secondary structures of {RNA} sequences},
journal = {EPL (Europhysics Letters)},
year = {2002},
volume = {59},
number = {6},
pages = {903},
publisher = {IOP Publishing},
}
@Article{Bundschuh2008,
author = {Bundschuh, Ralf and Bruinsma, Robijn},
title = {Melting of branched {RNA} molecules},
journal = {Physical review letters},
year = {2008},
volume = {100},
number = {14},
pages = {148101},
publisher = {APS},
}
@Article{David2007,
author = {David, Francois and Wiese, Kay Joerg},
title = {Systematic field theory of the {RNA} glass transition},
journal = {Physical review letters},
year = {2007},
volume = {98},
number = {12},
pages = {128102},
publisher = {APS},
}
@Article{Bundschuh1999,
author = {Bundschuh, Ralf and Hwa, Terence},
title = {{RNA} Secondary Structure Formation: A Solvable Model of Heteropolymer Folding},
journal = {Phys. Rev. Lett.},
year = {1999},
volume = {83},
pages = {1479--1482},
month = {Aug},
doi = {10.1103/PhysRevLett.83.1479},
issue = {7},
numpages = {0},
publisher = {American Physical Society},
url = {https://link.aps.org/doi/10.1103/PhysRevLett.83.1479},
}
@Article{Jin2008,
author = {Jin, Emma Y and Reidys, Christian M},
title = {Asymptotic enumeration of {RNA} structures with pseudoknots.},
journal = {Bulletin of mathematical biology},
year = {2008},
volume = {70},
pages = {951--970},
month = may,
issn = {0092-8240},
abstract = {In this paper, we present the asymptotic enumeration of {RNA} structures with pseudoknots. We develop a general framework for the computation of exponential growth rate and the asymptotic expansion for the numbers of k-noncrossing {RNA} structures. Our results are based on the generating function for the number of k-noncrossing {RNA} pseudoknot structures, Sk(n), derived in Bull. Math. Biol. (2008), where k-1 denotes the maximal size of sets of mutually intersecting bonds. We prove a functional equation for the generating function Sigman>or=0 Sk(n)zn and obtain for k=2 and k=3, the analytic continuation and singular expansions, respectively. It is implicit in our results that for arbitrary k singular expansions exist and via transfer theorems of analytic combinatorics, we obtain asymptotic expression for the coefficients. We explicitly derive the asymptotic expressions for 2- and 3-noncrossing {RNA} structures. Our main result is the derivation of the formula S3(n) approximately 10.4724.4!/n(n-1)...(n-4)(5+[sqrt]21/2)n.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-06-23},
country = {United States},
doi = {10.1007/s11538-007-9265-2},
issn-linking = {0092-8240},
issue = {4},
keywords = {Mathematics; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry},
nlm-id = {0401404},
owner = {NLM},
pmid = {18340497},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2008-04-04},
}
@Article{Huang2008,
author = {Huang, Fenix W D and Reidys, Christian M},
title = {Statistics of canonical {RNA} pseudoknot structures.},
journal = {Journal of theoretical biology},
year = {2008},
volume = {253},
pages = {570--578},
month = aug,
issn = {1095-8541},
abstract = {In this paper we study canonical {RNA} pseudoknot structures. We prove central limit theorems for the distributions of the arc-numbers of k-noncrossing {RNA} structures with given minimum stack-size tau over n nucleotides. Furthermore we compare the space of all canonical structures with canonical minimum free energy pseudoknot structures. Our results generalize the analysis of Schuster et al. obtained for {RNA} secondary structures [Hofacker, I.L., Schuster, P., Stadler, P.F., 1998. Combinatorics of {RNA} secondary structures. Discrete Appl. Math. 88, 207-237; Jin, E.Y., Reidys, C.M., 2007b. Central and local limit theorems for {RNA} structures. J. Theor. Biol. 250 (2008), 547-559; 2007a. Asymptotic enumeration of {RNA} structures with pseudoknots. Bull. Math. Biol., 70 (4), 951-970] to k-noncrossing {RNA} structures. Here k2 and tau are arbitrary natural numbers. We compare canonical pseudoknot structures to arbitrary structures and show that canonical pseudoknot structures exhibit significantly smaller exponential growth rates. We then compute the asymptotic distribution of their arc-numbers. Finally, we analyze how the minimum stack-size and crossing number factor into the distributions.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-09-04},
country = {England},
doi = {10.1016/j.jtbi.2008.04.002},
issn-linking = {0022-5193},
issue = {3},
keywords = {Algorithms; Animals; Models, Genetic; Models, Molecular; Nucleic Acid Conformation; {RNA}, genetics},
nlm-id = {0376342},
owner = {NLM},
pii = {S0022-5193(08)00176-8},
pmid = {18511081},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2008-07-18},
}
@Article{Clote2006,
author = {Clote, Peter},
title = {Combinatorics of saturated secondary structures of {RNA}.},
journal = {Journal of computational biology : a journal of computational molecular cell biology},
year = {2006},
volume = {13},
pages = {1640--1657},
month = nov,
issn = {1066-5277},
abstract = {Following Zuker (1986), a saturated secondary structure for a given {RNA} sequence is a secondary structure such that no base pair can be added without violating the definition of secondary structure, e.g., without introducing a pseudoknot. In the Nussinov-Jacobson energy model (Nussinov and Jacobson, 1980), where the energy of a secondary structure is -1 times the number of base pairs, saturated secondary structures are local minima in the energy landscape, hence form kinetic traps during the folding process. Here we present recurrence relations and closed form asymptotic limits for combinatorial problems related to the number of saturated secondary structures. In addition, Python source code to compute the number of saturated secondary structures having k base pairs can be found at the web servers link of bioinformatics.bc.edu/clotelab/.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2007-01-12},
country = {United States},
doi = {10.1089/cmb.2006.13.1640},
issn-linking = {1066-5277},
issue = {9},
keywords = {Base Pairing; Base Sequence; Biometry; Kinetics; Models, Molecular; Models, Statistical; Nucleic Acid Conformation; {RNA}, chemistry, genetics; Thermodynamics},
nlm-id = {9433358},
owner = {NLM},
pmid = {17147486},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2006-12-06},
}
@Article{Banderier2015,
author = {Banderier, Cyril and Drmota, Michael},
title = {Formulae and Asymptotics for Coefficients of Algebraic Functions},
journal = {Combinatorics, Probability and Computing},
year = {2015},
volume = {24},
number = {1},
pages = {1–53},
doi = {10.1017/S0963548314000728},
publisher = {Cambridge University Press},
}
@Article{Pringsheim1893,
author = {A. Pringsheim},
title = {Zur Theorie der Taylor'schen Reihe unde der analytischen Funcktionen mit beschränklen Existenzbereich},
journal = {Mathematische Annalen},
year = {1893},
volume = {42},
pages = {180},
}
@Article{Flajolet1990,
author = {Philippe Flajolet and Andrew M. Odlyzko},
title = {Singularity Analysis of Generating Functions},
journal = {{SIAM} J. Discrete Math.},
year = {1990},
volume = {3},
number = {2},
pages = {216--240},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/bib/journals/siamdm/FlajoletO90},
doi = {10.1137/0403019},
timestamp = {Fri, 26 May 2017 22:54:48 +0200},
url = {https://doi.org/10.1137/0403019},
}
@Article{Lalley1993,
author = {Lalley, Steven P},
title = {Finite range random walk on free groups and homogeneous trees},
journal = {The Annals of Probability},
year = {1993},
pages = {2087--2130},
publisher = {JSTOR},
}
@Article{Woods1997,
author = {Woods, Alan R},
title = {Coloring rules for finite trees, and probabilities of monadic second order sentences},
journal = {Random Structures \& Algorithms},
year = {1997},
volume = {10},
number = {4},
pages = {453--485},
publisher = {Wiley Online Library},
}
@Article{Akutsu2000,
author = {Tatsuya Akutsu},
title = {Dynamic programming algorithms for {RNA} secondary structure prediction with pseudoknots},
journal = {Discrete Applied Mathematics},
year = {2000},
volume = {104},
number = {1},
pages = {45 - 62},
issn = {0166-218X},
abstract = {This paper shows simple dynamic programming algorithms for {RNA} secondary structure prediction with pseudoknots. For a basic version of the problem (i.e., maximizing the number of base pairs), this paper presents an O(n4) time exact algorithm and an O(n4−δ) time approximation algorithm. The latter one outputs, for most {RNA} sequences, a secondary structure in which the number of base pairs is at least 1−ε of the optimal, where ε,δ are any constants satisfying 0<ε,δ<1. Several related results are shown too.},
doi = {https://doi.org/10.1016/S0166-218X(00)00186-4},
keywords = {{RNA} secondary structure, Pseudoknot, Approximation algorithms, Computational biology, Dynamic programming},
url = {http://www.sciencedirect.com/science/article/pii/S0166218X00001864},
}
@Article{Leontis2001,
author = {Leontis, Neocles B and Westhof, Eric},
title = {Geometric nomenclature and classification of {{RNA}} base pairs},
journal = {{{RNA}}},
year = {2001},
volume = {7},
number = {4},
pages = {499--512},
publisher = {Cambridge University Press},
}
@Article{Yoffe2011,
author = {Yoffe, Aron M and Prinsen, Peter and Gelbart, William M and Ben-Shaul, Avinoam},
title = {The ends of a large {RNA} molecule are necessarily close.},
journal = {Nucleic acids research},
year = {2011},
volume = {39},
pages = {292--299},
month = jan,
issn = {1362-4962},
abstract = {We show on general theoretical grounds that the two ends of single-stranded (ss) {RNA} molecules (consisting of roughly equal proportions of A, C, G and U) are necessarily close together, largely independent of their length and sequence. This is demonstrated to be a direct consequence of two generic properties of the equilibrium secondary structures, namely that the average proportion of bases in pairs is ∼60% and that the average duplex length is ∼4. Based on mfold and Vienna computations on large numbers of ss{RNA}s of various lengths (1000-10 000 nt) and sequences (both random and biological), we find that the 5'-3' distance-defined as the sum of H-bond and covalent (ss) links separating the ends of the {RNA} chain-is small, averaging 15-20 for each set of viral sequences tested. For random sequences this distance is ∼12, consistent with the theory. We discuss the relevance of these results to evolved sequence complementarity and specific protein binding effects that are known to be important for keeping the two ends of viral and messenger {RNA}s in close proximity. Finally we speculate on how our conclusions imply indistinguishability in size and shape of equilibrated forms of linear and covalently circularized ss{RNA} molecules.},
chemicals = {{RNA}, Circular, {RNA}, Viral, {RNA}},
citation-subset = {IM},
completed = {2011-02-09},
country = {England},
doi = {10.1093/nar/gkq642},
issn-linking = {0305-1048},
issue = {1},
keywords = {Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry; {RNA}, Circular; {RNA}, Viral, chemistry},
nlm-id = {0411011},
owner = {NLM},
pii = {gkq642},
pmc = {PMC3017586},
pmid = {20810537},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Wells1998,
author = {Wells, S E and Hillner, P E and Vale, R D and Sachs, A B},
title = {Circularization of m{RNA} by eukaryotic translation initiation factors.},
journal = {Molecular cell},
year = {1998},
volume = {2},
pages = {135--140},
month = jul,
issn = {1097-2765},
abstract = {Communication between the 5' cap structure and 3' poly(A) tail of eukaryotic m{RNA} results in the synergistic enhancement of translation. The cap and poly(A) tail binding proteins, eIF4E and Pab1p, mediate this effect in the yeast S. cerevisiae through their interactions with different parts of the translation factor eIF4G. Here, we demonstrate the reconstitution of an eIF4E/eIF4G/Pab1p complex with recombinant proteins, and show by atomic force microscopy that the complex can circularize capped, polyadenylated {RNA}. Our results suggest that formation of circular m{RNA} by translation factors could contribute to the control of m{RNA} expression in the eukaryotic cell.},
chemicals = {EIF4G1 protein, human, Eukaryotic Initiation Factor-4E, Eukaryotic Initiation Factor-4G, Fungal Proteins, Macromolecular Substances, Peptide Fragments, Peptide Initiation Factors, Poly(A)-Binding Proteins, {RNA}, Circular, {RNA}, Fungal, {RNA}, Messenger, {RNA}-Binding Proteins, Recombinant Fusion Proteins, Saccharomyces cerevisiae Proteins, TIF4631 protein, S cerevisiae, {RNA}, Glutathione Transferase},
citation-subset = {IM},
completed = {1998-08-31},
country = {United States},
doi = {10.1016/s1097-2765(00)80122-7},
issn-linking = {1097-2765},
issue = {1},
keywords = {Eukaryotic Initiation Factor-4E; Eukaryotic Initiation Factor-4G; Fungal Proteins, metabolism; Glutathione Transferase, genetics, metabolism; Macromolecular Substances; Microscopy, Atomic Force; Nucleic Acid Conformation; Peptide Fragments, genetics, metabolism; Peptide Initiation Factors, genetics, metabolism, ultrastructure; Poly(A)-Binding Proteins; Protein Biosynthesis; {RNA}, biosynthesis, ultrastructure; {RNA}, Circular; {RNA}, Fungal, chemistry, metabolism, ultrastructure; {RNA}, Messenger, chemistry, metabolism, ultrastructure; {RNA}-Binding Proteins, metabolism, ultrastructure; Recombinant Fusion Proteins, metabolism; Saccharomyces cerevisiae, genetics; Saccharomyces cerevisiae Proteins},
nlm-id = {9802571},
owner = {NLM},
pii = {S1097-2765(00)80122-7},
pmid = {9702200},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Albert2002,
author = {Albert, R. AND Barab{\'a}si, A.-L.},
title = {Statistical mechanics of complex networks},
journal = {Reviews of modern {P}hysics},
year = {2002},
volume = {74},
pages = {47--97},
}
@Article{Bowman2010,
author = {Bowman, G. R. AND Pande, V. S.},
title = {Protein folded states are kinetic hubs.},
journal = {Proc. Natl. Acad. Sci. U.S.A.},
year = {2010},
volume = {107},
number = {24},
pages = {10890--10895},
month = {June},
}
@Article{Scala2001,
author = {Scala, A. AND Nunes~Amaral, L.A. AND Barth{\'e}l{\'e}my, M.},
title = {Small-world networks and the conformation space of a short lattice polymer chain},
journal = {Europhys. Lett.},
year = {2001},
volume = {55},
number = {4},
pages = {594--600},
}
@Article{VanNoort2004,
author = {Van Noort, V. AND Snel, B. AND Huynen, M. A.},
title = {The yeast coexpression network has a small-world, scale-free architecture and can be explained by a simple model.},
journal = {EMBO Rep.},
year = {2004},
volume = {5},
number = {3},
pages = {280--284},
month = {March},
}
@Article{Watts1998,
author = {Watts, D. J. AND Strogatz, S. H.},
title = {Collective dynamics of 'small-world' networks.},
journal = {Nature},
year = {1998},
volume = {393},
number = {6684},
pages = {440--442},
month = {June},
}
@Article{Wuchty2003,
author = {Wuchty, S.},
title = {Small worlds in {{RNA}} structures.},
journal = {Nucleic. Acids. Res.},
year = {2003},
volume = {31},
number = {3},
pages = {1108--1117},
month = {February},
}
@Article{Newman2001,
author = {Newman, M. E. AND Strogatz, S. H. AND Watts, D. J.},
title = {Random graphs with arbitrary degree distributions and their applications.},
journal = {Phys. Rev. E},
year = {2001},
volume = {64},
number = {2},
pages = {026118},
month = {August},
}
@Article{Flamm2000,
author = {C. Flamm and W. Fontana and I.L. Hofacker and P. Schuster},
title = {{{RNA}} folding at elementary step resolution},
journal = {{RNA}},
year = {2000},
volume = {6},
pages = {325--338},
}
@Article{Cont2008,
author = {Cont, R. AND Tanimura, E.},
title = {Small-world graphs: characterization and alternative constructions},
journal = {Adv. in Appl. Probab.},
year = {2008},
volume = {40},
number = {4},
pages = {939--965},
}
@Article{Clote2015,
author = {Clote, Peter},
title = {Expected degree for {RNA} secondary structure networks.},
journal = {Journal of computational chemistry},
year = {2015},
volume = {36},
pages = {103--117},
month = jan,
issn = {1096-987X},
abstract = {Consider the network of all secondary structures of a given {RNA} sequence, where nodes are connected when the corresponding structures have base pair distance one. The expected degree of the network is the average number of neighbors, where average may be computed with respect to the either the uniform or Boltzmann probability. Here, we describe the first algorithm, {RNA}expNumNbors, that can compute the expected number of neighbors, or expected network degree, of an input sequence. For {RNA} sequences from the Rfam database, the expected degree is significantly less than the constrained minimum free energy structure, defined to have minimum free energy (MFE) over all structures consistent with the Rfam consensus structure. The expected degree of structural {RNA}s, such as purine riboswitches, paradoxically appears to be smaller than that of random {RNA}, yet the difference between the degree of the MFE structure and the expected degree is larger than that of random {RNA}. Expected degree does not seem to correlate with standard structural diversity measures of {RNA}, such as positional entropy and ensemble defect. The program {RNA}expNumNbors is written in C, runs in cubic time and quadratic space, and is publicly available at http://bioinformatics.bc.edu/clotelab/{RNA}expNumNbors.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2015-10-28},
country = {United States},
doi = {10.1002/jcc.23776},
issn-linking = {0192-8651},
issue = {2},
keywords = {Algorithms; Base Sequence; Databases, Factual; Nucleic Acid Conformation; {RNA}, chemistry; Software; Thermodynamics; {RNA} secondary structure; macromolecular network; network degree; small-world},
nlm-id = {9878362},
owner = {NLM},
pmid = {25382310},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2014-12-16},
}
@Article{Andronescu2008,
author = {Andronescu, Mirela and Bereg, Vera and Hoos, Holger H and Condon, Anne},
title = {{RNA} STRAND: the {RNA} secondary structure and statistical analysis database.},
journal = {BMC bioinformatics},
year = {2008},
volume = {9},
pages = {340},
month = aug,
issn = {1471-2105},
abstract = {The ability to access, search and analyse secondary structures of a large set of known {RNA} molecules is very important for deriving improved {RNA} energy models, for evaluating computational predictions of {RNA} secondary structures and for a better understanding of {RNA} folding. Currently there is no database that can easily provide these capabilities for almost all {RNA} molecules with known secondary structures. In this paper we describe {RNA} STRAND - the {RNA} secondary STRucture and statistical ANalysis Database, a curated database containing known secondary structures of any type and organism. Our new database provides a wide collection of known {RNA} secondary structures drawn from public databases, searchable and downloadable in a common format. Comprehensive statistical information on the secondary structures in our database is provided using the {RNA} Secondary Structure Analyser, a new tool we have developed to analyse {RNA} secondary structures. The information thus obtained is valuable for understanding to which extent and with which probability certain structural motifs can appear. We outline several ways in which the data provided in {RNA} STRAND can facilitate research on {RNA} structure, including the improvement of {RNA} energy models and evaluation of secondary structure prediction programs. In order to keep up-to-date with new {RNA} secondary structure experiments, we offer the necessary tools to add solved {RNA} secondary structures to our database and invite researchers to contribute to {RNA} STRAND. {RNA} STRAND is a carefully assembled database of trusted {RNA} secondary structures, with easy on-line tools for searching, analyzing and downloading user selected entries, and is publicly available at http://www.rnasoft.ca/strand.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-10-17},
country = {England},
doi = {10.1186/1471-2105-9-340},
issn-linking = {1471-2105},
keywords = {Computer Graphics; Computer Simulation; Database Management Systems; Databases, Genetic; Information Storage and Retrieval, methods; Models, Chemical; Models, Molecular; Nucleic Acid Conformation; {RNA}, chemistry, ultrastructure; User-Computer Interface},
nlm-id = {100965194},
owner = {NLM},
pii = {1471-2105-9-340},
pmc = {PMC2536673},
pmid = {18700982},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Giegerich2004,
author = {Giegerich, Robert and Voss, Björn and Rehmsmeier, Marc},
title = {Abstract shapes of {RNA}.},
journal = {Nucleic acids research},
year = {2004},
volume = {32},
pages = {4843--4851},
issn = {1362-4962},
abstract = {The function of a non-protein-coding {RNA} is often determined by its structure. Since experimental determination of {RNA} structure is time-consuming and expensive, its computational prediction is of great interest, and efficient solutions based on thermodynamic parameters are known. Frequently, however, the predicted minimum free energy structures are not the native ones, leading to the necessity of generating suboptimal solutions. While this can be accomplished by a number of programs, the user is often confronted with large outputs of similar structures, although he or she is interested in structures with more fundamental differences, or, in other words, with different abstract shapes. Here, we formalize the concept of abstract shapes and introduce their efficient computation. Each shape of an {RNA} molecule comprises a class of similar structures and has a representative structure of minimal free energy within the class. Shape analysis is implemented in the program {RNA}shapes. We applied {RNA}shapes to the prediction of optimal and suboptimal abstract shapes of several {RNA}s. For a given energy range, the number of shapes is considerably smaller than the number of structures, and in all cases, the native structures were among the top shape representatives. This demonstrates that the researcher can quickly focus on the structures of interest, without processing up to thousands of near-optimal solutions. We complement this study with a large-scale analysis of the growth behaviour of structure and shape spaces. {RNA}shapes is available for download and as an online version on the Bielefeld Bioinformatics Server.},
chemicals = {5' Untranslated Regions, {RNA}, Small Nuclear, {RNA}, Untranslated, {RNA}, Viral, U2 small nuclear {RNA}, {RNA}, Transfer},
citation-subset = {IM},
completed = {2004-09-24},
country = {England},
doi = {10.1093/nar/gkh779},
issn-linking = {0305-1048},
issue = {16},
keywords = {5' Untranslated Regions, chemistry; Base Sequence; Computational Biology, methods; HIV-1, genetics; Humans; Internet; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, Small Nuclear, chemistry; {RNA}, Transfer, chemistry; {RNA}, Untranslated, chemistry; {RNA}, Viral, chemistry; Software; Terminology as Topic},
nlm-id = {0411011},
owner = {NLM},
pii = {32/16/4843},
pmc = {PMC519098},
pmid = {15371549},
pubmodel = {Electronic-Print},
pubstatus = {epublish},
revised = {2019-12-10},
}
@Article{Reeder2005,
author = {Reeder, Jens and Giegerich, Robert},
title = {Consensus shapes: an alternative to the Sankoff algorithm for {RNA} consensus structure prediction.},
journal = {Bioinformatics (Oxford, England)},
year = {2005},
volume = {21},
pages = {3516--3523},
month = sep,
issn = {1367-4803},
abstract = {The well-known Sankoff algorithm for simultaneous {RNA} sequence alignment and folding is currently considered an ideal, but computationally over-expensive method. Available tools implement this algorithm under various pragmatic restrictions. They are still expensive to use, and it is difficult to judge if the moderate quality of results is because of the underlying model or to its imperfect implementation. We propose to redefine the consensus structure prediction problem in a way that does not imply a multiple sequence alignment step. For a family of {RNA} sequences, our method explicitly and independently enumerates the near-optimal abstract shape space, and predicts as the consensus an abstract shape common to all sequences. For each sequence, it delivers the thermodynamically best structure which has this common shape. Since the shape space is much smaller than the structure space, and identification of common shapes can be done in linear time (in the number of shapes considered), the method is essentially linear in the number of sequences. Our evaluation shows that the new method compares favorably with available alternatives. The new method has been implemented in the program {RNA}cast and is available on the Bielefeld Bioinformatics Server. jreeder@TechFak.Uni-Bielefeld.DE, robert@TechFak.Uni-Bielefeld.DE SUPPLEMENTARY INFORMATION: Available at http://bibiserv.techfak.uni-bielefeld.de/rnacast/supplementary.html},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2005-12-07},
country = {England},
doi = {10.1093/bioinformatics/bti577},
issn-linking = {1367-4803},
issue = {17},
keywords = {Algorithms; Base Sequence; Computer Simulation; Consensus Sequence; Models, Chemical; Models, Molecular; Molecular Sequence Data; Nucleic Acid Conformation; {RNA}, analysis, chemistry; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods; Sequence Homology, Nucleic Acid; Software},
nlm-id = {9808944},
owner = {NLM},
pii = {bti577},
pmid = {16020472},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2019-12-10},
}
@Article{Janssen2008,
author = {Janssen, Stefan and Reeder, Jens and Giegerich, Robert},
title = {Shape based indexing for faster search of {RNA} family databases.},
journal = {BMC bioinformatics},
year = {2008},
volume = {9},
pages = {131},
month = feb,
issn = {1471-2105},
abstract = {Most non-coding {RNA} families exert their function by means of a conserved, common secondary structure. The Rfam data base contains more than five hundred structurally annotated {RNA} families. Unfortunately, searching for new family members using covariance models (CMs) is very time consuming. Filtering approaches that use the sequence conservation to reduce the number of CM searches, are fast, but it is unknown to which sacrifice. We present a new filtering approach, which exploits the family specific secondary structure and significantly reduces the number of CM searches. The filter eliminates approximately 85% of the queries and discards only 2.6% true positives when evaluating Rfam against itself. First results also capture previously undetected non-coding {RNA}s in a recent human {RNA}z screen. The {RNA} shape index filter ({RNA}sifter) is based on the following rationale: An {RNA} family is characterised by structure, much more succinctly than by sequence content. Structures of individual family members, which naturally have different length and sequence composition, may exhibit structural variation in detail, but overall, they have a common shape in a more abstract sense. Given a fixed release of the Rfam data base, we can compute these abstract shapes for all families. This is called a shape index. If a query sequence belongs to a certain family, it must be able to fold into the family shape with reasonable free energy. Therefore, rather than matching the query against all families in the data base, we can first (and quickly) compute its feasible shape(s), and use the shape index to access only those families where a good match is possible due to a common shape with the query.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2008-04-23},
country = {England},
doi = {10.1186/1471-2105-9-131},
issn-linking = {1471-2105},
keywords = {Algorithms; Base Sequence; Database Management Systems; Databases, Genetic; Information Storage and Retrieval, methods; Molecular Sequence Data; {RNA}, genetics; Sequence Alignment, methods; Sequence Analysis, {RNA}, methods},
nlm-id = {100965194},
owner = {NLM},
pii = {1471-2105-9-131},
pmc = {PMC2277397},
pmid = {18312625},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Burbano2007,
author = {Burbano, Hernán A and Andrade, Eugenio},
title = {Analysis of t{RNA} abstract shapes of precursor/derivative amino acids in Archaea.},
journal = {Gene},
year = {2007},
volume = {396},
pages = {75--83},
month = jul,
issn = {0378-1119},
abstract = {Wong's theory of the genetic code's origin states that because of historical constraints, codon assignment depends on the relation between precursor and derivative amino acids, a result of the coevolutionary process between amino acids' biosynthetic pathways and t{RNA}s. Based on arguments supporting the assumption that natural selection favors more stable and thus functionally constrained structures, we tested whether precursor and derivative t{RNA}s are equally evolved by measuring their structural parameters, thermostability and molecular plasticity. We also estimated the extent to which precursor and derivative t{RNA}s differ within Archaea. We used Archaea sequences of both precursor and derivative t{RNA}s in order to examine the plastic repertoires or sets of suboptimal structures at a defined free energy interval. We grouped secondary structures according to their helix nesting and adjacency using abstract shapes analysis. This clustering enabled us to infer a consensus sequence for all shapes that fit the clover leaf secondary structure [Giegerich, R., et al., Nucleic Acids Res 2004; 32 (16): 4843-51.]. This consensus sequence was then folded in order to retrieve a set of suboptimal structures. For each pair of precursor and derivative t{RNA}s, we compared these plastic repertoires based on the number of secondary structures, the thermostability of the minimum free energy structure and two structural parameters (base pair propensity (P) and mean length of helical stem structures (S)), which were measured for every representative secondary structure [Schultes, E.A., et al., J Mol Evol 1999; 49 (1): 76-83.]. We found that derivative t{RNA}s have fewer numbers of shapes, higher thermostability and more stable parameters than precursor t{RNA}s, a fact in full agreement with Wong's coevolution theory of the genetic code.},
chemicals = {Amino Acids, {RNA}, Transfer},
citation-subset = {IM},
completed = {2007-08-01},
country = {Netherlands},
doi = {10.1016/j.gene.2007.02.024},
issn-linking = {0378-1119},
issue = {1},
keywords = {Amino Acids, genetics; Archaea, genetics; Base Composition, genetics; Base Pairing; Base Sequence; Evolution, Molecular; Genetic Code; Molecular Sequence Data; {RNA}, Transfer, chemistry, genetics; Thermodynamics},
nlm-id = {7706761},
owner = {NLM},
pii = {S0378-1119(07)00113-8},
pmid = {17433860},
pubmodel = {Print-Electronic},
pubstatus = {ppublish},
revised = {2007-06-04},
}
@Article{Voss2006,
author = {Voss, Björn and Giegerich, Robert and Rehmsmeier, Marc},
title = {Complete probabilistic analysis of {RNA} shapes.},
journal = {BMC biology},
year = {2006},
volume = {4},
pages = {5},
month = feb,
issn = {1741-7007},
abstract = {Soon after the first algorithms for {RNA} folding became available, it was recognised that the prediction of only one energetically optimal structure is insufficient to achieve reliable results. An in-depth analysis of the folding space as a whole appeared necessary to deduce the structural properties of a given {RNA} molecule reliably. Folding space analysis comprises various methods such as suboptimal folding, computation of base pair probabilities, sampling procedures and abstract shape analysis. Common to many approaches is the idea of partitioning the folding space into classes of structures, for which certain properties can be derived. In this paper we extend the approach of abstract shape analysis. We show how to compute the accumulated probabilities of all structures that share the same shape. While this implies a complete (non-heuristic) analysis of the folding space, the computational effort depends only on the size of the shape space, which is much smaller. This approach has been integrated into the tool {RNA} shapes, and we apply it to various {RNA}s. Analyses of conformational switches show the existence of two shapes with probabilities approximately 2/3 vs. 1/3, whereas the analysis of a micro{RNA} precursor reveals one shape with a probability near to 1.0. Furthermore, it is shown that a shape can outperform an energetically more favourable one by achieving a higher probability. From these results, and the fact that we use a complete and exact analysis of the folding space, we conclude that this approach opens up new and promising routes for investigating and understanding {RNA} secondary structure.},
chemicals = {{RNA}},
citation-subset = {IM},
completed = {2006-07-17},
country = {England},
doi = {10.1186/1741-7007-4-5},
issn-linking = {1741-7007},
keywords = {Algorithms; Computational Biology, methods; Models, Statistical; Models, Theoretical; Nucleic Acid Conformation; Probability; Programming Languages; {RNA}, chemistry},
nlm-id = {101190720},
owner = {NLM},
pii = {1741-7007-4-5},
pmc = {PMC1479382},
pmid = {16480488},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2018-11-13},
}
@Article{Findeiss2017,
author = {Findei{\ss}, Sven and Etzel, Maja and Will, Sebastian and M{\"o}rl, Mario and Stadler, Peter F},
title = {Design of Artificial Riboswitches as Biosensors.},
journal = {Sensors (Basel, Switzerland)},
year = {2017},
volume = {17},
number = {9},
pages = {E1990},
month = aug,
issn = {1424-8220},
abstract = {{RNA} aptamers readily recognize small organic molecules, polypeptides, as well as other nucleic acids in a highly specific manner. Many such aptamers have evolved as parts of regulatory systems in nature. Experimental selection techniques such as SELEX have been very successful in finding artificial aptamers for a wide variety of natural and synthetic ligands. Changes in structure and/or stability of aptamers upon ligand binding can propagate through larger {RNA} constructs and cause specific structural changes at distal positions. In turn, these may affect transcription, translation, splicing, or binding events. The {RNA} secondary structure model realistically describes both thermodynamic and kinetic aspects of {RNA} structure formation and refolding at a single, consistent level of modelling. Thus, this framework allows studying the function of natural riboswitches in silico. Moreover, it enables rationally designing artificial switches, combining essentially arbitrary sensors with a broad choice of read-out systems. Eventually, this approach sets the stage for constructing versatile biosensors.},
chemicals = {Aptamers, Nucleotide, Ligands, Riboswitch},
citation-subset = {IM},
completed = {2018-05-31},
country = {Switzerland},
doi = {10.3390/s17091990},
issn-linking = {1424-8220},
issue = {9},
keywords = {Aptamers, Nucleotide; Biosensing Techniques; Kinetics; Ligands; Riboswitch; {RNA} structure; aptamer; folding kinetics; ligand binding; rational design; refolding; thermodynamics},
nlm-id = {101204366},
owner = {NLM},
pii = {E1990},
pmc = {PMC5621056},
pmid = {28867802},
pubmodel = {Electronic},
pubstatus = {epublish},
revised = {2019-01-16},
}
@Article{Grabbe2016,
author = {Grabbe, Stephan and Haas, Heinrich and Diken, Mustafa and Kranz, Lena M and Langguth, Peter and Sahin, Ugur},
title = {Translating nanoparticulate-personalized cancer vaccines into clinical applications: case study with {RNA}-lipoplexes for the treatment of melanoma.},
journal = {Nanomedicine (London, England)},
year = {2016},
volume = {11},
pages = {2723--2734},
month = oct,
issn = {1748-6963},
abstract = {The development of nucleic acid based vaccines against cancer has gained considerable momentum through the advancement of modern sequencing technologies and on novel {RNA}-based synthetic drug formats, which can be readily adapted following identification of every patient's tumor-specific mutations. Furthermore, affordable and individual 'on demand' production of molecularly optimized vaccines should allow their application in large groups of patients. This has resulted in the therapeutic concept of an active personalized cancer vaccine, which has been brought into clinical testing. Successful trials have been performed by intranodal administration of sterile isotonic solutions of synthetic {RNA} vaccines. The second generation of {RNA} vaccines which is currently being developed encompasses intravenously injectable {RNA} nanoparticle formulations (lipoplexes), made up from lipid excipients, denoted {RNA} . A first product that has made its way from bench to bedside is a therapeutic vaccine for intravenous administration based on a fixed set of four {RNA} lipoplex drug products, each encoding for one shared tumor antigen (Lipoplex Melanoma {RNA} Immunotherapy, 'Lipo-MERIT'). This article describes the steps for translating these novel {RNA} nanomedicines into clinical trials.},
chemicals = {Antigens, Neoplasm, Cancer Vaccines, Excipients, Liposomes, {RNA}, Messenger, {RNA}},
citation-subset = {IM},
completed = {2018-03-22},
country = {England},
doi = {10.2217/nnm-2016-0275},
issn-linking = {1743-5889},
issue = {20},
keywords = {Animals; Antigens, Neoplasm, genetics, immunology; Cancer Vaccines, immunology; Clinical Trials as Topic; Excipients; Humans; Immunotherapy, methods; Liposomes, chemistry; Melanoma, immunology, therapy; Nanomedicine; Nanoparticles, chemistry, therapeutic use; Precision Medicine; {RNA}, administration & dosage, chemistry, immunology; {RNA}, Messenger, administration & dosage, chemistry, pharmacology, therapeutic use; cancer; drug delivery; lipoplex; liposomes; m{RNA}; tumor immunotherapy},
nlm-id = {101278111},
owner = {NLM},
pmid = {27700619},
pubmodel = {Print},
pubstatus = {ppublish},
revised = {2018-03-22},
}
@Article{Takahashi2013,
author = {Takahashi, Melissa K. and Lucks, Julius B.},
title = {A modular strategy for engineering orthogonal chimeric {{RNA}} transcription regulators},
journal = {Nucleic Acids Research},
year = {2013},
volume = {41},
number = {15},
pages = {7577-7588},
doi = {10.1093/nar/gkt452},
}
@Article{Wu2014,
author = {Wu, Sherry Y. and Lopez-Berestein, Gabriel and Calin, George A. and Sood, Anil K.},
title = {{{RNA}}i Therapies: Drugging the Undruggable},
journal = {Science Translational Medicine},
year = {2014},
volume = {6},
number = {240},
pages = {240ps7},
doi = {10.1126/scitranslmed.3008362},
}
@InProceedings{Bonnet2018,
author = {{\'{E}}douard Bonnet and Pawe{\l} Rz{{a}}{\.{z}}ewski and Florian Sikora},
title = {Designing {{RNA}} Secondary Structures Is Hard},
booktitle = {Research in Computational Molecular Biology - 22nd Annual International Conference, {RECOMB} 2018},
year = {2018},
editor = {Benjamin J. Raphael},