@article{mbs:/content/journal/ijsem/10.1099/ijsem.0.004070, author = "Sant’Anna, Fernando Hayashi and Reiter, Keli Cristine and Fátima Almeida, Patrícia de and Pereira Passaglia, Luciane Maria", title = "Systematic review of descriptions of novel bacterial species: evaluation of the twenty-first century taxonomy through text mining", journal= "International Journal of Systematic and Evolutionary Microbiology", year = "2020", volume = "70", number = "4", pages = "2925-2936", doi = "https://doi.org/10.1099/ijsem.0.004070", url = "https://www.microbiologyresearch.org/content/journal/ijsem/10.1099/ijsem.0.004070", publisher = "Microbiology Society", issn = "1466-5034", type = "Journal Article", keywords = "bacterial taxonomy", keywords = "text mining", keywords = "Proteobacteria", keywords = "pubmed", keywords = "bibliometrics", abstract = "Although described bacterial species increased in the twenty-first century, they correspond to a tiny fraction of the actual number of species living on our planet. The volume of textual data of these descriptions constitutes valuable information for revealing trends that in turn could support strategies for improvement of bacterial taxonomy. In this study, a text mining approach was used to generate bibliometric data to verify the state-of-art of bacterial taxonomy. Around 9700 abstracts of bacterial classification containing the expression ‘sp. nov.’ and published between 2001 and 2018 were downloaded from PubMed and analysed. Most articles were from PR China and the Republic of Korea, and published in the International Journal of Systematic and Evolutionary Microbiology. From about 10 800 species names detected, 93.33 % were considered valid according to the rules of the Bacterial Code, and they corresponded to 82.98 % of the total number of species validated between 2001 and 2018. Streptomyces , Bacillus and Paenibacillus each had more than 200 species described in the period. However, almost 40 % of all species were from the phylum Proteobacteria . Most bacteria were Gram-stain-negative, bacilli and isolated from soil. Thirteen species and one genus homonyms were found. With respect to methodologies of bacterial characterization, the use of terms related to 16S rRNA and polar lipids increased along these years, and terms related to genome metrics only began to appear from 2009 onward, although at a relatively lower frequency. Bacterial taxonomy is known as a conservative discipline, but it gradually changed in terms of players and practices. With the advent of the mandatory use of genomic analyses for species description, we are probably witnessing a turning point in the evolution of bacterial taxonomy.", }