@article{mbs:/content/journal/acmi/10.1099/acmi.ac2019.po0121, author = "Kidman, Samuel and Ukor, Emem-Fong and Floto, Andres and Parkhill, Julian", title = "Fitting linear mixed models to a highly-structured dataset effectively controls for population structure in bacterial genome-wide association studies", journal= "Access Microbiology", year = "2019", volume = "1", number = "1A", pages = "", doi = "https://doi.org/10.1099/acmi.ac2019.po0121", url = "https://www.microbiologyresearch.org/content/journal/acmi/10.1099/acmi.ac2019.po0121", publisher = "Microbiology Society", issn = "2516-8290", type = "Journal Article", eid = "264", abstract = "Cystic fibrosis is an inherited, autosomal recessive disease that causes an accumulation of viscous mucus within the lung, leading to chronic bacterial infections. Pseudomonas aeruginosa is a Gram-negative, opportunistic pathogen that can invade the cystic fibrosis lung, and is the most common bacteria isolated from cystic fibrosis patients. In the largest study of its kind, ∼4200 P. aeruginosa isolates were collected from nine cystic fibrosis patients. The isolates were whole-genome sequenced and screened for 14 virulence-related phenotypes. We aim to associate phenotype with genotype by fitting both linear models and linear mixed models association tests. During this study, linear mixed models were found to effectively control for strong population structure signals, allowing relevant associations to be uncovered. When sub-sampling the dataset into unstructured, single-patient groups, fitting linear mixed models were found to associate genotype with phenotype as effectively as fitting linear models. We also found that aggregating non-synonymous SNPs in the same gene to a single knockout mutation increases the power of the association tests to identify relevant associations. The future direction of this study will involve filtering the results to identify any phenotypically-relevant associations. These putative associations will then be experimentally verified.", }