R可视化-ggseqlogo包绘制序列分析图

科技 08-15 来源：科研那点事儿

序列分析图（sequence logo）一般指以图形方式依次展示序列比对中各个位置上出现的残基及其频率，常用于描述序列特征，如DNA中的蛋白质结合位点或蛋白质中的功能单元。序列分析图中每个残基对应图形字符的大小与残基在该位置上出现的频率是成正比的！

安装、加载R包

rm(list=ls())#安装包install.packages("ggseqlogo")#加载包library(ggseqlogo)

数据

支持序列与矩阵两种格式的文件，以以ggseqlogo包自带示例数据ggseqlogo_sample为例：

data(ggseqlogo_sample)#DNA数据df1<-pfms_dna#氨基酸数据df2<-seqs_aa#序列格式df3<-seqs_dna$MA0011.1#矩阵格式——代表碱基位置及相应碱基在该位置的出现次数df4<-pfms_dna$MA0031.1

ggseqlogo包绘图参数

1、基本参数查看

??ggseqlogo#查看参数ggseqlogo(data, facet = "wrap", scales = "free_x", ncol = NULL,          nrow = NULL, ...)

2、基本序列分析图绘制

ggseqlogo(df1)ggseqlogo(df3)

3、方法的选择——“bits”(默认)和“probability”

p1<-ggseqlogo(df3,method="bits")p2<-ggseqlogo(df3,method="probability")cowplot::plot_grid(p1,p2,ncol=1)

4、绘制多个图时通过facet与ncol控制

ggseqlogo(df1, facet = "wrap",ncol = 1)ggseqlogo(df1, facet = "wrap",ncol = 2)

5、序列类型的指定

#通过seq_type参数指定序列类型，默认为“auto”自动识别，可选择"aa"、"dna"、"rna"ggseqlogo(df1, facet = "wrap",ncol = 2, seq_type="dna")ggseqlogo(df2, facet = "wrap",ncol = 2, seq_type="aa")

6、配色方案

#通过col_scheme参数设置，具体配色方案通过?list_col_schemes查看list_col_schemes(v = T)

ggseqlogo(df1,col_scheme='clustalx')ggseqlogo(df1,col_scheme='taylor')

7、自定义配色方案——通过make_col_scheme参数实现，有离散型与连续性两种方式

#离散型配色col1<-make_col_scheme(chars = c("A","G", "T", "C"),                         groups = c("g1","g2", "g3","g4"),                        cols = c("red","green","blue","yellow"))ggseqlogo(df1,col_scheme=col1)

#连续型配色col2<-make_col_scheme(chars=c("A","G", "T", "C"),                      values=1:4,                      name='group')ggseqlogo(df1,col_scheme=col2)

8、字体设置

#通过font参数实现，可通过?list_fonts查看内置字体list_fonts(v = T)

a<-ggseqlogo(df3,font="xkcd_regular")b<-ggseqlogo(df3,font="roboto_slab_regular")c<-ggseqlogo(df3,font="helvetica_regular")d<-ggseqlogo(df3,font="helvetica_light")cowplot::plot_grid(a,b,c,d,ncol=2)

9、字母宽度设置

#通过stack_width参数设置a<-ggseqlogo(df3,stack_width=1)b<-ggseqlogo(df3,stack_width=0.5)cowplot::plot_grid(a,b,ncol=1)

10、注释——与ggplot2注释原理一致

p1<-ggplot2::ggplot()+geom_logo(df3)+theme_logo()#可视化p1

#添加文字注释p1+ggplot2::annotate("text", x=6, y=1, label="This is a text
 annotation!")

#添加线条p1+ggplot2::annotate("segment", x=1, xend = 3, y=1.5, yend = 1.5, size=3)

#添加图形注释p1+ggplot2::annotate("rect", xmin = 6.5, xmax = 7.5, ymin = -0.05, ymax = 0.8,                      alpha=0.2, col="grey", fill="green")

绘图模板代码

#模板代码library(ggplot2)col1<-make_col_scheme(chars = c("A","G", "T", "C"),                       groups = c("g1","g2", "g3","g4"),                      cols = c("red","green","blue","yellow"))#自定义配色ggplot()+geom_logo(df1$MA0018.2,#数据                            method="bits",#方法                            seq_type="dna",#序列类型                            col_scheme=col1,#配色方案                            font="xkcd_regular",#字体                            stack_width=0.8#字母宽度                            )+  annotate("text", x=5.5, y=2, color='red',label="This is a text
 annotation!")+  annotate("segment", x=4.5, xend = 6.5, y=1.6, yend = 1.6, size=3)+  annotate("rect", xmin = 2.5, xmax = 3.5, ymin = -0.05, ymax = 2.05,            alpha=0.2, col="grey", fill="green")+  theme_logo()