# Install packages
if (!requireNamespace("ggseqlogo", quietly = TRUE)) {
install.packages("ggseqlogo")
}if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
# Load packages
library(ggseqlogo)
library(ggplot2)
Seqlogo
Note
Hiplot website
This page is the tutorial for source code version of the Hiplot Seqlogo
plugin. You can also use the Hiplot website to achieve no code ploting. For more information please see the following link:
The sequence LOGO is a graphic that describes a sequence pattern of binding sites.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
ggseqlogo
;ggplot2
Data Preparation
The loaded data is the Sequence of binding sites for multiple transcription factors on multiple genes.
# Load data
<- read.delim("files/Hiplot/074-ggseqlogo-data.txt", header = T)
data
# Convert data structure
<- data[, !sapply(data, function(x) {all(is.na(x))})]
data <- as.list(data)
data <- lapply(data, function(x) {return(x[!is.na(x)])})
data
# View data
str(data)
List of 12
$ MA0001.1: chr [1:97] "CCATATATAG" "CCATATATAG" "CCATAAATAG" "CCATAAATAG" ...
$ MA0002.1: chr [1:26] "AATTGTGGTTA" "ATCTGTGGTTA" "AATTGTGGTAA" "TTCTGCGGTTA" ...
$ MA0004.1: chr [1:20] "CACGTG" "CACGTG" "CACGTG" "CACGTG" ...
$ MA0005.1: chr [1:90] "CCTAATTGGGC" "CCTAATTTGGC" "CCTAATCGGGC" "CCTAATCGGGC" ...
$ MA0006.1: chr [1:24] "CGCGTG" "CGCGTG" "CGCGTG" "CGCGTG" ...
$ MA0007.1: chr [1:24] "AAAAGTACACCCTGTACCGACA" "CTAAGCACACCGTGTCCCAGTC" "TTAAGAACACTCTGTACGACAC" "AGTAGAACATAATGTTCCGACA" ...
$ MA0008.1: chr [1:25] "CAATTATT" "CAATTATT" "CAATTATT" "CAATTATT" ...
$ MA0009.1: chr [1:40] "CTAGGTGTGAA" "CTAGGTGTGAA" "CTAGGTGTGAA" "CTAGGTGTGAA" ...
$ MA0010.1: chr [1:9] "CTAATTGGCAAATG" "ATAATAAACAAAAC" "GACATAGACAAGAC" "GTCTTTCACAAATA" ...
$ MA0011.1: chr [1:12] "AACTATTT" "TGCTAGTT" "TCCTAGTT" "TTCTATTC" ...
$ MA0012.1: chr [1:12] "TAAACTTGTTG" "TAAACTAAAGC" "TCAACTAGGAT" "TAAACAAAACC" ...
$ MA0013.1: chr [1:6] "TTGTGAAAGAC" "AAGTAAACTAA" "TAATAAACAAA" "TAATAAACAAA" ...
Visualization
# Seqlogo
<- ggseqlogo(
p
data,ncol = 4,
col_scheme = "nucleotide",
seq_type = "dna",
method = "bits") +
theme(plot.title = element_text(hjust = 0.5))
p

A sequence of binding sites was displayed in a column of the chart by means of BITS calculation, which could clearly observe the large proportion of bases of different sequences.