Histostats

Authors

[Editor] Hu Zheng;

[Contributors]

Display data distribution and inference.

Setup

  • System Requirements: Cross-platform (Linux/MacOS/Windows)

  • Programming language: R

  • Dependent packages: ggstatsplot

# Install packages
if (!requireNamespace("ggstatsplot", quietly = TRUE)) {
  install.packages("ggstatsplot")
}

# Load packages
library(ggstatsplot)

Data Preparation

# Load data
data <- read.delim("files/Hiplot/067-gghistostats-data.txt", header = T)

# Convert data structure
axis <- c("budget", "genre")
data[, axis[2]] <- factor(data[, axis[2]], levels = unique(data[, axis[2]]))

# View data
head(data)
                                                                 title year
1                       Lord of the Rings: The Return of the King, The 2003
2                   Lord of the Rings: The Fellowship of the Ring, The 2001
3                               Lord of the Rings: The Two Towers, The 2002
4                                                            Star Wars 1977
5                       Star Wars: Episode V - The Empire Strikes Back 1980
6 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb 1964
  length budget rating  votes  mpaa  genre
1    251   94.0    9.0 103631 PG-13 Action
2    208   93.0    8.8 157608 PG-13 Action
3    223   94.0    8.8 114797 PG-13 Action
4    125   11.0    8.8 134640    PG Action
5    129   18.0    8.8 103706    PG Action
6     93    1.8    8.7  63471    PG Comedy

Visualization

# Histostats
p <- grouped_gghistostats(
  data = data, x = budget, grouping.var = genre,
  effsize.type = "unbiased",
  type = "parametric",
  centrality.k = 2,
  plotgrid.args = list(ncol = 2),
  centrality.parameter = "solid",
  centrality.line.args = list(size = 1, color = "black"),
  bar.fill = "#0D47A1", 
  centrality.label.args = list(color = "#0D47A1", size = 3),
  test.value = as.numeric(0),
  normal.curve = F,
  normal.curve.args = list(size = 1)
)

p
FigureΒ 1: Histostats