Feng Li
School of Statistics and Mathematics
Central University of Finance and Economics
"The simple graph has brought more information to the data analyst’s mind than any other device."
— John Tukey
mpg
data frame¶mpg
contains observations collected by the US Environment Protection Agency on 38 models of car. You can see more details via ?mpg
. Among the variables in mpg
are:
displ
, a car’s engine size, in litres.hwy
, a car's fuel efficiency on the highway, in miles per gallon (mpg). A car with a low fuel efficiency consumes more fuel than a car with a high fuel efficiency when they travel the same distance.Practice and look at the help document of plot
library(ggplot2)
attach(mpg)
plot(displ, hwy)
abline(lm(hwy~displ))
title("Regression of MPG on engine size")
hist(mpg$hwy)
d <- density(mpg$hwy) # returns the density data
plot(d)
car.table <- table(mpg$manufacturer)
pie.cars <- car.table
names(pie.cars) <- names(car.table)
pie(pie.cars)
# Boxplot of MPG
boxplot(mpg$hwy, main = 'Boxplot of MPG')
# Boxplot of MPG by Car Cylinders
boxplot(hwy~cyl,data = mpg, main = "Car Milage Data",
xlab = "Number of Cylinders", ylab = "Miles Per Gallon")
# install.packages("corrplot")
data(mtcars)
library(corrplot)
M <- cor(mtcars)
corrplot(M, addCoef.col = "grey")
corrplot 0.92 loaded
library(forecast)
library(fpp)
plot(ausbeer)
Registered S3 method overwritten by 'quantmod': method from as.zoo.data.frame zoo Loading required package: fma Loading required package: expsmooth Loading required package: lmtest Loading required package: zoo Attaching package: ‘zoo’ The following objects are masked from ‘package:base’: as.Date, as.Date.numeric Loading required package: tseries
library(ggplot2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
With ggplot2, you begin a plot with the function ggplot()
. ggplot()
creates a coordinate system that you can add layers to.
geom_point()
adds a layer of points to your plot, which creates a scatterplot. You can specify the color, size and shape of these points. Each geom function in ggplot2 takes a mapping
argument.
Run ggplot(data = mpg)
. What do you see?
How many rows are in mpg? How many columns?
What does the drv
variable describe? Read the help for ?mpg
to find out.
Make a scatterplot of hwy
vs cyl
.
What happens if you make a scatterplot of class
vs drv
? Why is the plot not useful?
What happens for the outliers?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = 2, size = 3)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), shape = 18)
What’s gone wrong with this code? Why are the points not blue?
Which variables in mpg
are categorical? Which variables are continuous?
Map a continuous variable to color
. How does it behave differently for categorical vs. continuous variables?
What happens if you use something other than a variable name as the color, like aes(colour = displ < 5)?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))
facet_wrap()
. The first argument should be a formula, which you create with ~ followed by a variable name. ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
library(GGally)
ggpairs(subset(mtcars, select = c(1, 3, 4, 5, 6)))
Registered S3 method overwritten by 'GGally': method from +.gg ggplot2 Attaching package: ‘GGally’ The following object is masked from ‘package:fma’: pigs
The following chart displays the total number of cars in the mpg
dataset, grouped by drv
.
ggplot(data = mpg) +
geom_bar(mapping = aes(x = drv))
ggplot(data = mpg) +
geom_bar(mapping = aes(x = drv, fill = drv))
ggplot(data = mpg) +
geom_histogram(mapping = aes(x = hwy))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()
library(forecast)
library(fpp)
library(ggplot2)
autoplot(ausbeer)
plotly
¶library(plotly)
set.seed(123)
x <- rnorm(1000)
y <- rchisq(1000, df = 1, ncp = 0)
group <- sample(LETTERS[1:5], size = 1000, replace = T)
size <- sample(1:5, size = 1000, replace = T)
ds <- data.frame(x, y, group, size)
p <- plot_ly(ds, x = x, y = y, mode = "markers", split = group, size = size) %>%
layout(title = "Scatter Plot")
embed_notebook(p)
library(plotly)
plot_ly(mpg, type="scatter", mode = 'markers', x = ~displ, y = ~hwy, color = ~drv,
size = ~cyl, text = ~paste("manufacturer: ", manufacturer))
plot_ly(mpg, x = ~hwy, type = "histogram")
plot_ly(mpg, y = ~hwy, color = ~as.factor(cyl), type = "box")
df <- read.csv('data/2014_world_gdp_with_codes.csv')
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
# specify map projection/options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'Mercator')
)
plot_geo(df) %>%
add_trace(
z = ~GDP..BILLIONS., color = ~GDP..BILLIONS., colors = 'Blues',
text = ~COUNTRY, locations = ~CODE, marker = list(line = l)
) %>%
colorbar(title = 'GDP Billions US$', tickprefix = '$') %>%
layout(
title = '2014 Global GDP',
geo = g
)
Shiny apps have two components:
ui.R
. server.R
script contains the instructions that your computer needs to build your app. Put the two .R
files in one folder and you can run the app in Rstudio.
ui.R
# Rely on the 'WorldPhones' dataset in the datasets
# package (which generally comes preloaded).
library(datasets)
# Use a fluid Bootstrap layout
fluidPage(
# Give the page a title
titlePanel("Telephones by region"),
# Generate a row with a sidebar
sidebarLayout(
# Define the sidebar with one input
sidebarPanel(
selectInput("region", "Region:",
choices=colnames(WorldPhones)),
hr(),
helpText("Data from AT&T (1961) The World's Telephones.")
),
# Create a spot for the barplot
mainPanel(
plotOutput("phonePlot")
)
)
)
server.R
# Rely on the 'WorldPhones' dataset in the datasets
# package (which generally comes preloaded).
library(datasets)
# Define a server for the Shiny app
function(input, output) {
# Fill in the spot we created for a plot
output$phonePlot <- renderPlot({
# Render a barplot
barplot(WorldPhones[,input$region]*1000,
main=input$region,
ylab="Number of Telephones",
xlab="Year")
})
}
ui.R
library(shiny)
# Define UI
fluidPage(
# Application title
titlePanel("Please choose a mean and sd"),
# Sidebar with controls
sidebarLayout(
sidebarPanel(
numericInput("mean",
label = h3("Mean"),
value = 1),
selectInput("sd", label = h3("Standard Deviation"),
choices = list("1" = 1, "2" = 2,
"3" = 3), selected = 1)
),
mainPanel(
h2("This is the density plot of normal distribution."),
h3("Note how it changes with mean and sd."),
plotOutput("normPlot")
)
)
)
server.R
library(shiny)
# Define server logic required to plot density of normal distribution
function(input, output) {
output$normPlot <- renderPlot({
plot(density(rnorm(1000, mean = as.numeric(input$mean),
sd = as.numeric(input$sd))), main = 'normal density',
xlim = c(-10, 10))
})
}