pip3 install matplotlib -U


import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
r = norm.rvs(loc=0, scale=1, size=1000)
x = np.linspace(norm.ppf(0.01), #ppf stands for percentiles.
                norm.ppf(0.99), 100)


fig, ax = plt.subplots(1, 1)
ax.plot(x, norm.pdf(x),
        'blue', lw=5, alpha=0.6, label='norm pdf')
plt.show()


fig, ax = plt.subplots(1, 1)
ax.hist(r, histtype='stepfilled', alpha=1, label='...')
ax.legend(loc='best', frameon=False)
plt.show()


years = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
# create a line chart, years on x-axis, gdp on y-axis
fig = plt.figure()
plt.plot(years, gdp, color='green', marker='o', linestyle='solid')
# add a title
plt.title("Nominal GDP")
# add a label to the y-axis
plt.ylabel("Billions of $")
plt.show()


from scipy import special
def drumhead_height(n, k, distance, angle, t):
   kth_zero = special.jn_zeros(n, k)[-1]
   return np.cos(t) * np.cos(n*angle) * special.jn(n, distance*kth_zero)
theta = np.r_[0:2*np.pi:50j]
radius = np.r_[0:1:50j]
x = np.array([r * np.cos(theta) for r in radius])
y = np.array([r * np.sin(theta) for r in radius])
z = np.array([drumhead_height(1, 1, r, theta, 0.5) for r in radius])


from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
fig = plt.figure()
ax = Axes3D(fig)
ax.plot_surface(x, y, z, rstride=1, cstride=1, cmap=cm.jet)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()


# ! pip3 install seaborn -U


import seaborn as sns
import numpy as np
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))


x = np.random.normal(size=100)
sns.displot(x)


sns.displot(x, kde=False, rug=True);


import pandas as pd
sns.set_theme(style="darkgrid")
iris = pd.read_csv("data/iris.csv")


# Set up the figure
f, ax = plt.subplots(figsize=(8, 8))
ax.set_aspect("equal")

# Draw a contour plot to represent each bivariate density
sns.kdeplot(
    data=iris.query("species != 'versicolor'"),
    x="sepal_width",
    y="sepal_length",
    hue="species",
    thresh=.1,
)

<AxesSubplot:xlabel='sepal_width', ylabel='sepal_length'>


import pandas as pd
import numpy as np
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])


sns.jointplot(x="x", y="y", data=df, kind="kde");


g = sns.jointplot(x="x", y="y", data=df, kind="kde", color="m")
g.plot_joint(plt.scatter, c="b", s=30, linewidth=1, marker="*")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$");


sns.pairplot(iris);


g = sns.PairGrid(iris)
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot, cmap="Blues_d", n_levels=6);


g = sns.pairplot(iris, hue="species", palette="Set2", diag_kind="kde", height=2.5)


sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord, "categorical")))

titanic = pd.read_csv("data/titanic.csv")
tips = pd.read_csv("data/tips.csv")


sns.stripplot(x="day", y="total_bill", data=tips);


sns.swarmplot(x="day", y="total_bill", data=tips);


sns.violinplot(x="total_bill", y="day", hue="time", data=tips);


sns.violinplot(x="day", y="total_bill", data=tips, inner=None)
sns.swarmplot(x="day", y="total_bill", data=tips, color="w", alpha=.5);


sns.catplot(x="time", y="total_bill", hue="smoker",
               col="day", data=tips, kind="box", height=4, aspect=.5);


sns.set(color_codes=True)

np.random.seed(sum(map(ord, "regression")))


sns.regplot(x="total_bill", y="tip", data=tips);


sns.lmplot(x="size", y="tip", data=tips, x_jitter=.05);


sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips);


sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips,
           markers=["o", "x"], palette="Set1");


sns.lmplot(x="total_bill", y="tip", hue="smoker", col="time", data=tips);


sns.jointplot(x="total_bill", y="tip", data=tips, kind="reg");


# Load the example flights dataset and convert to long-form
flights_long = pd.read_csv("data/flights.csv")
flights = flights_long.pivot("month", "year", "passengers")
flights


sns.set_theme()
f, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax)

<AxesSubplot:xlabel='year', ylabel='month'>

year	1949	1950	1951	1952	1953	1954	1955	1956	1957	1958	1959	1960
month
April	129	135	163	181	235	227	269	313	348	348	396	461
August	148	170	199	242	272	293	347	405	467	505	559	606
December	118	140	166	194	201	229	278	306	336	337	405	432
February	118	126	150	180	196	188	233	277	301	318	342	391
January	112	115	145	171	196	204	242	284	315	340	360	417
July	148	170	199	230	264	302	364	413	465	491	548	622
June	135	149	178	218	243	264	315	374	422	435	472	535
March	132	141	178	193	236	235	267	317	356	362	406	419
May	121	125	172	183	229	234	270	318	355	363	420	472
November	104	114	146	172	180	203	237	271	305	310	362	390
October	119	133	162	191	211	229	274	306	347	359	407	461
September	136	158	184	209	237	259	312	355	404	404	463	508

Statistical Data Visualization¶

Basic Ploting with `matplotlib`¶

Display the probability density function (pdf)¶

3D Plot¶

Statistical Data Visualization with `Seaborn`¶

Features that seaborn offers¶

Comparison with `matplotlib`¶

Visualizing the distribution of a dataset¶

Histograms¶

Kernel density estimation¶

Visualizing pairwise relationships in a dataset¶

Plotting with categorical data¶

Categorical scatterplots¶

Violinplots¶

Visualizing linear relationships¶

Lab¶

Statistical Data Visualization¶

Basic Ploting with matplotlib¶

Display the probability density function (pdf)¶

3D Plot¶

Statistical Data Visualization with Seaborn¶

Features that seaborn offers¶

Comparison with matplotlib¶

Visualizing the distribution of a dataset¶

Histograms¶

Kernel density estimation¶

Visualizing pairwise relationships in a dataset¶

Plotting with categorical data¶

Categorical scatterplots¶

Violinplots¶

Visualizing linear relationships¶

Lab¶

Basic Ploting with `matplotlib`¶

Statistical Data Visualization with `Seaborn`¶

Comparison with `matplotlib`¶