{
"cells": [
{
"cell_type": "markdown",
"id": "5442af61",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Introduction to Spark Visualization\n",
"\n",
"## Feng Li\n",
"\n",
"### Central University of Finance and Economics\n",
"\n",
"### [feng.li@cufe.edu.cn](feng.li@cufe.edu.cn)\n",
"### Course home page: [https://feng.li/distcomp](https://feng.li/distcomp)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8f3c23ab",
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"import findspark\n",
"findspark.init()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c179d02b",
"metadata": {
"scrolled": true,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/fli/.APP/spark/python/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n",
" warnings.warn(\n",
"Setting default log level to \"WARN\".\n",
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
"23/06/08 17:14:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
]
}
],
"source": [
"import pyspark\n",
"import pyspark.pandas as ps\n",
"conf = pyspark.SparkConf().setAppName(\"Spark Visualization App\")\n",
"spark = pyspark.sql.SparkSession.builder.config(conf=conf).getOrCreate()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "50d86587",
"metadata": {
"scrolled": false,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"fillpattern": {
"shape": ""
},
"hovertemplate": "variable=sales
index=%{x}
value=%{y}