{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "source": [ "# Tourism Demand Forecasting with Spark\n", "\n", "\n", "## Feng Li\n", "\n", "### Guanghua School of Management\n", "### Peking University\n", "\n", "\n", "### [feng.li@gsm.pku.edu.cn](feng.li@gsm.pku.edu.cn)\n", "### Course home page: [https://feng.li/bdcf](https://feng.li/bdcf)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "25/03/26 19:36:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] }, { "data": { "text/html": [ "\n", "
SparkSession - in-memory
\n", " \n", "SparkContext
\n", "\n", " \n", "\n", "v3.5.3
local[*]
Spark Forecasting
\n", " | date | \n", "TotalAll | \n", "AAll | \n", "BAll | \n", "CAll | \n", "DAll | \n", "EAll | \n", "FAll | \n", "GAll | \n", "AAAll | \n", "... | \n", "GBBBus | \n", "GBBOth | \n", "GBCHol | \n", "GBCVis | \n", "GBCBus | \n", "GBCOth | \n", "GBDHol | \n", "GBDVis | \n", "GBDBus | \n", "GBDOth | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1998-01-01 | \n", "45151.071280 | \n", "17515.502380 | \n", "10393.618016 | \n", "8633.359047 | \n", "3504.313346 | \n", "3121.619189 | \n", "1850.735773 | \n", "131.923529 | \n", "4977.209611 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "7.536223 | \n", "0.000000 | \n", "1.628948 | \n", "0.000000 | \n", "0.811856 | \n", "0.000000 | \n", "9.478051 | \n", "0.0 | \n", "
1 | \n", "1998-02-01 | \n", "17294.699551 | \n", "5880.367918 | \n", "3855.647839 | \n", "3580.051065 | \n", "1321.257992 | \n", "1826.610676 | \n", "757.079744 | \n", "73.684316 | \n", "1937.229611 | \n", "... | \n", "1.045797 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "5.296459 | \n", "0.000000 | \n", "0.522899 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
2 | \n", "1998-03-01 | \n", "20725.114184 | \n", "7086.444392 | \n", "4353.379282 | \n", "4717.676663 | \n", "1521.950007 | \n", "1868.381530 | \n", "900.796622 | \n", "276.485688 | \n", "2117.671851 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "2.945006 | \n", "1.425324 | \n", "9.924744 | \n", "3.100121 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
3 | \n", "1998-04-01 | \n", "25388.612353 | \n", "10530.639348 | \n", "5115.865530 | \n", "4924.575204 | \n", "1813.439177 | \n", "1952.612465 | \n", "801.444140 | \n", "250.036488 | \n", "2615.957465 | \n", "... | \n", "11.461824 | \n", "0.000000 | \n", "26.419176 | \n", "13.690603 | \n", "2.312088 | \n", "0.000000 | \n", "0.000000 | \n", "10.958005 | \n", "2.312088 | \n", "0.0 | \n", "
4 | \n", "1998-05-01 | \n", "20330.035211 | \n", "7430.373559 | \n", "3820.666426 | \n", "4219.283647 | \n", "1375.082095 | \n", "2616.965317 | \n", "551.377058 | \n", "316.287109 | \n", "2393.145511 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "23.789282 | \n", "67.846207 | \n", "1.282767 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
223 | \n", "2016-08-01 | \n", "24100.446632 | \n", "7227.612581 | \n", "4000.422405 | \n", "7536.939346 | \n", "1230.321700 | \n", "2868.505157 | \n", "355.515713 | \n", "881.129730 | \n", "2305.649511 | \n", "... | \n", "7.874448 | \n", "0.000000 | \n", "49.899947 | \n", "8.300225 | \n", "17.633478 | \n", "8.218562 | \n", "4.121631 | \n", "0.408894 | \n", "1.625820 | \n", "0.0 | \n", "
224 | \n", "2016-09-01 | \n", "24800.033759 | \n", "6778.363226 | \n", "4132.168965 | \n", "7123.112802 | \n", "1632.732153 | \n", "3327.064770 | \n", "525.302974 | \n", "1281.288869 | \n", "2061.246613 | \n", "... | \n", "0.421996 | \n", "0.000000 | \n", "80.058887 | \n", "37.306013 | \n", "109.024164 | \n", "46.447153 | \n", "11.661140 | \n", "3.557735 | \n", "4.448534 | \n", "0.0 | \n", "
225 | \n", "2016-10-01 | \n", "30039.106985 | \n", "8592.998250 | \n", "5719.297913 | \n", "8759.191781 | \n", "1900.476487 | \n", "3704.651986 | \n", "895.180382 | \n", "467.310186 | \n", "2267.174784 | \n", "... | \n", "35.375093 | \n", "0.273247 | \n", "52.156131 | \n", "2.093902 | \n", "50.283538 | \n", "3.319366 | \n", "0.754941 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
226 | \n", "2016-11-01 | \n", "27320.918908 | \n", "8663.240960 | \n", "5165.403172 | \n", "6804.359328 | \n", "1543.299435 | \n", "3698.431886 | \n", "852.313563 | \n", "593.870565 | \n", "2786.280116 | \n", "... | \n", "10.723090 | \n", "39.292496 | \n", "8.344998 | \n", "7.697995 | \n", "43.270319 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "5.180648 | \n", "0.0 | \n", "
227 | \n", "2016-12-01 | \n", "24604.310774 | \n", "7953.659899 | \n", "5000.483537 | \n", "6049.188583 | \n", "1378.418048 | \n", "2844.820274 | \n", "876.351928 | \n", "501.388506 | \n", "2676.459548 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "2.418446 | \n", "0.000000 | \n", "0.762140 | \n", "1.055685 | \n", "0.000000 | \n", "0.000000 | \n", "9.966514 | \n", "0.0 | \n", "
228 rows × 556 columns
\n", "\n", " | Parent_Group | \n", "AAAHol | \n", "AAAVis | \n", "AAABus | \n", "AAAOth | \n", "AABHol | \n", "AABVis | \n", "AABBus | \n", "AABOth | \n", "ABAHol | \n", "... | \n", "GBBBus | \n", "GBBOth | \n", "GBCHol | \n", "GBCVis | \n", "GBCBus | \n", "GBCOth | \n", "GBDHol | \n", "GBDVis | \n", "GBDBus | \n", "GBDOth | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "TotalAll | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
1 | \n", "AAll | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "BAll | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "CAll | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "DAll | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
550 | \n", "GBCOth | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
551 | \n", "GBDHol | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
552 | \n", "GBDVis | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "
553 | \n", "GBDBus | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
554 | \n", "GBDOth | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
555 rows × 305 columns
\n", "