不多解释直接上代码,少python包的自己直接下载
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from pyspark.sql import SparkSession
import pandas as pd
import os# 初始化 SparkSession 并启用 Hive 支持
spark = SparkSession.builder \.appName("select_hive_data_to_xlsx") \.master("yarn") \.config("spark.sql.warehouse.dir", "hdfs://1.1.1.1:4007/aaa/hive/warehouse") \.enableHiveSupport() \.config("spark.yarn.queue", "default") \.config("spark.executor.instances", "10") \.config("spark.executor.memory", "5g") \.config("spark.executor.cores", "4") \.config("spark.driver.memory", "6g") \.config("spark.driver.cores", "5") \.config("spark.hadoop.fs.defaultFS", "hdfs://1.1.1.1:4007") \.config("spark.hadoop.fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFile