import findspark
findspark.init()
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Basics").getOrCreate()
print(spark.version)
calib = r"C:\Users\Public\Documents\TRAVAIL\csg\data_out\CalibPOTE_2019.csv"
df = spark.read.csv(parquet_path)
hdf_pote = (
    r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\extraction_assiettes_csg\*.hdf"
)
hdf_pote
parquet_path = r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\assiettes_csg.parquet"
parquet_path
 
df = spark.read.parquet(parquet_path)
df.createOrReplaceTempView("csg")
df_count = spark.sql("SELECT count(*) FROM csg")
df_count.show()
import plotly