Pārlūkot izejas kodu

Update write function

theenglishway (time) 2 gadi atpakaļ
vecāks
revīzija
1df3a0b77d
3 mainītis faili ar 9 papildinājumiem un 6 dzēšanām
  1. 1 0
      .gitignore
  2. 1 1
      src/main/scala/Main.scala
  3. 7 5
      src/main/scala/Transform.scala

+ 1 - 0
.gitignore

@@ -3,3 +3,4 @@ target/
 project/target/
 .bloop/
 .metals/
+output/

+ 1 - 1
src/main/scala/Main.scala

@@ -28,5 +28,5 @@ object Main extends App {
   println(stats.size)
 
   val analysis = GamesAnalysis(teams_output, games_output, stats_output)
-  analysis.writeToCsv("output")
+  GamesAnalysis.writeToCsv(analysis.merged, "output/merged_df")
 }

+ 7 - 5
src/main/scala/Transform.scala

@@ -1,6 +1,6 @@
 package transform
 
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{SparkSession, DataFrame}
 import org.apache.spark.sql.functions.{when, sum}
 
 case class GamesAnalysis(teams: os.Path, games: os.Path, stats: os.Path) {
@@ -60,11 +60,13 @@ case class GamesAnalysis(teams: os.Path, games: os.Path, stats: os.Path) {
     )
     .drop($"sgp.game_id", $"sgp.team_id")
 
-  def writeToCsv(output: String) = {
-    merged.write.option("header", true).mode("overwrite").csv(output)
-  }
-
   private def readInput(filePath: os.Path) = {
     spark.read.option("multiline", "true").json(filePath.toString())
   }
 }
+
+object GamesAnalysis {
+  def writeToCsv(df: DataFrame, output: String) = {
+    df.write.option("header", true).mode("overwrite").csv(output)
+  }
+}