SoFunction
Updated on 2024-11-21

DataFrame:Methods for converting scala classes to DataFrame through SparkSql

DataFrame:Methods for converting scala classes to DataFrame through SparkSql

Updated January 29, 2019 14:33:24 by silentwolfyh
Today, I'd like to share a DataFrame: scala class to DataFrame via SparkSql, with good reference value, I hope it will help you. Together follow the editor over to see it

As shown below:

import 
import 
import 
import 
import .{StructField, StructType}
import .{Row, SaveMode, DataFrame, SQLContext}
import .{SparkConf, SparkContext}
import org.
 
/**
 * Created by silentwolf on 2016/6/3.
 */
 
case class UserTag(SUUID: String,
     MAN: Float,
     WOMAN: Float,
     AGE10_19: Float,
     AGE20_29: Float,
     AGE30_39: Float,
     AGE40_49: Float,
     AGE50_59: Float,
     GAME: Float,
     MOVIE: Float,
     MUSIC: Float,
     ART: Float,
     POLITICS_NEWS: Float,
     FINANCIAL: Float,
     EDUCATION_TRAINING: Float,
     HEALTH_CARE: Float,
     TRAVEL: Float,
     AUTOMOBILE: Float,
     HOUSE_PROPERTY: Float,
     CLOTHING_ACCESSORIES: Float,
     BEAUTY: Float,
     IT: Float,
     BABY_PRODUCT: Float,
     FOOD_SERVICE: Float,
     HOME_FURNISHING: Float,
     SPORTS: Float,
     OUTDOOR_ACTIVITIES: Float,
     MEDICINE: Float
     )
 
object UserTagTable {
 
 val LOG = ()
 
 val REP_HOME = s"${AppConfig.HDFS_MASTER}/${AppConfig.HDFS_REP}"
 
 def main(args: Array[String]) {
 
 var startTime = ()
 
 val conf:  = ()
 
 val sc = new SparkContext()
 
 val sqlContext = new SQLContext(sc)
 
 var df1: DataFrame = null
 
 if ( == 0) {
  println("Please enter: appkey , StartTime : 2016-04-10 , StartEnd : 2016-04-11")
 }
 else {
 
  var appkey = args(0)
 
  var lastdate = args(1)
 
  df1 = loadDataFrame(sqlContext, appkey, "2016-04-10", lastdate)
 
  ("suuidTable")
 
  ("taginfo", (a: String) => userTagInfo(a))
  ("intToString", (b: Long) => intToString(b))
  import ._
 
  //***Focus***: take the suuid from the temporary table and the Json data from the customized function and put it into UserTag.
 (" select distinct(suuid) AS suuid,taginfo(suuid) from suuidTable group by suuid").map { case Row(suuid: String, taginfo: String) =>
  val taginfoObj = (taginfo)
  UserTag(,
   ("man"),
   ("woman"),
   ("age10_19"),
   ("age20_29"),
   ("age30_39"),
   ("age40_49"),
   ("age50_59"),
   ("game"),
   ("movie"),
   ("music"),
   ("art"),
   ("politics_news"),
   ("financial"),
   ("education_training"),
   ("health_care"),
   ("travel"),
   ("automobile"),
   ("house_property"),
   ("clothing_accessories"),
   ("beauty"),
   ("IT"),
   ("baby_Product"),
   ("food_service"),
   ("home_furnishing"),
   ("sports"),
   ("outdoor_activities"),
   ("medicine")
  )}.toDF().registerTempTable("resultTable")
 
  val resultDF = (s"select '$appkey' AS APPKEY, '$lastdate' AS DATE,SUUID ,MAN,WOMAN,AGE10_19,AGE20_29,AGE30_39 ," +
  "AGE40_49 ,AGE50_59,GAME,MOVIE,MUSIC,ART,POLITICS_NEWS,FINANCIAL,EDUCATION_TRAINING,HEALTH_CARE,TRAVEL,AUTOMOBILE," +
  "HOUSE_PROPERTY,CLOTHING_ACCESSORIES,BEAUTY,IT,BABY_PRODUCT ,FOOD_SERVICE ,HOME_FURNISHING ,SPORTS ,OUTDOOR_ACTIVITIES ," +
  "MEDICINE from resultTable WHERE SUUID IS NOT NULL")
  ().options(
  Map("table" -> "USER_TAGS", "zkUrl" -> (""))
  ).format("").save()
 
 }
 }
 
 def intToString(suuid: Long): String = {
 ()
 }
 
 def userTagInfo(num1: String): String = {
 
 var de = new DecimalFormat("0.00")
 var mannum = ().toFloat
 var man = mannum
 var woman = (1 - mannum).toFloat
 
 var age10_19num = ( * 0.2).toFloat
 var age20_29num = ( * 0.2).toFloat
 var age30_39num = ( * 0.2).toFloat
 var age40_49num = ( * 0.2).toFloat
 
 var age10_19 = age10_19num
 var age20_29 = age20_29num
 var age30_39 = age30_39num
 var age40_49 = age40_49num
 var age50_59 = (1 - age10_19num - age20_29num - age30_39num - age40_49num).toFloat
 
 var game = ( * 1).toFloat
 var movie = ( * 1).toFloat
 var music = ( * 1).toFloat
 var art = ( * 1).toFloat
 var politics_news = ( * 1).toFloat
 
 var financial = ( * 1).toFloat
 var education_training = ( * 1).toFloat
 var health_care = ( * 1).toFloat
 var travel = ( * 1).toFloat
 var automobile = ( * 1).toFloat
 
 var house_property = ( * 1).toFloat
 var clothing_accessories = ( * 1).toFloat
 var beauty = ( * 1).toFloat
 var IT = ( * 1).toFloat
 var baby_Product = ( * 1).toFloat
 
 var food_service = ( * 1).toFloat
 var home_furnishing = ( * 1).toFloat
 var sports = ( * 1).toFloat
 var outdoor_activities = ( * 1).toFloat
 var medicine = ( * 1).toFloat
 
 "{" + "\"man\"" + ":" + man + "," + "\"woman\"" + ":" + woman + "," + "\"age10_19\"" + ":" + age10_19 + "," + "\"age20_29\"" + ":" + age20_29 + "," +
  "\"age30_39\"" + ":" + age30_39 + "," + "\"age40_49\"" + ":" + age40_49 + "," + "\"age50_59\"" + ":" + age50_59 + "," + "\"game\"" + ":" + game + "," +
  "\"movie\"" + ":" + movie + "," + "\"music\"" + ":" + music + "," + "\"art\"" + ":" + art + "," + "\"politics_news\"" + ":" + politics_news + "," +
  "\"financial\"" + ":" + financial + "," + "\"education_training\"" + ":" + education_training + "," + "\"health_care\"" + ":" + health_care + "," +
  "\"travel\"" + ":" + travel + "," + "\"automobile\"" + ":" + automobile + "," + "\"house_property\"" + ":" + house_property + "," + "\"clothing_accessories\"" + ":" + clothing_accessories + "," +
  "\"beauty\"" + ":" + beauty + "," + "\"IT\"" + ":" + IT + "," + "\"baby_Product\"" + ":" + baby_Product + "," + "\"food_service\"" + ":" + food_service + "," +
  "\"home_furnishing\"" + ":" + home_furnishing + "," + "\"sports\"" + ":" + sports + "," + "\"outdoor_activities\"" + ":" + outdoor_activities + "," + "\"medicine\"" + ":" + medicine +
  "}";
 
 }
 
 def loadDataFrame(ctx: SQLContext, appkey: String, startDay: String, endDay: String): DataFrame = {
 val path = s"$REP_HOME/appstatistic"
 (path)
  .filter(s"timestamp is not null and appkey='$appkey' and day>='$startDay' and day<='$endDay'")
 }
 
 
}

Above this DataFrame: scala class to DataFrame through SparkSql is all I have to share with you, I hope to be able to give you a reference, and I hope you support me more.

  • DataFrame
  • SparkSql
  • scala

Related articles

  • Example analysis python3 implementation of concurrent access to the horizontal slice table

    In this article, small and medium-sized editors have organized the relevant knowledge points about python3 to achieve concurrent access to the horizontal slice of the table as well as the example code, interested friends refer to the following.
    2018-09-09
  • Learn python from scratch series of new version of the import httplib module reported ImportError solution!

    When using the new version of python to open the old version of the code, there may be some errors or incompatibilities appear, today we will analyze one of the situations
    2014-05-05
  • Python PyYAML library parses YAML files.

    This article introduces the Python PyYAML library to parse the use of YAML files in detail, there is a need for friends can refer to reference, I hope to be able to help, I wish you more progress, an early promotion and salary increase!
    2023-11-11
  • About python crawler principle analysis

    This article introduces the principle of python crawler, today we want to explain to you in detail the principle of python crawler, what is python crawler, python crawler work the basic process of what content, I hope that this is being carried out on the python crawler to learn the students to help!
    2023-07-07
  • Explaining the Array module in Python in detail

    This article introduces the Array module in Python, Python array module is a predefined array, so its space in memory is much smaller than the standard list, but also can perform fast element-level operations, such as adding, deleting, indexing and slicing operations, if you need it, you can refer to the following
    2023-04-04
  • pytorch custom initialization of weights

    Today, I'd like to share a pytorch custom initialization of weights, with good reference value, I hope to help you. Together follow the editor over to see it
    2019-08-08
  • Example of Python batch deleting and renaming a folder

    Today I'm going to share an example of Python batch deleting and renaming a folder, with good reference value, I hope it will help you. Together follow the editor over to see it
    2018-07-07
  • Code to extract Abaqus field output data using python scripts

    This article introduces the use of python script to extract the Abaqus field output data, the use of python script to Abaqus data extraction, the python script to do the first step of the import process, this article through the example code explains in detail the need for friends can refer to the following
    2022-11-11
  • Python to ip address sorting, classification method details

    This article introduces the python ip address sorting, categorization method explained, the IP protocol known as the "InterNetwork Interconnection Protocol Internet Protocol", the IP protocol is the TCP/IP system of network layer protocols, need friends can refer to the following
    2023-07-07
  • Python's method of reading input values using raw_input

    This article introduces the Python raw_input read input value of the method, the beginner has a good learning value, need friends can refer to the following
    2014-08-08

Latest Comments