DataFrame:Methods for converting scala classes to DataFrame through SparkSql
As shown below:
import import import import import .{StructField, StructType} import .{Row, SaveMode, DataFrame, SQLContext} import .{SparkConf, SparkContext} import org. /** * Created by silentwolf on 2016/6/3. */ case class UserTag(SUUID: String, MAN: Float, WOMAN: Float, AGE10_19: Float, AGE20_29: Float, AGE30_39: Float, AGE40_49: Float, AGE50_59: Float, GAME: Float, MOVIE: Float, MUSIC: Float, ART: Float, POLITICS_NEWS: Float, FINANCIAL: Float, EDUCATION_TRAINING: Float, HEALTH_CARE: Float, TRAVEL: Float, AUTOMOBILE: Float, HOUSE_PROPERTY: Float, CLOTHING_ACCESSORIES: Float, BEAUTY: Float, IT: Float, BABY_PRODUCT: Float, FOOD_SERVICE: Float, HOME_FURNISHING: Float, SPORTS: Float, OUTDOOR_ACTIVITIES: Float, MEDICINE: Float ) object UserTagTable { val LOG = () val REP_HOME = s"${AppConfig.HDFS_MASTER}/${AppConfig.HDFS_REP}" def main(args: Array[String]) { var startTime = () val conf: = () val sc = new SparkContext() val sqlContext = new SQLContext(sc) var df1: DataFrame = null if ( == 0) { println("Please enter: appkey , StartTime : 2016-04-10 , StartEnd : 2016-04-11") } else { var appkey = args(0) var lastdate = args(1) df1 = loadDataFrame(sqlContext, appkey, "2016-04-10", lastdate) ("suuidTable") ("taginfo", (a: String) => userTagInfo(a)) ("intToString", (b: Long) => intToString(b)) import ._ //***Focus***: take the suuid from the temporary table and the Json data from the customized function and put it into UserTag. (" select distinct(suuid) AS suuid,taginfo(suuid) from suuidTable group by suuid").map { case Row(suuid: String, taginfo: String) => val taginfoObj = (taginfo) UserTag(, ("man"), ("woman"), ("age10_19"), ("age20_29"), ("age30_39"), ("age40_49"), ("age50_59"), ("game"), ("movie"), ("music"), ("art"), ("politics_news"), ("financial"), ("education_training"), ("health_care"), ("travel"), ("automobile"), ("house_property"), ("clothing_accessories"), ("beauty"), ("IT"), ("baby_Product"), ("food_service"), ("home_furnishing"), ("sports"), ("outdoor_activities"), ("medicine") )}.toDF().registerTempTable("resultTable") val resultDF = (s"select '$appkey' AS APPKEY, '$lastdate' AS DATE,SUUID ,MAN,WOMAN,AGE10_19,AGE20_29,AGE30_39 ," + "AGE40_49 ,AGE50_59,GAME,MOVIE,MUSIC,ART,POLITICS_NEWS,FINANCIAL,EDUCATION_TRAINING,HEALTH_CARE,TRAVEL,AUTOMOBILE," + "HOUSE_PROPERTY,CLOTHING_ACCESSORIES,BEAUTY,IT,BABY_PRODUCT ,FOOD_SERVICE ,HOME_FURNISHING ,SPORTS ,OUTDOOR_ACTIVITIES ," + "MEDICINE from resultTable WHERE SUUID IS NOT NULL") ().options( Map("table" -> "USER_TAGS", "zkUrl" -> ("")) ).format("").save() } } def intToString(suuid: Long): String = { () } def userTagInfo(num1: String): String = { var de = new DecimalFormat("0.00") var mannum = ().toFloat var man = mannum var woman = (1 - mannum).toFloat var age10_19num = ( * 0.2).toFloat var age20_29num = ( * 0.2).toFloat var age30_39num = ( * 0.2).toFloat var age40_49num = ( * 0.2).toFloat var age10_19 = age10_19num var age20_29 = age20_29num var age30_39 = age30_39num var age40_49 = age40_49num var age50_59 = (1 - age10_19num - age20_29num - age30_39num - age40_49num).toFloat var game = ( * 1).toFloat var movie = ( * 1).toFloat var music = ( * 1).toFloat var art = ( * 1).toFloat var politics_news = ( * 1).toFloat var financial = ( * 1).toFloat var education_training = ( * 1).toFloat var health_care = ( * 1).toFloat var travel = ( * 1).toFloat var automobile = ( * 1).toFloat var house_property = ( * 1).toFloat var clothing_accessories = ( * 1).toFloat var beauty = ( * 1).toFloat var IT = ( * 1).toFloat var baby_Product = ( * 1).toFloat var food_service = ( * 1).toFloat var home_furnishing = ( * 1).toFloat var sports = ( * 1).toFloat var outdoor_activities = ( * 1).toFloat var medicine = ( * 1).toFloat "{" + "\"man\"" + ":" + man + "," + "\"woman\"" + ":" + woman + "," + "\"age10_19\"" + ":" + age10_19 + "," + "\"age20_29\"" + ":" + age20_29 + "," + "\"age30_39\"" + ":" + age30_39 + "," + "\"age40_49\"" + ":" + age40_49 + "," + "\"age50_59\"" + ":" + age50_59 + "," + "\"game\"" + ":" + game + "," + "\"movie\"" + ":" + movie + "," + "\"music\"" + ":" + music + "," + "\"art\"" + ":" + art + "," + "\"politics_news\"" + ":" + politics_news + "," + "\"financial\"" + ":" + financial + "," + "\"education_training\"" + ":" + education_training + "," + "\"health_care\"" + ":" + health_care + "," + "\"travel\"" + ":" + travel + "," + "\"automobile\"" + ":" + automobile + "," + "\"house_property\"" + ":" + house_property + "," + "\"clothing_accessories\"" + ":" + clothing_accessories + "," + "\"beauty\"" + ":" + beauty + "," + "\"IT\"" + ":" + IT + "," + "\"baby_Product\"" + ":" + baby_Product + "," + "\"food_service\"" + ":" + food_service + "," + "\"home_furnishing\"" + ":" + home_furnishing + "," + "\"sports\"" + ":" + sports + "," + "\"outdoor_activities\"" + ":" + outdoor_activities + "," + "\"medicine\"" + ":" + medicine + "}"; } def loadDataFrame(ctx: SQLContext, appkey: String, startDay: String, endDay: String): DataFrame = { val path = s"$REP_HOME/appstatistic" (path) .filter(s"timestamp is not null and appkey='$appkey' and day>='$startDay' and day<='$endDay'") } }
Above this DataFrame: scala class to DataFrame through SparkSql is all I have to share with you, I hope to be able to give you a reference, and I hope you support me more.
Related articles
Example analysis python3 implementation of concurrent access to the horizontal slice table
In this article, small and medium-sized editors have organized the relevant knowledge points about python3 to achieve concurrent access to the horizontal slice of the table as well as the example code, interested friends refer to the following.2018-09-09Learn python from scratch series of new version of the import httplib module reported ImportError solution!
When using the new version of python to open the old version of the code, there may be some errors or incompatibilities appear, today we will analyze one of the situations2014-05-05Python PyYAML library parses YAML files.
This article introduces the Python PyYAML library to parse the use of YAML files in detail, there is a need for friends can refer to reference, I hope to be able to help, I wish you more progress, an early promotion and salary increase!2023-11-11About python crawler principle analysis
This article introduces the principle of python crawler, today we want to explain to you in detail the principle of python crawler, what is python crawler, python crawler work the basic process of what content, I hope that this is being carried out on the python crawler to learn the students to help!2023-07-07Explaining the Array module in Python in detail
This article introduces the Array module in Python, Python array module is a predefined array, so its space in memory is much smaller than the standard list, but also can perform fast element-level operations, such as adding, deleting, indexing and slicing operations, if you need it, you can refer to the following2023-04-04pytorch custom initialization of weights
Today, I'd like to share a pytorch custom initialization of weights, with good reference value, I hope to help you. Together follow the editor over to see it2019-08-08Example of Python batch deleting and renaming a folder
Today I'm going to share an example of Python batch deleting and renaming a folder, with good reference value, I hope it will help you. Together follow the editor over to see it2018-07-07Code to extract Abaqus field output data using python scripts
This article introduces the use of python script to extract the Abaqus field output data, the use of python script to Abaqus data extraction, the python script to do the first step of the import process, this article through the example code explains in detail the need for friends can refer to the following2022-11-11Python to ip address sorting, classification method details
This article introduces the python ip address sorting, categorization method explained, the IP protocol known as the "InterNetwork Interconnection Protocol Internet Protocol", the IP protocol is the TCP/IP system of network layer protocols, need friends can refer to the following2023-07-07Python's method of reading input values using raw_input
This article introduces the Python raw_input read input value of the method, the beginner has a good learning value, need friends can refer to the following2014-08-08