from pyspark.sql import SparkSession
from pyspark.sql import Row, Column
from pyspark.sql.functions import expr
= SparkSession.builder.appName('col-row').getOrCreate() spark
Row and column in Spark
spark
Row
= [Row('Brooke', 20), Row('Denny', 31), Row('Jules', 30), Row('TD', 35), Row('Brooke', 25)]
rows rows
[<Row('Brooke', 20)>,
<Row('Denny', 31)>,
<Row('Jules', 30)>,
<Row('TD', 35)>,
<Row('Brooke', 25)>]
= spark.createDataFrame(rows, ['name', 'age'])
df df.show()
[Stage 0:> (0 + 1) / 1]
+------+---+
| name|age|
+------+---+
|Brooke| 20|
| Denny| 31|
| Jules| 30|
| TD| 35|
|Brooke| 25|
+------+---+
Column
= expr("age + 3")
new_col new_col
Column<'(age + 3)'>
'age after 3 years', new_col).show() df.withColumn(
+------+---+-----------------+
| name|age|age after 3 years|
+------+---+-----------------+
|Brooke| 20| 23|
| Denny| 31| 34|
| Jules| 30| 33|
| TD| 35| 38|
|Brooke| 25| 28|
+------+---+-----------------+
df.age
Column<'age'>
'age after 5 years', (df.age + 5)).show() df.withColumn(
+------+---+-----------------+
| name|age|age after 5 years|
+------+---+-----------------+
|Brooke| 20| 25|
| Denny| 31| 36|
| Jules| 30| 35|
| TD| 35| 40|
|Brooke| 25| 30|
+------+---+-----------------+
spark.stop()