from pyspark.sql import SparkSession
from pyspark.sql import Row, Column
from pyspark.sql.functions import expr
spark = SparkSession.builder.appName('col-row').getOrCreate()Row and column in Spark
spark
Row
rows = [Row('Brooke', 20), Row('Denny', 31), Row('Jules', 30), Row('TD', 35), Row('Brooke', 25)]
rows[<Row('Brooke', 20)>,
<Row('Denny', 31)>,
<Row('Jules', 30)>,
<Row('TD', 35)>,
<Row('Brooke', 25)>]
df = spark.createDataFrame(rows, ['name', 'age'])
df.show()[Stage 0:> (0 + 1) / 1]
+------+---+
| name|age|
+------+---+
|Brooke| 20|
| Denny| 31|
| Jules| 30|
| TD| 35|
|Brooke| 25|
+------+---+
Column
new_col = expr("age + 3")
new_colColumn<'(age + 3)'>
df.withColumn('age after 3 years', new_col).show()+------+---+-----------------+
| name|age|age after 3 years|
+------+---+-----------------+
|Brooke| 20| 23|
| Denny| 31| 34|
| Jules| 30| 33|
| TD| 35| 38|
|Brooke| 25| 28|
+------+---+-----------------+
df.ageColumn<'age'>
df.withColumn('age after 5 years', (df.age + 5)).show()+------+---+-----------------+
| name|age|age after 5 years|
+------+---+-----------------+
|Brooke| 20| 25|
| Denny| 31| 36|
| Jules| 30| 35|
| TD| 35| 40|
|Brooke| 25| 30|
+------+---+-----------------+
spark.stop()