Spark Listener Metrics

How to log Spark internal metrics with the help of Databand.

Logging Spark internal metrics

Databand can capture Spark internal metrics, for example stage statistics captured in the Spark History UI — CPU usage, records read/written, etc.

To capture Spark internal metrics, manually add the Databand Spark Listener to your Spark context:

import ai.databand.annotations.Task
import ai.databand.spark.DbndSparkListener
import org.apache.spark.sql.{Dataset, Row, SparkSession}

object CreateReport {

    def main(args: Array[String]): Unit = {
        val spark = SparkSession.builder
            .appName("CreateReportSparkScala")
            .getOrCreate
        val listener = new DbndSparkListener
        spark.sparkContext.addSparkListener(listener)
    }

}
import ai.databand.annotations.Task;
import ai.databand.spark.DbndSparkListener;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

public class CreateReport {

    public static void main(String[] args) {
        SparkSession spark = SparkSession
            .builder()
            .appName("CreateReportSparkJava")
            .getOrCreate();

        DbndSparkListener listener = new DbndSparkListener();
        spark.sparkContext().addSparkListener(listener);
                //...
    }

}

Did this page help you?