CopyPastor

The following function in Python will capture dataproc jobId, if anyone is interested:
import pyspark sc = pyspark.SparkContext() def extract_jobid(sc): # Access the underlying SparkConf spark_conf = sc.getConf() # Get the value of spark.yarn.tags configuration spark_conf = spark_conf.get("spark.yarn.tags") # Extract the jobId from yarn_tags using string processing # assuming yarn_tags format: "dataproc_job_<job_id>" job_id = None if yarn_tags: tags = yarn_tags.split(",") for tag in tags: if (tag.startswith("dataproc_job_") and not tag.startswith("dataproc_job_attempt_timestamp_")): job_id = tag.split("_")[2] break return job_id # Simply call to the function to output the dataproc jobId extract_jobid(sc)

This is the answer in Python if anyone is interested:
import pyspark sc = pyspark.SparkContext() def extract_jobid(sc): # Access the underlying SparkConf spark_conf = sc.getConf() # Get the value of spark.yarn.tags configuration spark_conf = spark_conf.get("spark.yarn.tags") # Extract the jobId from yarn_tags using string processing # assuming yarn_tags format: "dataproc_job_<job_id>" job_id = None if yarn_tags: tags = yarn_tags.split(",") for tag in tags: if (tag.startswith("dataproc_job_") and not tag.startswith("dataproc_job_attempt_timestamp_")): job_id = tag.split("_")[2] break return job_id # Simply call to the function to output the dataproc jobId extract_jobid(sc)

CopyPastor

Possible Plagiarism

Original Post