I am new to pyspark and facing few issues while executing jobs.
I am sending a job to a standalone spark instance with 2 executors configured correctly, Sometime both the executors start working in parallel and utilizes the allocated resources correctly and Job completes successfully. But sometimes only a single executor starts working and the other one remains idles on submitting the SAME job that was initially executing perfectly.
what could be the issue that every time both the executors are not performing their functionality.
below is my code.
from flask import Blueprint
import time
from pyspark import SparkContext
from pyspark import SQLContext
import pyspark
jl = Blueprint('HelloWorld', __name__, url_prefix='/')
@jl.route('/join')
def join_logic():
conf = pyspark.SparkConf().setAll([('spark.executor.memory', '24g'), ('spark.executor.cores', '3'), ('spark.worker.memory', '56g'), ('spark.driver.memory','24g'), ('spark.worker.cores', '6'), ('spark.network.timeout', '10000001'), ('spark.executor.heartbeatInterval', '10000000')])
sc = SparkContext("spark://X.X.X.X:7077","JOB_1", conf=conf)
sqlContext = SQLContext(sc)
df = sqlContext.read.format('jdbc').options(
url='jdbc:mysql://x.x.x.x/schemaName?autoReconnect=true&useSSL=false',
driver='com.mysql.jdbc.Driver',
dbtable='table_name',
user='root',
password='xxxx').load()
df1 = sqlContext.read.format('jdbc').options(
url='jdbc:mysql://X.X.X.X/schema_Name?autoReconnect=true&useSSL=false',
driver='com.mysql.jdbc.Driver',
dbtable='Table_Name',
user='root',
password='xxxx').load()
result = df.join(df1, df.column == df1.column, 'left')
res = result.count()
sc.stop()
return str(res);