Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext

Помогите пжл устранить ошибку в Jupyter Notebook. При выполнении команды:

from pyspark.sql import SparkSession

spark = SparkSession.builder\
        .master('local[4]')\
        .appName('Lesson_2')\
        .config('spark.ui.port', '4050')\
        .config('spark.executor.instances', 4)\
        .config('spark.executor.memory', f'{int(12000/4/1.1)}mb')\
        .config('spark.executor.cores', 1)\
        .getOrCreate()

sc = spark.sparkContext

Появляется следующая ошибка:

    ---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-6-7663f4aba942> in <module>
      2 
      3 
----> 4 spark = SparkSession.builder\
      5         .master('local[4]')\
      6         .appName('Lesson_2')\

E:\Program Files\Anaconda\lib\site-packages\pyspark\sql\session.py in getOrCreate(self)
    226                             sparkConf.set(key, value)
    227                         # This SparkContext may be an existing one.
--> 228                         sc = SparkContext.getOrCreate(sparkConf)
    229                     # Do not update `SparkConf` for existing `SparkContext`, as it's shared
    230                     # by all sessions.

E:\Program Files\Anaconda\lib\site-packages\pyspark\context.py in getOrCreate(cls, conf)
    390         with SparkContext._lock:
    391             if SparkContext._active_spark_context is None:
--> 392                 SparkContext(conf=conf or SparkConf())
    393             return SparkContext._active_spark_context
    394 

E:\Program Files\Anaconda\lib\site-packages\pyspark\context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
    144         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
    145         try:
--> 146             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
    147                           conf, jsc, profiler_cls)
    148         except:

E:\Program Files\Anaconda\lib\site-packages\pyspark\context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
    207 
    208         # Create the Java SparkContext through Py4J
--> 209         self._jsc = jsc or self._initialize_context(self._conf._jconf)
    210         # Reset the SparkConf to the one actually used by the SparkContext in JVM.
    211         self._conf = SparkConf(_jconf=self._jsc.sc().conf())

E:\Program Files\Anaconda\lib\site-packages\pyspark\context.py in _initialize_context(self, jconf)
    327         Initialize SparkContext in function to allow subclass specific initialization
    328         """
--> 329         return self._jvm.JavaSparkContext(jconf)
    330 
    331     @classmethod

E:\Program Files\Anaconda\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
   1571 
   1572         answer = self._gateway_client.send_command(command)
-> 1573         return_value = get_return_value(
   1574             answer, self._gateway_client, None, self._fqn)
   1575 

E:\Program Files\Anaconda\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--> 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)

Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.storage.StorageUtils$
    at org.apache.spark.storage.BlockManagerMasterEndpoint.<init>(BlockManagerMasterEndpoint.scala:110)
    at org.apache.spark.SparkEnv$.$anonfun$create$9(SparkEnv.scala:348)
    at org.apache.spark.SparkEnv$.registerOrLookupEndpoint$1(SparkEnv.scala:287)
    at org.apache.spark.SparkEnv$.create(SparkEnv.scala:336)
    at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:191)
    at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
    at org.apache.spark.SparkContext.<init>(SparkContext.scala:460)
    at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
    at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
    at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
    at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
    at py4j.Gateway.invoke(Gateway.java:238)
    at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
    at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
    at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
    at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
    at java.base/java.lang.Thread.run(Thread.java:833)

Использую Anaconda, отдельно установила Spark, java, чтобы можно было работать локально.

Переменные среды добавила и выглядят так:

JAVA_HOME: C:\Program Files\Java\jdk-17.0.1
SPARK_HOME: C:\Program Files\spark
Path: %SPARK_HOME%\bin
      %HADOOP_HOME%\bin
      %JAVA_HOME%\jdk-17.0.1\bin

После чтения чатов с аналогичной проблемой, добавила еще:

SPARK_LOCAL_IP: localhost
PYTHONPATH: C:\Program Files\spark\python\lib\py4j-0.10.9.2-src.zip

Но это не помогло. Подскажите, что не так? Как настроить Анаконду, чтобы в spark можно было работать локально?


Ответы (0 шт):