Ошибка Tensorflow.Keras: _EagerConst: Dst tensor is not initialized

Question

Обучаю NLP-модель. Перевел ее на GPU, теперь она некорректно работает, выдавая ошибку:

Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
  File "F:\SchoolAI\nlp.py", line 81, in train_model
    model.fit(X, y, verbose=1, epochs=1, batch_size=64)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

During handling of the above exception, another exception occurred:

  File "F:\nlp\ai.py", line 88, in train_model
    model.fit(X, y, verbose=1, epochs=1, batch_size=64)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

During handling of the above exception, another exception occurred:

  File "F:\nlp\ai.py", line 91, in train_model
    model.fit(X, y, verbose=1, epochs=1, batch_size=64)
  File "F:\nlp\ai.py", line 138, in <module>
    train_model(1)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

Как ее исправить? Я ниже привел нужные куски кода:

Код инициализации Tensorflow и датасета:

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer # type: ignore
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
import numpy as np
import random
import requests
import gc
import os
import string
from tensorflow.keras.optimizers import Adam  # type: ignore
# выделяем TensorFlow 40% объема VRAM
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.4)
config = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
session = tf.compat.v1.Session(config=config)
# Получение текста из удаленного источника
resp = requests.get('скрыл')
messages = resp.content.decode('cp1251').lower().split('\n')
optimizer = Adam(learning_rate=0.01)
# Инициализация токенизатора и обучение на текстах
tokenizer = Tokenizer()
tokenizer.fit_on_texts(messages)
total_words = len(tokenizer.word_index) + 1
path='model_weights.h5'

#path = "drive/MyDrive/nlp/model_weights.h5" # для google colab
#print(os.listdir('drive/MyDrive/nlp')) # для google colab

# Создание последовательностей и их паддинг
input_sequences = []
for line in messages:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

# Очистка памяти
del messages
gc.collect()

# Определение максимальной длины последовательности
max_sequence_length = max([len(x) for x in input_sequences])

# Паддинг последовательностей
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length-1, padding='pre', dtype='int16')
X, labels = input_sequences[:, :-1], input_sequences[:, -1]

# Преобразование labels в категориальный формат
y = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# Очистка памяти
del input_sequences
del resp
gc.collect()

Архитектура модели:

with tf.device('/device:gpu:0'):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(total_words, 100, input_length=None, trainable=True),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=False)),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(total_words, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Функция обучения:

with tf.device('/device:cpu:0'):
    # Обучение модели
    def train_model(epochs):
        try:
            model.load_weights(path)
            for i in range(epochs):
                model.load_weights(path)
                model.fit(X, y, verbose=1, epochs=1, batch_size=64)
                model.save_weights(path)
        except:
            print('Обучение с нуля...')
            for i in range(epochs):
                try:
                    model.load_weights(path)
                    model.fit(X, y, verbose=1, epochs=1, batch_size=64)
                    model.save_weights(path)
                except:
                    model.fit(X, y, verbose=1, epochs=1, batch_size=64)
                    model.save_weights(path)

Answer 1

Спасибо CrazyElf за помощь в решении проблемы!

Нашел решение - видеокарта была перегружена датасетом и вычислениями :)

Если у вас тоже не больше 3-4 ГБ ОЗУ, то вам нужно уменьшить размер пакета batch_size при обучении в несколько раз - до 16.

Попробуйте уменьшить batch_size, тут пишут, что это может быть признаком нехватки памяти: stackoverflow.com/a/40389498/8324991 – CrazyElf

Также можно добавить строки tf.compat.v1.disable_eager_execution(), tf.compat.v1.experimental.output_all_intermediates(True), если после уменьшения батча Tensorflow просит добавить эти строки:

import tensorflow as tf # импортируем TensorFlow

tf.compat.v1.disable_eager_execution() # отключаем быстрое выполнение
tf.compat.v1.experimental.output_all_intermediates(True) # выводим все промежуточные элементы при использовании Control Flow

И если ОЗУ хватает, но обучение проходит медленно, то можно включить тип int16 при токенизации датасета, если датасет небольшой. Может ускорить вычисления, тк числа будут не больше 32767 (65535 при int16).

БЛОГ НА HUSL

Ошибка Tensorflow.Keras: _EagerConst: Dst tensor is not initialized

Ответы (1 шт):