При попытке обучить нейронную сеть получаю ошибку: "lambda x, y: x[name]) KeyError: 'fingerprint'"

Всем привет! Я пытаюсь построить нейронную сеть, которая будет принимать на вход данные анализа модекулы(ppm) и отдавать отпечаток молекулы(fingerprint len=512). csv файл с данными выглядит так:

   ppm_and_assign                                        fingerprint
0  [98.76, 687.0, 74.52, 666.0, 72.51, 704.0, 71....  1111010000101011011011111101001001010101111100...
1                                    [15.52, 1000.0]  0000000000000000000000000000000000000000000000...
...
10335  [153.05, 662.0, 136.76, 408.0, 128.98, 1000.0,...  0010000001000000010000001000100000001010110000...
10336  [157.65, 96.0, 129.94, 995.0, 2.0, 113.81, 100...  0110000001000000010000000000000000001000110010...

смещение - список разной длинны, отпечаток - бинарное число длинной 512 символов. при попытке обучить нейронную сеть этими двумя значениями, получаю ошибку:

lambda x, y: x[name])

KeyError: 'fingerprint'

как я понимаю ошибка в энкодинге данных отпечатка. Подскажите пожайлуста как исправить ошибку. Мой код:

import os
import keras
from keras import layers

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


import pandas as pd

import tensorflow as tf


dataframe = pd.read_csv('/home/oleksii/PycharmProjects/vae/ppm_fingerprint.csv', 
names=['ppm_and_assign', 'fingerprint'], skipinitialspace=True, skiprows=1, 
engine="python")

val_dataframe = dataframe.sample(frac=0.2, random_state=1337)
train_dataframe = dataframe.drop(val_dataframe.index)

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("fingerprint")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

print(train_ds)
print(val_ds)

for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)



from keras.layers import IntegerLookup
from keras.layers import Normalization
from keras.layers import StringLookup

def encode_numerical_feature(feature, name, dataset):
    '''
    Didn't work for fingerprint
    '''
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature



def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature



ppm_and_assign_1 = keras.Input(shape=(1,), name="ppm_and_assign", dtype="string")
fingerprint = keras.Input(shape=(1,), name="fingerprint", dtype="string")

all_inputs = [
    ppm_and_assign_1,
    fingerprint,
    ]



 ppm_and_assign_encoded = encode_categorical_feature(ppm_and_assign_1, 
 "ppm_and_assign", train_ds, True)

fingerprint_encoded = encode_numerical_feature(fingerprint, "fingerprint", train_ds)


all_features = layers.concatenate(
    [
        ppm_and_assign_encoded,
        fingerprint_encoded
    ]
)

x = layers.Dense(8, activation="relu")(all_features)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation="relu")(x)
model = keras.Model(all_inputs, output)
# model.compile("adam", "mean_squared_error", metrics=["accuracy"])
model.compile(
    # loss='categorical_crossentropy',
    loss='mean_squared_error',
    # loss=tf.keras.losses.mae,
          optimizer=tf.keras.optimizers.Adam(),
         # metrics=tf.keras.metrics.mae
              metrics=['accuracy']
              )


model.fit(train_ds, epochs=50, validation_data=val_ds)

Ответы (0 шт):