[Python] Custom Encoder and Decoder Layers within Keras Model Show As Unbuilt

Stack · Setembro 28, 2024 às 16:33

I have a subclassed tensorflow.keras.Model Seq2Seq model with custom layers. However, when I try to run a test script to build and compile the model, running model.summary() gives:

Model: "retrosynthesis_seq2_seq_model"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ simple_encoder (SimpleEncoder) │ ? │ 0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ simple_decoder (SimpleDecoder) │ ? │ 0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ enc_state_h (Dense) │ (1, 128) │ 16,512 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ enc_state_c (Dense) │ (1, 128) │ 16,512 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 361,064 (1.38 MB)
Trainable params: 361,064 (1.38 MB)
Non-trainable params: 0 (0.00 B)
Model output shape: (1, 20, 1000)

From what I can tell, I have correctly implemented the build() methods for the encoder & decoder layers. I think this is then causing a TypeError: Unsupported integer size (0) serialization error when I attempt to save the model.

I have included the encoder, decoder and Seq2Seq model classes below, along with the test script for replication. I appreciate its quite a lot of code, but pasting it into a single file and running is enough to replicate the error:

import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Layer, Embedding, Bidirectional, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from typing import Optional, Tuple, Any

"""
Encoder Layer
"""
class SimpleEncoder(Layer):
def __init__(self, vocab_size: int, embedding_dim: int, units: int, dropout_rate: float = 0.2, **kwargs):
super(SimpleEncoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.units = units
self.dropout_rate = dropout_rate

self.embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True, name='simple_embedding')
self.dense = Dense(units, activation='relu', name='simple_dense')
self.dropout = Dropout(dropout_rate, name='simple_dropout')

def build(self, input_shape):
self.embedding.build(input_shape)

embedding_output_shape = self.embedding.compute_output_shape(input_shape)
self.dense.build(embedding_output_shape)

dense_output_shape = self.dense.compute_output_shape(embedding_output_shape)
self.dropout.build(dense_output_shape)

super(SimpleEncoder, self).build(input_shape)

def call(self, inputs: tf.Tensor, training: Optional[bool] = None) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
x = self.embedding(inputs) # Shape: (batch_size, sequence_length, embedding_dim)

encoder_output = self.dense(x) # Shape: (batch_size, sequence_length, units)

encoder_output = self.dropout(encoder_output, training=training)

state_h = tf.zeros_like(encoder_output[:, 0, :]) # Shape: (batch_size, units)
state_c = tf.zeros_like(encoder_output[:, 0, :]) # Shape: (batch_size, units)

return encoder_output, state_h, state_c

def compute_mask(self, inputs: tf.Tensor, mask: Optional[tf.Tensor] = None) -> Optional[tf.Tensor]:
return self.embedding.compute_mask(inputs, mask)

def get_config(self) -> dict:
config = super(SimpleEncoder, self).get_config()
config.update({
'vocab_size': self.vocab_size,
'embedding_dim': self.embedding_dim,
'units': self.units,
'dropout_rate': self.dropout_rate,
'embedding': tf.keras.layers.serialize(self.embedding),
'dense': tf.keras.layers.serialize(self.dense),
'dropout': tf.keras.layers.serialize(self.dropout),
})
return config

@classmethod
def from_config(cls, config: dict) -> 'SimpleEncoder':
config['embedding'] = tf.keras.layers.deserialize(config['embedding'])
config['dense'] = tf.keras.layers.deserialize(config['dense'])
config['dropout'] = tf.keras.layers.deserialize(config['dropout'])
return cls(**config)

"""
Decoder Layer
"""
class SimpleDecoder(Layer):
def __init__(
self,
vocab_size: int,
embedding_dim: int,
units: int,
dropout_rate: float = 0.2,
**kwargs
):
super(SimpleDecoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.units = units
self.dropout_rate = dropout_rate

self.embedding = Embedding(
input_dim=vocab_size,
output_dim=embedding_dim,
mask_zero=True,
name='decoder_embedding'
)
self.lstm = LSTM(
units,
return_sequences=True,
return_state=True,
name='decoder_lstm'
)
self.dropout = Dropout(dropout_rate, name='decoder_dropout')
self.dense = Dense(vocab_size, activation='softmax', name='decoder_dense')

def build(self, input_shape):
decoder_input_shape, initial_states_shape = input_shape

self.embedding.build(decoder_input_shape)

embedding_output_shape = self.embedding.compute_output_shape(decoder_input_shape)
self.lstm.build(embedding_output_shape)

lstm_output_shape = self.lstm.compute_output_shape(embedding_output_shape)
self.dropout.build(lstm_output_shape)

dropout_output_shape = self.dropout.compute_output_shape(lstm_output_shape)
self.dense.build(dropout_output_shape)

super(SimpleDecoder, self).build(input_shape)

def call(
self,
inputs: Tuple[tf.Tensor, tuple[tf.Tensor, tf.Tensor]],
training: Optional[bool] = None,
mask: Optional[tf.Tensor] = None
) -> tf.Tensor:
decoder_input, initial_state = inputs

if decoder_input is None or initial_state is None:
raise ValueError('decoder_input and initial_state must be provided to the Decoder.')

x = self.embedding(decoder_input)

lstm_output, state_h, state_c = self.lstm(
x,
initial_state=initial_state,
training=training,
mask=None
)

lstm_output = self.dropout(lstm_output, training=training)

output = self.dense(lstm_output)

return output

@staticmethod
def compute_mask(inputs: Tuple, mask: Optional[tf.Tensor] = None) -> None:
return None

def get_config(self) -> dict:
config = super(SimpleDecoder, self).get_config()
config.update({
'vocab_size': self.vocab_size,
'embedding_dim': self.embedding_dim,
'units': self.units,
'dropout_rate': self.dropout_rate,
'embedding': tf.keras.layers.serialize(self.embedding),
'lstm': tf.keras.layers.serialize(self.lstm),
'dropout': tf.keras.layers.serialize(self.dropout),
'dense': tf.keras.layers.serialize(self.dense),
})
return config

@classmethod
def from_config(cls, config: dict) -> 'SimpleDecoder':
config['embedding'] = tf.keras.layers.deserialize(config['embedding'])
config['lstm'] = tf.keras.layers.deserialize(config['lstm'])
config['dropout'] = tf.keras.layers.deserialize(config['dropout'])
config['dense'] = tf.keras.layers.deserialize(config['dense'])
return cls(**config)

"""
Seq2Seq Model
"""
class RetrosynthesisSeq2SeqModel(Model):
def __init__(self, input_vocab_size: int, output_vocab_size: int, encoder_embedding_dim: int,
decoder_embedding_dim: int, units: int, dropout_rate: float = 0.2, *args, **kwargs):
super(RetrosynthesisSeq2SeqModel, self).__init__(*args, **kwargs)

self.units: int = units

self.encoder: SimpleEncoder = SimpleEncoder(
input_vocab_size, encoder_embedding_dim, units, dropout_rate
)

self.decoder: SimpleDecoder = SimpleDecoder(
output_vocab_size, decoder_embedding_dim, units, dropout_rate
)

self.input_vocab_size: int = input_vocab_size
self.output_vocab_size: int = output_vocab_size

self.enc_state_h: Dense = Dense(units, name='enc_state_h')
self.enc_state_c: Dense = Dense(units, name='enc_state_c')

self.encoder_data_processor: Optional[Any] = None
self.decoder_data_processor: Optional[Any] = None

self.dropout_rate: float = dropout_rate

def build(self, input_shape):
encoder_input_shape, decoder_input_shape = input_shape

encoder_dummy = tf.zeros(encoder_input_shape)
decoder_dummy = tf.zeros(decoder_input_shape)

self.call((encoder_dummy, decoder_dummy), training=False)

super(RetrosynthesisSeq2SeqModel, self).build(input_shape)

def call(self, inputs: Tuple[tf.Tensor, tf.Tensor], training: Optional[bool] = None) -> tf.Tensor:
encoder_input, decoder_input = inputs

encoder_output, state_h, state_c = self.encoder.call(encoder_input, training=training)

decoder_initial_state_h: tf.Tensor = self.enc_state_h(state_h)
decoder_initial_state_c: tf.Tensor = self.enc_state_c(state_c)
decoder_initial_state: Tuple[tf.Tensor, tf.Tensor] = (decoder_initial_state_h, decoder_initial_state_c)

decoder_inputs = (
decoder_input,
decoder_initial_state
)

encoder_mask: Optional[tf.Tensor] = self.encoder.compute_mask(encoder_input)

output: tf.Tensor = self.decoder.call(
decoder_inputs,
training=training,
mask=encoder_mask
)

return output

def get_config(self) -> dict:
config = super(RetrosynthesisSeq2SeqModel, self).get_config()
config.update({
'units': self.units,
'input_vocab_size': self.input_vocab_size,
'output_vocab_size': self.output_vocab_size,
'encoder_embedding_dim': self.encoder.embedding.output_dim,
'decoder_embedding_dim': self.decoder.embedding.output_dim,
'dropout_rate': self.dropout_rate,
'encoder': tf.keras.layers.serialize(self.encoder),
'decoder': tf.keras.layers.serialize(self.decoder),
'enc_state_h': tf.keras.layers.serialize(self.enc_state_h),
'enc_state_c': tf.keras.layers.serialize(self.enc_state_c)
})
return config

@classmethod
def from_config(cls, config: dict) -> 'RetrosynthesisSeq2SeqModel':
config['encoder'] = tf.keras.layers.deserialize(config['encoder'])
config['decoder'] = tf.keras.layers.deserialize(config['decoder'])
config['enc_state_h'] = tf.keras.layers.deserialize(config['enc_state_h'])
config['enc_state_c'] = tf.keras.layers.deserialize(config['enc_state_c'])
return cls(**config)

"""
Test Script
"""
input_vocab_size = 1000
output_vocab_size = 1000
encoder_embedding_dim = 32
decoder_embedding_dim = 64
units = 128
dropout_rate = 0.2

model = RetrosynthesisSeq2SeqModel(
input_vocab_size=input_vocab_size,
output_vocab_size=output_vocab_size,
encoder_embedding_dim=encoder_embedding_dim,
decoder_embedding_dim=decoder_embedding_dim,
units=units,
dropout_rate=dropout_rate
)

encoder_seq_length = 20
decoder_seq_length = 20
model.build(input_shape=[(1, encoder_seq_length), (1, decoder_seq_length)])

sample_encoder_input = np.random.randint(0, input_vocab_size, size=(1, 20))
sample_decoder_input = np.random.randint(0, output_vocab_size, size=(1, 20))

learning_rate: float = 0.0001
optimizer: Adam = Adam(learning_rate=learning_rate, clipnorm=5.0)

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

output = model([sample_encoder_input, sample_decoder_input])
print("Model output shape:", output.shape)

model.save('minimal_seq2seq_model.keras')
print("Model saved successfully.")

Continue reading...

Logar ou Criar uma Conta

[Python] Custom Encoder and Decoder Layers within Keras Model Show As Unbuilt

Stack Membro Participativo

Compartilhe esta Página

Logar ou Criar uma Conta

[Python] Custom Encoder and Decoder Layers within Keras Model Show As Unbuilt

Stack Membro Participativo

Compartilhe esta Página

Pesquisas Úteis