1. Anuncie Aqui ! Entre em contato fdantas@4each.com.br

[Python] Human segmentation fails with Pytorch, not with Tensorflow Keras

Discussão em 'Python' iniciado por Stack, Setembro 10, 2024.

  1. Stack

    Stack Membro Participativo

    I probably missed something, but here is the same workflow with Pytorch and Tensorflow Keras.

    The results are here:

    The PyTorch version

    [​IMG]

    The Keras version:

    [​IMG]

    Hard to explain the whole process but this is what I do with PyTorch:

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = models.segmentation.deeplabv3_resnet50(weights="DEFAULT").to(device)

    # Mettre le modèle en mode évaluation
    model.eval()

    # Transformations similaires à celles utilisées dans Keras
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((img_size, img_size)),
    ])

    def t_read_image(imagepath: Image.Image):
    image = transform(imagepath)
    # make the image to be from -1 to 1
    print(image.min(), image.max())
    image = image * 2 - 1
    image = image.to(device)
    return image

    def t_infer(model, image):
    with torch.no_grad():
    output = model(image.unsqueeze(0))

    output = output['out']
    output = np.squeeze(output.cpu().numpy())

    output = output[1:]
    output = np.argmax(output, axis=0)
    return output

    def t_decode_segmentation_masks(mask, colormap, n_classes):
    r = np.zeros_like(mask).astype(np.uint8)
    g = np.zeros_like(mask).astype(np.uint8)
    b = np.zeros_like(mask).astype(np.uint8)
    for l in range(0, n_classes):
    idx = mask == l
    r[idx] = colormap[l, 0]
    g[idx] = colormap[l, 1]
    b[idx] = colormap[l, 2]
    rgb = np.stack([r, g, b], axis=2)
    return rgb

    def t_get_overlay(image, colored_mask):
    image = image.cpu().numpy()
    image = (image - image.min()) / (image.max() - image.min()) * 255
    image = image.astype(np.uint8)
    image = np.transpose(image, (1, 2, 0))
    overlay = cv2.addWeighted(image, 0.35, colored_mask, 0.65, 0)
    return overlay

    def t_segmentation(input_image: Image.Image):
    image_tensor = t_read_image(input_image)
    prediction_mask = t_infer(image=image_tensor, model=model)
    prediction_colormap = t_decode_segmentation_masks(prediction_mask, colormap, 20)
    overlay = t_get_overlay(image_tensor, prediction_colormap)
    return (overlay, prediction_colormap)


    img = Image.open('./image.jpg')
    img = np.array(img)
    overlay, segs = t_segmentation(img)
    plt.imshow(overlay)
    plt.show()


    And the same thing with Keras

    model = from_pretrained_keras("keras-io/deeplabv3p-resnet50")

    def read_image(image):
    image = tf.convert_to_tensor(image)
    image.set_shape([None, None, 3])
    image = tf.image.resize(images=image, size=[img_size, img_size])
    image = image / 127.5 - 1
    return image

    def infer(model, image_tensor):
    predictions = model.predict(np.expand_dims((image_tensor), axis=0))
    predictions = np.squeeze(predictions)
    predictions = np.argmax(predictions, axis=2)
    return predictions

    def decode_segmentation_masks(mask, colormap, n_classes):
    r = np.zeros_like(mask).astype(np.uint8)
    g = np.zeros_like(mask).astype(np.uint8)
    b = np.zeros_like(mask).astype(np.uint8)
    for l in range(0, n_classes):
    idx = mask == l
    r[idx] = colormap[l, 0]
    g[idx] = colormap[l, 1]
    b[idx] = colormap[l, 2]
    rgb = np.stack([r, g, b], axis=2)
    return rgb

    def get_overlay(image, colored_mask):
    image = tf.keras.preprocessing.image.array_to_img(image)
    image = np.array(image).astype(np.uint8)
    overlay = cv2.addWeighted(image, 0.35, colored_mask, 0.65, 0)
    return overlay

    def segmentation(input_image):
    image_tensor = read_image(input_image)
    prediction_mask = infer(image_tensor=image_tensor, model=model)
    prediction_colormap = decode_segmentation_masks(prediction_mask, colormap, 20)
    overlay = get_overlay(image_tensor, prediction_colormap)
    return (overlay, prediction_colormap)

    img = Image.open('./image.jpg')
    img = np.array(img)
    overlay, segs = segmentation(img)
    plt.imshow(overlay)
    plt.show()


    As you can see, this is the same model, and globally the same code. But the result is very different. What I would like is to get the same result than with Keras, using Pytorch.

    I share the notebook here: https://colab.research.google.com/drive/1mgWSRs4Z7lqag8vxBnq5vi65BohXNdMd?usp=sharing

    Thanks a lot.

    Continue reading...

Compartilhe esta Página