Convert yolov3-spp.weights to .tf model

Question

Hello i'm trying to convert yolov3-spp.weights to .tf model i found how to convert yolov3 yolov3_tiny but i couldnt convert the yolov3_spp any solutions !

emichester · Accepted Answer · 2021-02-09 12:00:45Z

Modify your Darknet53 model like:

elif (block["type"] == "route"):
            block["layers"] = block["layers"].split(',')
            start = int(block["layers"][0])

            if len(block["layers"]) > 1 and int(block["layers"][1]) > 0:
                end = int(block["layers"][1]) - i
                filters = output_filters[i + start] + output_filters[end]  # Index negatif :end - index
                inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1)
            elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
                filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
                inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )
            else:
                filters = output_filters[i + start]
                inputs = outputs[i + start]

The key is in here:

            elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
                filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
                inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )

The filters will be the sum of the needed output_filters and the input will be the concatenated tensor of the output of the needed layers.

Find the full code here:

'''
https://machinelearningspace.com/yolov3-tensorflow-2-part-2/
https://stackoverflow.com/questions/45175469/typeerror-concat-got-multiple-values-for-argument-axis
'''
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, \
    Input, ZeroPadding2D, LeakyReLU, UpSampling2D

def parse_cfg(cfgfile):
    """
    Parse the configuration file for YOLOv3

    Args:
        cfgfile : str : config file
    Returns:
        blocks : list -> [dict,...,dict] : all config atributes
    """
    with open(cfgfile, 'r') as file:
        lines = [line.rstrip('\n') for line in file if line != '\n' and line[0] != '#']
    holder = {}
    blocks = []
    # Read atributes line by line and save them.
    for line in lines:
        if line[0] == '[':
            line = 'type=' + line[1:-1].rstrip()
            if len(holder) != 0:
                blocks.append(holder)
                holder = {}
        key, value = line.split("=")
        holder[key.rstrip()] = value.lstrip()
    blocks.append(holder)
    return blocks

def YOLOv3Net(cfgfile, model_size, num_classes):
    """
    Darknet53Conv2D
        cfgfile : str : config file
        model_size : tuple : 3x1 tuple like (height, width, layers)
                            e.g. (608, 608, 3)
        num_classes : int : number of classes
    """

    blocks = parse_cfg(cfgfile)

    outputs = {}
    output_filters = []
    filters = []
    out_pred = []
    scale = 0

    # We define the input model using Keras function and divided by 255 to normalize it to the range of 0–1.
    inputs = input_image = Input(shape=model_size)
    inputs = inputs / 255.0

    # For every iteration, we check the type of the block which corresponds to the type of layer.
    for i, block in enumerate(blocks[1:]):
        # If it is a convolutional layer
        if (block["type"] == "convolutional"):
            activation = block["activation"]
            filters = int(block["filters"])
            kernel_size = int(block["size"])
            strides = int(block["stride"])
            if strides > 1:
                inputs = ZeroPadding2D(((1, 0), (1, 0)))(inputs)
            inputs = Conv2D(filters,
                            kernel_size,
                            strides=strides,
                            padding='valid' if strides > 1 else 'same',
                            name='conv_' + str(i),
                            use_bias=False if ("batch_normalize" in block) else True)(inputs)
            if "batch_normalize" in block:
                inputs = BatchNormalization(name='bnorm_' + str(i))(inputs)
                inputs = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(inputs)

        elif (block["type"] == "upsample"):
            stride = int(block["stride"])
            inputs = UpSampling2D(stride)(inputs)

        # If it is a route layer
        elif (block["type"] == "route"):
            block["layers"] = block["layers"].split(',')
            start = int(block["layers"][0])

            if len(block["layers"]) > 1 and int(block["layers"][1]) > 0:
                end = int(block["layers"][1]) - i
                filters = output_filters[i + start] + output_filters[end]  # Index negatif :end - index
                inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1)
            elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
                filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
                inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )
            else:
                filters = output_filters[i + start]
                inputs = outputs[i + start]

        elif block["type"] == "shortcut":
            from_ = int(block["from"])
            inputs = outputs[i - 1] + outputs[i + from_]
        
        # Yolo detection layer
        elif block["type"] == "yolo":
            mask = block["mask"].split(",")
            mask = [int(x) for x in mask]
            anchors = block["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in mask]
            n_anchors = len(anchors)

            # reshape the YOLOv3 output to the form of [None, B * grid size * grid size, 5 + C].
            # The B is the number of anchors and C is the number of classes.
            out_shape = inputs.get_shape().as_list()

            inputs = tf.reshape(inputs, [-1, n_anchors * out_shape[1] * out_shape[2], \
                                         5 + num_classes])
            
            # Then access all boxes attributes by this way:
            box_centers = inputs[:, :, 0:2]
            box_shapes = inputs[:, :, 2:4]
            confidence = inputs[:, :, 4:5]
            classes = inputs[:, :, 5:num_classes + 5]

            # Refine Bounding Boxes
            # Use the sigmoid function to convert box_centers, confidence,
            # and classes values into range of 0 – 1.
            box_centers = tf.sigmoid(box_centers)
            confidence = tf.sigmoid(confidence)
            classes = tf.sigmoid(classes)

            # Then convert box_shapes as the following:
            anchors = tf.tile(anchors, [out_shape[1] * out_shape[2], 1])
            box_shapes = tf.exp(box_shapes) * tf.cast(anchors, dtype=tf.float32)

            # Use a meshgrid to convert the relative positions of the center boxes into
            # the real positions.
            x = tf.range(out_shape[1], dtype=tf.float32)
            y = tf.range(out_shape[2], dtype=tf.float32)

            cx, cy = tf.meshgrid(x, y)
            cx = tf.reshape(cx, (-1, 1))
            cy = tf.reshape(cy, (-1, 1))
            cxy = tf.concat([cx, cy], axis=-1)
            cxy = tf.tile(cxy, [1, n_anchors])
            cxy = tf.reshape(cxy, [1, -1, 2])

            strides = (input_image.shape[1] // out_shape[1], \
                       input_image.shape[2] // out_shape[2])
            box_centers = (box_centers + cxy) * strides

            # Then, concatenate them all together.
            prediction = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1)

            # YOLOv3 does 3 predictions across the scale. We do as it is.
            # Take the prediction result for each scale and concatenate it with the others.
            if scale:
                out_pred = tf.concat([out_pred, prediction], axis=1)
            else:
                out_pred = prediction
                scale = 1

        # Since the route and shortcut layers need output feature maps from previous layers,
        # so for every iteration, we always keep the track of the feature maps and output filters.
        outputs[i] = inputs
        output_filters.append(filters)

    # Finally, we can return our model.
    model = Model(input_image, out_pred)
    model.summary()
    return model

And the conversor here:

#convert_weights.py
import numpy as np
from yolov3 import YOLOv3Net
from yolov3 import parse_cfg
import argparse

def load_weights(model,cfgfile,weightfile):

    # Open the weights file
    with open(weightfile, 'rb') as fp:
        print("\t--->weightfile open")
        
        # The first 5 values are header information
        np.fromfile(fp, dtype=np.int32, count=5)

        blocks = parse_cfg(cfgfile)

        for i, block in enumerate(blocks[1:]):

            if (block["type"] == "convolutional"):
                conv_layer = model.get_layer('conv_' + str(i))
                print("layer: ",i+1,conv_layer)

                filters = conv_layer.filters
                k_size = conv_layer.kernel_size[0]
                in_dim = conv_layer.input_shape[-1]

                if "batch_normalize" in block:

                    norm_layer = model.get_layer('bnorm_' + str(i))
                    print("layer: ",i+1,norm_layer)
                    size = np.prod(norm_layer.get_weights()[0].shape)

                    bn_weights = np.fromfile(fp, dtype=np.float32, count=4 * filters)
                    # tf [gamma, beta, mean, variance]
                    bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]

                else:
                    conv_bias = np.fromfile(fp, dtype=np.float32, count=filters)

                # darknet shape (out_dim, in_dim, height, width)
                conv_shape = (filters, in_dim, k_size, k_size)
                conv_weights = np.fromfile(
                    fp, dtype=np.float32, count=np.product(conv_shape))
                # tf shape (height, width, in_dim, out_dim)
                conv_weights = conv_weights.reshape(
                    conv_shape).transpose([2, 3, 1, 0])

                if "batch_normalize" in block:
                    norm_layer.set_weights(bn_weights)
                    conv_layer.set_weights([conv_weights])
                else:
                    conv_layer.set_weights([conv_weights, conv_bias])

        # assert len(fp.read()) == 0, 'failed to read all data'

def parser():
    ap = argparse.ArgumentParser()
    ap.add_argument("-w", "--weightfile", required=True, \
        help="Path to weight file")
    ap.add_argument("-c", "--cfgfile", required=True, \
        help="Path to cfg file")
    return vars(ap.parse_args())

def main():

    args = parser()

    weightfile = args['weightfile']
    cfgfile = args['cfgfile']

    model_size = (608, 608, 3)
    num_classes = 1

    model=YOLOv3Net(cfgfile,model_size,num_classes)
    load_weights(model,cfgfile,weightfile)

    try:
        model.save_weights('weights/yolov3_weights.tf')
        print('\nThe file \'yolov3_weights.tf\' has been saved successfully.')
    except IOError:
        print("Couldn't write the file \'yolov3_weights.tf\'.")

if __name__ == "__main__":
    main()

PS: I got my original code from here and then I modified it.

Collectives™ on Stack Overflow

Convert yolov3-spp.weights to .tf model

1 Answer 1

Your Answer

Not the answer you're looking for? Browse other questions tagged
tensorflow
deep-learning
yolo
darknet
or ask your own question.

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Not the answer you're looking for? Browse other questions tagged tensorflowdeep-learningyolodarknet or ask your own question.

Linked

Related

Not the answer you're looking for? Browse other questions tagged
tensorflow
deep-learning
yolo
darknet
or ask your own question.