Hello i'm trying to convert yolov3-spp.weights to .tf model i found how to convert yolov3 yolov3_tiny but i couldnt convert the yolov3_spp any solutions !
1 Answer
Modify your Darknet53 model like:
elif (block["type"] == "route"):
block["layers"] = block["layers"].split(',')
start = int(block["layers"][0])
if len(block["layers"]) > 1 and int(block["layers"][1]) > 0:
end = int(block["layers"][1]) - i
filters = output_filters[i + start] + output_filters[end] # Index negatif :end - index
inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1)
elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )
else:
filters = output_filters[i + start]
inputs = outputs[i + start]
The key is in here:
elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )
The filters will be the sum of the needed output_filters and the input will be the concatenated tensor of the output of the needed layers.
Find the full code here:
'''
https://machinelearningspace.com/yolov3-tensorflow-2-part-2/
https://stackoverflow.com/questions/45175469/typeerror-concat-got-multiple-values-for-argument-axis
'''
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, \
Input, ZeroPadding2D, LeakyReLU, UpSampling2D
def parse_cfg(cfgfile):
"""
Parse the configuration file for YOLOv3
Args:
cfgfile : str : config file
Returns:
blocks : list -> [dict,...,dict] : all config atributes
"""
with open(cfgfile, 'r') as file:
lines = [line.rstrip('\n') for line in file if line != '\n' and line[0] != '#']
holder = {}
blocks = []
# Read atributes line by line and save them.
for line in lines:
if line[0] == '[':
line = 'type=' + line[1:-1].rstrip()
if len(holder) != 0:
blocks.append(holder)
holder = {}
key, value = line.split("=")
holder[key.rstrip()] = value.lstrip()
blocks.append(holder)
return blocks
def YOLOv3Net(cfgfile, model_size, num_classes):
"""
Darknet53Conv2D
cfgfile : str : config file
model_size : tuple : 3x1 tuple like (height, width, layers)
e.g. (608, 608, 3)
num_classes : int : number of classes
"""
blocks = parse_cfg(cfgfile)
outputs = {}
output_filters = []
filters = []
out_pred = []
scale = 0
# We define the input model using Keras function and divided by 255 to normalize it to the range of 0–1.
inputs = input_image = Input(shape=model_size)
inputs = inputs / 255.0
# For every iteration, we check the type of the block which corresponds to the type of layer.
for i, block in enumerate(blocks[1:]):
# If it is a convolutional layer
if (block["type"] == "convolutional"):
activation = block["activation"]
filters = int(block["filters"])
kernel_size = int(block["size"])
strides = int(block["stride"])
if strides > 1:
inputs = ZeroPadding2D(((1, 0), (1, 0)))(inputs)
inputs = Conv2D(filters,
kernel_size,
strides=strides,
padding='valid' if strides > 1 else 'same',
name='conv_' + str(i),
use_bias=False if ("batch_normalize" in block) else True)(inputs)
if "batch_normalize" in block:
inputs = BatchNormalization(name='bnorm_' + str(i))(inputs)
inputs = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(inputs)
elif (block["type"] == "upsample"):
stride = int(block["stride"])
inputs = UpSampling2D(stride)(inputs)
# If it is a route layer
elif (block["type"] == "route"):
block["layers"] = block["layers"].split(',')
start = int(block["layers"][0])
if len(block["layers"]) > 1 and int(block["layers"][1]) > 0:
end = int(block["layers"][1]) - i
filters = output_filters[i + start] + output_filters[end] # Index negatif :end - index
inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1)
elif len(block["layers"]) > 1 and int(block["layers"][1]) < 0: # for SPP (Spatial Pyramid Pooling) models layers=-1,-3,-5,-6
filters = sum( output_filters[i + int(offset)] for offset in block["layers"] )
inputs = tf.concat( [outputs[i + int(offset)] for offset in block["layers"]], axis=-1 )
else:
filters = output_filters[i + start]
inputs = outputs[i + start]
elif block["type"] == "shortcut":
from_ = int(block["from"])
inputs = outputs[i - 1] + outputs[i + from_]
# Yolo detection layer
elif block["type"] == "yolo":
mask = block["mask"].split(",")
mask = [int(x) for x in mask]
anchors = block["anchors"].split(",")
anchors = [int(a) for a in anchors]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in mask]
n_anchors = len(anchors)
# reshape the YOLOv3 output to the form of [None, B * grid size * grid size, 5 + C].
# The B is the number of anchors and C is the number of classes.
out_shape = inputs.get_shape().as_list()
inputs = tf.reshape(inputs, [-1, n_anchors * out_shape[1] * out_shape[2], \
5 + num_classes])
# Then access all boxes attributes by this way:
box_centers = inputs[:, :, 0:2]
box_shapes = inputs[:, :, 2:4]
confidence = inputs[:, :, 4:5]
classes = inputs[:, :, 5:num_classes + 5]
# Refine Bounding Boxes
# Use the sigmoid function to convert box_centers, confidence,
# and classes values into range of 0 – 1.
box_centers = tf.sigmoid(box_centers)
confidence = tf.sigmoid(confidence)
classes = tf.sigmoid(classes)
# Then convert box_shapes as the following:
anchors = tf.tile(anchors, [out_shape[1] * out_shape[2], 1])
box_shapes = tf.exp(box_shapes) * tf.cast(anchors, dtype=tf.float32)
# Use a meshgrid to convert the relative positions of the center boxes into
# the real positions.
x = tf.range(out_shape[1], dtype=tf.float32)
y = tf.range(out_shape[2], dtype=tf.float32)
cx, cy = tf.meshgrid(x, y)
cx = tf.reshape(cx, (-1, 1))
cy = tf.reshape(cy, (-1, 1))
cxy = tf.concat([cx, cy], axis=-1)
cxy = tf.tile(cxy, [1, n_anchors])
cxy = tf.reshape(cxy, [1, -1, 2])
strides = (input_image.shape[1] // out_shape[1], \
input_image.shape[2] // out_shape[2])
box_centers = (box_centers + cxy) * strides
# Then, concatenate them all together.
prediction = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1)
# YOLOv3 does 3 predictions across the scale. We do as it is.
# Take the prediction result for each scale and concatenate it with the others.
if scale:
out_pred = tf.concat([out_pred, prediction], axis=1)
else:
out_pred = prediction
scale = 1
# Since the route and shortcut layers need output feature maps from previous layers,
# so for every iteration, we always keep the track of the feature maps and output filters.
outputs[i] = inputs
output_filters.append(filters)
# Finally, we can return our model.
model = Model(input_image, out_pred)
model.summary()
return model
And the conversor here:
#convert_weights.py
import numpy as np
from yolov3 import YOLOv3Net
from yolov3 import parse_cfg
import argparse
def load_weights(model,cfgfile,weightfile):
# Open the weights file
with open(weightfile, 'rb') as fp:
print("\t--->weightfile open")
# The first 5 values are header information
np.fromfile(fp, dtype=np.int32, count=5)
blocks = parse_cfg(cfgfile)
for i, block in enumerate(blocks[1:]):
if (block["type"] == "convolutional"):
conv_layer = model.get_layer('conv_' + str(i))
print("layer: ",i+1,conv_layer)
filters = conv_layer.filters
k_size = conv_layer.kernel_size[0]
in_dim = conv_layer.input_shape[-1]
if "batch_normalize" in block:
norm_layer = model.get_layer('bnorm_' + str(i))
print("layer: ",i+1,norm_layer)
size = np.prod(norm_layer.get_weights()[0].shape)
bn_weights = np.fromfile(fp, dtype=np.float32, count=4 * filters)
# tf [gamma, beta, mean, variance]
bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
else:
conv_bias = np.fromfile(fp, dtype=np.float32, count=filters)
# darknet shape (out_dim, in_dim, height, width)
conv_shape = (filters, in_dim, k_size, k_size)
conv_weights = np.fromfile(
fp, dtype=np.float32, count=np.product(conv_shape))
# tf shape (height, width, in_dim, out_dim)
conv_weights = conv_weights.reshape(
conv_shape).transpose([2, 3, 1, 0])
if "batch_normalize" in block:
norm_layer.set_weights(bn_weights)
conv_layer.set_weights([conv_weights])
else:
conv_layer.set_weights([conv_weights, conv_bias])
# assert len(fp.read()) == 0, 'failed to read all data'
def parser():
ap = argparse.ArgumentParser()
ap.add_argument("-w", "--weightfile", required=True, \
help="Path to weight file")
ap.add_argument("-c", "--cfgfile", required=True, \
help="Path to cfg file")
return vars(ap.parse_args())
def main():
args = parser()
weightfile = args['weightfile']
cfgfile = args['cfgfile']
model_size = (608, 608, 3)
num_classes = 1
model=YOLOv3Net(cfgfile,model_size,num_classes)
load_weights(model,cfgfile,weightfile)
try:
model.save_weights('weights/yolov3_weights.tf')
print('\nThe file \'yolov3_weights.tf\' has been saved successfully.')
except IOError:
print("Couldn't write the file \'yolov3_weights.tf\'.")
if __name__ == "__main__":
main()
PS: I got my original code from here and then I modified it.