Source code for micromind.networks.yolo

YOLOv8 building blocks.

    - Matteo Beltrami, 2023
    - Francesco Paissan, 2023

This file contains the definition of the building blocks of the yolov8 network.
Model architecture has been taken from
import torch
import torch.nn as nn
import torch.nn.functional as F

from micromind.utils.yolo import autopad, dist2bbox, make_anchors

[docs] class Upsample: def __init__(self, scale_factor, mode="nearest"): assert mode == "nearest" self.scale_factor = scale_factor self.mode = mode def __call__(self, x: torch.Tensor): assert ( len(x.shape) > 2 and len(x.shape) <= 5 ), "Input tensor must have 3 to 5 dimensions" upsampled = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) return upsampled
[docs] class Conv(nn.Module): """Implements YOLOv8's convolutional block Arguments --------- c1 : int Input channels of the convolutional block. c2 : int Output channels of the convolutional block. kernel_size : int Kernel size for the convolutional block. stride : int Stride for the convolutional block. padding : int Padding for the convolutional block. dilation : int Dilation for the convolutional block. groups : int Groups for the convolutional block. """ def __init__( self, c1, c2, kernel_size=1, stride=1, padding=None, dilation=1, groups=1 ): super().__init__() self.conv = nn.Conv2d( c1, c2, kernel_size=kernel_size, stride=stride, padding=autopad(kernel_size, padding, dilation), dilation=dilation, groups=groups, bias=False, ) = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03) self.silu = nn.SiLU()
[docs] def forward(self, x): """Executes YOLOv8 convolutional block. Arguments --------- x : torch.Tensor Input to the convolutional block. Returns ------- Ouput of the convolutional block : torch.Tensor """ x = self.conv(x) x = x = self.silu(x) return x
[docs] class Bottleneck(nn.Module): """Implements YOLOv8's bottleneck block. Arguments --------- c1 : int Input channels of the bottleneck block. c2 : int Output channels of the bottleneck block. shortcut : bool Decides whether to perform a shortcut in the bottleneck block. groups : int Groups for the bottleneck block. kernels : list Kernel size for the bottleneck block. channel_factor : float Decides the number of channels of the intermediate result between the two convolutional blocks. """ def __init__( self, c1, c2, shortcut: bool, groups=1, kernels: list = (3, 3), channel_factor=0.5, ): super().__init__() c_ = int(c2 * channel_factor) self.cv1 = Conv(c1, c_, kernel_size=kernels[0], stride=1, padding=None) self.cv2 = Conv( c_, c2, kernel_size=kernels[1], stride=1, padding=None, groups=groups ) self.residual = c1 == c2 and shortcut
[docs] def forward(self, x): """Executes YOLOv8 bottleneck block. Arguments --------- x : torch.Tensor Input to the bottleneck block. Returns ------- Ouput of the bottleneck block : torch.Tensor """ if self.residual: return x + self.cv2(self.cv1(x)) else: return self.cv2(self.cv1(x))
[docs] class C2f(nn.Module): """Implements YOLOv8's C2f block. Arguments --------- c1 : int Input channels of the C2f block. c2 : int Output channels of the C2f block. n : int Number of bottleck blocks executed in the C2f block. shortcut : bool Decides whether to perform a shortcut in the bottleneck blocks. groups : int Groups for the C2f block. e : float Factor for cancatenating intermeidate results. """ def __init__(self, c1, c2, n=1, shortcut=False, groups=1, e=0.5): super().__init__() self.c = int(c2 * e) self.cv1 = Conv( c1, 2 * self.c, 1, ) self.cv2 = Conv((2 + n) * self.c, c2, 1) self.bottleneck = nn.ModuleList( [ Bottleneck( self.c, self.c, shortcut, groups, kernels=[(3, 3), (3, 3)], channel_factor=1.0, ) for _ in range(n) ] )
[docs] def forward(self, x): """Executes YOLOv8 C2f block. Arguments --------- x : torch.Tensor Input to the C2f block. Returns ------- Ouput of the C2f block : torch.Tensor """ x = self.cv1(x) y = list(torch.chunk(x, chunks=2, dim=1)) y.extend(m(y[-1]) for m in self.bottleneck) z = y[0] for i in y[1:]: z =, i), dim=1) return self.cv2(z)
[docs] class SPPF(nn.Module): """Implements YOLOv8's SPPF block. Arguments --------- c1 : int Input channels of the SPPF block. c2 : int Output channels of the SPPF block. k : int Kernel size for the SPPF block Maxpooling operations """ def __init__(self, c1, c2, k=5): super().__init__() c_ = c1 // 2 self.cv1 = Conv(c1, c_, 1, 1, padding=None) self.cv2 = Conv(c_ * 4, c2, 1, 1, padding=None) self.maxpool = nn.MaxPool2d( kernel_size=k, stride=1, padding=2, dilation=1, ceil_mode=False )
[docs] def forward(self, x): """Executes YOLOv8 SPPF block. Arguments --------- x : torch.Tensor Input to the SPPF block. Returns ------- Ouput of the SPPF block : torch.Tensor """ x = self.cv1(x) x2 = self.maxpool(x) x3 = self.maxpool(x2) x4 = self.maxpool(x3) y =, x2, x3, x4), dim=1) return self.cv2(y)
[docs] class DFL(nn.Module): """Implements YOLOv8's DFL block. Arguments --------- c1 : int Input channels of the DFL block. """ def __init__(self, c1=16): super().__init__() self.conv = nn.Conv2d(c1, 1, kernel_size=1, bias=False) weight = torch.arange(c1).reshape(1, c1, 1, 1).float() self.conv.weight.requires_grad = False self.conv.weight.copy_(weight) self.c1 = c1
[docs] @torch.no_grad() # TODO: check when training def forward(self, x): """Executes YOLOv8 DFL block. Arguments --------- x : torch.Tensor Input to the DFL block. Returns ------- Ouput of the DFL block : torch.Tensor """ b, _, a = x.shape y = x.reshape(b, 4, self.c1, a).transpose(2, 1) y = F.softmax(y, dim=1) y = self.conv(y) y = y.reshape(b, 4, a) return y
[docs] class Darknet(nn.Module): """Implements YOLOv8's convolutional backbone. Arguments --------- w : float Width multiple of the Darknet. r : float Ratio multiple of the Darknet. d : float Depth multiple of the Darknet. """ def __init__(self, w, r, d): super().__init__() self.b1 = nn.Sequential( Conv(c1=3, c2=int(64 * w), kernel_size=3, stride=2, padding=1), Conv(int(64 * w), int(128 * w), kernel_size=3, stride=2, padding=1), ) self.b2 = nn.Sequential( C2f(c1=int(128 * w), c2=int(128 * w), n=round(3 * d), shortcut=True), Conv(int(128 * w), int(256 * w), 3, 2, 1), C2f(int(256 * w), int(256 * w), round(6 * d), True), ) self.b3 = nn.Sequential( Conv(int(256 * w), int(512 * w), kernel_size=3, stride=2, padding=1), C2f(int(512 * w), int(512 * w), round(6 * d), True), ) self.b4 = nn.Sequential( Conv(int(512 * w), int(512 * w * r), kernel_size=3, stride=2, padding=1), C2f(int(512 * w * r), int(512 * w * r), round(3 * d), True), ) self.b5 = SPPF(int(512 * w * r), int(512 * w * r), 5)
[docs] def forward(self, x): """Executes YOLOv8 convolutional backbone. Arguments --------- x : torch.Tensor Input to the Darknet. Returns ------- Three intermediate representations with different resolutions : tuple """ x1 = self.b1(x) x2 = self.b2(x1) x3 = self.b3(x2) x4 = self.b4(x3) x5 = self.b5(x4) return (x2, x3, x5)
[docs] class Yolov8Neck(nn.Module): """Implements YOLOv8's neck. Arguments --------- w : float Width multiple of the Darknet. r : float Ratio multiple of the Darknet. d : float Depth multiple of the Darknet. """ def __init__(self, filters=[256, 512, 768], up=[2, 2], d=1): super().__init__() self.up1 = Upsample(up[0], mode="nearest") self.up2 = Upsample(up[1], mode="nearest") self.n1 = C2f( c1=int(filters[1] + filters[2]), c2=int(filters[1]), n=round(3 * d), shortcut=False, ) self.n2 = C2f( c1=int(filters[0] + filters[1]), c2=int(filters[0]), n=round(3 * d), shortcut=False, ) self.n3 = Conv( c1=int(filters[0]), c2=int(filters[0]), kernel_size=3, stride=2, padding=1 ) self.n4 = C2f( c1=int(filters[0] + filters[1]), c2=int(filters[1]), n=round(3 * d), shortcut=False, ) self.n5 = Conv( c1=int(filters[1]), c2=int(filters[1]), kernel_size=3, stride=2, padding=1 ) self.n6 = C2f( c1=int(filters[1] + filters[2]), c2=int(filters[2]), n=round(3 * d), shortcut=False, )
[docs] def forward(self, p3, p4, p5): """Executes YOLOv8 neck. Arguments --------- x : tuple Input to the neck. Returns ------- Three intermediate representations with different resolutions : list """ x = self.up1(p5) x =, p4), dim=1) x = self.n1(x) h1 = self.up2(x) h1 =, p3), dim=1) head_1 = self.n2(h1) h2 = self.n3(head_1) h2 =, x), dim=1) head_2 = self.n4(h2) h3 = self.n5(head_2) h3 =, p5), dim=1) head_3 = self.n6(h3) return [head_1, head_2, head_3]
[docs] class DetectionHead(nn.Module): """Implements YOLOv8's detection head. Arguments --------- nc : int Number of classes to predict. filters : tuple Number of channels of the three inputs of the detection head. """ def __init__(self, nc=80, filters=()): super().__init__() self.reg_max = 16 = nc = len(filters) = nc + self.reg_max * 4 self.stride = torch.tensor([8.0, 16.0, 32.0], dtype=torch.float16) c2, c3 = max((16, filters[0] // 4, self.reg_max * 4)), max( filters[0], min(, 104) ) # channels self.cv2 = nn.ModuleList( nn.Sequential( Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1) ) for x in filters ) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3,, 1)) for x in filters ) self.dfl = DFL(self.reg_max)
[docs] def forward(self, x): """Executes YOLOv8 detection head. Arguments --------- x : list Input to the detection head. Returns ------- Output of the detection head : torch.Tensor """ for i in range( a = self.cv2[i](x[i]) b = self.cv3[i](x[i]) x[i] =, b), dim=1) self.anchors, self.strides = ( xl.transpose(0, 1) for xl in make_anchors(x, self.stride, 0.5) ) y = [(i.reshape(x[0].shape[0],, -1)) for i in x] x_cat =[0], y[1], y[2]), dim=2) box, cls = x_cat[:, : self.reg_max * 4], x_cat[:, self.reg_max * 4 :] dbox = ( dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides ) z =, nn.Sigmoid()(cls)), dim=1) return z, x
[docs] class YOLOv8(nn.Module): """Implements YOLOv8 network. Arguments --------- w : float Width multiple of the Darknet. r : float Ratio multiple of the Darknet. d : float Depth multiple of the Darknet. num_classes : int Number of classes to predict. """ def __init__(self, w, r, d, num_classes=80): super().__init__() = Darknet(w, r, d) self.fpn = Yolov8Neck(w, r, d) self.head = DetectionHead( num_classes, filters=(int(256 * w), int(512 * w), int(512 * w * r)) )
[docs] def forward(self, x): """Executes YOLOv8 network. Arguments --------- x : torch.Tensor Input to the YOLOv8 network. Returns ------- Output of the YOLOv8 network : torch.Tensor """ x = x = self.fpn(*x) x = self.head(x) return x