The mmdet3d-gaussian from zhanggefan

使用mmrotated gwo loss导致nan的问题

章老师你好，我使用mmrotated.models.losses.GDLoss作为尺寸回归的补充，训练几个iters后所有loss都会变成nan。我对比了mmrotated里面gwd_loss和你的实现，除了增加一个height维度的尺寸，没有发现太多不同，什么原因导致nan？具体配置参数如下。

mmrotated commit b78bab24dc35657fe4e349d 最新版
mmrotated GDLoss配置参数loss_gauss_iou=dict(type='GDLoss', loss_type='gwd', loss_weight=5.0)

@HEADS.register_module()
class Anchor3DHeadGaussianLoss(Anchor3DHead):
    """Anchor head for SECOND/PointPillars/MVXNet/PartA2.

    Args:
        loss_corners (dict): Config of box corners loss.
    """

    def __init__(self,
                 loss_gauss_iou=dict(
                     type='GDLoss', loss_type='gwd', loss_weight=5.0),
                 **kwargs):
        #
        super(Anchor3DHeadGaussianLoss, self).__init__(**kwargs)
        self.loss_gauss_iou = build_loss(loss_gauss_iou)
        self.use_iou_loss = True

    def loss_gaussian_single(
        self,
        cls_score,
        bbox_pred,
        dir_cls_preds,
        labels,
        label_weights,
        bbox_targets,
        bbox_weights,
        dir_targets,
        dir_weights,
        anchor_list,
        num_total_samples,
    ):
        """most parts copied from anchor3d_head_iou_aware.loss_iou_single

        Args:
            cls_score (torch.Tensor): Class score in single-level.
            bbox_pred (torch.Tensor): Bbox prediction in single-level.
            dir_cls_preds (torch.Tensor): Predictions of direction class
                in single-level.
            bbox_iou_preds (torch.Tensor): Predictions of bbox iou.
            labels (torch.Tensor): Labels of class.
            label_weights (torch.Tensor): Weights of class loss.
            bbox_targets (torch.Tensor): Targets of bbox predictions.
            bbox_weights (torch.Tensor): Weights of bbox loss.
            dir_targets (torch.Tensor): Targets of direction predictions.
            dir_weights (torch.Tensor): Weights of direction loss.
            anchor_list (torch.Tensor): 
            num_total_samples (int): The number of valid samples.

        Returns:
            tuple[torch.Tensor]: Losses of class, bbox \
                and direction, respectively.
        """
        # classification loss
        if num_total_samples is None:
            num_total_samples = int(cls_score.shape[0])

        # regression loss
        batch_size = bbox_pred.shape[0]
        bbox_pred = bbox_pred.permute(0, 2, 3,
                                      1).reshape(-1, self.box_code_size)
        bbox_targets = bbox_targets.reshape(-1, self.box_code_size)
        bbox_weights = bbox_weights.reshape(-1, self.box_code_size)

        labels = labels.reshape(-1)
        bg_class_ind = self.num_classes
        pos_inds = ((labels >= 0)
                    & (labels < bg_class_ind)).nonzero(
                        as_tuple=False).reshape(-1)
        num_pos = len(pos_inds)

        pos_bbox_pred = bbox_pred[pos_inds]
        pos_bbox_targets = bbox_targets[pos_inds]
        pos_bbox_weights = bbox_weights[pos_inds]

        # input anchor_list is level-based, the batch channel is 1
        anchor_list = anchor_list.reshape(-1, self.box_code_size).repeat(
            batch_size, 1)
        anchors = anchor_list[pos_inds]

        loss_iou = None
        if num_pos > 0:
            # 7 channels
            iou_overlap_weight = bbox_weights.new_full((len(pos_inds), ), 1.0)
            # TIPS: encoded gt and pred boxes may flipped with each other,
            # but the iou is not influence by the flipped
            gt_bboxes = self.bbox_coder.decode(anchors, pos_bbox_targets)
            pred_bboxes = self.bbox_coder.decode(anchors, pos_bbox_pred)

            if self.use_iou_loss:
                iou_gau_weight = self.train_cfg.get('iou_gau_weight', 1.0)
                iou_gau_weight = iou_overlap_weight * iou_gau_weight
                # TIPS. weights must be [N]
                pred_bboxes_bev = pred_bboxes[..., [0, 1, 3, 4, 6]]
                gt_bboxes_bev = gt_bboxes[..., [0, 1, 3, 4, 6]]
                loss_iou = self.loss_gauss_iou(
                    pred_bboxes_bev,
                    gt_bboxes_bev,
                    iou_gau_weight,
                    avg_factor=max(num_pos, 1))

        else:
            loss_iou = pos_bbox_pred.sum()

        return loss_iou,

    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
    def loss(self,
             cls_scores,
             bbox_preds,
             dir_cls_preds,
             gt_bboxes,
             gt_labels,
             input_metas,
             gt_bboxes_ignore=None):
        """Calculate losses.

        Args:
            cls_scores (list[torch.Tensor]): Multi-level class scores.
            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
            dir_cls_preds (list[torch.Tensor]): Multi-level direction
                class predictions.
            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes
                of each sample.
            gt_labels (list[torch.Tensor]): Gt labels of each sample.
            input_metas (list[dict]): Contain pcd and img's meta info.
            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
                which bounding.

        Returns:
            dict[str, list[torch.Tensor]]: Classification, bbox, and \
                direction losses of each level.

                - loss_cls (list[torch.Tensor]): Classification losses.
                - loss_bbox (list[torch.Tensor]): Box regression losses.
                - loss_dir (list[torch.Tensor]): Direction classification \
                    losses.
        """
        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        assert len(featmap_sizes) == self.anchor_generator.num_levels
        device = cls_scores[0].device
        anchor_list = self.get_anchors(
            featmap_sizes, input_metas, device=device)
        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
        cls_reg_targets = self.anchor_target_3d(
            anchor_list,
            gt_bboxes,
            input_metas,
            gt_bboxes_ignore_list=gt_bboxes_ignore,
            gt_labels_list=gt_labels,
            num_classes=self.num_classes,
            label_channels=label_channels,
            sampling=self.sampling)

        if cls_reg_targets is None:
            return None
        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
         dir_targets_list, dir_weights_list, num_total_pos,
         num_total_neg) = cls_reg_targets
        num_total_samples = (
            num_total_pos + num_total_neg if self.sampling else num_total_pos)

        # num_total_samples = None
        losses_cls, losses_bbox, losses_dir = multi_apply(
            self.loss_single,
            cls_scores,
            bbox_preds,
            dir_cls_preds,
            labels_list,
            label_weights_list,
            bbox_targets_list,
            bbox_weights_list,
            dir_targets_list,
            dir_weights_list,
            num_total_samples=num_total_samples)

        losses_dict = dict(
            loss_cls=losses_cls,
            loss_bbox=losses_bbox,
        )

        if losses_dir[0] is not None:
            losses_dict.update(dict(loss_dir=losses_dir))

        # num_total_samples = None
        multi_level_anchors = self.anchor_generator.grid_anchors(
            featmap_sizes, device=device)

        losses_gaussian, = multi_apply(
            self.loss_gaussian_single,
            cls_scores,
            bbox_preds,
            dir_cls_preds,
            labels_list,
            label_weights_list,
            bbox_targets_list,
            bbox_weights_list,
            dir_targets_list,
            dir_weights_list,
            multi_level_anchors,
            num_total_samples=num_total_samples)

        if losses_gaussian[0] is not None:
            losses_dict.update(dict(losses_gaussian=losses_gaussian))

        return losses_dict

zhanggefan / mmdet3d-gaussian Goto Github PK

mmdet3d-gaussian's People

Contributors

Stargazers

Watchers

Forkers

mmdet3d-gaussian's Issues

使用mmrotated gwo loss导致nan的问题

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent