Code Monkey home page Code Monkey logo

Comments (8)

jpainam avatar jpainam commented on June 7, 2024 1
"""
Training file
Usage:
    --config [PATH to configuration file for desired dataset]
"""

parser = argparse.ArgumentParser(description='Semantic-Aware Scene Recognition Evaluation')
parser.add_argument('--config', metavar='DIR', help='Configuration file path')

def train_model(model, train_loader, optimizer, scheduler):
    # Extract batch size
    batch_size = CONFIG['TRAINING']['BATCH_SIZE']['TRAIN']
    # Start data time
    data_time_start = time.time()
    for epoch in range(CONFIG['TRAINING']['EPOCH']):
        batch_time = utils.AverageMeter()
        losses = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top2 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        print("-" * 65)
        print(f'Training for epoch {epoch}')
        for i, mini_batch in enumerate(train_loader):
            start_time = time.time()
            RGB_image = mini_batch['Image'].cuda()
            semantic_mask = mini_batch['Semantic'].cuda()
            semantic_scores = mini_batch['Semantic Scores'].cuda()
            sceneLabelGT = mini_batch['Scene Index'].cuda()
            if USE_CUDA:
                RGB_image, semantic_mask, semantic_scores = RGB_image.cuda(), semantic_mask.cuda(), semantic_scores.cuda()
                sceneLabelGT = sceneLabelGT.cuda()
            # Create tensor of probabilities from semantic_mask
            semanticTensor = utils.make_one_hot(semantic_mask, semantic_scores, C=CONFIG['DATASET']['N_CLASSES_SEM'])

            # Model Forward
            optimizer.zero_grad()
            outputSceneLabel, feature_conv, outputSceneLabelRGB, outputSceneLabelSEM = model(RGB_image, semanticTensor)

            # Compute Loss
            loss = model.loss(outputSceneLabel, sceneLabelGT)
            loss.backward()
           optimizer.step()

            # Measure Top1, Top2 and Top5 accuracy
            prec1, prec2, prec5 = utils.accuracy(outputSceneLabel.data, sceneLabelGT, topk=(1, 2, 5))

            # Update values
            losses.update(loss.item(), batch_size)
            top1.update(prec1.item(), batch_size)
            top2.update(prec2.item(), batch_size)
            top5.update(prec5.item(), batch_size)

            if i % CONFIG['TRAINING']['PRINT_FREQ'] == 0:
                print('Training Epoch {} \t'
                      'Loss {loss.val:.3f} (avg: {loss.avg:.3f})\t'
                      'Prec@1 {top1.val:.3f} (avg: {top1.avg:.3f})\t'
                      'Prec@2 {top2.val:.3f} (avg: {top2.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} (avg: {top5.avg:.3f})'.
                      format(epoch, loss=losses,
                             top1=top1, top2=top2, top5=top5))
         scheduler.step()
        print('Training Epoch {} \t'
              'Loss {loss.val:.3f} (avg: {loss.avg:.3f})\t'
              'Prec@1 {top1.val:.3f} (avg: {top1.avg:.3f})\t'
              'Prec@2 {top2.val:.3f} (avg: {top2.avg:.3f})\t'
              'Prec@5 {top5.val:.3f} (avg: {top5.avg:.3f})'.
              format(epoch, loss=losses,
                     top1=top1, top2=top2, top5=top5))



global USE_CUDA, classes, CONFIG

# Decode CONFIG file information
args = parser.parse_args()
CONFIG = yaml.safe_load(open(args.config, 'r'))
USE_CUDA = torch.cuda.is_available()
log_name = osp.join(CONFIG['EXP']['ID'], 'train.log')
log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
sys.stdout = Logger(osp.join(CONFIG['EXP']['OUTPUT_DIR'], log_name))

if __name__ == '__main__':

    print('-' * 65)
    print("Training started starting...")
    print('-' * 65)
    # Instantiate network
    if CONFIG['MODEL']['ONLY_RGB']:
        print('Training ONLY RGB branch')
        print('Selected RGB backbone architecture: ' + CONFIG['MODEL']['ARCH'])
        model = RGBBranch(arch=CONFIG['MODEL']['ARCH'], scene_classes=CONFIG['DATASET']['N_CLASSES_SCENE'])
    elif CONFIG['MODEL']['ONLY_SEM']:
        print('Training ONLY SEM branch')
        model = SemBranch(scene_classes=CONFIG['DATASET']['N_CLASSES_SCENE'], semantic_classes=CONFIG['DATASET']['N_CLASSES_SEM'])
    else:
        print('Training complete model')
        print('Selected RG backbone architecture: ' + CONFIG['MODEL']['ARCH'])
        model = SASceneNet(arch=CONFIG['MODEL']['ARCH'], scene_classes=CONFIG['DATASET']['N_CLASSES_SCENE'], semantic_classes=CONFIG['DATASET']['N_CLASSES_SEM'])

    # Move Model to GPU an set it to evaluation mode
    if USE_CUDA:
        model.cuda()
    cudnn.benchmark = USE_CUDA
    model.train()

    print('-' * 65)
    print('Loading dataset {}...'.format(CONFIG['DATASET']['NAME']))

    traindir = os.path.join(CONFIG['DATASET']['ROOT'], CONFIG['DATASET']['NAME'])
    valdir = os.path.join(CONFIG['DATASET']['ROOT'], CONFIG['DATASET']['NAME'])

    train_loader, val_loader, classes = get_data(traindir, valdir, CONFIG)
    # Print dataset information
    print('Train set. Size {}. Batch size {}. Nbatches {}'
          .format(len(train_loader) * CONFIG['VALIDATION']['BATCH_SIZE']['TRAIN'], CONFIG['VALIDATION']['BATCH_SIZE']['TRAIN'], len(train_loader)))
    print('Train set number of scenes: {}' .format(len(classes)))
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer_ft = optim.SGD(model.parameters(), lr=0.001, weight_decay=1e-4, momentum=0.9, nesterov=True)

    # Decay LR by a factor of 0.1 every 40 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=80, gamma=0.1)
    train_model(model, train_loader, optimizer_ft, exp_lr_scheduler)

But this code never gets out of the for i, mini_batch in enumerate(train_loader): even after I reduce the batchsize to 32,

from semantic-aware-scene-recognition.

alexlopezcifuentes avatar alexlopezcifuentes commented on June 7, 2024

Hi!

The training code is not explicitly released yet. However, you can easily train the method.

We provide you with the full implementations of the three used networks:

RGB Branch: https://github.com/vpulab/Semantic-Aware-Scene-Recognition/blob/master/RGBBranch.py
Semantic Branch: https://github.com/vpulab/Semantic-Aware-Scene-Recognition/blob/master/SemBranch.py
Semantic-Aware: https://github.com/vpulab/Semantic-Aware-Scene-Recognition/blob/master/SASceneNet.py

In addition, we provide all the data and links for the used training data, as well as the full training procedure as explained in the paper (https://www.sciencedirect.com/science/article/pii/S0031320320300613).

With this in mind, you can simply use a classification training script from PyTorch (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html) and train the whole method.

Alex.

from semantic-aware-scene-recognition.

jpainam avatar jpainam commented on June 7, 2024

You mentioned a two training procedure in your paper. Just to avoid a gap between the results of my implementation and yours, i wanted to use the same training detail. I'll try to reproduce the training detail and share the training loop here tomorrow. So, you confirm.

Thank you for your quick answer.

from semantic-aware-scene-recognition.

jpainam avatar jpainam commented on June 7, 2024

Hi, I was able to trace the execution and saw that. it hangs at ImgAug transformation

        # ----------------------------- #
        #     ImAug Transformations     #
        # ----------------------------- #
        # Transformations for train set
        self.seq = iaa.Sequential([
            # Small gaussian blur with random sigma between 0 and 0.5.
            iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))),
            # Strengthen or weaken the contrast in each image.
            iaa.ContrastNormalization((0.75, 1.5)),
            # Add gaussian noise.
            iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
            # Make some images brighter and some darker.
            iaa.Multiply((0.8, 1.2), per_channel=0.2),
        ], random_order=True)  # apply augmenters in random order

Particularly, this line where you applied the transformations

img = np.squeeze(self.seq.augment_images(np.expand_dims(img, axis=0)))

So, i had commented it out. These are the results are I get

Validation results: Loss 1.006, Prec@1 73.582, Prec@2 83.881, Prec@5 93.358, Mean Class Accuracy 73.696

Compared to yours.

Validation results: Loss 0.509, Prec@1 87.015, Prec@2 93.881, Prec@5 98.582, Mean Class Accuracy 86.943

Any comments on the training loop will be welcomed

from semantic-aware-scene-recognition.

alexlopezcifuentes avatar alexlopezcifuentes commented on June 7, 2024

Are you first training both RGB and Sematic Branches independently, then training the Attention Module with those branches frozen? That's quite important...

What batch size are you using?

I will start by checking that your results from RGB Branch and Semantic Branch independently are near the ones we reported on the paper. Then follow by using those trained weights to train the Attention Module.

from semantic-aware-scene-recognition.

jpainam avatar jpainam commented on June 7, 2024

I directly trained this model SASceneNet model and based on this link. it already does include the attention mechanism.

e7 = e6 * self.sigmoid(y5)

I mean I should freeze the parameters of the attention layers for some epoch and then reactivate them later?

from semantic-aware-scene-recognition.

alexlopezcifuentes avatar alexlopezcifuentes commented on June 7, 2024

Please carefully read Section 3.6. Training procedure and loss from the paper.

from semantic-aware-scene-recognition.

jpainam avatar jpainam commented on June 7, 2024

.....

from semantic-aware-scene-recognition.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.