and i got an error like below.
deepspeed --num_nodes 1 --num_gpus 4 pretrain_gpt2.py --model-parallel-size 4 --num-layers 24 --hidden-size 1024 --num-attention-heads 16 --batch-size 8 --seq-length 1024 --max-position-embeddings 1024 --train-iters 100000 --resume-dataloader --train-data wikipedia --lazy-loader --tokenizer-type GPT2BPETokenizer --split 949,50,1 --distributed-backend nccl --lr 0.00015 --no-load-optim --lr-decay-style cosine --weight-decay 1e-2 --clip-grad 1.0 --warmup .01 --checkpoint-activations --deepspeed-activation-checkpointing --fp16 --deepspeed --deepspeed_config /home/sdl/DeepSpeedExamples/Megatron-LM/scripts/ds_zero2_config.json
[2021-02-14 15:50:02,533] [WARNING] [runner.py:117:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
[2021-02-14 15:50:02,574] [INFO] [runner.py:355:main] cmd = /home/sdl/anaconda3/envs/deepspeed/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgM119 --master_addr=127.0.0.1 --master_port=29500 pretrain_gpt2.py --model-parallel-size 4 --num-layers 24 --hidden-size 1024 --num-attention-heads 16 --batch-size 8 --seq-length 1024 --max-position-embeddings 1024 --train-iters 100000 --resume-dataloader --train-data wikipedia --lazy-loader --tokenizer-type GPT2BPETokenizer --split 949,50,1 --distributed-backend nccl --lr 0.00015 --no-load-optim --lr-decay-style cosine --weight-decay 1e-2 --clip-grad 1.0 --warmup .01 --checkpoint-activations --deepspeed-activation-checkpointing --fp16 --deepspeed --deepspeed_config /home/sdl/DeepSpeedExamples/Megatron-LM/scripts/ds_zero2_config.json
[2021-02-14 15:50:04,024] [INFO] [launch.py:78:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3]}
[2021-02-14 15:50:04,024] [INFO] [launch.py:87:main] nnodes=1, num_local_procs=4, node_rank=0
[2021-02-14 15:50:04,024] [INFO] [launch.py:99:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3]})
[2021-02-14 15:50:04,024] [INFO] [launch.py:100:main] dist_world_size=4
[2021-02-14 15:50:04,024] [INFO] [launch.py:103:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3
[2021-02-14 15:50:06,570] [INFO] [distributed.py:40:init_distributed] Initializing torch distributed with backend: nccl
using world size: 4 and model-parallel size: 4
> using dynamic loss scaling
[2021-02-14 15:50:06,582] [INFO] [distributed.py:40:init_distributed] Initializing torch distributed with backend: nccl
[2021-02-14 15:50:06,582] [INFO] [distributed.py:40:init_distributed] Initializing torch distributed with backend: nccl
[2021-02-14 15:50:06,583] [INFO] [distributed.py:40:init_distributed] Initializing torch distributed with backend: nccl
> initializing model parallel with size 4
[2021-02-14 15:50:17,741] [INFO] [checkpointing.py:629:_configure_using_config_file] {'partition_activations': True, 'contiguous_memory_optimization': False, 'cpu_checkpointing': False, 'number_checkpoints': None, 'synchronize_checkpoint_boundary': False, 'profile': False}
[2021-02-14 15:50:17,742] [INFO] [checkpointing.py:629:_configure_using_config_file] {'partition_activations': True, 'contiguous_memory_optimization': False, 'cpu_checkpointing': False, 'number_checkpoints': None, 'synchronize_checkpoint_boundary': False, 'profile': False}
Pretrain GPT2 model
arguments:
pretrained_bert .............. False
attention_dropout ............ 0.1
num_attention_heads .......... 16
hidden_size .................. 1024
intermediate_size ............ None
num_layers ................... 24
layernorm_epsilon ............ 1e-05
hidden_dropout ............... 0.1
max_position_embeddings ...... 1024
vocab_size ................... 30522
deep_init .................... False
make_vocab_size_divisible_by . 128
cpu_optimizer ................ False
cpu_torch_adam ............... False
fp16 ......................... True
fp32_embedding ............... False
fp32_layernorm ............... False
fp32_tokentypes .............. False
fp32_allreduce ............... False
hysteresis ................... 2
loss_scale ................... None
loss_scale_window ............ 1000
min_scale .................... 1
batch_size ................... 8
weight_decay ................. 0.01
checkpoint_activations ....... True
checkpoint_num_layers ........ 1
deepspeed_activation_checkpointing True
clip_grad .................... 1.0
train_iters .................. 100000
log_interval ................. 100
exit_interval ................ None
seed ......................... 1234
reset_position_ids ........... False
reset_attention_mask ......... False
lr_decay_iters ............... None
lr_decay_style ............... cosine
lr ........................... 0.00015
warmup ....................... 0.01
[2021-02-14 15:50:17,742] [INFO] [checkpointing.py:629:_configure_using_config_file] {'partition_activations': True, 'contiguous_memory_optimization': False, 'cpu_checkpointing': False, 'number_checkpoints': None, 'synchronize_checkpoint_boundary': False, 'profile': False}
save ......................... None
save_interval ................ 5000
no_save_optim ................ False
no_save_rng .................. False
load ......................... None
no_load_optim ................ True
no_load_rng .................. False
finetune ..................... False
resume_dataloader ............ True
distributed_backend .......... nccl
local_rank ................... 0
eval_batch_size .............. None
eval_iters ................... 100
eval_interval ................ 1000
eval_seq_length .............. None
eval_max_preds_per_seq ....... None
overlapping_eval ............. 32
cloze_eval ................... False
eval_hf ...................... False
load_openai .................. False
temperature .................. 1.0
top_p ........................ 0.0
top_k ........................ 0
out_seq_length ............... 256
model_parallel_size .......... 4
shuffle ...................... False
train_data ................... ['wikipedia']
use_npy_data_loader .......... False
train_data_path ..............
val_data_path ................
test_data_path ...............
input_data_sizes_file ........ sizes.txt
delim ........................ ,
text_key ..................... sentence
eval_text_key ................ None
valid_data ................... None
split ........................ 949,50,1
test_data .................... None
lazy_loader .................. True
loose_json ................... False
presplit_sentences ........... False
num_workers .................. 2
tokenizer_model_type ......... bert-large-uncased
tokenizer_path ............... tokenizer.model
tokenizer_type ............... GPT2BPETokenizer
cache_dir .................... None
use_tfrecords ................ False
seq_length ................... 1024
max_preds_per_seq ............ None
deepspeed .................... True
deepspeed_config ............. /home/sdl/DeepSpeedExamples/Megatron-LM/scripts/ds_zero2_config.json
deepscale .................... False
deepscale_config ............. None
deepspeed_mpi ................ False
cuda ......................... True
rank ......................... 0
world_size ................... 4
dynamic_loss_scale ........... True
[2021-02-14 15:50:17,742] [INFO] [checkpointing.py:629:_configure_using_config_file] {'partition_activations': True, 'contiguous_memory_optimization': False, 'cpu_checkpointing': False, 'number_checkpoints': None, 'synchronize_checkpoint_boundary': False, 'profile': False}
[2021-02-14 15:50:17,743] [INFO] [checkpointing.py:256:model_parallel_cuda_manual_seed] > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234
configuring data
> padded vocab (size: 50257) with 431 dummy tokens (new size: 50688)
> found end-of-document token: 50256
building GPT2 model ...
> number of parameters on model parallel rank 3: 89714688
Optimizer = FusedAdam
> number of parameters on model parallel rank 1: 89714688
Optimizer = FusedAdam
> number of parameters on model parallel rank 2: 89714688
Optimizer = FusedAdam
Using /home/sdl/.cache/torch_extensions as PyTorch extensions root...
> number of parameters on model parallel rank 0: 89714688
Detected CUDA files, patching ldflags
Emitting ninja build file /home/sdl/.cache/torch_extensions/fused_adam/build.ninja...
Building extension module fused_adam...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Using /home/sdl/.cache/torch_extensions as PyTorch extensions root...
Optimizer = FusedAdam
learning rate decaying cosine
DeepSpeed is enabled.
[2021-02-14 15:50:30,238] [INFO] [logging.py:60:log_dist] [Rank 0] DeepSpeed info: version=0.3.10, git-hash=unknown, git-branch=unknown
Using /home/sdl/.cache/torch_extensions as PyTorch extensions root...
Using /home/sdl/.cache/torch_extensions as PyTorch extensions root...
[1/2] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/csrc/includes -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/torch/csrc/api/include -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/TH -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/sdl/anaconda3/envs/deepspeed/include/python3.6m -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75 -std=c++14 -c /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o
FAILED: multi_tensor_adam.cuda.o
/usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/csrc/includes -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/torch/csrc/api/include -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/TH -isystem /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/sdl/anaconda3/envs/deepspeed/include/python3.6m -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -lineinfo -O3 --use_fast_math -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75 -std=c++14 -c /home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o
/usr/include/c++/7/bits/basic_string.tcc: In instantiation of ‘static std::basic_string<_CharT, _Traits, _Alloc>::_Rep* std::basic_string<_CharT, _Traits, _Alloc>::_Rep::_S_create(std::basic_string<_CharT, _Traits, _Alloc>::size_type, std::basic_string<_CharT, _Traits, _Alloc>::size_type, const _Alloc&) [with _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>; std::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]’:
/usr/include/c++/7/bits/basic_string.tcc:578:28: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct(_InIterator, _InIterator, const _Alloc&, std::forward_iterator_tag) [with _FwdIterator = const char16_t*; _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>]’
/usr/include/c++/7/bits/basic_string.h:5042:20: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct_aux(_InIterator, _InIterator, const _Alloc&, std::__false_type) [with _InIterator = const char16_t*; _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>]’
/usr/include/c++/7/bits/basic_string.h:5063:24: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct(_InIterator, _InIterator, const _Alloc&) [with _InIterator = const char16_t*; _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>]’
/usr/include/c++/7/bits/basic_string.tcc:656:134: required from ‘std::basic_string<_CharT, _Traits, _Alloc>::basic_string(const _CharT*, std::basic_string<_CharT, _Traits, _Alloc>::size_type, const _Alloc&) [with _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>; std::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]’
/usr/include/c++/7/bits/basic_string.h:6688:95: required from here
/usr/include/c++/7/bits/basic_string.tcc:1067:16: error: cannot call member function ‘void std::basic_string<_CharT, _Traits, _Alloc>::_Rep::_M_set_sharable() [with _CharT = char16_t; _Traits = std::char_traits<char16_t>; _Alloc = std::allocator<char16_t>]’ without object
__p->_M_set_sharable();
~~~~~~~~~^~
/usr/include/c++/7/bits/basic_string.tcc: In instantiation of ‘static std::basic_string<_CharT, _Traits, _Alloc>::_Rep* std::basic_string<_CharT, _Traits, _Alloc>::_Rep::_S_create(std::basic_string<_CharT, _Traits, _Alloc>::size_type, std::basic_string<_CharT, _Traits, _Alloc>::size_type, const _Alloc&) [with _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>; std::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]’:
/usr/include/c++/7/bits/basic_string.tcc:578:28: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct(_InIterator, _InIterator, const _Alloc&, std::forward_iterator_tag) [with _FwdIterator = const char32_t*; _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>]’
/usr/include/c++/7/bits/basic_string.h:5042:20: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct_aux(_InIterator, _InIterator, const _Alloc&, std::__false_type) [with _InIterator = const char32_t*; _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>]’
/usr/include/c++/7/bits/basic_string.h:5063:24: required from ‘static _CharT* std::basic_string<_CharT, _Traits, _Alloc>::_S_construct(_InIterator, _InIterator, const _Alloc&) [with _InIterator = const char32_t*; _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>]’
/usr/include/c++/7/bits/basic_string.tcc:656:134: required from ‘std::basic_string<_CharT, _Traits, _Alloc>::basic_string(const _CharT*, std::basic_string<_CharT, _Traits, _Alloc>::size_type, const _Alloc&) [with _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>; std::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]’
/usr/include/c++/7/bits/basic_string.h:6693:95: required from here
/usr/include/c++/7/bits/basic_string.tcc:1067:16: error: cannot call member function ‘void std::basic_string<_CharT, _Traits, _Alloc>::_Rep::_M_set_sharable() [with _CharT = char32_t; _Traits = std::char_traits<char32_t>; _Alloc = std::allocator<char32_t>]’ without object
ninja: build stopped: subcommand failed.
Traceback (most recent call last):
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1539, in _run_ninja_build
env=env)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/subprocess.py", line 438, in run
output=stdout, stderr=stderr)
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "pretrain_gpt2.py", line 716, in <module>
main()
File "pretrain_gpt2.py", line 664, in main
model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
File "pretrain_gpt2.py", line 176, in setup_model_and_optimizer
dist_init_required=False
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/__init__.py", line 119, in initialize
config_params=config_params)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 171, in __init__
self._configure_optimizer(optimizer, model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 514, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 583, in _configure_basic_optimizer
optimizer = FusedAdam(model_parameters, **optimizer_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in __init__
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 180, in load
return self.jit_load(verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 216, in jit_load
verbose=verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 997, in load
keep_intermediates=keep_intermediates)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1202, in _jit_compile
with_cuda=with_cuda)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1300, in _write_ninja_file_and_build_library
error_prefix="Error building extension '{}'".format(name))
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1555, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'fused_adam'
Loading extension module fused_adam...
Traceback (most recent call last):
File "pretrain_gpt2.py", line 716, in <module>
main()
File "pretrain_gpt2.py", line 664, in main
model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
File "pretrain_gpt2.py", line 176, in setup_model_and_optimizer
dist_init_required=False
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/__init__.py", line 119, in initialize
config_params=config_params)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 171, in __init__
self._configure_optimizer(optimizer, model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 514, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 583, in _configure_basic_optimizer
optimizer = FusedAdam(model_parameters, **optimizer_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in __init__
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 180, in load
Loading extension module fused_adam...
return self.jit_load(verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 216, in jit_load
verbose=verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 997, in load
Traceback (most recent call last):
File "pretrain_gpt2.py", line 716, in <module>
keep_intermediates=keep_intermediates)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1213, in _jit_compile
return _import_module_from_library(name, build_directory, is_python_module)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1560, in _import_module_from_library
file, path, description = imp.find_module(module_name, [path])
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/imp.py", line 297, in find_module
main()
File "pretrain_gpt2.py", line 664, in main
raise ImportError(_ERR_MSG.format(name), name=name)
ImportError: No module named 'fused_adam'
model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
File "pretrain_gpt2.py", line 176, in setup_model_and_optimizer
Loading extension module fused_adam...
dist_init_required=False
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/__init__.py", line 119, in initialize
config_params=config_params)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 171, in __init__
self._configure_optimizer(optimizer, model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 514, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 583, in _configure_basic_optimizer
Traceback (most recent call last):
File "pretrain_gpt2.py", line 716, in <module>
optimizer = FusedAdam(model_parameters, **optimizer_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in __init__
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 180, in load
main()
File "pretrain_gpt2.py", line 664, in main
return self.jit_load(verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 216, in jit_load
verbose=verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 997, in load
model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
File "pretrain_gpt2.py", line 176, in setup_model_and_optimizer
dist_init_required=False
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/__init__.py", line 119, in initialize
keep_intermediates=keep_intermediates)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1213, in _jit_compile
config_params=config_params)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 171, in __init__
self._configure_optimizer(optimizer, model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 514, in _configure_optimizer
return _import_module_from_library(name, build_directory, is_python_module)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1560, in _import_module_from_library
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/runtime/engine.py", line 583, in _configure_basic_optimizer
file, path, description = imp.find_module(module_name, [path])
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/imp.py", line 297, in find_module
optimizer = FusedAdam(model_parameters, **optimizer_parameters)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in __init__
raise ImportError(_ERR_MSG.format(name), name=name)
ImportError: No module named 'fused_adam'
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 180, in load
return self.jit_load(verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/deepspeed/ops/op_builder/builder.py", line 216, in jit_load
verbose=verbose)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 997, in load
keep_intermediates=keep_intermediates)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1213, in _jit_compile
return _import_module_from_library(name, build_directory, is_python_module)
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/site-packages/torch/utils/cpp_extension.py", line 1560, in _import_module_from_library
file, path, description = imp.find_module(module_name, [path])
File "/home/sdl/anaconda3/envs/deepspeed/lib/python3.6/imp.py", line 297, in find_module
raise ImportError(_ERR_MSG.format(name), name=name)
ImportError: No module named 'fused_adam'