When I try to have a new large dataset (15M pairs) to test code clones on different models, I get an error related to multiprocessing encoding. Any ideas or suggestions? I suppose it's related to the dataset being large and the CPU freezes dealing with it. I tried reducing the batch size and max_length and still, the problem persists. The system I'm using is Linux.
Error Message:
Killed
[usr]$ Process ForkPoolWorker-3:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 131, in worker
put((job, i, result))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 409, in _send_bytes
self._send(buf)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib64/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 136, in worker
put((job, i, (False, wrapped)))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Process ForkPoolWorker-2:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 131, in worker
put((job, i, result))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib64/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 136, in worker
put((job, i, (False, wrapped)))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Process ForkPoolWorker-1:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 131, in worker
put((job, i, result))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib64/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 136, in worker
put((job, i, (False, wrapped)))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Process ForkPoolWorker-4:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 131, in worker
put((job, i, result))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib64/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib64/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python3.9/multiprocessing/pool.py", line 136, in worker
put((job, i, (False, wrapped)))
File "/usr/lib64/python3.9/multiprocessing/queues.py", line 377, in put
self._writer.send_bytes(obj)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 204, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 408, in _send_bytes
self._send(header)
File "/usr/lib64/python3.9/multiprocessing/connection.py", line 372, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe