import torch
import pandas as pd
import polars as pl
import depthcharge as dc
import natsort
import re
import numpy as np
import pyarrow as pa
from pyarrow import int32
from depthcharge.data import SpectrumDataset
from torch.utils.data import DataLoader
from depthcharge.transformers import SpectrumTransformerEncoder
from depthcharge.encoders import FloatEncoder
from depthcharge.data import CustomField
mzml_file = ["20190118_Q2_MD_ColQ2-51_AlexanderBull_P15_Fluide_4microscans.mgf"]
parse_kwargs = {
"progress": False,
"preprocessing_fn": [
dc.data.preprocessing.set_mz_range(min_mz=0),
dc.data.preprocessing.filter_intensity(max_num_peaks=200),
dc.data.preprocessing.scale_intensity(scaling="root"),
dc.data.preprocessing.scale_to_unit_norm,
],
"custom_fields": [
# CustomField("Seq", lambda x: x["params"]["seq"], pa.string()),
CustomField("RT", lambda x: x["params"]["rtinseconds"], pa.float64()),
CustomField("charge", lambda x: x['params']['charge'], pa.list_(int32())),
]
}
dataset = SpectrumDataset(mzml_file, batch_size=8, parse_kwargs=parse_kwargs)
from torch.utils.data import DataLoader
loader = DataLoader(dataset, batch_size=None,sampler=True)
for batch in loader:
print(batch)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[26], line 38
33 dataset = SpectrumDataset(mzml_file, batch_size=8, parse_kwargs=parse_kwargs)
36 from torch.utils.data import DataLoader
---> 38 loader = DataLoader(dataset, batch_size=None,shuffle=True)
40 for batch in loader:
41 print(batch)
File ~/miniconda3/envs/ttt/lib/python3.11/site-packages/torch/utils/data/dataloader.py:313, in DataLoader.__init__(self, dataset, batch_size, shuffle, sampler, batch_sampler, num_workers, collate_fn, pin_memory, drop_last, timeout, worker_init_fn, multiprocessing_context, generator, prefetch_factor, persistent_workers, pin_memory_device)
--> 308 raise ValueError(
309 f"DataLoader with IterableDataset: expected unspecified shuffle option, but got shuffle={shuffle}")
311 if sampler is not None:
312 # See NOTE [ Custom Samplers and IterableDataset ]
313 raise ValueError(
314 f"DataLoader with IterableDataset: expected unspecified sampler option, but got sampler={sampler}")
315 elif batch_sampler is not None:
316 # See NOTE [ Custom Samplers and IterableDataset ]
317 raise ValueError(
318 "DataLoader with IterableDataset: expected unspecified "
319 f"batch_sampler option, but got batch_sampler={batch_sampler}")
ValueError: DataLoader with IterableDataset: expected unspecified sampler option, but got shuffle=True