plugins:
source:
- module: intake_parquet
sources:
out_2:
driver: parquet
args:
urlpath: gs://bucket_name/out_2.parq
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-28-2831bbb95f63> in <module>()
----> 1 cat.out_2.read()
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/intake_parquet/source.py in read(self)
75 Create single pandas dataframe from the whole data-set
76 """
---> 77 self._load_metadata()
78 return self._df.compute()
79
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/intake/source/base.py in _load_metadata(self)
124 """load metadata only if needed"""
125 if self._schema is None:
--> 126 self._schema = self._get_schema()
127 self.datashape = self._schema.datashape
128 self.dtype = self._schema.dtype
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/intake_parquet/source.py in _get_schema(self)
58 def _get_schema(self):
59 if self._df is None:
---> 60 self._df = self._to_dask()
61 dtypes = {k: str(v) for k, v in self._df._meta.dtypes.items()}
62 self._schema = base.Schema(datashape=None,
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/intake_parquet/source.py in _to_dask(self)
107 urlpath = self._get_cache(self._urlpath)[0]
108 self._df = dd.read_parquet(urlpath,
--> 109 storage_options=self._storage_options, **self._kwargs)
110 self._load_metadata()
111 return self._df
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/dask/dataframe/io/parquet/core.py in read_parquet(path, columns, filters, categories, index, storage_options, engine, gather_statistics, split_row_groups, read_from_paths, chunksize, **kwargs)
343 read_from_paths=read_from_paths,
344 engine=engine,
--> 345 **kwargs,
346 )
347
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/dask/dataframe/io/parquet/fastparquet.py in read_metadata(cls, fs, paths, categories, index, gather_statistics, filters, **kwargs)
206 # correspond to a row group (populated below).
207 parts, pf, gather_statistics, fast_metadata, base_path = _determine_pf_parts(
--> 208 fs, paths, gather_statistics, **kwargs
209 )
210
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/dask/dataframe/io/parquet/fastparquet.py in _determine_pf_parts(fs, paths, gather_statistics, **kwargs)
145 pf.cats = paths_to_cats(fns, scheme)
146 parts = paths.copy()
--> 147 elif fs.isdir(paths[0]):
148 # This is a directory, check for _metadata, then _common_metadata
149 paths = fs.glob(paths[0] + fs.sep + "*")
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs)
116 def wrapper(*args, **kwargs):
117 self = obj or args[0]
--> 118 return maybe_sync(func, self, *args, **kwargs)
119
120 return wrapper
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/fsspec/asyn.py in maybe_sync(func, self, *args, **kwargs)
95 if inspect.iscoroutinefunction(func):
96 # run the awaitable on the loop
---> 97 return sync(loop, func, *args, **kwargs)
98 else:
99 # just call the blocking function
/home/user/miniconda3/envs/intake/lib/python3.7/site-packages/fsspec/asyn.py in sync(loop, func, callback_timeout, *args, **kwargs)
63 else:
64 while not e.is_set():
---> 65 e.wait(10)
66 if error[0]:
67 typ, exc, tb = error[0]
/home/user/miniconda3/envs/intake/lib/python3.7/threading.py in wait(self, timeout)
550 signaled = self._flag
551 if not signaled:
--> 552 signaled = self._cond.wait(timeout)
553 return signaled
554
/home/user/miniconda3/envs/intake/lib/python3.7/threading.py in wait(self, timeout)
298 else:
299 if timeout > 0:
--> 300 gotit = waiter.acquire(True, timeout)
301 else:
302 gotit = waiter.acquire(False)
KeyboardInterrupt:
out_2.parq
├── _common_metadata
├── _metadata
├── part.0.parquet
├── part.1.parquet
├── part.2.parquet
├── part.3.parquet
└── part.4.parquet
Python 3.7.9
intake: 0.6.0
intake_parquet: 0.2.3
fsspec: 0.8.3
fastparquet: 0.4.2
dask: 2020.12.0