Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change temp view name to uppercase, fix various build issues #536

Merged
merged 24 commits into from
Apr 26, 2024
Merged
Prev Previous commit
Next Next commit
update
goodwanghan authored Apr 8, 2024
commit 5c868004d358804b2a7c3f100a8baa843cd04087
8 changes: 4 additions & 4 deletions fugue_dask/_utils.py
Original file line number Diff line number Diff line change
@@ -53,7 +53,7 @@ def hash_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
if num < 1:
return df
if num == 1:
return df.repartition(1)
return df.repartition(npartitions=1)
df = df.reset_index(drop=True).clear_divisions()
idf, ct = _add_hash_index(df, num, cols)
return _postprocess(idf, ct, num)
@@ -76,7 +76,7 @@ def even_repartition(df: dd.DataFrame, num: int, cols: List[Any]) -> dd.DataFram
the number of partitions will be the number of groups.
"""
if num == 1:
return df.repartition(1)
return df.repartition(npartitions=1)
if len(cols) == 0 and num <= 0:
return df
df = df.reset_index(drop=True).clear_divisions()
@@ -111,7 +111,7 @@ def rand_repartition(
if num < 1:
return df
if num == 1:
return df.repartition(1)
return df.repartition(npartitions=1)
df = df.reset_index(drop=True).clear_divisions()
if len(cols) == 0:
idf, ct = _add_random_index(df, num=num, seed=seed)
@@ -124,7 +124,7 @@ def rand_repartition(
def _postprocess(idf: dd.DataFrame, ct: int, num: int) -> dd.DataFrame:
parts = min(ct, num)
if parts <= 1:
return idf.repartition(1)
return idf.repartition(npartitions=1)
divisions = list(np.arange(ct, step=math.ceil(ct / parts)))
divisions.append(ct - 1)
return idf.repartition(divisions=divisions, force=True)