Fix Archiver #151
11 errors, 34 pass in 56s
2 files 2 suites 56s ⏱️
45 tests 34 ✅ 0 💤 0 ❌ 11 🔥
90 runs 68 ✅ 1 💤 0 ❌ 21 🔥
Results for commit 1c58f21.
Annotations
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[classification_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 10s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 10s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[segmentation_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[detection_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[keypoint_bbox_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[resnet_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[coco_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_simple_models[efficient_coco_model] (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_multi_input (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_custom_tasks (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
All 2 runs with error: test_parsing_loader (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
artifacts/Test Results [windows-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError
Check failure on line 0 in tests.integration.test_sanity
github-actions / Test Results
1 out of 2 runs with error: test_tuner (tests.integration.test_sanity)
artifacts/Test Results [ubuntu-latest] (Python )/pytest.xml [took 0s]
Raw output
failed on setup with "AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?"
@pytest.fixture(scope="session", autouse=True)
def create_coco_dataset():
dataset_name = "coco_test"
url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
output_zip = WORK_DIR / "COCO_people_subset.zip"
if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
> gdown.download(url, output_zip, quiet=False)
tests/integration/conftest.py:168:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT'
output = PosixPath('tests/data/COCO_people_subset.zip'), quiet = False
proxy = None, speed = None, use_cookies = True, verify = True, id = None
fuzzy = False, resume = False, format = None
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
log_messages = {}
def download(
url=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
verify=True,
id=None,
fuzzy=False,
resume=False,
format=None,
user_agent=None,
log_messages=None,
):
"""Download file from URL.
Parameters
----------
url: str
URL. Google Drive URL is also supported.
output: str
Output filename/directory. Default is basename of URL.
If output ends with separator '/' basename will be kept and the
parameter will be treated as parenting directory.
quiet: bool
Suppress terminal output. Default is False.
proxy: str
Proxy.
speed: float
Download byte size per second (e.g., 256KB/s = 256 * 1024).
use_cookies: bool
Flag to use cookies. Default is True.
verify: bool or string
Either a bool, in which case it controls whether the server's TLS
certificate is verified, or a string, in which case it must be a path
to a CA bundle to use. Default is True.
id: str
Google Drive's file ID.
fuzzy: bool
Fuzzy extraction of Google Drive's file Id. Default is False.
resume: bool
Resume interrupted downloads while skipping completed ones.
Default is False.
format: str, optional
Format of Google Docs, Spreadsheets and Slides. Default is:
- Google Docs: 'docx'
- Google Spreadsheet: 'xlsx'
- Google Slides: 'pptx'
user_agent: str, optional
User-agent to use in the HTTP request.
log_messages: dict, optional
Log messages to customize. Currently it supports:
- 'start': the message to show the start of the download
- 'output': the message to show the output filename
Returns
-------
output: str
Output filename.
"""
if not (id is None) ^ (url is None):
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if user_agent is None:
# We need to use different user agent for file download c.f., folder
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA: E501
if log_messages is None:
log_messages = {}
url_origin = url
sess, cookies_file = _get_session(
proxy=proxy,
use_cookies=use_cookies,
user_agent=user_agent,
return_cookies_file=True,
)
gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
if fuzzy and gdrive_file_id:
# overwrite the url with fuzzy match of a file id
url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id)
url_origin = url
is_gdrive_download_link = True
while True:
res = sess.get(url, stream=True, verify=verify)
if not (gdrive_file_id and is_gdrive_download_link):
break
if url == url_origin and res.status_code == 500:
# The file could be Google Docs or Spreadsheets.
url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id)
continue
if res.headers["Content-Type"].startswith("text/html"):
m = re.search("<title>(.+)</title>", res.text)
if m and m.groups()[0].endswith(" - Google Docs"):
url = (
"https://docs.google.com/document/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="docx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Sheets"):
url = (
"https://docs.google.com/spreadsheets/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="xlsx" if format is None else format,
)
)
continue
elif m and m.groups()[0].endswith(" - Google Slides"):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
elif (
"Content-Disposition" in res.headers
and res.headers["Content-Disposition"].endswith("pptx")
and format not in {None, "pptx"}
):
url = (
"https://docs.google.com/presentation/d/{id}/export"
"?format={format}".format(
id=gdrive_file_id,
format="pptx" if format is None else format,
)
)
continue
if use_cookies:
cookie_jar = MozillaCookieJar(cookies_file)
for cookie in sess.cookies:
cookie_jar.set_cookie(cookie)
cookie_jar.save()
if "Content-Disposition" in res.headers:
# This is the file
break
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(res.text)
except FileURLRetrievalError as e:
message = (
"Failed to retrieve file url:\n\n{}\n\n"
"You may still be able to access the file from the browser:"
"\n\n\t{}\n\n"
"but Gdown can't. Please check connections and permissions."
).format(
indent("\n".join(textwrap.wrap(str(e))), prefix="\t"),
url_origin,
)
raise FileURLRetrievalError(message)
filename_from_url = None
last_modified_time = None
if gdrive_file_id and is_gdrive_download_link:
filename_from_url = _get_filename_from_response(response=res)
last_modified_time = _get_modified_time_from_response(response=res)
if filename_from_url is None:
filename_from_url = osp.basename(url)
if output is None:
output = filename_from_url
output_is_path = isinstance(output, str)
if output_is_path and output.endswith(osp.sep):
if not osp.exists(output):
os.makedirs(output)
output = osp.join(output, filename_from_url)
if output_is_path:
if resume and os.path.isfile(output):
if not quiet:
print(f"Skipping already downloaded file {output}", file=sys.stderr)
return output
existing_tmp_files = []
for file in os.listdir(osp.dirname(output) or "."):
if file.startswith(osp.basename(output)) and file.endswith(".part"):
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
# mkstemp is preferred, but does not work on Windows
# https://github.com/wkentaro/gdown/issues/153
tmp_file = tempfile.mktemp(
suffix=".part",
prefix=osp.basename(output),
dir=osp.dirname(output),
)
f = open(tmp_file, "ab")
else:
tmp_file = None
f = output
if tmp_file is not None and f.tell() != 0:
start_size = f.tell()
headers = {"Range": "bytes={}-".format(start_size)}
res = sess.get(url, headers=headers, stream=True, verify=verify)
else:
start_size = 0
if not quiet:
print(log_messages.get("start", "Downloading...\n"), file=sys.stderr, end="")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
if url_origin != url:
print("From (original):", url_origin, file=sys.stderr)
print("From (redirected):", url, file=sys.stderr)
else:
print("From:", url, file=sys.stderr)
print(
log_messages.get(
"output", f"To: {osp.abspath(output) if output_is_path else output}\n"
),
file=sys.stderr,
end="",
)
try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total) + start_size
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", initial=start_size, unit_scale=True)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
> f.write(chunk)
E AttributeError: 'PosixPath' object has no attribute 'write'. Did you mean: 'drive'?
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/gdown/download.py:369: AttributeError