cloudpathlib.CloudPath¶
Base class for cloud storage file URIs, in the style of the Python standard library's
pathlib
module. Instances represent a path
in cloud storage with filesystem path semantics, and convenient methods allow for basic
operations like joining, reading, writing, iterating over contents, etc. CloudPath
almost
entirely mimics the pathlib.Path
interface, so most familiar properties and methods should be available and behave in the
expected way.
Analogous to the way pathlib.Path
works, instantiating CloudPath
will instead create an
instance of an appropriate subclass that implements a particular cloud storage service, such as
S3Path
. This dispatching behavior is based on the URI scheme part of a cloud
storage URI (e.g., "s3://"
).
Source code in cloudpathlib/cloudpath.py
class CloudPath(metaclass=CloudPathMeta):
"""Base class for cloud storage file URIs, in the style of the Python standard library's
[`pathlib` module](https://docs.python.org/3/library/pathlib.html). Instances represent a path
in cloud storage with filesystem path semantics, and convenient methods allow for basic
operations like joining, reading, writing, iterating over contents, etc. `CloudPath` almost
entirely mimics the [`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path)
interface, so most familiar properties and methods should be available and behave in the
expected way.
Analogous to the way `pathlib.Path` works, instantiating `CloudPath` will instead create an
instance of an appropriate subclass that implements a particular cloud storage service, such as
[`S3Path`](../s3path). This dispatching behavior is based on the URI scheme part of a cloud
storage URI (e.g., `"s3://"`).
"""
_cloud_meta: CloudImplementation
cloud_prefix: str
def __init__(
self,
cloud_path: Union[str, Self, "CloudPath"],
client: Optional["Client"] = None,
) -> None:
# handle if local file gets opened. must be set at the top of the method in case any code
# below raises an exception, this prevents __del__ from raising an AttributeError
self._handle: Optional[IO] = None
self._client: Optional["Client"] = None
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
self._cloud_meta.validate_completeness()
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
self._client = cloud_path.client
else:
self._client = client
if client is not None and not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
@property
def client(self):
if getattr(self, "_client", None) is None:
self._client = self._cloud_meta.client_class.get_default_client()
return self._client
def __del__(self) -> None:
# make sure that file handle to local path is closed
if self._handle is not None and self._local.exists():
self._handle.close()
# ensure file removed from cache when cloudpath object deleted
client = getattr(self, "_client", None)
if getattr(client, "file_cache_mode", None) == FileCacheMode.cloudpath_object:
self.clear_cache()
def __getstate__(self) -> Dict[str, Any]:
state = self.__dict__.copy()
# don't pickle client
if "_client" in state:
del state["_client"]
return state
def __setstate__(self, state: Dict[str, Any]) -> None:
self.__dict__.update(state)
@property
def _no_prefix(self) -> str:
return self._str[len(self.cloud_prefix) :]
@property
def _no_prefix_no_drive(self) -> str:
return self._str[len(self.cloud_prefix) + len(self.drive) :]
@overload
@classmethod
def is_valid_cloudpath(
cls, path: "CloudPath", raise_on_error: bool = ...
) -> TypeGuard[Self]: ...
@overload
@classmethod
def is_valid_cloudpath(cls, path: str, raise_on_error: bool = ...) -> bool: ...
@classmethod
def is_valid_cloudpath(
cls, path: Union[str, "CloudPath"], raise_on_error: bool = False
) -> Union[bool, TypeGuard[Self]]:
valid = str(path).lower().startswith(cls.cloud_prefix.lower())
if raise_on_error and not valid:
raise InvalidPrefixError(
f"'{path}' is not a valid path since it does not start with '{cls.cloud_prefix}'"
)
return valid
def __repr__(self) -> str:
return f"{self.__class__.__name__}('{self}')"
def __str__(self) -> str:
return self._str
def __hash__(self) -> int:
return hash((type(self).__name__, str(self)))
def __eq__(self, other: Any) -> bool:
return isinstance(other, type(self)) and str(self) == str(other)
def __fspath__(self) -> str:
if self.is_file():
self._refresh_cache()
return str(self._local)
def __lt__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return self.parts < other.parts
def __le__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return self.parts <= other.parts
def __gt__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return self.parts > other.parts
def __ge__(self, other: Any) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return self.parts >= other.parts
# ====================== NOT IMPLEMENTED ======================
# as_posix - no cloud equivalent; not needed since we assume url separator
# chmod - permission changing should be explicitly done per client with methods
# that make sense for the client permission options
# cwd - no cloud equivalent
# expanduser - no cloud equivalent
# group - should be implemented with client-specific permissions
# home - no cloud equivalent
# is_block_device - no cloud equivalent
# is_char_device - no cloud equivalent
# is_fifo - no cloud equivalent
# is_mount - no cloud equivalent
# is_reserved - no cloud equivalent
# is_socket - no cloud equivalent
# is_symlink - no cloud equivalent
# lchmod - no cloud equivalent
# lstat - no cloud equivalent
# owner - no cloud equivalent
# root - drive already has the bucket and anchor/prefix has the scheme, so nothing to store here
# symlink_to - no cloud equivalent
# link_to - no cloud equivalent
# hardlink_to - no cloud equivalent
# ====================== REQUIRED, NOT GENERIC ======================
# Methods that must be implemented, but have no generic application
@property
@abc.abstractmethod
def drive(self) -> str:
"""For example "bucket" on S3 or "container" on Azure; needs to be defined for each class"""
pass
@abc.abstractmethod
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
"""Should be implemented using the client API without requiring a dir is downloaded"""
pass
@abc.abstractmethod
def touch(self, exist_ok: bool = True) -> None:
"""Should be implemented using the client API to create and update modified time"""
pass
def as_url(self, presign: bool = False, expire_seconds: int = 60 * 60) -> str:
if presign:
url = self.client._generate_presigned_url(self, expire_seconds=expire_seconds)
else:
url = self.client._get_public_url(self)
return url
# ====================== IMPLEMENTED FROM SCRATCH ======================
# Methods with their own implementations that work generically
def __rtruediv__(self, other: Any) -> None:
raise ValueError(
"Cannot change a cloud path's root since all paths are absolute; create a new path instead."
)
@property
def anchor(self) -> str:
return self.cloud_prefix
def as_uri(self) -> str:
return str(self)
def exists(self) -> bool:
return self.client._exists(self)
def is_dir(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "dir"
def is_file(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "file"
@property
def fspath(self) -> str:
return self.__fspath__()
@classmethod
def from_uri(cls, uri: str) -> Self:
return cls(uri)
def _glob_checks(self, pattern: Union[str, os.PathLike]) -> str:
if isinstance(pattern, os.PathLike):
if isinstance(pattern, CloudPath):
str_pattern = str(pattern.relative_to(self))
else:
str_pattern = os.fspath(pattern)
else:
str_pattern = str(pattern)
if ".." in str_pattern:
raise CloudPathNotImplementedError(
"Relative paths with '..' not supported in glob patterns."
)
if str_pattern.startswith(self.cloud_prefix) or str_pattern.startswith("/"):
raise CloudPathNotImplementedError("Non-relative patterns are unsupported")
if self.drive == "":
raise CloudPathNotImplementedError(
".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()"
)
return str_pattern
def _build_subtree(self, recursive):
# build a tree structure for all files out of default dicts
Tree: Callable = lambda: defaultdict(Tree)
def _build_tree(trunk, branch, nodes, is_dir):
"""Utility to build a tree from nested defaultdicts with a generator
of nodes (parts) of a path."""
next_branch = next(nodes, None)
if next_branch is None:
trunk[branch] = Tree() if is_dir else None # leaf node
else:
_build_tree(trunk[branch], next_branch, nodes, is_dir)
file_tree = Tree()
for f, is_dir in self.client._list_dir(self, recursive=recursive):
parts = str(f.relative_to(self)).split("/")
# skip self
if len(parts) == 1 and parts[0] == ".":
continue
nodes = (p for p in parts)
_build_tree(file_tree, next(nodes, None), nodes, is_dir)
return dict(file_tree) # freeze as normal dict before passing in
def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]:
file_tree = self._build_subtree(recursive)
root = _CloudPathSelectable(
self.name,
[], # nothing above self will be returned, so initial parents is empty
file_tree,
)
for p in selector.select_from(root):
# select_from returns self.name/... so strip before joining
yield (self / str(p)[len(self.name) + 1 :])
def glob(
self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
pattern = self._glob_checks(pattern)
pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(
tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)
yield from self._glob(
selector,
"/" in pattern
or "**"
in pattern, # recursive listing needed if explicit ** or any sub folder in pattern
)
def rglob(
self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
pattern = self._glob_checks(pattern)
pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(
("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)
yield from self._glob(selector, True)
def iterdir(self) -> Generator[Self, None, None]:
for f, _ in self.client._list_dir(self, recursive=False):
if f != self: # iterdir does not include itself in pathlib
yield f
@staticmethod
def _walk_results_from_tree(root, tree, top_down=True):
"""Utility to yield tuples in the form expected by `.walk` from the file
tree constructed by `_build_substree`.
"""
dirs = []
files = []
for item, branch in tree.items():
files.append(item) if branch is None else dirs.append(item)
if top_down:
yield root, dirs, files
for dir in dirs:
yield from CloudPath._walk_results_from_tree(root / dir, tree[dir], top_down=top_down)
if not top_down:
yield root, dirs, files
def walk(
self,
top_down: bool = True,
on_error: Optional[Callable] = None,
follow_symlinks: bool = False,
) -> Generator[Tuple[Self, List[str], List[str]], None, None]:
try:
file_tree = self._build_subtree(recursive=True) # walking is always recursive
yield from self._walk_results_from_tree(self, file_tree, top_down=top_down)
except Exception as e:
if on_error is not None:
on_error(e)
else:
raise
@overload
def open(
self,
mode: "OpenTextMode" = "r",
buffering: int = -1,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "TextIOWrapper": ...
@overload
def open(
self,
mode: "OpenBinaryMode",
buffering: Literal[0],
encoding: None = None,
errors: None = None,
newline: None = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "FileIO": ...
@overload
def open(
self,
mode: "OpenBinaryModeUpdating",
buffering: Literal[-1, 1] = -1,
encoding: None = None,
errors: None = None,
newline: None = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "BufferedRandom": ...
@overload
def open(
self,
mode: "OpenBinaryModeWriting",
buffering: Literal[-1, 1] = -1,
encoding: None = None,
errors: None = None,
newline: None = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "BufferedWriter": ...
@overload
def open(
self,
mode: "OpenBinaryModeReading",
buffering: Literal[-1, 1] = -1,
encoding: None = None,
errors: None = None,
newline: None = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "BufferedReader": ...
@overload
def open(
self,
mode: "OpenBinaryMode",
buffering: int = -1,
encoding: None = None,
errors: None = None,
newline: None = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "BinaryIO": ...
@overload
def open(
self,
mode: str,
buffering: int = -1,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: Optional[bool] = None,
force_overwrite_to_cloud: Optional[bool] = None,
) -> "IO[Any]": ...
def open(
self,
mode: str = "r",
buffering: int = -1,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: Optional[bool] = None, # extra kwarg not in pathlib
force_overwrite_to_cloud: Optional[bool] = None, # extra kwarg not in pathlib
) -> "IO[Any]":
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0.0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
wrapped_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close_upload(*args, **kwargs) -> None:
wrapped_close(*args, **kwargs)
# we should be idempotent and not upload again if
# we already ran our close method patch
if not self._dirty:
return
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
self._dirty = False
buffer.close = _patched_close_upload # type: ignore
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
# if we don't want any cache around, remove the cache
# as soon as the file is closed
if self.client.file_cache_mode == FileCacheMode.close_file:
# this may be _patched_close_upload, in which case we need to
# make sure to call that first so the file gets uploaded
wrapped_close_for_cache = buffer.close
def _patched_close_empty_cache(*args, **kwargs):
wrapped_close_for_cache(*args, **kwargs)
# remove local file as last step on closing
self.clear_cache()
buffer.close = _patched_close_empty_cache # type: ignore
return buffer
def replace(self, target: Self) -> Self:
if type(self) is not type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; rename/replace the files recursively."
)
if target == self:
# Request is to replace/rename this with the same path - nothing to do
return self
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
def rename(self, target: Self) -> Self:
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
def rmdir(self) -> None:
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
def samefile(self, other_path: Union[str, os.PathLike]) -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
def unlink(self, missing_ok: bool = True) -> None:
# Note: missing_ok defaults to False in pathlib, but changing the default now would be a breaking change.
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self, missing_ok)
def write_bytes(self, data: bytes) -> int:
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
def write_text(
self,
data: str,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> int:
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L1146-L1155
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)
def read_bytes(self) -> bytes:
with self.open(mode="rb") as f:
return f.read()
def read_text(
self,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> str:
with self.open(mode="r", encoding=encoding, errors=errors, newline=newline) as f:
return f.read()
def is_junction(self):
return False # only windows paths can be junctions, not cloudpaths
# ====================== DISPATCHED TO POSIXPATH FOR PURE PATHS ======================
# Methods that are dispatched to exactly how pathlib.PurePosixPath would calculate it on
# self._path for pure paths (does not matter if file exists);
# see the next session for ones that require a real file to exist
def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any:
"""Some functions we can just dispatch to the pathlib version
We want to do this explicitly so we don't have to support all
of pathlib and subclasses can override individually if necessary.
"""
path_version = self._path.__getattribute__(func)
# Path functions should be called so the results are calculated
if callable(path_version):
path_version = path_version(*args, **kwargs)
# Paths should always be resolved and then converted to the same client + class as this one
if isinstance(path_version, PurePosixPath):
# always resolve since cloud paths must be absolute
path_version = _resolve(path_version)
return self._new_cloudpath(path_version)
# When sequence of PurePosixPath, we want to convert to sequence of CloudPaths
if (
isinstance(path_version, collections.abc.Sequence)
and len(path_version) > 0
and isinstance(path_version[0], PurePosixPath)
):
sequence_class = (
type(path_version) if not isinstance(path_version, _PathParents) else tuple
)
return sequence_class( # type: ignore
self._new_cloudpath(_resolve(p)) for p in path_version if _resolve(p) != p.root
)
# when pathlib returns something else, we probably just want that thing
# cases this should include: str, empty sequence, sequence of str, ...
else:
return path_version
def __truediv__(self, other: Union[str, PurePosixPath]) -> Self:
if not isinstance(other, (str, PurePosixPath)):
raise TypeError(f"Can only join path {repr(self)} with strings or posix paths.")
return self._dispatch_to_path("__truediv__", other)
def joinpath(self, *pathsegments: Union[str, os.PathLike]) -> Self:
return self._dispatch_to_path("joinpath", *pathsegments)
def absolute(self) -> Self:
return self
def is_absolute(self) -> bool:
return True
def resolve(self, strict: bool = False) -> Self:
return self
def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath:
# We don't dispatch regularly since this never returns a cloud path (since it is relative, and cloud paths are
# absolute)
if not isinstance(other, CloudPath):
raise ValueError(f"{self} is a cloud path, but {other} is not")
if self.cloud_prefix != other.cloud_prefix:
raise ValueError(
f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path"
)
kwargs = dict(walk_up=walk_up)
if sys.version_info < (3, 12):
kwargs.pop("walk_up")
return self._path.relative_to(other._path, **kwargs) # type: ignore[call-arg]
def is_relative_to(self, other: Self) -> bool:
try:
self.relative_to(other)
return True
except ValueError:
return False
@property
def name(self) -> str:
return self._dispatch_to_path("name")
def full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> bool:
if sys.version_info < (3, 13):
raise NotImplementedError("full_match requires Python 3.13 or higher")
# strip scheme from start of pattern before testing
if pattern.startswith(self.anchor + self.drive):
pattern = pattern[len(self.anchor + self.drive) :]
# remove drive, which is kept on normal dispatch to pathlib
return PurePosixPath(self._no_prefix_no_drive).full_match( # type: ignore[attr-defined]
pattern, case_sensitive=case_sensitive
)
def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool:
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
kwargs = dict(case_sensitive=case_sensitive)
if sys.version_info < (3, 12):
kwargs.pop("case_sensitive")
return self._dispatch_to_path("match", path_pattern, **kwargs)
@property
def parser(self) -> Self:
if sys.version_info < (3, 13):
raise NotImplementedError("parser requires Python 3.13 or higher")
return self._dispatch_to_path("parser")
@property
def parent(self) -> Self:
return self._dispatch_to_path("parent")
@property
def parents(self) -> Sequence[Self]:
return self._dispatch_to_path("parents")
@property
def parts(self) -> Tuple[str, ...]:
parts = self._dispatch_to_path("parts")
if parts[0] == "/":
parts = parts[1:]
return (self.anchor, *parts)
@property
def stem(self) -> str:
return self._dispatch_to_path("stem")
@property
def suffix(self) -> str:
return self._dispatch_to_path("suffix")
@property
def suffixes(self) -> List[str]:
return self._dispatch_to_path("suffixes")
def with_stem(self, stem: str) -> Self:
try:
return self._dispatch_to_path("with_stem", stem)
except AttributeError:
# with_stem was only added in python 3.9, so we fallback for compatibility
return self.with_name(stem + self.suffix)
def with_name(self, name: str) -> Self:
return self._dispatch_to_path("with_name", name)
def with_segments(self, *pathsegments) -> Self:
"""Create a new CloudPath with the same client out of the given segments.
The first segment will be interpreted as the bucket/container name.
"""
return self._new_cloudpath("/".join(pathsegments))
def with_suffix(self, suffix: str) -> Self:
return self._dispatch_to_path("with_suffix", suffix)
# ====================== DISPATCHED TO LOCAL CACHE FOR CONCRETE PATHS ======================
# Items that can be executed on the cached file on the local filesystem
def _dispatch_to_local_cache_path(self, func: str, *args, **kwargs) -> Any:
self._refresh_cache()
path_version = self._local.__getattribute__(func)
# Path functions should be called so the results are calculated
if callable(path_version):
path_version = path_version(*args, **kwargs)
# Paths should always be resolved and then converted to the same client + class as this one
if isinstance(path_version, (PosixPath, WindowsPath)):
# always resolve since cloud paths must be absolute
path_version = path_version.resolve()
return self._new_cloudpath(path_version)
# when pathlib returns a string, etc. we probably just want that thing
else:
return path_version
def stat(self, follow_symlinks: bool = True) -> os.stat_result:
"""Note: for many clients, we may want to override so we don't incur
network costs since many of these properties are available as
API calls.
"""
warn(
f"stat not implemented as API call for {self.__class__} so file must be downloaded to "
f"calculate stats; this may take a long time depending on filesize"
)
return self._dispatch_to_local_cache_path("stat", follow_symlinks=follow_symlinks)
# =========== public cloud methods, not in pathlib ===============
def download_to(self, destination: Union[str, os.PathLike]) -> Path:
destination = Path(destination)
if not self.exists():
raise CloudPathNotExistsError(f"Cannot download because path does not exist: {self}")
if self.is_file():
if destination.is_dir():
destination = destination / self.name
return self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
return destination
def rmtree(self) -> None:
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
def upload_from(
self,
source: Union[str, os.PathLike],
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Upload a file or directory to the cloud path."""
source = Path(source)
if source.is_dir():
for p in source.iterdir():
(self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)
return self
else:
if self.exists() and self.is_dir():
dst = self / source.name
else:
dst = self
dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)
return dst
@overload
def copy(
self,
destination: Self,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self: ...
@overload
def copy(
self,
destination: Path,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Path: ...
@overload
def copy(
self,
destination: str,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Union[Path, "CloudPath"]: ...
def copy(self, destination, force_overwrite_to_cloud=None):
"""Copy self to destination folder of file, if self is a file."""
if not self.exists() or not self.is_file():
raise ValueError(
f"Path {self} should be a file. To copy a directory tree use the method copytree."
)
# handle string version of cloud paths + local paths
if isinstance(destination, (str, os.PathLike)):
destination = anypath.to_anypath(destination)
if not isinstance(destination, CloudPath):
return self.download_to(destination)
# if same client, use cloud-native _move_file on client to avoid downloading
if self.client is destination.client:
if destination.exists() and destination.is_dir():
destination = destination / self.name
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]
if (
not force_overwrite_to_cloud
and destination.exists()
and destination.stat().st_mtime >= self.stat().st_mtime
):
raise OverwriteNewerCloudError(
f"File ({destination}) is newer than ({self}). "
f"To overwrite "
f"pass `force_overwrite_to_cloud=True`."
)
return self.client._move_file(self, destination, remove_src=False)
else:
if not destination.exists() or destination.is_file():
return destination.upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)
else:
return (destination / self.name).upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)
@overload
def copytree(
self,
destination: Self,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Self: ...
@overload
def copytree(
self,
destination: Path,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Path: ...
@overload
def copytree(
self,
destination: str,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Union[Path, "CloudPath"]: ...
def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None):
"""Copy self to a directory, if self is a directory."""
if not self.is_dir():
raise CloudPathNotADirectoryError(
f"Origin path {self} must be a directory. To copy a single file use the method copy."
)
# handle string version of cloud paths + local paths
if isinstance(destination, (str, os.PathLike)):
destination = anypath.to_anypath(destination)
if destination.exists() and destination.is_file():
raise CloudPathFileExistsError(
f"Destination path {destination} of copytree must be a directory."
)
contents = list(self.iterdir())
if ignore is not None:
ignored_names = ignore(self._no_prefix_no_drive, [x.name for x in contents])
else:
ignored_names = set()
destination.mkdir(parents=True, exist_ok=True)
for subpath in contents:
if subpath.name in ignored_names:
continue
if subpath.is_file():
subpath.copy(
destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
)
elif subpath.is_dir():
subpath.copytree(
destination / subpath.name,
force_overwrite_to_cloud=force_overwrite_to_cloud,
ignore=ignore,
)
return destination
def clear_cache(self):
"""Removes cache if it exists"""
if self._local.exists():
if self._local.is_file():
self._local.unlink()
else:
shutil.rmtree(self._local)
# =========== private cloud methods ===============
@property
def _local(self) -> Path:
"""Cached local version of the file."""
return self.client._local_cache_dir / self._no_prefix
def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:
"""Use the scheme, client, cache dir of this cloudpath to instantiate
a new cloudpath of the same type with the path passed.
Used to make results of iterdir and joins have a unified client + cache.
"""
path = str(path)
# strip initial "/" if path has one
if path.startswith("/"):
path = path[1:]
# add prefix/anchor if it is not already
if not path.startswith(self.cloud_prefix):
path = f"{self.cloud_prefix}{path}"
return self.client.CloudPath(path)
def _refresh_cache(self, force_overwrite_from_cloud: Optional[bool] = None) -> None:
try:
stats = self.stat()
except NoStatError:
# nothing to cache if the file does not exist; happens when creating
# new files that will be uploaded
return
if force_overwrite_from_cloud is None:
force_overwrite_from_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "False"
).lower() in ["1", "true"]
# if not exist or cloud newer
if (
force_overwrite_from_cloud
or not self._local.exists()
or (self._local.stat().st_mtime < stats.st_mtime)
):
# ensure there is a home for the file
self._local.parent.mkdir(parents=True, exist_ok=True)
self.download_to(self._local)
# force cache time to match cloud times
os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))
if self._dirty:
raise OverwriteDirtyFileError(
f"Local file ({self._local}) for cloud path ({self}) has been changed by your code, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)
# if local newer but not dirty, it was updated
# by a separate process; do not overwrite unless forced to
if self._local.stat().st_mtime > stats.st_mtime:
raise OverwriteNewerLocalError(
f"Local file ({self._local}) for cloud path ({self}) is newer on disk, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)
def _upload_local_to_cloud(
self,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads cache file at self._local to the cloud"""
# We should never try to be syncing entire directories; we should only
# cache and upload individual files.
if self._local.is_dir():
raise ValueError("Only individual files can be uploaded to the cloud")
uploaded = self._upload_file_to_cloud(
self._local, force_overwrite_to_cloud=force_overwrite_to_cloud
)
# force cache time to match cloud times
stats = self.stat()
os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))
# reset dirty and handle now that this is uploaded
self._dirty = False
self._handle = None
return uploaded
def _upload_file_to_cloud(
self,
local_path: Path,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads file at `local_path` to the cloud if there is not a newer file
already there.
"""
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]
if force_overwrite_to_cloud:
# If we are overwriting no need to perform any checks, so we can save time
self.client._upload_file(
local_path,
self,
)
return self
try:
stats = self.stat()
except NoStatError:
stats = None
# if cloud does not exist or local is newer, do the upload
if not stats or (local_path.stat().st_mtime > stats.st_mtime):
self.client._upload_file(
local_path,
self,
)
return self
# cloud is newer and we are not overwriting
raise OverwriteNewerCloudError(
f"Local file ({self._local}) for cloud path ({self}) is newer in the cloud disk, but "
f"is being requested to be uploaded to the cloud. Either (1) redownload changes from the cloud or "
f"(2) pass `force_overwrite_to_cloud=True` to "
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1."
)
# =========== pydantic integration special methods ===============
@classmethod
def __get_pydantic_core_schema__(cls, _source_type: Any, _handler):
"""Pydantic special method. See
https://docs.pydantic.dev/2.0/usage/types/custom/"""
try:
from pydantic_core import core_schema
return core_schema.no_info_after_validator_function(
cls.validate,
core_schema.any_schema(),
)
except ImportError:
return None
@classmethod
def validate(cls, v: str) -> Self:
"""Used as a Pydantic validator. See
https://docs.pydantic.dev/2.0/usage/types/custom/"""
return cls(v)
@classmethod
def __get_validators__(cls) -> Generator[Callable[[Any], Self], None, None]:
"""Pydantic special method. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
yield cls._validate
@classmethod
def _validate(cls, value: Any) -> Self:
"""Used as a Pydantic validator. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
return cls(value)
Attributes¶
anchor: str
property
readonly
¶
The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)
client
property
readonly
¶
drive: str
property
readonly
¶
The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)
fspath: str
property
readonly
¶
name: str
property
readonly
¶
The final path component, if any. (Docstring copied from pathlib.Path)
parent: Self
property
readonly
¶
The logical parent of the path. (Docstring copied from pathlib.Path)
parents: Sequence[Self]
property
readonly
¶
A sequence of this path's logical parents. (Docstring copied from pathlib.Path)
parser: Self
property
readonly
¶
parts: Tuple[str, ...]
property
readonly
¶
An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)
stem: str
property
readonly
¶
The final path component, minus its last suffix. (Docstring copied from pathlib.Path)
suffix: str
property
readonly
¶
The final component's last suffix, if any.
This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)
suffixes: List[str]
property
readonly
¶
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)
Methods¶
__init__(self, cloud_path: Union[str, Self, CloudPath], client: Optional[Client] = None) -> None
special
¶
Source code in cloudpathlib/cloudpath.py
def __init__(
self,
cloud_path: Union[str, Self, "CloudPath"],
client: Optional["Client"] = None,
) -> None:
# handle if local file gets opened. must be set at the top of the method in case any code
# below raises an exception, this prevents __del__ from raising an AttributeError
self._handle: Optional[IO] = None
self._client: Optional["Client"] = None
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
self._cloud_meta.validate_completeness()
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
self._client = cloud_path.client
else:
self._client = client
if client is not None and not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
absolute(self) -> Self
¶
Return an absolute version of this path by prepending the current working directory. No normalization or symlink resolution is performed.
Use resolve() to get the canonical path to a file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def absolute(self) -> Self:
return self
as_uri(self) -> str
¶
Return the path as a 'file' URI. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def as_uri(self) -> str:
return str(self)
as_url(self, presign: bool = False, expire_seconds: int = 3600) -> str
¶
Source code in cloudpathlib/cloudpath.py
def as_url(self, presign: bool = False, expire_seconds: int = 60 * 60) -> str:
if presign:
url = self.client._generate_presigned_url(self, expire_seconds=expire_seconds)
else:
url = self.client._get_public_url(self)
return url
clear_cache(self)
¶
Removes cache if it exists
Source code in cloudpathlib/cloudpath.py
def clear_cache(self):
"""Removes cache if it exists"""
if self._local.exists():
if self._local.is_file():
self._local.unlink()
else:
shutil.rmtree(self._local)
copy(self, destination, force_overwrite_to_cloud = None)
¶
Copy self to destination folder of file, if self is a file.
Source code in cloudpathlib/cloudpath.py
def copy(self, destination, force_overwrite_to_cloud=None):
"""Copy self to destination folder of file, if self is a file."""
if not self.exists() or not self.is_file():
raise ValueError(
f"Path {self} should be a file. To copy a directory tree use the method copytree."
)
# handle string version of cloud paths + local paths
if isinstance(destination, (str, os.PathLike)):
destination = anypath.to_anypath(destination)
if not isinstance(destination, CloudPath):
return self.download_to(destination)
# if same client, use cloud-native _move_file on client to avoid downloading
if self.client is destination.client:
if destination.exists() and destination.is_dir():
destination = destination / self.name
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]
if (
not force_overwrite_to_cloud
and destination.exists()
and destination.stat().st_mtime >= self.stat().st_mtime
):
raise OverwriteNewerCloudError(
f"File ({destination}) is newer than ({self}). "
f"To overwrite "
f"pass `force_overwrite_to_cloud=True`."
)
return self.client._move_file(self, destination, remove_src=False)
else:
if not destination.exists() or destination.is_file():
return destination.upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)
else:
return (destination / self.name).upload_from(
self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
)
copytree(self, destination, force_overwrite_to_cloud = None, ignore = None)
¶
Copy self to a directory, if self is a directory.
Source code in cloudpathlib/cloudpath.py
def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None):
"""Copy self to a directory, if self is a directory."""
if not self.is_dir():
raise CloudPathNotADirectoryError(
f"Origin path {self} must be a directory. To copy a single file use the method copy."
)
# handle string version of cloud paths + local paths
if isinstance(destination, (str, os.PathLike)):
destination = anypath.to_anypath(destination)
if destination.exists() and destination.is_file():
raise CloudPathFileExistsError(
f"Destination path {destination} of copytree must be a directory."
)
contents = list(self.iterdir())
if ignore is not None:
ignored_names = ignore(self._no_prefix_no_drive, [x.name for x in contents])
else:
ignored_names = set()
destination.mkdir(parents=True, exist_ok=True)
for subpath in contents:
if subpath.name in ignored_names:
continue
if subpath.is_file():
subpath.copy(
destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
)
elif subpath.is_dir():
subpath.copytree(
destination / subpath.name,
force_overwrite_to_cloud=force_overwrite_to_cloud,
ignore=ignore,
)
return destination
download_to(self, destination: Union[str, os.PathLike]) -> Path
¶
Source code in cloudpathlib/cloudpath.py
def download_to(self, destination: Union[str, os.PathLike]) -> Path:
destination = Path(destination)
if not self.exists():
raise CloudPathNotExistsError(f"Cannot download because path does not exist: {self}")
if self.is_file():
if destination.is_dir():
destination = destination / self.name
return self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
return destination
exists(self) -> bool
¶
Whether this path exists. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def exists(self) -> bool:
return self.client._exists(self)
from_uri(uri: str) -> Self
classmethod
¶
Source code in cloudpathlib/cloudpath.py
@classmethod
def from_uri(cls, uri: str) -> Self:
return cls(uri)
full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> bool
¶
Source code in cloudpathlib/cloudpath.py
def full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> bool:
if sys.version_info < (3, 13):
raise NotImplementedError("full_match requires Python 3.13 or higher")
# strip scheme from start of pattern before testing
if pattern.startswith(self.anchor + self.drive):
pattern = pattern[len(self.anchor + self.drive) :]
# remove drive, which is kept on normal dispatch to pathlib
return PurePosixPath(self._no_prefix_no_drive).full_match( # type: ignore[attr-defined]
pattern, case_sensitive=case_sensitive
)
glob(self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None) -> Generator[Self, NoneType, NoneType]
¶
Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def glob(
self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
pattern = self._glob_checks(pattern)
pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(
tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)
yield from self._glob(
selector,
"/" in pattern
or "**"
in pattern, # recursive listing needed if explicit ** or any sub folder in pattern
)
is_absolute(self) -> bool
¶
True if the path is absolute (has both a root and, if applicable, a drive). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def is_absolute(self) -> bool:
return True
is_dir(self, follow_symlinks = True) -> bool
¶
Whether this path is a directory. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def is_dir(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "dir"
is_file(self, follow_symlinks = True) -> bool
¶
Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def is_file(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "file"
is_junction(self)
¶
Source code in cloudpathlib/cloudpath.py
def is_junction(self):
return False # only windows paths can be junctions, not cloudpaths
is_relative_to(self, other: Self) -> bool
¶
Return True if the path is relative to another path or False. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def is_relative_to(self, other: Self) -> bool:
try:
self.relative_to(other)
return True
except ValueError:
return False
is_valid_cloudpath(path: Union[str, CloudPath], raise_on_error: bool = False) -> Union[bool, TypeGuard[Self]]
classmethod
¶
Source code in cloudpathlib/cloudpath.py
@classmethod
def is_valid_cloudpath(
cls, path: Union[str, "CloudPath"], raise_on_error: bool = False
) -> Union[bool, TypeGuard[Self]]:
valid = str(path).lower().startswith(cls.cloud_prefix.lower())
if raise_on_error and not valid:
raise InvalidPrefixError(
f"'{path}' is not a valid path since it does not start with '{cls.cloud_prefix}'"
)
return valid
iterdir(self) -> Generator[Self, NoneType, NoneType]
¶
Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def iterdir(self) -> Generator[Self, None, None]:
for f, _ in self.client._list_dir(self, recursive=False):
if f != self: # iterdir does not include itself in pathlib
yield f
joinpath(self, *pathsegments: Union[str, os.PathLike]) -> Self
¶
Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def joinpath(self, *pathsegments: Union[str, os.PathLike]) -> Self:
return self._dispatch_to_path("joinpath", *pathsegments)
match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool
¶
Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool:
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
kwargs = dict(case_sensitive=case_sensitive)
if sys.version_info < (3, 12):
kwargs.pop("case_sensitive")
return self._dispatch_to_path("match", path_pattern, **kwargs)
mkdir(self, parents: bool = False, exist_ok: bool = False) -> None
¶
Create a new directory at this given path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
@abc.abstractmethod
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
"""Should be implemented using the client API without requiring a dir is downloaded"""
pass
open(self, mode: str = 'r', buffering: int = -1, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None, force_overwrite_from_cloud: Optional[bool] = None, force_overwrite_to_cloud: Optional[bool] = None) -> IO[Any]
¶
Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def open(
self,
mode: str = "r",
buffering: int = -1,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: Optional[bool] = None, # extra kwarg not in pathlib
force_overwrite_to_cloud: Optional[bool] = None, # extra kwarg not in pathlib
) -> "IO[Any]":
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0.0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
wrapped_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close_upload(*args, **kwargs) -> None:
wrapped_close(*args, **kwargs)
# we should be idempotent and not upload again if
# we already ran our close method patch
if not self._dirty:
return
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
self._dirty = False
buffer.close = _patched_close_upload # type: ignore
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
# if we don't want any cache around, remove the cache
# as soon as the file is closed
if self.client.file_cache_mode == FileCacheMode.close_file:
# this may be _patched_close_upload, in which case we need to
# make sure to call that first so the file gets uploaded
wrapped_close_for_cache = buffer.close
def _patched_close_empty_cache(*args, **kwargs):
wrapped_close_for_cache(*args, **kwargs)
# remove local file as last step on closing
self.clear_cache()
buffer.close = _patched_close_empty_cache # type: ignore
return buffer
read_bytes(self) -> bytes
¶
Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def read_bytes(self) -> bytes:
with self.open(mode="rb") as f:
return f.read()
read_text(self, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None) -> str
¶
Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def read_text(
self,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> str:
with self.open(mode="r", encoding=encoding, errors=errors, newline=newline) as f:
return f.read()
relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath
¶
Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not a subpath of the other path), raise ValueError. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath:
# We don't dispatch regularly since this never returns a cloud path (since it is relative, and cloud paths are
# absolute)
if not isinstance(other, CloudPath):
raise ValueError(f"{self} is a cloud path, but {other} is not")
if self.cloud_prefix != other.cloud_prefix:
raise ValueError(
f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path"
)
kwargs = dict(walk_up=walk_up)
if sys.version_info < (3, 12):
kwargs.pop("walk_up")
return self._path.relative_to(other._path, **kwargs) # type: ignore[call-arg]
rename(self, target: Self) -> Self
¶
Rename this path to the target path.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def rename(self, target: Self) -> Self:
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
replace(self, target: Self) -> Self
¶
Rename this path to the target path, overwriting if that path exists.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def replace(self, target: Self) -> Self:
if type(self) is not type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; rename/replace the files recursively."
)
if target == self:
# Request is to replace/rename this with the same path - nothing to do
return self
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
resolve(self, strict: bool = False) -> Self
¶
Make the path absolute, resolving all symlinks on the way and also normalizing it. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def resolve(self, strict: bool = False) -> Self:
return self
rglob(self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None) -> Generator[Self, NoneType, NoneType]
¶
Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def rglob(
self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
pattern = self._glob_checks(pattern)
pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(
("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)
yield from self._glob(selector, True)
rmdir(self) -> None
¶
Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def rmdir(self) -> None:
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
rmtree(self) -> None
¶
Delete an entire directory tree.
Source code in cloudpathlib/cloudpath.py
def rmtree(self) -> None:
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
samefile(self, other_path: Union[str, os.PathLike]) -> bool
¶
Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def samefile(self, other_path: Union[str, os.PathLike]) -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
stat(self, follow_symlinks: bool = True) -> stat_result
¶
Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def stat(self, follow_symlinks: bool = True) -> os.stat_result:
"""Note: for many clients, we may want to override so we don't incur
network costs since many of these properties are available as
API calls.
"""
warn(
f"stat not implemented as API call for {self.__class__} so file must be downloaded to "
f"calculate stats; this may take a long time depending on filesize"
)
return self._dispatch_to_local_cache_path("stat", follow_symlinks=follow_symlinks)
touch(self, exist_ok: bool = True) -> None
¶
Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
@abc.abstractmethod
def touch(self, exist_ok: bool = True) -> None:
"""Should be implemented using the client API to create and update modified time"""
pass
unlink(self, missing_ok: bool = True) -> None
¶
Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def unlink(self, missing_ok: bool = True) -> None:
# Note: missing_ok defaults to False in pathlib, but changing the default now would be a breaking change.
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self, missing_ok)
upload_from(self, source: Union[str, os.PathLike], force_overwrite_to_cloud: Optional[bool] = None) -> Self
¶
Upload a file or directory to the cloud path.
Source code in cloudpathlib/cloudpath.py
def upload_from(
self,
source: Union[str, os.PathLike],
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Upload a file or directory to the cloud path."""
source = Path(source)
if source.is_dir():
for p in source.iterdir():
(self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)
return self
else:
if self.exists() and self.is_dir():
dst = self / source.name
else:
dst = self
dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)
return dst
validate(v: str) -> Self
classmethod
¶
Used as a Pydantic validator. See https://docs.pydantic.dev/2.0/usage/types/custom/
Source code in cloudpathlib/cloudpath.py
@classmethod
def validate(cls, v: str) -> Self:
"""Used as a Pydantic validator. See
https://docs.pydantic.dev/2.0/usage/types/custom/"""
return cls(v)
walk(self, top_down: bool = True, on_error: Optional[Callable] = None, follow_symlinks: bool = False) -> Generator[Tuple[Self, List[str], List[str]], NoneType, NoneType]
¶
Source code in cloudpathlib/cloudpath.py
def walk(
self,
top_down: bool = True,
on_error: Optional[Callable] = None,
follow_symlinks: bool = False,
) -> Generator[Tuple[Self, List[str], List[str]], None, None]:
try:
file_tree = self._build_subtree(recursive=True) # walking is always recursive
yield from self._walk_results_from_tree(self, file_tree, top_down=top_down)
except Exception as e:
if on_error is not None:
on_error(e)
else:
raise
with_name(self, name: str) -> Self
¶
Return a new path with the file name changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def with_name(self, name: str) -> Self:
return self._dispatch_to_path("with_name", name)
with_segments(self, *pathsegments) -> Self
¶
Create a new CloudPath with the same client out of the given segments. The first segment will be interpreted as the bucket/container name.
Source code in cloudpathlib/cloudpath.py
def with_segments(self, *pathsegments) -> Self:
"""Create a new CloudPath with the same client out of the given segments.
The first segment will be interpreted as the bucket/container name.
"""
return self._new_cloudpath("/".join(pathsegments))
with_stem(self, stem: str) -> Self
¶
Return a new path with the stem changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def with_stem(self, stem: str) -> Self:
try:
return self._dispatch_to_path("with_stem", stem)
except AttributeError:
# with_stem was only added in python 3.9, so we fallback for compatibility
return self.with_name(stem + self.suffix)
with_suffix(self, suffix: str) -> Self
¶
Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def with_suffix(self, suffix: str) -> Self:
return self._dispatch_to_path("with_suffix", suffix)
write_bytes(self, data: bytes) -> int
¶
Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def write_bytes(self, data: bytes) -> int:
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
write_text(self, data: str, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None) -> int
¶
Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/cloudpath.py
def write_text(
self,
data: str,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> int:
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L1146-L1155
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)