cloudpathlib.CloudPath¶

Base class for cloud storage file URIs, in the style of the Python standard library's pathlib module. Instances represent a path in cloud storage with filesystem path semantics, and convenient methods allow for basic operations like joining, reading, writing, iterating over contents, etc. CloudPath almost entirely mimics the pathlib.Path interface, so most familiar properties and methods should be available and behave in the expected way.

Analogous to the way pathlib.Path works, instantiating CloudPath will instead create an instance of an appropriate subclass that implements a particular cloud storage service, such as S3Path. This dispatching behavior is based on the URI scheme part of a cloud storage URI (e.g., "s3://").

Source code in cloudpathlib/cloudpath.py

class CloudPath(metaclass=CloudPathMeta):
    """Base class for cloud storage file URIs, in the style of the Python standard library's
    [`pathlib` module](https://docs.python.org/3/library/pathlib.html). Instances represent a path
    in cloud storage with filesystem path semantics, and convenient methods allow for basic
    operations like joining, reading, writing, iterating over contents, etc. `CloudPath` almost
    entirely mimics the [`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path)
    interface, so most familiar properties and methods should be available and behave in the
    expected way.

    Analogous to the way `pathlib.Path` works, instantiating `CloudPath` will instead create an
    instance of an appropriate subclass that implements a particular cloud storage service, such as
    [`S3Path`](../s3path). This dispatching behavior is based on the URI scheme part of a cloud
    storage URI (e.g., `"s3://"`).
    """

    _cloud_meta: CloudImplementation
    cloud_prefix: str

    def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
        self.is_valid_cloudpath(cloud_path, raise_on_error=True)

        # versions of the raw string that provide useful methods
        self._str = str(cloud_path)
        self._url = urlparse(self._str)
        self._path = PurePosixPath(f"/{self._no_prefix}")

        # setup client
        if client is None:
            if isinstance(cloud_path, CloudPath):
                client = cloud_path.client
            else:
                client = self._cloud_meta.client_class.get_default_client()
        if not isinstance(client, self._cloud_meta.client_class):
            raise ClientMismatchError(
                f"Client of type [{client.__class__}] is not valid for cloud path of type "
                f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
                f"None to use default client for this cloud path class."
            )
        self.client: Client = client

        # track if local has been written to, if so it may need to be uploaded
        self._dirty = False

        # handle if local file gets opened
        self._handle = None

    def __del__(self):
        # make sure that file handle to local path is closed
        if self._handle is not None:
            self._handle.close()

    @property
    def _no_prefix(self) -> str:
        return self._str[len(self.cloud_prefix) :]

    @property
    def _no_prefix_no_drive(self) -> str:
        return self._str[len(self.cloud_prefix) + len(self.drive) :]

    @classmethod
    def is_valid_cloudpath(cls, path: Union[str, "CloudPath"], raise_on_error=False) -> bool:
        valid = str(path).lower().startswith(cls.cloud_prefix.lower())

        if raise_on_error and not valid:
            raise InvalidPrefixError(
                f"'{path}' is not a valid path since it does not start with '{cls.cloud_prefix}'"
            )

        return valid

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}('{self}')"

    def __str__(self) -> str:
        return self._str

    def __hash__(self) -> int:
        return hash((type(self).__name__, str(self)))

    def __eq__(self, other: Any) -> bool:
        return isinstance(other, type(self)) and str(self) == str(other)

    def __fspath__(self):
        if self.is_file():
            self._refresh_cache(force_overwrite_from_cloud=False)
        return str(self._local)

    def __lt__(self, other: Any) -> bool:
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.parts < other.parts

    def __le__(self, other: Any) -> bool:
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.parts <= other.parts

    def __gt__(self, other: Any) -> bool:
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.parts > other.parts

    def __ge__(self, other: Any) -> bool:
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.parts >= other.parts

    # ====================== NOT IMPLEMENTED ======================
    # absolute - no cloud equivalent; all cloud paths are absolute already
    # as_posix - no cloud equivalent; not needed since we assume url separator
    # chmod - permission changing should be explicitly done per client with methods
    #           that make sense for the client permission options
    # cwd - no cloud equivalent
    # expanduser - no cloud equivalent
    # group - should be implemented with client-specific permissions
    # home - no cloud equivalent
    # is_absolute - no cloud equivalent; all cloud paths are absolute already
    # is_block_device - no cloud equivalent
    # is_char_device - no cloud equivalent
    # is_fifo - no cloud equivalent
    # is_mount - no cloud equivalent
    # is_reserved - no cloud equivalent
    # is_socket - no cloud equivalent
    # is_symlink - no cloud equivalent
    # lchmod - no cloud equivalent
    # lstat - no cloud equivalent
    # owner - no cloud equivalent
    # relative to - cloud paths are absolute
    # resolve - all cloud paths are absolute, so no resolving
    # root - drive already has the bucket and anchor/prefix has the scheme, so nothing to store here
    # symlink_to - no cloud equivalent

    # ====================== REQUIRED, NOT GENERIC ======================
    # Methods that must be implemented, but have no generic application
    @property
    @abc.abstractmethod
    def drive(self) -> str:
        """For example "bucket" on S3 or "container" on Azure; needs to be defined for each class"""
        pass

    @abc.abstractmethod
    def is_dir(self) -> bool:
        """Should be implemented without requiring a dir is downloaded"""
        pass

    @abc.abstractmethod
    def is_file(self) -> bool:
        """Should be implemented without requiring that the file is downloaded"""
        pass

    @abc.abstractmethod
    def mkdir(self, parents: bool = False, exist_ok: bool = False):
        """Should be implemented using the client API without requiring a dir is downloaded"""
        pass

    @abc.abstractmethod
    def touch(self):
        """Should be implemented using the client API to create and update modified time"""
        pass

    # ====================== IMPLEMENTED FROM SCRATCH ======================
    # Methods with their own implementations that work generically
    def __rtruediv__(self, other):
        raise ValueError(
            "Cannot change a cloud path's root since all paths are absolute; create a new path instead."
        )

    @property
    def anchor(self) -> str:
        return self.cloud_prefix

    def as_uri(self) -> str:
        return str(self)

    def exists(self) -> bool:
        return self.client._exists(self)

    @property
    def fspath(self) -> str:
        return self.__fspath__()

    def glob(self, pattern: str) -> Iterable["CloudPath"]:
        # strip cloud prefix from pattern if it is included
        if pattern.startswith(self.cloud_prefix):
            pattern = pattern[len(self.cloud_prefix) :]

        # strip "drive" from pattern if it is included
        if pattern.startswith(self.drive + "/"):
            pattern = pattern[len(self.drive + "/") :]

        # identify if pattern is recursive or not
        recursive = False
        if pattern.startswith("**/"):
            pattern = pattern.split("/", 1)[-1]
            recursive = True

        for f in self.client._list_dir(self, recursive=recursive):
            if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
                yield f

    def iterdir(self) -> Iterable["CloudPath"]:
        for f in self.client._list_dir(self, recursive=False):
            yield f

    def open(
        self,
        mode="r",
        buffering=-1,
        encoding=None,
        errors=None,
        newline=None,
        force_overwrite_from_cloud=False,  # extra kwarg not in pathlib
        force_overwrite_to_cloud=False,  # extra kwarg not in pathlib
    ) -> IO:
        # if trying to call open on a directory that exists
        if self.exists() and not self.is_file():
            raise CloudPathIsADirectoryError(
                f"Cannot open directory, only files. Tried to open ({self})"
            )

        if mode == "x" and self.exists():
            raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")

        # TODO: consider streaming from client rather than DLing entire file to cache
        self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)

        # create any directories that may be needed if the file is new
        if not self._local.exists():
            self._local.parent.mkdir(parents=True, exist_ok=True)
            original_mtime = 0
        else:
            original_mtime = self._local.stat().st_mtime

        buffer = self._local.open(
            mode=mode,
            buffering=buffering,
            encoding=encoding,
            errors=errors,
            newline=newline,
        )

        # write modes need special on closing the buffer
        if any(m in mode for m in ("w", "+", "x", "a")):
            # dirty, handle, patch close
            original_close = buffer.close

            # since we are pretending this is a cloud file, upload it to the cloud
            # when the buffer is closed
            def _patched_close(*args, **kwargs):
                original_close(*args, **kwargs)

                # original mtime should match what was in the cloud; because of system clocks or rounding
                # by the cloud provider, the new version in our cache is "older" than the original version;
                # explicitly set the new modified time to be after the original modified time.
                if self._local.stat().st_mtime < original_mtime:
                    new_mtime = original_mtime + 1
                    os.utime(self._local, times=(new_mtime, new_mtime))

                self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)

            buffer.close = _patched_close

            # keep reference in case we need to close when __del__ is called on this object
            self._handle = buffer

            # opened for write, so mark dirty
            self._dirty = True

        return buffer

    def replace(self, target: "CloudPath") -> "CloudPath":
        if type(self) != type(target):
            raise TypeError(
                f"The target based to rename must be an instantiated class of type: {type(self)}"
            )

        if target.exists():
            target.unlink()

        self.client._move_file(self, target)
        return target

    def rename(self, target: "CloudPath") -> "CloudPath":
        # for cloud services replace == rename since we don't just rename,
        # we actually move files
        return self.replace(target)

    def rglob(self, pattern: str) -> Iterable["CloudPath"]:
        return self.glob("**/" + pattern)

    def rmdir(self):
        if self.is_file():
            raise CloudPathNotADirectoryError(
                f"Path {self} is a file; call unlink instead of rmdir."
            )
        try:
            next(self.iterdir())
            raise DirectoryNotEmptyError(
                f"Directory not empty: '{self}'. Use rmtree to delete recursively."
            )
        except StopIteration:
            pass
        self.client._remove(self)

    def samefile(self, other_path: "CloudPath") -> bool:
        # all cloud paths are absolute and the paths are used for hash
        return self == other_path

    def unlink(self):
        if self.is_dir():
            raise CloudPathIsADirectoryError(
                f"Path {self} is a directory; call rmdir instead of unlink."
            )
        self.client._remove(self)

    def write_bytes(self, data: bytes):
        """Open the file in bytes mode, write to it, and close the file.

        NOTE: vendored from pathlib since we override open
        https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
        """
        # type-check for the buffer interface before truncating the file
        view = memoryview(data)
        with self.open(mode="wb") as f:
            return f.write(view)

    def write_text(self, data: str, encoding=None, errors=None):
        """Open the file in text mode, write to it, and close the file.

        NOTE: vendored from pathlib since we override open
        https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
        """
        if not isinstance(data, str):
            raise TypeError("data must be str, not %s" % data.__class__.__name__)
        with self.open(mode="w", encoding=encoding, errors=errors) as f:
            return f.write(data)

    # ====================== DISPATCHED TO POSIXPATH FOR PURE PATHS ======================
    # Methods that are dispatched to exactly how pathlib.PurePosixPath would calculate it on
    # self._path for pure paths (does not matter if file exists);
    # see the next session for ones that require a real file to exist
    def _dispatch_to_path(self, func, *args, **kwargs):
        """Some functions we can just dispatch to the pathlib version
        We want to do this explicitly so we don't have to support all
        of pathlib and subclasses can override individually if necessary.
        """
        path_version = self._path.__getattribute__(func)

        # Path functions should be called so the results are calculated
        if callable(path_version):
            path_version = path_version(*args, **kwargs)

        # Paths should always be resolved and then converted to the same client + class as this one
        if isinstance(path_version, PurePosixPath):
            # always resolve since cloud paths must be absolute
            path_version = _resolve(path_version)
            return self._new_cloudpath(path_version)

        if isinstance(path_version, collections.abc.Sequence) and isinstance(
            path_version[0], PurePosixPath
        ):
            return [
                self._new_cloudpath(_resolve(p)) for p in path_version if _resolve(p) != p.root
            ]

        # when pathlib returns a string, etc. we probably just want that thing
        else:
            return path_version

    def __truediv__(self, other):
        if not isinstance(other, (str,)):
            raise TypeError(f"Can only join path {repr(self)} with strings.")

        return self._dispatch_to_path("__truediv__", other)

    def joinpath(self, *args):
        return self._dispatch_to_path("joinpath", *args)

    @property
    def name(self):
        return self._dispatch_to_path("name")

    def match(self, path_pattern):
        # strip scheme from start of pattern before testing
        if path_pattern.startswith(self.anchor + self.drive + "/"):
            path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]

        return self._dispatch_to_path("match", path_pattern)

    @property
    def parent(self):
        return self._dispatch_to_path("parent")

    @property
    def parents(self):
        return self._dispatch_to_path("parents")

    @property
    def parts(self):
        parts = self._dispatch_to_path("parts")
        if parts[0] == "/":
            parts = parts[1:]

        return (self.anchor, *parts)

    @property
    def stem(self):
        return self._dispatch_to_path("stem")

    @property
    def suffix(self):
        return self._dispatch_to_path("suffix")

    @property
    def suffixes(self):
        return self._dispatch_to_path("suffixes")

    def with_name(self, name):
        return self._dispatch_to_path("with_name", name)

    def with_suffix(self, suffix):
        return self._dispatch_to_path("with_suffix", suffix)

    # ====================== DISPATCHED TO LOCAL CACHE FOR CONCRETE PATHS ======================
    # Items that can be executed on the cached file on the local filesystem
    def _dispatch_to_local_cache_path(self, func, *args, **kwargs):
        self._refresh_cache()

        path_version = self._local.__getattribute__(func)

        # Path functions should be called so the results are calculated
        if callable(path_version):
            path_version = path_version(*args, **kwargs)

        # Paths should always be resolved and then converted to the same client + class as this one
        if isinstance(path_version, (PosixPath, WindowsPath)):
            # always resolve since cloud paths must be absolute
            path_version = path_version.resolve()
            return self._new_cloudpath(path_version)

        # when pathlib returns a string, etc. we probably just want that thing
        else:
            return path_version

    @property
    def stat(self):
        """Note: for many clients, we may want to override so we don't incur
        network costs since many of these properties are available as
        API calls.
        """
        warn(
            f"stat not implemented as API call for {self.__class__} so file must be downloaded to "
            f"calculate stats; this may take a long time depending on filesize"
        )
        return self._dispatch_to_local_cache_path("stat")

    def read_bytes(self):
        return self._dispatch_to_local_cache_path("read_bytes")

    def read_text(self):
        return self._dispatch_to_local_cache_path("read_text")

    # ===========  public cloud methods, not in pathlib ===============
    def download_to(self, destination: Union[str, os.PathLike]) -> Path:
        destination = Path(destination)
        if self.is_file():
            if destination.is_dir():
                destination = destination / self.name
            return self.client._download_file(self, destination)
        else:
            destination.mkdir(exist_ok=True)
            for f in self.iterdir():
                rel = str(self)
                if not rel.endswith("/"):
                    rel = rel + "/"

                rel_dest = str(f)[len(rel) :]
                f.download_to(destination / rel_dest)

            return destination

    def rmtree(self):
        """Delete an entire directory tree."""
        if self.is_file():
            raise CloudPathNotADirectoryError(
                f"Path {self} is a file; call unlink instead of rmtree."
            )
        self.client._remove(self)

    def upload_from(
        self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False
    ) -> "CloudPath":
        """Upload a file or directory to the cloud path."""
        source = Path(source)

        if source.is_dir():
            for p in source.iterdir():
                (self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)

            return self

        else:
            if self.exists() and self.is_dir():
                dst = self / source.name
            else:
                dst = self

            dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)

            return dst

    def copy(
        self,
        destination: Union[str, os.PathLike, "CloudPath"],
        force_overwrite_to_cloud: bool = False,
    ) -> Union[Path, "CloudPath"]:
        """Copy self to destination folder of file, if self is a file."""
        if not self.exists() or not self.is_file():
            raise ValueError(
                f"Path {self} should be a file. To copy a directory tree use the method copytree."
            )

        # handle string version of cloud paths + local paths
        if isinstance(destination, (str, os.PathLike)):
            destination = anypath.to_anypath(destination)

        if not isinstance(destination, CloudPath):
            return self.download_to(destination)

        # if same client, use cloud-native _move_file on client to avoid downloading
        elif self.client is destination.client:
            if destination.exists() and destination.is_dir():
                destination: CloudPath = destination / self.name  # type: ignore

            if (
                not force_overwrite_to_cloud
                and destination.exists()
                and destination.stat().st_mtime >= self.stat().st_mtime
            ):
                raise OverwriteNewerCloudError(
                    f"File ({destination}) is newer than ({self}). "
                    f"To overwrite "
                    f"pass `force_overwrite_to_cloud=True`."
                )

            return self.client._move_file(self, destination, remove_src=False)

        else:
            if not destination.exists() or destination.is_file():
                return destination.upload_from(
                    self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
                )
            else:
                return (destination / self.name).upload_from(
                    self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
                )

    def copytree(
        self,
        destination: Union[str, os.PathLike, "CloudPath"],
        force_overwrite_to_cloud: bool = False,
    ) -> Union[Path, "CloudPath"]:
        """Copy self to a directory, if self is a directory."""
        if not self.is_dir():
            raise CloudPathNotADirectoryError(
                f"Origin path {self} must be a directory. To copy a single file use the method copy."
            )

        # handle string version of cloud paths + local paths
        if isinstance(destination, (str, os.PathLike)):
            destination = anypath.to_anypath(destination)

        if destination.exists() and destination.is_file():
            raise CloudPathFileExistsError(
                "Destination path {destination} of copytree must be a directory."
            )

        destination.mkdir(parents=True, exist_ok=True)

        for subpath in self.iterdir():
            if subpath.is_file():
                subpath.copy(
                    destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
                )
            elif subpath.is_dir():
                subpath.copytree(
                    destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
                )

        return destination

    # ===========  private cloud methods ===============
    @property
    def _local(self):
        """Cached local version of the file."""
        return self.client._local_cache_dir / self._no_prefix

    def _new_cloudpath(self, path):
        """Use the scheme, client, cache dir of this cloudpath to instantiate
        a new cloudpath of the same type with the path passed.

        Used to make results of iterdir and joins have a unified client + cache.
        """
        path = str(path)

        # strip initial "/" if path has one
        if path.startswith("/"):
            path = path[1:]

        # add prefix/anchor if it is not already
        if not path.startswith(self.cloud_prefix):
            path = f"{self.cloud_prefix}{path}"

        return self.client.CloudPath(path)

    def _refresh_cache(self, force_overwrite_from_cloud=False):
        try:
            stats = self.stat()
        except NoStatError:
            # nothing to cache if the file does not exist; happens when creating
            # new files that will be uploaded
            return

        # if not exist or cloud newer
        if (
            not self._local.exists()
            or (self._local.stat().st_mtime < stats.st_mtime)
            or force_overwrite_from_cloud
        ):
            # ensure there is a home for the file
            self._local.parent.mkdir(parents=True, exist_ok=True)
            self.download_to(self._local)

            # force cache time to match cloud times
            os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))

        if self._dirty:
            raise OverwriteDirtyFileError(
                f"Local file ({self._local}) for cloud path ({self}) has been changed by your code, but "
                f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
                f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
                f"overwrite."
            )

        # if local newer but not dirty, it was updated
        # by a separate process; do not overwrite unless forced to
        if self._local.stat().st_mtime > stats.st_mtime:
            raise OverwriteNewerLocalError(
                f"Local file ({self._local}) for cloud path ({self}) is newer on disk, but "
                f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
                f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
                f"overwrite."
            )

    def _upload_local_to_cloud(self, force_overwrite_to_cloud: bool = False):
        """Uploads cache file at self._local to the cloud"""
        # We should never try to be syncing entire directories; we should only
        # cache and upload individual files.
        if self._local.is_dir():
            raise ValueError("Only individual files can be uploaded to the cloud")

        uploaded = self._upload_file_to_cloud(
            self._local, force_overwrite_to_cloud=force_overwrite_to_cloud
        )

        # force cache time to match cloud times
        stats = self.stat()
        os.utime(self._local, times=(stats.st_mtime, stats.st_mtime))

        # reset dirty and handle now that this is uploaded
        self._dirty = False
        self._handle = None

        return uploaded

    def _upload_file_to_cloud(self, local_path, force_overwrite_to_cloud: bool = False):
        """Uploads file at `local_path` to the cloud if there is not a newer file
        already there.
        """
        try:
            stats = self.stat()
        except NoStatError:
            stats = None

        # if cloud does not exist or local is newer or we are overwriting, do the upload
        if (
            not stats  # cloud does not exist
            or (local_path.stat().st_mtime > stats.st_mtime)
            or force_overwrite_to_cloud
        ):
            self.client._upload_file(
                local_path,
                self,
            )

            return self

        # cloud is newer and we are not overwriting
        raise OverwriteNewerCloudError(
            f"Local file ({self._local}) for cloud path ({self}) is newer in the cloud disk, but "
            f"is being requested to be uploaded to the cloud. Either (1) redownload changes from the cloud or "
            f"(2) pass `force_overwrite_to_cloud=True` to "
            f"overwrite."
        )

    # ===========  pydantic integration special methods ===============
    @classmethod
    def __get_validators__(cls):
        """Pydantic special method. See
        https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
        yield cls._validate

    @classmethod
    def _validate(cls, value: Any):
        """Used as a Pydantic validator. See
        https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
        return cls(value)

Attributes¶

`anchor: str` `property` `readonly` ¶

The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)

`drive: str` `property` `readonly` ¶

The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)

`fspath: str` `property` `readonly` ¶

`name` `property` `readonly` ¶

The final path component, if any. (Docstring copied from pathlib.Path)

`parent` `property` `readonly` ¶

The logical parent of the path. (Docstring copied from pathlib.Path)

`parents` `property` `readonly` ¶

A sequence of this path's logical parents. (Docstring copied from pathlib.Path)

`parts` `property` `readonly` ¶

An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)

`stat` `property` `readonly` ¶

Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)

`stem` `property` `readonly` ¶

The final path component, minus its last suffix. (Docstring copied from pathlib.Path)

`suffix` `property` `readonly` ¶

The final component's last suffix, if any.

This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)

`suffixes` `property` `readonly` ¶

A list of the final component's suffixes, if any.

These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)

Methods¶

`init(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)` `special` ¶

Source code in cloudpathlib/cloudpath.py

def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
    self.is_valid_cloudpath(cloud_path, raise_on_error=True)

    # versions of the raw string that provide useful methods
    self._str = str(cloud_path)
    self._url = urlparse(self._str)
    self._path = PurePosixPath(f"/{self._no_prefix}")

    # setup client
    if client is None:
        if isinstance(cloud_path, CloudPath):
            client = cloud_path.client
        else:
            client = self._cloud_meta.client_class.get_default_client()
    if not isinstance(client, self._cloud_meta.client_class):
        raise ClientMismatchError(
            f"Client of type [{client.__class__}] is not valid for cloud path of type "
            f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
            f"None to use default client for this cloud path class."
        )
    self.client: Client = client

    # track if local has been written to, if so it may need to be uploaded
    self._dirty = False

    # handle if local file gets opened
    self._handle = None

`as_uri(self) -> str` ¶

Return the path as a 'file' URI. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def as_uri(self) -> str:
    return str(self)

`copy(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath]` ¶

Copy self to destination folder of file, if self is a file.

Source code in cloudpathlib/cloudpath.py

def copy(
    self,
    destination: Union[str, os.PathLike, "CloudPath"],
    force_overwrite_to_cloud: bool = False,
) -> Union[Path, "CloudPath"]:
    """Copy self to destination folder of file, if self is a file."""
    if not self.exists() or not self.is_file():
        raise ValueError(
            f"Path {self} should be a file. To copy a directory tree use the method copytree."
        )

    # handle string version of cloud paths + local paths
    if isinstance(destination, (str, os.PathLike)):
        destination = anypath.to_anypath(destination)

    if not isinstance(destination, CloudPath):
        return self.download_to(destination)

    # if same client, use cloud-native _move_file on client to avoid downloading
    elif self.client is destination.client:
        if destination.exists() and destination.is_dir():
            destination: CloudPath = destination / self.name  # type: ignore

        if (
            not force_overwrite_to_cloud
            and destination.exists()
            and destination.stat().st_mtime >= self.stat().st_mtime
        ):
            raise OverwriteNewerCloudError(
                f"File ({destination}) is newer than ({self}). "
                f"To overwrite "
                f"pass `force_overwrite_to_cloud=True`."
            )

        return self.client._move_file(self, destination, remove_src=False)

    else:
        if not destination.exists() or destination.is_file():
            return destination.upload_from(
                self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
            )
        else:
            return (destination / self.name).upload_from(
                self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
            )

`copytree(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath]` ¶

Copy self to a directory, if self is a directory.

Source code in cloudpathlib/cloudpath.py

def copytree(
    self,
    destination: Union[str, os.PathLike, "CloudPath"],
    force_overwrite_to_cloud: bool = False,
) -> Union[Path, "CloudPath"]:
    """Copy self to a directory, if self is a directory."""
    if not self.is_dir():
        raise CloudPathNotADirectoryError(
            f"Origin path {self} must be a directory. To copy a single file use the method copy."
        )

    # handle string version of cloud paths + local paths
    if isinstance(destination, (str, os.PathLike)):
        destination = anypath.to_anypath(destination)

    if destination.exists() and destination.is_file():
        raise CloudPathFileExistsError(
            "Destination path {destination} of copytree must be a directory."
        )

    destination.mkdir(parents=True, exist_ok=True)

    for subpath in self.iterdir():
        if subpath.is_file():
            subpath.copy(
                destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
            )
        elif subpath.is_dir():
            subpath.copytree(
                destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
            )

    return destination

`download_to(self, destination: Union[str, os.PathLike]) -> Path` ¶

Source code in cloudpathlib/cloudpath.py

def download_to(self, destination: Union[str, os.PathLike]) -> Path:
    destination = Path(destination)
    if self.is_file():
        if destination.is_dir():
            destination = destination / self.name
        return self.client._download_file(self, destination)
    else:
        destination.mkdir(exist_ok=True)
        for f in self.iterdir():
            rel = str(self)
            if not rel.endswith("/"):
                rel = rel + "/"

            rel_dest = str(f)[len(rel) :]
            f.download_to(destination / rel_dest)

        return destination

`exists(self) -> bool` ¶

Whether this path exists. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def exists(self) -> bool:
    return self.client._exists(self)

`glob(self, pattern: str) -> Iterable[CloudPath]` ¶

Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def glob(self, pattern: str) -> Iterable["CloudPath"]:
    # strip cloud prefix from pattern if it is included
    if pattern.startswith(self.cloud_prefix):
        pattern = pattern[len(self.cloud_prefix) :]

    # strip "drive" from pattern if it is included
    if pattern.startswith(self.drive + "/"):
        pattern = pattern[len(self.drive + "/") :]

    # identify if pattern is recursive or not
    recursive = False
    if pattern.startswith("**/"):
        pattern = pattern.split("/", 1)[-1]
        recursive = True

    for f in self.client._list_dir(self, recursive=recursive):
        if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
            yield f

`is_dir(self) -> bool` ¶

Whether this path is a directory. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

@abc.abstractmethod
def is_dir(self) -> bool:
    """Should be implemented without requiring a dir is downloaded"""
    pass

`is_file(self) -> bool` ¶

Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

@abc.abstractmethod
def is_file(self) -> bool:
    """Should be implemented without requiring that the file is downloaded"""
    pass

`is_valid_cloudpath(path: Union[str, CloudPath], raise_on_error = False) -> bool` `classmethod` ¶

Source code in cloudpathlib/cloudpath.py

@classmethod
def is_valid_cloudpath(cls, path: Union[str, "CloudPath"], raise_on_error=False) -> bool:
    valid = str(path).lower().startswith(cls.cloud_prefix.lower())

    if raise_on_error and not valid:
        raise InvalidPrefixError(
            f"'{path}' is not a valid path since it does not start with '{cls.cloud_prefix}'"
        )

    return valid

`iterdir(self) -> Iterable[CloudPath]` ¶

Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def iterdir(self) -> Iterable["CloudPath"]:
    for f in self.client._list_dir(self, recursive=False):
        yield f

`joinpath(self, *args)` ¶

Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def joinpath(self, *args):
    return self._dispatch_to_path("joinpath", *args)

`match(self, path_pattern)` ¶

Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def match(self, path_pattern):
    # strip scheme from start of pattern before testing
    if path_pattern.startswith(self.anchor + self.drive + "/"):
        path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]

    return self._dispatch_to_path("match", path_pattern)

`mkdir(self, parents: bool = False, exist_ok: bool = False)` ¶

Create a new directory at this given path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

@abc.abstractmethod
def mkdir(self, parents: bool = False, exist_ok: bool = False):
    """Should be implemented using the client API without requiring a dir is downloaded"""
    pass

`open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO` ¶

Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def open(
    self,
    mode="r",
    buffering=-1,
    encoding=None,
    errors=None,
    newline=None,
    force_overwrite_from_cloud=False,  # extra kwarg not in pathlib
    force_overwrite_to_cloud=False,  # extra kwarg not in pathlib
) -> IO:
    # if trying to call open on a directory that exists
    if self.exists() and not self.is_file():
        raise CloudPathIsADirectoryError(
            f"Cannot open directory, only files. Tried to open ({self})"
        )

    if mode == "x" and self.exists():
        raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")

    # TODO: consider streaming from client rather than DLing entire file to cache
    self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)

    # create any directories that may be needed if the file is new
    if not self._local.exists():
        self._local.parent.mkdir(parents=True, exist_ok=True)
        original_mtime = 0
    else:
        original_mtime = self._local.stat().st_mtime

    buffer = self._local.open(
        mode=mode,
        buffering=buffering,
        encoding=encoding,
        errors=errors,
        newline=newline,
    )

    # write modes need special on closing the buffer
    if any(m in mode for m in ("w", "+", "x", "a")):
        # dirty, handle, patch close
        original_close = buffer.close

        # since we are pretending this is a cloud file, upload it to the cloud
        # when the buffer is closed
        def _patched_close(*args, **kwargs):
            original_close(*args, **kwargs)

            # original mtime should match what was in the cloud; because of system clocks or rounding
            # by the cloud provider, the new version in our cache is "older" than the original version;
            # explicitly set the new modified time to be after the original modified time.
            if self._local.stat().st_mtime < original_mtime:
                new_mtime = original_mtime + 1
                os.utime(self._local, times=(new_mtime, new_mtime))

            self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)

        buffer.close = _patched_close

        # keep reference in case we need to close when __del__ is called on this object
        self._handle = buffer

        # opened for write, so mark dirty
        self._dirty = True

    return buffer

`read_bytes(self)` ¶

Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def read_bytes(self):
    return self._dispatch_to_local_cache_path("read_bytes")

`read_text(self)` ¶

Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def read_text(self):
    return self._dispatch_to_local_cache_path("read_text")

`rename(self, target: CloudPath) -> CloudPath` ¶

Rename this path to the target path.

The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.

Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def rename(self, target: "CloudPath") -> "CloudPath":
    # for cloud services replace == rename since we don't just rename,
    # we actually move files
    return self.replace(target)

`replace(self, target: CloudPath) -> CloudPath` ¶

Rename this path to the target path, overwriting if that path exists.

The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.

Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def replace(self, target: "CloudPath") -> "CloudPath":
    if type(self) != type(target):
        raise TypeError(
            f"The target based to rename must be an instantiated class of type: {type(self)}"
        )

    if target.exists():
        target.unlink()

    self.client._move_file(self, target)
    return target

`rglob(self, pattern: str) -> Iterable[CloudPath]` ¶

Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def rglob(self, pattern: str) -> Iterable["CloudPath"]:
    return self.glob("**/" + pattern)

`rmdir(self)` ¶

Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def rmdir(self):
    if self.is_file():
        raise CloudPathNotADirectoryError(
            f"Path {self} is a file; call unlink instead of rmdir."
        )
    try:
        next(self.iterdir())
        raise DirectoryNotEmptyError(
            f"Directory not empty: '{self}'. Use rmtree to delete recursively."
        )
    except StopIteration:
        pass
    self.client._remove(self)

`rmtree(self)` ¶

Delete an entire directory tree.

Source code in cloudpathlib/cloudpath.py

def rmtree(self):
    """Delete an entire directory tree."""
    if self.is_file():
        raise CloudPathNotADirectoryError(
            f"Path {self} is a file; call unlink instead of rmtree."
        )
    self.client._remove(self)

`samefile(self, other_path: CloudPath) -> bool` ¶

Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def samefile(self, other_path: "CloudPath") -> bool:
    # all cloud paths are absolute and the paths are used for hash
    return self == other_path

`touch(self)` ¶

Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

@abc.abstractmethod
def touch(self):
    """Should be implemented using the client API to create and update modified time"""
    pass

`unlink(self)` ¶

Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def unlink(self):
    if self.is_dir():
        raise CloudPathIsADirectoryError(
            f"Path {self} is a directory; call rmdir instead of unlink."
        )
    self.client._remove(self)

`upload_from(self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False) -> CloudPath` ¶

Upload a file or directory to the cloud path.

Source code in cloudpathlib/cloudpath.py

def upload_from(
    self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False
) -> "CloudPath":
    """Upload a file or directory to the cloud path."""
    source = Path(source)

    if source.is_dir():
        for p in source.iterdir():
            (self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)

        return self

    else:
        if self.exists() and self.is_dir():
            dst = self / source.name
        else:
            dst = self

        dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)

        return dst

`with_name(self, name)` ¶

Return a new path with the file name changed. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def with_name(self, name):
    return self._dispatch_to_path("with_name", name)

`with_suffix(self, suffix)` ¶

Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def with_suffix(self, suffix):
    return self._dispatch_to_path("with_suffix", suffix)

`write_bytes(self, data: bytes)` ¶

Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def write_bytes(self, data: bytes):
    """Open the file in bytes mode, write to it, and close the file.

    NOTE: vendored from pathlib since we override open
    https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
    """
    # type-check for the buffer interface before truncating the file
    view = memoryview(data)
    with self.open(mode="wb") as f:
        return f.write(view)

`write_text(self, data: str, encoding = None, errors = None)` ¶

Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/cloudpath.py

def write_text(self, data: str, encoding=None, errors=None):
    """Open the file in text mode, write to it, and close the file.

    NOTE: vendored from pathlib since we override open
    https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
    """
    if not isinstance(data, str):
        raise TypeError("data must be str, not %s" % data.__class__.__name__)
    with self.open(mode="w", encoding=encoding, errors=errors) as f:
        return f.write(data)

cloudpathlib.CloudPath¶

Attributes¶

anchor: str property readonly ¶

drive: str property readonly ¶

fspath: str property readonly ¶

name property readonly ¶

parent property readonly ¶

parents property readonly ¶

parts property readonly ¶

stat property readonly ¶

stem property readonly ¶

suffix property readonly ¶

suffixes property readonly ¶

Methods¶

__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None) special ¶

as_uri(self) -> str ¶

copy(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath] ¶

copytree(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath] ¶

download_to(self, destination: Union[str, os.PathLike]) -> Path ¶

exists(self) -> bool ¶

glob(self, pattern: str) -> Iterable[CloudPath] ¶

is_dir(self) -> bool ¶

is_file(self) -> bool ¶

is_valid_cloudpath(path: Union[str, CloudPath], raise_on_error = False) -> bool classmethod ¶

iterdir(self) -> Iterable[CloudPath] ¶

joinpath(self, *args) ¶

match(self, path_pattern) ¶

mkdir(self, parents: bool = False, exist_ok: bool = False) ¶

open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO ¶

read_bytes(self) ¶

read_text(self) ¶

rename(self, target: CloudPath) -> CloudPath ¶

replace(self, target: CloudPath) -> CloudPath ¶

rglob(self, pattern: str) -> Iterable[CloudPath] ¶

rmdir(self) ¶

rmtree(self) ¶

samefile(self, other_path: CloudPath) -> bool ¶

touch(self) ¶

unlink(self) ¶

upload_from(self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False) -> CloudPath ¶

with_name(self, name) ¶

with_suffix(self, suffix) ¶

write_bytes(self, data: bytes) ¶

write_text(self, data: str, encoding = None, errors = None) ¶

`anchor: str` `property` `readonly` ¶

`drive: str` `property` `readonly` ¶

`fspath: str` `property` `readonly` ¶

`name` `property` `readonly` ¶

`parent` `property` `readonly` ¶

`parents` `property` `readonly` ¶

`parts` `property` `readonly` ¶

`stat` `property` `readonly` ¶

`stem` `property` `readonly` ¶

`suffix` `property` `readonly` ¶

`suffixes` `property` `readonly` ¶

`init(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)` `special` ¶

`as_uri(self) -> str` ¶

`copy(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath]` ¶

`copytree(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath]` ¶

`download_to(self, destination: Union[str, os.PathLike]) -> Path` ¶

`exists(self) -> bool` ¶

`glob(self, pattern: str) -> Iterable[CloudPath]` ¶

`is_dir(self) -> bool` ¶

`is_file(self) -> bool` ¶

`is_valid_cloudpath(path: Union[str, CloudPath], raise_on_error = False) -> bool` `classmethod` ¶

`iterdir(self) -> Iterable[CloudPath]` ¶

`joinpath(self, *args)` ¶

`match(self, path_pattern)` ¶

`mkdir(self, parents: bool = False, exist_ok: bool = False)` ¶

`open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO` ¶

`read_bytes(self)` ¶

`read_text(self)` ¶

`rename(self, target: CloudPath) -> CloudPath` ¶

`replace(self, target: CloudPath) -> CloudPath` ¶

`rglob(self, pattern: str) -> Iterable[CloudPath]` ¶

`rmdir(self)` ¶

`rmtree(self)` ¶

`samefile(self, other_path: CloudPath) -> bool` ¶

`touch(self)` ¶

`unlink(self)` ¶

`upload_from(self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False) -> CloudPath` ¶

`with_name(self, name)` ¶

`with_suffix(self, suffix)` ¶

`write_bytes(self, data: bytes)` ¶

`write_text(self, data: str, encoding = None, errors = None)` ¶