Skip to content

cloudpathlib.GSPath

Class for representing and operating on Google Cloud Storage URIs, in the style of the Python standard library's pathlib module. Instances represent a path in GS with filesystem path semantics, and convenient methods allow for basic operations like joining, reading, writing, iterating over contents, etc. This class almost entirely mimics the pathlib.Path interface, so most familiar properties and methods should be available and behave in the expected way.

The GSClient class handles authentication with GCP. If a client instance is not explicitly specified on GSPath instantiation, a default client is used. See GSClient's documentation for more details.

Source code in cloudpathlib/gs/gspath.py
class GSPath(CloudPath):
    """Class for representing and operating on Google Cloud Storage URIs, in the style of the
    Python standard library's [`pathlib` module](https://docs.python.org/3/library/pathlib.html).
    Instances represent a path in GS with filesystem path semantics, and convenient methods allow
    for basic operations like joining, reading, writing, iterating over contents, etc. This class
    almost entirely mimics the [`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path)
    interface, so most familiar properties and methods should be available and behave in the
    expected way.

    The [`GSClient`](../gsclient/) class handles authentication with GCP. If a client instance is
    not explicitly specified on `GSPath` instantiation, a default client is used. See `GSClient`'s
    documentation for more details.
    """

    cloud_prefix: str = "gs://"
    client: "GSClient"

    @property
    def drive(self) -> str:
        return self.bucket

    def is_dir(self) -> bool:
        return self.client._is_file_or_dir(self) == "dir"

    def is_file(self) -> bool:
        return self.client._is_file_or_dir(self) == "file"

    def mkdir(self, parents=False, exist_ok=False):
        # not possible to make empty directory on cloud storage
        pass

    def touch(self):
        if self.exists():
            self.client._move_file(self, self)
        else:
            tf = TemporaryDirectory()
            p = Path(tf.name) / "empty"
            p.touch()

            self.client._upload_file(p, self)

            tf.cleanup()

    def stat(self):
        meta = self.client._get_metadata(self)
        if meta is None:
            raise NoStatError(
                f"No stats available for {self}; it may be a directory or not exist."
            )

        try:
            mtime = meta["updated"].timestamp()
        except KeyError:
            mtime = 0

        return os.stat_result(
            (
                None,  # mode
                None,  # ino
                self.cloud_prefix,  # dev,
                None,  # nlink,
                None,  # uid,
                None,  # gid,
                meta.get("size", 0),  # size,
                None,  # atime,
                mtime,  # mtime,
                None,  # ctime,
            )
        )

    @property
    def bucket(self) -> str:
        return self._no_prefix.split("/", 1)[0]

    @property
    def blob(self) -> str:
        key = self._no_prefix_no_drive

        # key should never have starting slash for
        # use with google-cloud-storage, etc.
        if key.startswith("/"):
            key = key[1:]

        return key

    @property
    def etag(self):
        return self.client._get_metadata(self).get("etag")

Attributes

anchor: str inherited property readonly

The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)

blob: str property readonly

bucket: str property readonly

cloud_prefix: str

drive: str property readonly

The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)

etag property readonly

fspath: str inherited property readonly

name inherited property readonly

The final path component, if any. (Docstring copied from pathlib.Path)

parent inherited property readonly

The logical parent of the path. (Docstring copied from pathlib.Path)

parents inherited property readonly

A sequence of this path's logical parents. (Docstring copied from pathlib.Path)

parts inherited property readonly

An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)

stem inherited property readonly

The final path component, minus its last suffix. (Docstring copied from pathlib.Path)

suffix inherited property readonly

The final component's last suffix, if any.

This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)

suffixes inherited property readonly

A list of the final component's suffixes, if any.

These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)

Methods

__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None) inherited special

Source code in cloudpathlib/gs/gspath.py
def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
    self.is_valid_cloudpath(cloud_path, raise_on_error=True)

    # versions of the raw string that provide useful methods
    self._str = str(cloud_path)
    self._url = urlparse(self._str)
    self._path = PurePosixPath(f"/{self._no_prefix}")

    # setup client
    if client is None:
        if isinstance(cloud_path, CloudPath):
            client = cloud_path.client
        else:
            client = self._cloud_meta.client_class.get_default_client()
    if not isinstance(client, self._cloud_meta.client_class):
        raise ClientMismatchError(
            f"Client of type [{client.__class__}] is not valid for cloud path of type "
            f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
            f"None to use default client for this cloud path class."
        )
    self.client: Client = client

    # track if local has been written to, if so it may need to be uploaded
    self._dirty = False

    # handle if local file gets opened
    self._handle = None

as_uri(self) -> str inherited

Return the path as a 'file' URI. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def as_uri(self) -> str:
    return str(self)

copy(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath] inherited

Copy self to destination folder of file, if self is a file.

Source code in cloudpathlib/gs/gspath.py
def copy(
    self,
    destination: Union[str, os.PathLike, "CloudPath"],
    force_overwrite_to_cloud: bool = False,
) -> Union[Path, "CloudPath"]:
    """Copy self to destination folder of file, if self is a file."""
    if not self.exists() or not self.is_file():
        raise ValueError(
            f"Path {self} should be a file. To copy a directory tree use the method copytree."
        )

    # handle string version of cloud paths + local paths
    if isinstance(destination, (str, os.PathLike)):
        destination = anypath.to_anypath(destination)

    if not isinstance(destination, CloudPath):
        return self.download_to(destination)

    # if same client, use cloud-native _move_file on client to avoid downloading
    elif self.client is destination.client:
        if destination.exists() and destination.is_dir():
            destination: CloudPath = destination / self.name  # type: ignore

        if (
            not force_overwrite_to_cloud
            and destination.exists()
            and destination.stat().st_mtime >= self.stat().st_mtime
        ):
            raise OverwriteNewerCloudError(
                f"File ({destination}) is newer than ({self}). "
                f"To overwrite "
                f"pass `force_overwrite_to_cloud=True`."
            )

        return self.client._move_file(self, destination, remove_src=False)

    else:
        if not destination.exists() or destination.is_file():
            return destination.upload_from(
                self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
            )
        else:
            return (destination / self.name).upload_from(
                self.fspath, force_overwrite_to_cloud=force_overwrite_to_cloud
            )

copytree(self, destination: Union[str, os.PathLike, CloudPath], force_overwrite_to_cloud: bool = False) -> Union[pathlib.Path, CloudPath] inherited

Copy self to a directory, if self is a directory.

Source code in cloudpathlib/gs/gspath.py
def copytree(
    self,
    destination: Union[str, os.PathLike, "CloudPath"],
    force_overwrite_to_cloud: bool = False,
) -> Union[Path, "CloudPath"]:
    """Copy self to a directory, if self is a directory."""
    if not self.is_dir():
        raise CloudPathNotADirectoryError(
            f"Origin path {self} must be a directory. To copy a single file use the method copy."
        )

    # handle string version of cloud paths + local paths
    if isinstance(destination, (str, os.PathLike)):
        destination = anypath.to_anypath(destination)

    if destination.exists() and destination.is_file():
        raise CloudPathFileExistsError(
            "Destination path {destination} of copytree must be a directory."
        )

    destination.mkdir(parents=True, exist_ok=True)

    for subpath in self.iterdir():
        if subpath.is_file():
            subpath.copy(
                destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
            )
        elif subpath.is_dir():
            subpath.copytree(
                destination / subpath.name, force_overwrite_to_cloud=force_overwrite_to_cloud
            )

    return destination

download_to(self, destination: Union[str, os.PathLike]) -> Path inherited

Source code in cloudpathlib/gs/gspath.py
def download_to(self, destination: Union[str, os.PathLike]) -> Path:
    destination = Path(destination)
    if self.is_file():
        if destination.is_dir():
            destination = destination / self.name
        return self.client._download_file(self, destination)
    else:
        destination.mkdir(exist_ok=True)
        for f in self.iterdir():
            rel = str(self)
            if not rel.endswith("/"):
                rel = rel + "/"

            rel_dest = str(f)[len(rel) :]
            f.download_to(destination / rel_dest)

        return destination

exists(self) -> bool inherited

Whether this path exists. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def exists(self) -> bool:
    return self.client._exists(self)

glob(self, pattern: str) -> Iterable[CloudPath] inherited

Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def glob(self, pattern: str) -> Iterable["CloudPath"]:
    # strip cloud prefix from pattern if it is included
    if pattern.startswith(self.cloud_prefix):
        pattern = pattern[len(self.cloud_prefix) :]

    # strip "drive" from pattern if it is included
    if pattern.startswith(self.drive + "/"):
        pattern = pattern[len(self.drive + "/") :]

    # identify if pattern is recursive or not
    recursive = False
    if pattern.startswith("**/"):
        pattern = pattern.split("/", 1)[-1]
        recursive = True

    for f in self.client._list_dir(self, recursive=recursive):
        if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
            yield f

is_dir(self) -> bool

Whether this path is a directory. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def is_dir(self) -> bool:
    return self.client._is_file_or_dir(self) == "dir"

is_file(self) -> bool

Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def is_file(self) -> bool:
    return self.client._is_file_or_dir(self) == "file"

iterdir(self) -> Iterable[CloudPath] inherited

Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def iterdir(self) -> Iterable["CloudPath"]:
    for f in self.client._list_dir(self, recursive=False):
        yield f

joinpath(self, *args) inherited

Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def joinpath(self, *args):
    return self._dispatch_to_path("joinpath", *args)

match(self, path_pattern) inherited

Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def match(self, path_pattern):
    # strip scheme from start of pattern before testing
    if path_pattern.startswith(self.anchor + self.drive + "/"):
        path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]

    return self._dispatch_to_path("match", path_pattern)

mkdir(self, parents = False, exist_ok = False)

Create a new directory at this given path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def mkdir(self, parents=False, exist_ok=False):
    # not possible to make empty directory on cloud storage
    pass

open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO inherited

Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def open(
    self,
    mode="r",
    buffering=-1,
    encoding=None,
    errors=None,
    newline=None,
    force_overwrite_from_cloud=False,  # extra kwarg not in pathlib
    force_overwrite_to_cloud=False,  # extra kwarg not in pathlib
) -> IO:
    # if trying to call open on a directory that exists
    if self.exists() and not self.is_file():
        raise CloudPathIsADirectoryError(
            f"Cannot open directory, only files. Tried to open ({self})"
        )

    if mode == "x" and self.exists():
        raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")

    # TODO: consider streaming from client rather than DLing entire file to cache
    self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)

    # create any directories that may be needed if the file is new
    if not self._local.exists():
        self._local.parent.mkdir(parents=True, exist_ok=True)
        original_mtime = 0
    else:
        original_mtime = self._local.stat().st_mtime

    buffer = self._local.open(
        mode=mode,
        buffering=buffering,
        encoding=encoding,
        errors=errors,
        newline=newline,
    )

    # write modes need special on closing the buffer
    if any(m in mode for m in ("w", "+", "x", "a")):
        # dirty, handle, patch close
        original_close = buffer.close

        # since we are pretending this is a cloud file, upload it to the cloud
        # when the buffer is closed
        def _patched_close(*args, **kwargs):
            original_close(*args, **kwargs)

            # original mtime should match what was in the cloud; because of system clocks or rounding
            # by the cloud provider, the new version in our cache is "older" than the original version;
            # explicitly set the new modified time to be after the original modified time.
            if self._local.stat().st_mtime < original_mtime:
                new_mtime = original_mtime + 1
                os.utime(self._local, times=(new_mtime, new_mtime))

            self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)

        buffer.close = _patched_close

        # keep reference in case we need to close when __del__ is called on this object
        self._handle = buffer

        # opened for write, so mark dirty
        self._dirty = True

    return buffer

read_bytes(self) inherited

Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def read_bytes(self):
    return self._dispatch_to_local_cache_path("read_bytes")

read_text(self) inherited

Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def read_text(self):
    return self._dispatch_to_local_cache_path("read_text")

rename(self, target: CloudPath) -> CloudPath inherited

Rename this path to the target path.

The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.

Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def rename(self, target: "CloudPath") -> "CloudPath":
    # for cloud services replace == rename since we don't just rename,
    # we actually move files
    return self.replace(target)

replace(self, target: CloudPath) -> CloudPath inherited

Rename this path to the target path, overwriting if that path exists.

The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.

Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def replace(self, target: "CloudPath") -> "CloudPath":
    if type(self) != type(target):
        raise TypeError(
            f"The target based to rename must be an instantiated class of type: {type(self)}"
        )

    if target.exists():
        target.unlink()

    self.client._move_file(self, target)
    return target

rglob(self, pattern: str) -> Iterable[CloudPath] inherited

Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def rglob(self, pattern: str) -> Iterable["CloudPath"]:
    return self.glob("**/" + pattern)

rmdir(self) inherited

Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def rmdir(self):
    if self.is_file():
        raise CloudPathNotADirectoryError(
            f"Path {self} is a file; call unlink instead of rmdir."
        )
    try:
        next(self.iterdir())
        raise DirectoryNotEmptyError(
            f"Directory not empty: '{self}'. Use rmtree to delete recursively."
        )
    except StopIteration:
        pass
    self.client._remove(self)

rmtree(self) inherited

Delete an entire directory tree.

Source code in cloudpathlib/gs/gspath.py
def rmtree(self):
    """Delete an entire directory tree."""
    if self.is_file():
        raise CloudPathNotADirectoryError(
            f"Path {self} is a file; call unlink instead of rmtree."
        )
    self.client._remove(self)

samefile(self, other_path: CloudPath) -> bool inherited

Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def samefile(self, other_path: "CloudPath") -> bool:
    # all cloud paths are absolute and the paths are used for hash
    return self == other_path

stat(self)

Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def stat(self):
    meta = self.client._get_metadata(self)
    if meta is None:
        raise NoStatError(
            f"No stats available for {self}; it may be a directory or not exist."
        )

    try:
        mtime = meta["updated"].timestamp()
    except KeyError:
        mtime = 0

    return os.stat_result(
        (
            None,  # mode
            None,  # ino
            self.cloud_prefix,  # dev,
            None,  # nlink,
            None,  # uid,
            None,  # gid,
            meta.get("size", 0),  # size,
            None,  # atime,
            mtime,  # mtime,
            None,  # ctime,
        )
    )

touch(self)

Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def touch(self):
    if self.exists():
        self.client._move_file(self, self)
    else:
        tf = TemporaryDirectory()
        p = Path(tf.name) / "empty"
        p.touch()

        self.client._upload_file(p, self)

        tf.cleanup()

Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def unlink(self):
    if self.is_dir():
        raise CloudPathIsADirectoryError(
            f"Path {self} is a directory; call rmdir instead of unlink."
        )
    self.client._remove(self)

upload_from(self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False) -> CloudPath inherited

Upload a file or directory to the cloud path.

Source code in cloudpathlib/gs/gspath.py
def upload_from(
    self, source: Union[str, os.PathLike], force_overwrite_to_cloud: bool = False
) -> "CloudPath":
    """Upload a file or directory to the cloud path."""
    source = Path(source)

    if source.is_dir():
        for p in source.iterdir():
            (self / p.name).upload_from(p, force_overwrite_to_cloud=force_overwrite_to_cloud)

        return self

    else:
        if self.exists() and self.is_dir():
            dst = self / source.name
        else:
            dst = self

        dst._upload_file_to_cloud(source, force_overwrite_to_cloud=force_overwrite_to_cloud)

        return dst

with_name(self, name) inherited

Return a new path with the file name changed. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def with_name(self, name):
    return self._dispatch_to_path("with_name", name)

with_suffix(self, suffix) inherited

Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def with_suffix(self, suffix):
    return self._dispatch_to_path("with_suffix", suffix)

write_bytes(self, data: bytes) inherited

Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def write_bytes(self, data: bytes):
    """Open the file in bytes mode, write to it, and close the file.

    NOTE: vendored from pathlib since we override open
    https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
    """
    # type-check for the buffer interface before truncating the file
    view = memoryview(data)
    with self.open(mode="wb") as f:
        return f.write(view)

write_text(self, data: str, encoding = None, errors = None) inherited

Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)

Source code in cloudpathlib/gs/gspath.py
def write_text(self, data: str, encoding=None, errors=None):
    """Open the file in text mode, write to it, and close the file.

    NOTE: vendored from pathlib since we override open
    https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
    """
    if not isinstance(data, str):
        raise TypeError("data must be str, not %s" % data.__class__.__name__)
    with self.open(mode="w", encoding=encoding, errors=errors) as f:
        return f.write(data)