File and classes that inherit from it. File is a special
DataModel that is generated
automatically when creating a DataChain from files, like in
DataChain.from_storage. File
classes include various metadata fields about the underlying file as well as methods to
read from the files and otherwise work with the file contents.
defexport(self,output:str,placement:ExportPlacement="fullpath",use_cache:bool=True,)->None:"""Export file to new location."""ifuse_cache:self._caching_enabled=use_cachedst=self.get_destination_path(output,placement)dst_dir=os.path.dirname(dst)os.makedirs(dst_dir,exist_ok=True)self.save(dst)
defget_destination_path(self,output:str,placement:ExportPlacement)->str:""" Returns full destination path of a file for exporting to some output based on export placement """ifplacement=="filename":path=unquote(self.name)elifplacement=="etag":path=f"{self.etag}{self.get_file_suffix()}"elifplacement=="fullpath":path=unquote(self.get_full_name())source=urlparse(self.source)ifsource.schemeandsource.scheme!="file":path=posixpath.join(source.netloc,path)elifplacement=="checksum":raiseNotImplementedError("Checksum placement not implemented yet")else:raiseValueError(f"Unsupported file export placement: {placement}")returnposixpath.join(output,path)# type: ignore[union-attr]
defget_local_path(self)->Optional[str]:"""Returns path to a file in a local cache. Return None if file is not cached. Throws an exception if cache is not setup."""ifself._catalogisNone:raiseRuntimeError("cannot resolve local file path because catalog is not setup")returnself._catalog.cache.get_path(self.get_uid())
@contextmanagerdefopen(self,mode:Literal["rb","r"]="rb"):"""Open the file and return a file object."""ifself.location:withVFileRegistry.resolve(self,self.location)asf:# type: ignore[arg-type]yieldfuid=self.get_uid()client=self._catalog.get_client(self.source)ifself._caching_enabled:client.download(uid,callback=self._download_cb)withclient.open_object(uid,use_cache=self._caching_enabled,cb=self._download_cb)asf:yieldio.TextIOWrapper(f)ifmode=="r"elsef
@classmethoddefopen(cls,file:"File",location:list[dict]):"""Stream file from tar archive based on location in archive."""iflen(location)>1:VFileError(file,"multiple 'location's are not supported yet")loc=location[0]if(offset:=loc.get("offset",None))isNone:VFileError(file,"'offset' is not specified")if(size:=loc.get("size",None))isNone:VFileError(file,"'size' is not specified")if(parent:=loc.get("parent",None))isNone:VFileError(file,"'parent' is not specified")tar_file=File(**parent)tar_file._set_stream(file._catalog)tar_file_uid=tar_file.get_uid()client=file._catalog.get_client(tar_file_uid.storage)fd=client.open_object(tar_file_uid,use_cache=file._caching_enabled)returnFileSlice(fd,offset,size,file.name)
@contextmanagerdefopen(self,mode:Literal["rb","r"]="r"):"""Open the file and return a file object (default to text mode)."""withsuper().open(mode=mode)asstream:yieldstream