From 43d42293e58ef7c251d2078e76f69a858749472a Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Tue, 17 Dec 2024 10:41:31 -0500 Subject: [PATCH 1/4] Remove unnecessary cim mount cache Currently we have a map which maintains a mapping of CIM & containerd ID to the volume at which a CIM is mounted for the given container. This was required before the layer refactoring work when we needed to get the volume path from the layer cim path. However, this isn't needed anymore. As of now, this map doesn't provide much value and makes the code a bit complicated. Moreover, we will need to rewrite some of this code anyway when we do the work required for handling `shim delete` cleanups properly (https://github.com/containerd/containerd/issues/9727). Signed-off-by: Amit Barve --- internal/layers/wcow_mount.go | 8 ++--- internal/wclayer/cim/mount.go | 64 +++++++---------------------------- 2 files changed, 16 insertions(+), 56 deletions(-) diff --git a/internal/layers/wcow_mount.go b/internal/layers/wcow_mount.go index a5f706f940..fa8a5c0777 100644 --- a/internal/layers/wcow_mount.go +++ b/internal/layers/wcow_mount.go @@ -209,19 +209,17 @@ func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string return nil, nil, err } - volume, err := cimlayer.MountCimLayer(ctx, l.layers[0].cimPath, containerID) + volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID) if err != nil { return nil, nil, fmt.Errorf("mount layer cim: %w", err) } defer func() { if err != nil { - _ = cimlayer.UnmountCimLayer(ctx, l.layers[0].cimPath, containerID) + _ = cimlayer.UnmountCimLayer(ctx, volume) } }() - // Use the layer path for GUID rather than the mounted volume path, so that the generated layerID - // remains same. - layerID, err := cimlayer.LayerID(l.layers[0].cimPath, containerID) + layerID, err := cimlayer.LayerID(volume) if err != nil { return nil, nil, err } diff --git a/internal/wclayer/cim/mount.go b/internal/wclayer/cim/mount.go index e4b193533f..f3ddc2260b 100644 --- a/internal/wclayer/cim/mount.go +++ b/internal/wclayer/cim/mount.go @@ -7,27 +7,19 @@ import ( "fmt" "os" "strings" - "sync" "github.com/Microsoft/go-winio/pkg/guid" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" cimfs "github.com/Microsoft/hcsshim/pkg/cimfs" ) -// a cache of cim layer to its mounted volume - The mount manager plugin currently doesn't have an option of -// querying a mounted cim to get the volume at which it is mounted, so we maintain a cache of that here -var ( - cimMounts map[string]string = make(map[string]string) - cimMountMapLock sync.Mutex - // A random GUID used as a namespace for generating cim mount volume GUIDs: 6827367b-c388-4e9b-95ec-961c6d2c936c - cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}} -) +var cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}} -// MountCimLayer mounts the cim at path `cimPath` and returns the mount location of that cim. This method -// uses the `CimMountFlagCacheFiles` mount flag when mounting the cim. The containerID is used to generated -// the volumeID for the volume at which this CIM is mounted. containerID is used so that if the shim process -// crashes for any reason, the mounted cim can be correctly cleaned up during `shim delete` call. -func MountCimLayer(ctx context.Context, cimPath, containerID string) (string, error) { +// MountForkedCimLayer mounts the cim at path `cimPath` and returns the mount location of +// that cim. The containerID is used to generate the volumeID for the volume at which +// this CIM is mounted. containerID is used so that if the shim process crashes for any +// reason, the mounted cim can be correctly cleaned up during `shim delete` call. +func MountForkedCimLayer(ctx context.Context, cimPath, containerID string) (string, error) { volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID)) if err != nil { return "", fmt.Errorf("generated cim mount GUID: %w", err) @@ -37,40 +29,12 @@ func MountCimLayer(ctx context.Context, cimPath, containerID string) (string, er if err != nil { return "", err } - - cimMountMapLock.Lock() - defer cimMountMapLock.Unlock() - cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)] = vol - return vol, nil } -// Unmount unmounts the cim at mounted for given container. -func UnmountCimLayer(ctx context.Context, cimPath, containerID string) error { - cimMountMapLock.Lock() - defer cimMountMapLock.Unlock() - if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok { - return fmt.Errorf("cim %s not mounted", cimPath) - } else { - delete(cimMounts, fmt.Sprintf("%s_%s", containerID, cimPath)) - err := cimfs.Unmount(vol) - if err != nil { - return err - } - } - return nil -} - -// GetCimMountPath returns the volume at which a cim is mounted. If the cim is not mounted returns error -func GetCimMountPath(cimPath, containerID string) (string, error) { - cimMountMapLock.Lock() - defer cimMountMapLock.Unlock() - - if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok { - return "", fmt.Errorf("cim %s not mounted", cimPath) - } else { - return vol, nil - } +// Unmounts the cim mounted at the given volume +func UnmountCimLayer(ctx context.Context, volume string) error { + return cimfs.Unmount(volume) } func CleanupContainerMounts(containerID string) error { @@ -89,12 +53,10 @@ func CleanupContainerMounts(containerID string) error { return nil } -func LayerID(cimPath, containerID string) (string, error) { - cimMountMapLock.Lock() - defer cimMountMapLock.Unlock() - if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok { - return "", fmt.Errorf("cim %s not mounted", cimPath) - } else if !strings.HasPrefix(vol, "\\\\?\\Volume{") || !strings.HasSuffix(vol, "}\\") { +// LayerID provides a unique GUID for each mounted CIM volume. +func LayerID(vol string) (string, error) { + // since each mounted volume has a unique GUID, just return the same GUID as ID + if !strings.HasPrefix(vol, "\\\\?\\Volume{") || !strings.HasSuffix(vol, "}\\") { return "", fmt.Errorf("volume path %s is not in the expected format", vol) } else { return strings.TrimSuffix(strings.TrimPrefix(vol, "\\\\?\\Volume{"), "}\\"), nil From 3b5cb61886fc0d2fa8638ae385a9031f9cbd00c4 Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Tue, 17 Dec 2024 10:41:31 -0500 Subject: [PATCH 2/4] Block CIM types and new CimFS API wrappers CimFS now supports a new format for storing CIMs, named BlockCIM. A block CIM format can store the entire CIM on a block device (like a VHD) or a file formatted like a block device. This commit adds Go wrappers for the new CimFS APIs that allow creation, merging and mounting of such Block CIMs. Some new flags required when creating and mounting these CIMs are added and some deprecated flags have been removed. New type has been introduced to represent a block CIM. Unit tests have been added to test the newly added CimFS functionality. Lastly, CimFS flags aren't a part of the hcs schema (only the CimMount request is), those flags are moved from the hcs/schema2 package to the cimfs package. Signed-off-by: Amit Barve --- .../hcs/schema2/{cim_mount.go => cimfs.go} | 8 - internal/winapi/cimfs.go | 11 + internal/winapi/zsyscall_windows.go | 146 +++++ pkg/cimfs/cim_test.go | 611 ++++++++++++++++-- pkg/cimfs/cim_writer_windows.go | 142 +++- pkg/cimfs/cimfs.go | 83 ++- pkg/cimfs/doc.go | 90 ++- pkg/cimfs/mount_cim.go | 55 ++ 8 files changed, 1071 insertions(+), 75 deletions(-) rename internal/hcs/schema2/{cim_mount.go => cimfs.go} (70%) diff --git a/internal/hcs/schema2/cim_mount.go b/internal/hcs/schema2/cimfs.go similarity index 70% rename from internal/hcs/schema2/cim_mount.go rename to internal/hcs/schema2/cimfs.go index 81865e7ea4..52fb62a829 100644 --- a/internal/hcs/schema2/cim_mount.go +++ b/internal/hcs/schema2/cimfs.go @@ -9,14 +9,6 @@ package hcsschema -const ( - CimMountFlagNone uint32 = 0x0 - CimMountFlagChildOnly uint32 = 0x1 - CimMountFlagEnableDax uint32 = 0x2 - CimMountFlagCacheFiles uint32 = 0x4 - CimMountFlagCacheRegions uint32 = 0x8 -) - type CimMount struct { ImagePath string `json:"ImagePath,omitempty"` FileSystemName string `json:"FileSystemName,omitempty"` diff --git a/internal/winapi/cimfs.go b/internal/winapi/cimfs.go index 21664577b7..6c026d9822 100644 --- a/internal/winapi/cimfs.go +++ b/internal/winapi/cimfs.go @@ -32,10 +32,16 @@ type CimFsFileMetadata struct { EACount uint32 } +type CimFsImagePath struct { + ImageDir *uint16 + ImageName *uint16 +} + //sys CimMountImage(imagePath string, fsName string, flags uint32, volumeID *g) (hr error) = cimfs.CimMountImage? //sys CimDismountImage(volumeID *g) (hr error) = cimfs.CimDismountImage? //sys CimCreateImage(imagePath string, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) = cimfs.CimCreateImage? +//sys CimCreateImage2(imagePath string, flags uint32, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) = cimfs.CimCreateImage2? //sys CimCloseImage(cimFSHandle FsHandle) = cimfs.CimCloseImage? //sys CimCommitImage(cimFSHandle FsHandle) (hr error) = cimfs.CimCommitImage? @@ -45,3 +51,8 @@ type CimFsFileMetadata struct { //sys CimDeletePath(cimFSHandle FsHandle, path string) (hr error) = cimfs.CimDeletePath? //sys CimCreateHardLink(cimFSHandle FsHandle, newPath string, oldPath string) (hr error) = cimfs.CimCreateHardLink? //sys CimCreateAlternateStream(cimFSHandle FsHandle, path string, size uint64, cimStreamHandle *StreamHandle) (hr error) = cimfs.CimCreateAlternateStream? +//sys CimAddFsToMergedImage(cimFSHandle FsHandle, path string) (hr error) = cimfs.CimAddFsToMergedImage? +//sys CimAddFsToMergedImage2(cimFSHandle FsHandle, path string, flags uint32) (hr error) = cimfs.CimAddFsToMergedImage2? +//sys CimMergeMountImage(numCimPaths uint32, backingImagePaths *CimFsImagePath, flags uint32, volumeID *g) (hr error) = cimfs.CimMergeMountImage? +//sys CimTombstoneFile(cimFSHandle FsHandle, path string) (hr error) = cimfs.CimTombstoneFile? +//sys CimCreateMergeLink(cimFSHandle FsHandle, newPath string, oldPath string) (hr error) = cimfs.CimCreateMergeLink? diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go index ecdded312e..2abdc2e072 100644 --- a/internal/winapi/zsyscall_windows.go +++ b/internal/winapi/zsyscall_windows.go @@ -53,6 +53,8 @@ var ( procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA") procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA") procCM_Locate_DevNodeW = modcfgmgr32.NewProc("CM_Locate_DevNodeW") + procCimAddFsToMergedImage = modcimfs.NewProc("CimAddFsToMergedImage") + procCimAddFsToMergedImage2 = modcimfs.NewProc("CimAddFsToMergedImage2") procCimCloseImage = modcimfs.NewProc("CimCloseImage") procCimCloseStream = modcimfs.NewProc("CimCloseStream") procCimCommitImage = modcimfs.NewProc("CimCommitImage") @@ -60,9 +62,13 @@ var ( procCimCreateFile = modcimfs.NewProc("CimCreateFile") procCimCreateHardLink = modcimfs.NewProc("CimCreateHardLink") procCimCreateImage = modcimfs.NewProc("CimCreateImage") + procCimCreateImage2 = modcimfs.NewProc("CimCreateImage2") + procCimCreateMergeLink = modcimfs.NewProc("CimCreateMergeLink") procCimDeletePath = modcimfs.NewProc("CimDeletePath") procCimDismountImage = modcimfs.NewProc("CimDismountImage") + procCimMergeMountImage = modcimfs.NewProc("CimMergeMountImage") procCimMountImage = modcimfs.NewProc("CimMountImage") + procCimTombstoneFile = modcimfs.NewProc("CimTombstoneFile") procCimWriteStream = modcimfs.NewProc("CimWriteStream") procSetJobCompartmentId = modiphlpapi.NewProc("SetJobCompartmentId") procClosePseudoConsole = modkernel32.NewProc("ClosePseudoConsole") @@ -181,6 +187,54 @@ func _CMLocateDevNode(pdnDevInst *uint32, pDeviceID *uint16, uFlags uint32) (hr return } +func CimAddFsToMergedImage(cimFSHandle FsHandle, path string) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(path) + if hr != nil { + return + } + return _CimAddFsToMergedImage(cimFSHandle, _p0) +} + +func _CimAddFsToMergedImage(cimFSHandle FsHandle, path *uint16) (hr error) { + hr = procCimAddFsToMergedImage.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimAddFsToMergedImage.Addr(), uintptr(cimFSHandle), uintptr(unsafe.Pointer(path))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func CimAddFsToMergedImage2(cimFSHandle FsHandle, path string, flags uint32) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(path) + if hr != nil { + return + } + return _CimAddFsToMergedImage2(cimFSHandle, _p0, flags) +} + +func _CimAddFsToMergedImage2(cimFSHandle FsHandle, path *uint16, flags uint32) (hr error) { + hr = procCimAddFsToMergedImage2.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimAddFsToMergedImage2.Addr(), uintptr(cimFSHandle), uintptr(unsafe.Pointer(path)), uintptr(flags)) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func CimCloseImage(cimFSHandle FsHandle) (err error) { err = procCimCloseImage.Find() if err != nil { @@ -321,6 +375,59 @@ func _CimCreateImage(imagePath *uint16, oldFSName *uint16, newFSName *uint16, ci return } +func CimCreateImage2(imagePath string, flags uint32, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(imagePath) + if hr != nil { + return + } + return _CimCreateImage2(_p0, flags, oldFSName, newFSName, cimFSHandle) +} + +func _CimCreateImage2(imagePath *uint16, flags uint32, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) { + hr = procCimCreateImage2.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimCreateImage2.Addr(), uintptr(unsafe.Pointer(imagePath)), uintptr(flags), uintptr(unsafe.Pointer(oldFSName)), uintptr(unsafe.Pointer(newFSName)), uintptr(unsafe.Pointer(cimFSHandle))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func CimCreateMergeLink(cimFSHandle FsHandle, newPath string, oldPath string) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(newPath) + if hr != nil { + return + } + var _p1 *uint16 + _p1, hr = syscall.UTF16PtrFromString(oldPath) + if hr != nil { + return + } + return _CimCreateMergeLink(cimFSHandle, _p0, _p1) +} + +func _CimCreateMergeLink(cimFSHandle FsHandle, newPath *uint16, oldPath *uint16) (hr error) { + hr = procCimCreateMergeLink.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimCreateMergeLink.Addr(), uintptr(cimFSHandle), uintptr(unsafe.Pointer(newPath)), uintptr(unsafe.Pointer(oldPath))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func CimDeletePath(cimFSHandle FsHandle, path string) (hr error) { var _p0 *uint16 _p0, hr = syscall.UTF16PtrFromString(path) @@ -360,6 +467,21 @@ func CimDismountImage(volumeID *g) (hr error) { return } +func CimMergeMountImage(numCimPaths uint32, backingImagePaths *CimFsImagePath, flags uint32, volumeID *g) (hr error) { + hr = procCimMergeMountImage.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimMergeMountImage.Addr(), uintptr(numCimPaths), uintptr(unsafe.Pointer(backingImagePaths)), uintptr(flags), uintptr(unsafe.Pointer(volumeID))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func CimMountImage(imagePath string, fsName string, flags uint32, volumeID *g) (hr error) { var _p0 *uint16 _p0, hr = syscall.UTF16PtrFromString(imagePath) @@ -389,6 +511,30 @@ func _CimMountImage(imagePath *uint16, fsName *uint16, flags uint32, volumeID *g return } +func CimTombstoneFile(cimFSHandle FsHandle, path string) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(path) + if hr != nil { + return + } + return _CimTombstoneFile(cimFSHandle, _p0) +} + +func _CimTombstoneFile(cimFSHandle FsHandle, path *uint16) (hr error) { + hr = procCimTombstoneFile.Find() + if hr != nil { + return + } + r0, _, _ := syscall.SyscallN(procCimTombstoneFile.Addr(), uintptr(cimFSHandle), uintptr(unsafe.Pointer(path))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func CimWriteStream(cimStreamHandle StreamHandle, buffer uintptr, bufferSize uint32) (hr error) { hr = procCimWriteStream.Find() if hr != nil { diff --git a/pkg/cimfs/cim_test.go b/pkg/cimfs/cim_test.go index c1e2bc4028..7e194421c8 100644 --- a/pkg/cimfs/cim_test.go +++ b/pkg/cimfs/cim_test.go @@ -5,19 +5,19 @@ package cimfs import ( "bytes" - "context" "errors" "fmt" "io" + "syscall" "os" "path/filepath" "testing" "time" - "github.com/Microsoft/go-winio" + winio "github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio/pkg/guid" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + vhd "github.com/Microsoft/go-winio/vhd" "golang.org/x/sys/windows" ) @@ -29,6 +29,30 @@ type tuple struct { isDir bool } +// a test interface for representing both forked & block CIMs +type testCIM interface { + // returns a full CIM path + cimPath() string +} + +type testForkedCIM struct { + imageDir string + parentName string + imageName string +} + +func (t *testForkedCIM) cimPath() string { + return filepath.Join(t.imageDir, t.imageName) +} + +type testBlockCIM struct { + BlockCIM +} + +func (t *testBlockCIM) cimPath() string { + return filepath.Join(t.BlockPath, t.CimName) +} + // A utility function to create a file/directory and write data to it in the given cim. func createCimFileUtil(c *CimFsWriter, fileTuple tuple) error { // create files inside the cim @@ -60,6 +84,97 @@ func createCimFileUtil(c *CimFsWriter, fileTuple tuple) error { return nil } +// openNewCIM creates a new CIM and returns a writer to that CIM. The caller MUST close +// the writer. +func openNewCIM(t *testing.T, newCIM testCIM) *CimFsWriter { + t.Helper() + + var ( + writer *CimFsWriter + err error + ) + + switch val := newCIM.(type) { + case *testForkedCIM: + writer, err = Create(val.imageDir, val.parentName, val.imageName) + case *testBlockCIM: + writer, err = CreateBlockCIM(val.BlockPath, val.CimName, val.Type) + } + if err != nil { + t.Fatalf("failed while creating a cim: %s", err) + } + t.Cleanup(func() { + writer.Close() + // add 3 second sleep before test cleanup remove the cim directory + // otherwise, that removal fails due to some handles still being open + time.Sleep(3 * time.Second) + }) + return writer +} + +// compareContent takes in path to a directory (which is usually a volume at which a CIM is +// mounted) and ensures that every file/directory in the `testContents` shows up exactly +// as it is under that directory. +func compareContent(t *testing.T, root string, testContents []tuple) { + t.Helper() + + for _, ft := range testContents { + if ft.isDir { + _, err := os.Stat(filepath.Join(root, ft.filepath)) + if err != nil { + t.Fatalf("stat directory %s from cim: %s", ft.filepath, err) + } + } else { + f, err := os.Open(filepath.Join(root, ft.filepath)) + if err != nil { + t.Fatalf("open file %s: %s", filepath.Join(root, ft.filepath), err) + } + defer f.Close() + + // it is a file - read contents + fileContents, err := io.ReadAll(f) + if err != nil { + t.Fatalf("failure while reading file %s from cim: %s", ft.filepath, err) + } else if !bytes.Equal(fileContents, ft.fileContents) { + t.Fatalf("contents of file %s don't match", ft.filepath) + } + } + } +} + +func writeCIM(t *testing.T, writer *CimFsWriter, testContents []tuple) { + t.Helper() + for _, ft := range testContents { + err := createCimFileUtil(writer, ft) + if err != nil { + t.Fatalf("failed to create the file %s inside the cim:%s", ft.filepath, err) + } + } + if err := writer.Close(); err != nil { + t.Fatalf("cim close: %s", err) + } +} + +func mountCIM(t *testing.T, testCIM testCIM, mountFlags uint32) string { + t.Helper() + // mount and read the contents of the cim + volumeGUID, err := guid.NewV4() + if err != nil { + t.Fatalf("generate cim mount GUID: %s", err) + } + + mountvol, err := Mount(testCIM.cimPath(), volumeGUID, mountFlags) + if err != nil { + t.Fatalf("mount cim : %s", err) + } + t.Cleanup(func() { + if err := Unmount(mountvol); err != nil { + t.Logf("CIM unmount failed: %s", err) + } + }) + return mountvol +} + // This test creates a cim, writes some files to it and then reads those files back. // The cim created by this test has only 3 files in the following tree // / @@ -78,72 +193,476 @@ func TestCimReadWrite(t *testing.T) { } tempDir := t.TempDir() + testCIM := &testForkedCIM{ + imageDir: tempDir, + parentName: "", + imageName: "test.cim", + } + + writer := openNewCIM(t, testCIM) + writeCIM(t, writer, testContents) + mountvol := mountCIM(t, testCIM, CimMountFlagNone) + compareContent(t, mountvol, testContents) +} + +func TestBlockCIMInvalidCimName(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + blockPath := "C:\\Windows" + cimName := "" + _, err := CreateBlockCIM(blockPath, cimName, BlockCIMTypeSingleFile) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s`", err, os.ErrInvalid) + } +} + +func TestBlockCIMInvalidBlockPath(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + blockPath := "" + cimName := "foo.bcim" + _, err := CreateBlockCIM(blockPath, cimName, BlockCIMTypeSingleFile) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s", os.ErrInvalid, err) + } +} + +func TestBlockCIMInvalidType(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + blockPath := "" + cimName := "foo.bcim" + _, err := CreateBlockCIM(blockPath, cimName, BlockCIMTypeNone) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s", os.ErrInvalid, err) + } +} + +func TestCIMMergeInvalidType(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + mergedCIM := &BlockCIM{ + Type: 0, + BlockPath: "C:\\fake\\path", + CimName: "fakename.cim", + } + // doesn't matter what we pass in the source CIM array as long as it has 2+ elements + err := MergeBlockCIMs(mergedCIM, []*BlockCIM{mergedCIM, mergedCIM}) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s", os.ErrInvalid, err) + } +} + +func TestCIMMergeInvalidSourceType(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + mergedCIM := &BlockCIM{ + Type: BlockCIMTypeDevice, + BlockPath: "C:\\fake\\path", + CimName: "fakename.cim", + } + + sCIMs := []*BlockCIM{ + { + Type: BlockCIMTypeDevice, + BlockPath: "C:\\fake\\path", + CimName: "fakename.cim", + }, + { + Type: BlockCIMTypeSingleFile, + BlockPath: "C:\\fake\\path", + CimName: "fakename.cim", + }, + } - cimName := "test.cim" - cimPath := filepath.Join(tempDir, cimName) - c, err := Create(tempDir, "", cimName) + // doesn't matter what we pass in the source CIM array as long as it has 2+ elements + err := MergeBlockCIMs(mergedCIM, sCIMs) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s", os.ErrInvalid, err) + } +} + +func TestCIMMergeInvalidLength(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + mergedCIM := &BlockCIM{ + Type: 0, + BlockPath: "C:\\fake\\path", + CimName: "fakename.cim", + } + err := MergeBlockCIMs(mergedCIM, []*BlockCIM{mergedCIM}) + if !errors.Is(err, os.ErrInvalid) { + t.Fatalf("expected error `%s`, got `%s", os.ErrInvalid, err) + } +} + +func TestBlockCIMEmpty(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + root := t.TempDir() + blockPath := filepath.Join(root, "layer.bcim") + cimName := "layer.cim" + w, err := CreateBlockCIM(blockPath, cimName, BlockCIMTypeSingleFile) if err != nil { - t.Fatalf("failed while creating a cim: %s", err) + t.Fatalf("unexpected error: %s", err) } - defer func() { - // destroy cim sometimes fails if tried immediately after accessing & unmounting the cim so - // give some time and then remove. - time.Sleep(3 * time.Second) - if err := DestroyCim(context.Background(), cimPath); err != nil { - t.Fatalf("destroy cim failed: %s", err) + err = w.Close() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } +} + +func TestBlockCIMSingleFileReadWrite(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + root := t.TempDir() + testCIM := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "layer.bcim"), + CimName: "layer.cim", + }, + } + + testContents := []tuple{ + {"foobar.txt", []byte("foobar test data"), false}, + {"foo", []byte(""), true}, + {"foo\\bar.txt", []byte("bar test data"), false}, + } + + writer := openNewCIM(t, testCIM) + writeCIM(t, writer, testContents) + mountvol := mountCIM(t, testCIM, CimMountSingleFileCim) + compareContent(t, mountvol, testContents) +} + +// creates a block device for storing a blockCIM. returns a volume path to the block +// device that can be used for writing the CIM. +func createBlockDevice(t *testing.T, dir string) string { + t.Helper() + // create a VHD for storing our block CIM + vhdPath := filepath.Join(dir, "layer.vhdx") + if err := vhd.CreateVhdx(vhdPath, 1, 1); err != nil { + t.Fatalf("failed to create VHD: %s", err) + } + + diskHandle, err := vhd.OpenVirtualDisk(vhdPath, vhd.VirtualDiskAccessNone, vhd.OpenVirtualDiskFlagNone) + if err != nil { + t.Fatalf("failed to open VHD: %s", err) + } + t.Cleanup(func() { + closeErr := syscall.CloseHandle(diskHandle) + if closeErr != nil { + t.Logf("failed to close VHD handle: %s", closeErr) } - }() + }) - for _, ft := range testContents { - err := createCimFileUtil(c, ft) - if err != nil { - t.Fatalf("failed to create the file %s inside the cim:%s", ft.filepath, err) + if err = vhd.AttachVirtualDisk(diskHandle, vhd.AttachVirtualDiskFlagNone, &vhd.AttachVirtualDiskParameters{Version: 2}); err != nil { + t.Fatalf("failed to attach VHD: %s", err) + } + t.Cleanup(func() { + detachErr := vhd.DetachVirtualDisk(diskHandle) + if detachErr != nil { + t.Logf("failed to detach VHD: %s", detachErr) } + }) + + physicalPath, err := vhd.GetVirtualDiskPhysicalPath(diskHandle) + if err != nil { + t.Fatalf("failed to get physical path of VHD: %s", err) } - if err := c.Close(); err != nil { - t.Fatalf("cim close: %s", err) + return physicalPath +} + +func TestBlockCIMBlockDeviceReadWrite(t *testing.T) { + if !IsBlockCimSupported() { + t.Skip("blockCIM not supported on this OS version") + } + + root := t.TempDir() + + physicalPath := createBlockDevice(t, root) + + testCIM := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeDevice, + BlockPath: physicalPath, + CimName: "layer.cim", + }, + } + + testContents := []tuple{ + {"foobar.txt", []byte("foobar test data"), false}, + {"foo", []byte(""), true}, + {"foo\\bar.txt", []byte("bar test data"), false}, + } + + writer := openNewCIM(t, testCIM) + writeCIM(t, writer, testContents) + mountvol := mountCIM(t, testCIM, CimMountBlockDeviceCim) + compareContent(t, mountvol, testContents) +} + +func TestMergedBlockCIMs(rootT *testing.T) { + if !IsBlockCimSupported() { + rootT.Skipf("BlockCIM not supported") + } + + // A slice of 3 slices, 1 slice for contents of each CIM + testContents := [][]tuple{ + {{"foo.txt", []byte("foo1"), false}}, + {{"bar.txt", []byte("bar"), false}}, + {{"foo.txt", []byte("foo2"), false}}, + } + // create 3 separate block CIMs + nCIMs := len(testContents) + + // test merging for both SingleFile & BlockDevice type of block CIMs + type testBlock struct { + name string + blockType BlockCIMType + mountFlag uint32 + blockPathGenerator func(t *testing.T, dir string) string + } + + tests := []testBlock{ + { + name: "single file", + blockType: BlockCIMTypeSingleFile, + mountFlag: CimMountSingleFileCim, + blockPathGenerator: func(t *testing.T, dir string) string { + t.Helper() + return filepath.Join(dir, "layer.bcim") + }, + }, + { + name: "block device", + blockType: BlockCIMTypeDevice, + mountFlag: CimMountBlockDeviceCim, + blockPathGenerator: func(t *testing.T, dir string) string { + t.Helper() + return createBlockDevice(t, dir) + }, + }, + } + + for _, test := range tests { + rootT.Run(test.name, func(t *testing.T) { + sourceCIMs := make([]*BlockCIM, 0, nCIMs) + for i := 0; i < nCIMs; i++ { + root := t.TempDir() + blockPath := test.blockPathGenerator(t, root) + tc := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: test.blockType, + BlockPath: blockPath, + CimName: "layer.cim", + }} + writer := openNewCIM(t, tc) + writeCIM(t, writer, testContents[i]) + sourceCIMs = append(sourceCIMs, &tc.BlockCIM) + } + + mergedBlockPath := test.blockPathGenerator(t, t.TempDir()) + // prepare a merged CIM + mergedCIM := &BlockCIM{ + Type: test.blockType, + BlockPath: mergedBlockPath, + CimName: "merged.cim", + } + + if err := MergeBlockCIMs(mergedCIM, sourceCIMs); err != nil { + t.Fatalf("failed to merge block CIMs: %s", err) + } + + // mount and read the contents of the cim + volumeGUID, err := guid.NewV4() + if err != nil { + t.Fatalf("generate cim mount GUID: %s", err) + } + + mountvol, err := MountMergedBlockCIMs(mergedCIM, sourceCIMs, test.mountFlag, volumeGUID) + if err != nil { + t.Fatalf("failed to mount merged block CIMs: %s\n", err) + } + defer func() { + if err := Unmount(mountvol); err != nil { + t.Logf("CIM unmount failed: %s", err) + } + }() + // since we are merging, only 1 foo.txt (from the 1st CIM) should + // show up + compareContent(t, mountvol, []tuple{testContents[0][0], testContents[1][0]}) + }) + } +} + +func TestTombstoneInMergedBlockCIMs(rootT *testing.T) { + if !IsBlockCimSupported() { + rootT.Skipf("BlockCIM not supported") + } + + root := rootT.TempDir() + + testContents := []tuple{ + {"foobar.txt", []byte("foobar test data"), false}, + {"foo", []byte(""), true}, + {"foo\\bar.txt", []byte("bar test data"), false}, + } + + cim1 := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "1.bcim"), + CimName: "test.cim", + }, + } + writer := openNewCIM(rootT, cim1) + writeCIM(rootT, writer, testContents) + + cim2 := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "2.bcim"), + CimName: "test.cim", + }, + } + + cim2writer := openNewCIM(rootT, cim2) + + if err := cim2writer.AddTombstone("foobar.txt"); err != nil { + rootT.Fatalf("failed to add tombstone: %s", err) + } + if err := cim2writer.Close(); err != nil { + rootT.Fatalf("failed to close the CIM: %s", err) + } + + mergedCIM := &BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "merged.cim"), + CimName: "merged.cim", + } + + sourceCIMs := []*BlockCIM{&cim2.BlockCIM, &cim1.BlockCIM} + if err := MergeBlockCIMs(mergedCIM, sourceCIMs); err != nil { + rootT.Fatalf("failed to merge block CIMs: %s", err) } // mount and read the contents of the cim volumeGUID, err := guid.NewV4() if err != nil { - t.Fatalf("generate cim mount GUID: %s", err) + rootT.Fatalf("generate cim mount GUID: %s", err) } - mountvol, err := Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles) + mountvol, err := MountMergedBlockCIMs(mergedCIM, sourceCIMs, CimMountSingleFileCim, volumeGUID) if err != nil { - t.Fatalf("mount cim : %s", err) + rootT.Fatalf("failed to mount merged block CIMs: %s\n", err) } defer func() { if err := Unmount(mountvol); err != nil { - t.Fatalf("unmount failed: %s", err) + rootT.Logf("CIM unmount failed: %s", err) } }() - for _, ft := range testContents { - if ft.isDir { - _, err := os.Stat(filepath.Join(mountvol, ft.filepath)) - if err != nil { - t.Fatalf("stat directory %s from cim: %s", ft.filepath, err) - } - } else { - f, err := os.Open(filepath.Join(mountvol, ft.filepath)) - if err != nil { - t.Fatalf("open file %s: %s", filepath.Join(mountvol, ft.filepath), err) - } - defer f.Close() + // verify that foobar.txt doesn't show up + _, err = os.Stat(filepath.Join(mountvol, "foobar.txt")) + if err == nil || !os.IsNotExist(err) { + rootT.Fatalf("expected 'file not found' error, got: %s", err) + } +} - fileContents := make([]byte, len(ft.fileContents)) +func TestMergedLinksInMergedBlockCIMs(rootT *testing.T) { + if !IsBlockCimSupported() { + rootT.Skipf("BlockCIM not supported") + } - // it is a file - read contents - rc, err := f.Read(fileContents) - if err != nil && !errors.Is(err, io.EOF) { - t.Fatalf("failure while reading file %s from cim: %s", ft.filepath, err) - } else if rc != len(ft.fileContents) { - t.Fatalf("couldn't read complete file contents for file: %s, read %d bytes, expected: %d", ft.filepath, rc, len(ft.fileContents)) - } else if !bytes.Equal(fileContents[:rc], ft.fileContents) { - t.Fatalf("contents of file %s don't match", ft.filepath) - } + root := rootT.TempDir() + + testContents := []tuple{ + {"foobar.txt", []byte("foobar test data"), false}, + {"foo", []byte(""), true}, + {"foo\\bar.txt", []byte("bar test data"), false}, + } + + cim1 := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "1.bcim"), + CimName: "test.cim", + }, + } + writer := openNewCIM(rootT, cim1) + writeCIM(rootT, writer, testContents) + + cim2 := &testBlockCIM{ + BlockCIM: BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "2.bcim"), + CimName: "test.cim", + }, + } + + cim2writer := openNewCIM(rootT, cim2) + + if err := cim2writer.AddMergedLink("foobar.txt", "b_link.txt"); err != nil { + rootT.Fatalf("failed to add merged link: %s", err) + } + if err := cim2writer.AddMergedLink("b_link.txt", "a_link.txt"); err != nil { + rootT.Fatalf("failed to add merged link: %s", err) + } + if err := cim2writer.Close(); err != nil { + rootT.Fatalf("failed to close the CIM: %s", err) + } + + mergedCIM := &BlockCIM{ + Type: BlockCIMTypeSingleFile, + BlockPath: filepath.Join(root, "merged.cim"), + CimName: "merged.cim", + } + + sourceCIMs := []*BlockCIM{&cim2.BlockCIM, &cim1.BlockCIM} + if err := MergeBlockCIMs(mergedCIM, sourceCIMs); err != nil { + rootT.Fatalf("failed to merge block CIMs: %s", err) + } + + // mount and read the contents of the cim + volumeGUID, err := guid.NewV4() + if err != nil { + rootT.Fatalf("generate cim mount GUID: %s", err) + } + + mountvol, err := MountMergedBlockCIMs(mergedCIM, sourceCIMs, CimMountSingleFileCim, volumeGUID) + if err != nil { + rootT.Fatalf("failed to mount merged block CIMs: %s\n", err) + } + defer func() { + if err := Unmount(mountvol); err != nil { + rootT.Logf("CIM unmount failed: %s", err) } + }() + + // read contents of "a_link.txt", they should match that of "foobar.txt" + data, err := os.ReadFile(filepath.Join(mountvol, "a_link.txt")) + if err != nil { + rootT.Logf("read file failed: %s", err) + } + if !bytes.Equal(data, testContents[0].fileContents) { + rootT.Logf("file contents don't match!") } } diff --git a/pkg/cimfs/cim_writer_windows.go b/pkg/cimfs/cim_writer_windows.go index 8e88216bfc..4204e87773 100644 --- a/pkg/cimfs/cim_writer_windows.go +++ b/pkg/cimfs/cim_writer_windows.go @@ -35,7 +35,8 @@ type CimFsWriter struct { } // Create creates a new cim image. The CimFsWriter returned can then be used to do -// operations on this cim. +// operations on this cim. If `oldFSName` is provided the new image is "forked" from the +// CIM with name `oldFSName` located under `imagePath`. func Create(imagePath string, oldFSName string, newFSName string) (_ *CimFsWriter, err error) { var oldNameBytes *uint16 // CimCreateImage API call has different behavior if the value of oldNameBytes / newNameBytes @@ -62,6 +63,41 @@ func Create(imagePath string, oldFSName string, newFSName string) (_ *CimFsWrite return &CimFsWriter{handle: handle, name: filepath.Join(imagePath, fsName)}, nil } +// Create creates a new block CIM and opens it for writing. The CimFsWriter +// returned can then be used to add/remove files to/from this CIM. +func CreateBlockCIM(blockPath, name string, blockType BlockCIMType) (_ *CimFsWriter, err error) { + if !IsBlockCimSupported() { + return nil, fmt.Errorf("block CIM not supported on this OS version") + } + if blockPath == "" || name == "" { + return nil, fmt.Errorf("both blockPath & name must be non empty: %w", os.ErrInvalid) + } + + // When creating block CIMs we always want them to be consistent CIMs i.e a CIMs + // created from the same layer tar will always be identical. + var createFlags uint32 = CimCreateFlagConsistentCim + switch blockType { + case BlockCIMTypeDevice: + createFlags |= CimCreateFlagBlockDeviceCim + case BlockCIMTypeSingleFile: + createFlags |= CimCreateFlagSingleFileCim + default: + return nil, fmt.Errorf("invalid block CIM type `%d`: %w", blockType, os.ErrInvalid) + } + + var newNameUTF16 *uint16 + newNameUTF16, err = windows.UTF16PtrFromString(name) + if err != nil { + return nil, err + } + + var handle winapi.FsHandle + if err := winapi.CimCreateImage2(blockPath, createFlags, nil, newNameUTF16, &handle); err != nil { + return nil, fmt.Errorf("failed to create block CIM at path %s,%s: %w", blockPath, name, err) + } + return &CimFsWriter{handle: handle, name: name}, nil +} + // CreateAlternateStream creates alternate stream of given size at the given path inside the cim. This will // replace the current active stream. Always, finish writing current active stream and then create an // alternate stream. @@ -160,7 +196,7 @@ func (c *CimFsWriter) Write(p []byte) (int, error) { return len(p), nil } -// AddLink adds a hard link from `oldPath` to `newPath` in the image. +// AddLink adds a hard link at `newPath` that points to `oldPath`. func (c *CimFsWriter) AddLink(oldPath string, newPath string) error { err := c.closeStream() if err != nil { @@ -173,21 +209,41 @@ func (c *CimFsWriter) AddLink(oldPath string, newPath string) error { return err } -// Unlink deletes the file at `path` from the image. +// AddMergedLink adds a hard link at `newPath` that points to `oldPath` in the +// image. However unlike AddLink this link is resolved at merge time. This allows us to +// create links to files that are in other CIMs. +func (c *CimFsWriter) AddMergedLink(oldPath string, newPath string) error { + err := c.closeStream() + if err != nil { + return err + } + err = winapi.CimCreateMergeLink(c.handle, newPath, oldPath) + if err != nil { + err = &LinkError{Cim: c.name, Op: "addMergedLink", Old: oldPath, New: newPath, Err: err} + } + return err +} + +// Unlink deletes the file at `path` from the image. Note that the file MUST have been +// already added to the image. func (c *CimFsWriter) Unlink(path string) error { err := c.closeStream() if err != nil { return err } - //TODO(ambarve): CimDeletePath currently returns an error if the file isn't found but we ideally want - // to put a tombstone at that path so that when cims are merged it removes that file from the lower - // layer - err = winapi.CimDeletePath(c.handle, path) - if err != nil && !os.IsNotExist(err) { - err = &PathError{Cim: c.name, Op: "unlink", Path: path, Err: err} + return winapi.CimDeletePath(c.handle, path) +} + +// Adds a tombstone at given path. This ensures that when the the CIMs are merged, the +// file at this path from lower layers won't show up in a mounted CIM. In case of Unlink, +// the file from the lower layers still shows up after merge. +func (c *CimFsWriter) AddTombstone(path string) error { + err := c.closeStream() + if err != nil { return err } - return nil + + return winapi.CimTombstoneFile(c.handle, path) } func (c *CimFsWriter) commit() error { @@ -203,22 +259,22 @@ func (c *CimFsWriter) commit() error { } // Close closes the CimFS filesystem. -func (c *CimFsWriter) Close() error { +func (c *CimFsWriter) Close() (err error) { if c.handle == 0 { return fmt.Errorf("invalid writer") } - if err := c.commit(); err != nil { + if err = c.commit(); err != nil { return &OpError{Cim: c.name, Op: "commit", Err: err} } - if err := winapi.CimCloseImage(c.handle); err != nil { - return &OpError{Cim: c.name, Op: "close", Err: err} - } + err = winapi.CimCloseImage(c.handle) c.handle = 0 - return nil + return err } -// DestroyCim finds out the region files, object files of this cim and then delete -// the region files, object files and the .cim file itself. +// DestroyCim finds out the region files, object files of this cim and then delete the +// region files, object files and the .cim file itself. Note that any other +// CIMs that were forked off of this CIM would become unusable after this operation. This +// should not be used for block CIMs, os.Remove is sufficient for block CIMs. func DestroyCim(ctx context.Context, cimPath string) (retErr error) { regionFilePaths, err := getRegionFilePaths(ctx, cimPath) if err != nil { @@ -289,3 +345,53 @@ func GetCimUsage(ctx context.Context, cimPath string) (uint64, error) { } return totalUsage, nil } + +// MergeBlockCIMs creates a new merged BlockCIM from the provided source BlockCIMs. CIM +// at index 0 is considered to be topmost CIM and the CIM at index `length-1` is +// considered the base CIM. (i.e file with the same path in CIM at index 0 will shadow +// files with the same path at all other CIMs) When mounting this merged CIM the source +// CIMs MUST be provided in the exact same order. +func MergeBlockCIMs(mergedCIM *BlockCIM, sourceCIMs []*BlockCIM) (err error) { + if !IsMergedCimSupported() { + return fmt.Errorf("merged CIMs aren't supported on this OS version") + } else if len(sourceCIMs) < 2 { + return fmt.Errorf("need at least 2 source CIMs, got %d: %w", len(sourceCIMs), os.ErrInvalid) + } + + var mergeFlag uint32 + switch mergedCIM.Type { + case BlockCIMTypeDevice: + mergeFlag = CimMergeFlagBlockDevice + case BlockCIMTypeSingleFile: + mergeFlag = CimMergeFlagSingleFile + default: + return fmt.Errorf("invalid block CIM type `%d`: %w", mergedCIM.Type, os.ErrInvalid) + } + + for _, sCIM := range sourceCIMs { + if sCIM.Type != mergedCIM.Type { + return fmt.Errorf("source CIM (%s) type doesn't match with merged CIM type: %w", sCIM.String(), os.ErrInvalid) + } + } + + cim, err := CreateBlockCIM(mergedCIM.BlockPath, mergedCIM.CimName, mergedCIM.Type) + if err != nil { + return fmt.Errorf("create merged CIM: %w", err) + } + defer func() { + cErr := cim.Close() + if err == nil { + err = cErr + } + }() + + // CimAddFsToMergedImage expects that topmost CIM is added first and the bottom + // most CIM is added last. + for _, sCIM := range sourceCIMs { + fullPath := filepath.Join(sCIM.BlockPath, sCIM.CimName) + if err := winapi.CimAddFsToMergedImage2(cim.handle, fullPath, mergeFlag); err != nil { + return fmt.Errorf("add cim to merged image: %w", err) + } + } + return nil +} diff --git a/pkg/cimfs/cimfs.go b/pkg/cimfs/cimfs.go index 21cdf109bc..f301764387 100644 --- a/pkg/cimfs/cimfs.go +++ b/pkg/cimfs/cimfs.go @@ -4,6 +4,8 @@ package cimfs import ( + "path/filepath" + "github.com/Microsoft/hcsshim/osversion" "github.com/sirupsen/logrus" ) @@ -13,5 +15,84 @@ func IsCimFSSupported() bool { if err != nil { logrus.WithError(err).Warn("get build revision") } - return osversion.Build() == 20348 && rv >= 2031 + build := osversion.Build() + // CimFS support is backported to LTSC2022 starting with revision 2031 and should + // otherwise be available on all builds >= V25H1Server + return build >= osversion.V25H1Server || (build == osversion.V21H2Server && rv >= 2031) +} + +// IsBlockCimSupported returns true if block formatted CIMs (i.e block device CIM & +// single file CIM) are supported on the current OS build. +func IsBlockCimSupported() bool { + build := osversion.Build() + // TODO(ambarve): Currently we are checking against a higher build number since there is no + // official build with block CIM support yet. Once we have that build, we should + // update the build number here. + return build >= 27766 +} + +func IsMergedCimSupported() bool { + // The merged CIM support was originally added before block CIM support. However, + // some of the merged CIM features that we use (e.g. merged hard links) were added + // later along with block CIM support. So use the same check as block CIM here. + return IsBlockCimSupported() +} + +type BlockCIMType uint32 + +const ( + BlockCIMTypeNone BlockCIMType = iota + BlockCIMTypeSingleFile + BlockCIMTypeDevice + + CimMountFlagNone uint32 = 0x0 + CimMountFlagEnableDax uint32 = 0x2 + CimMountBlockDeviceCim uint32 = 0x10 + CimMountSingleFileCim uint32 = 0x20 + + CimCreateFlagNone uint32 = 0x0 + CimCreateFlagDoNotExpandPEImages uint32 = 0x1 + CimCreateFlagFixedSizeChunks uint32 = 0x2 + CimCreateFlagBlockDeviceCim uint32 = 0x4 + CimCreateFlagSingleFileCim uint32 = 0x8 + CimCreateFlagConsistentCim uint32 = 0x10 + + CimMergeFlagNone uint32 = 0x0 + CimMergeFlagSingleFile uint32 = 0x1 + CimMergeFlagBlockDevice uint32 = 0x2 +) + +// BlockCIM represents a CIM stored in a block formatted way. +// +// A CIM usually is made up of a .cim file and multiple region & objectID +// files. Currently, all of these files are stored together in the same directory. To +// refer to such a CIM, we provide the path to the `.cim` file and the corresponding +// region & objectID files are assumed to be present right next to it. In this case the +// directory on the host's filesystem which holds one or more such CIMs is the container +// for those CIMs. +// +// Using multiple files for a single CIM can be very limiting. (For example, if you want +// to do a remote mount for a CIM layer, you now need to mount multiple files for a single +// layer). In such cases having a single container which contains all of the CIM related +// data is a great option. For this reason, CimFS has added support for a new type of a +// CIM named BlockCIM. A BlockCIM is a CIM for which the container used to store all of +// the CIM files is a block device or a binary file formatted like a block device. Such a +// block device (or a binary file) doesn't have a separate filesystem (like NTFS or FAT32) +// on it. Instead it is formatted in such a way that CimFS driver can read the blocks and +// find out which CIMs are present on that block device. The CIMs stored on a raw block +// device are sometimes referred to as block device CIMs and CIMs stored on the block +// formatted single file are referred as single file CIMs. +type BlockCIM struct { + Type BlockCIMType + // BlockPath is a path to the block device or the single file which contains the + // CIM. + BlockPath string + // Since a block device CIM or a single file CIM can container multiple CIMs, we + // refer to an individual CIM using its name. + CimName string +} + +// added for logging convenience +func (b *BlockCIM) String() string { + return filepath.Join(b.BlockPath, b.CimName) } diff --git a/pkg/cimfs/doc.go b/pkg/cimfs/doc.go index 9b5476cb6c..bb9ce57717 100644 --- a/pkg/cimfs/doc.go +++ b/pkg/cimfs/doc.go @@ -1,3 +1,89 @@ -// This package provides simple go wrappers on top of the win32 CIMFS mount APIs. -// The mounting/unmount of cim layers is done by the cim mount functions the internal/wclayer/cim package. +/* +This package provides simple go wrappers on top of the win32 CIMFS APIs. + +Details about CimFS & related win32 APIs can be found here: +https://learn.microsoft.com/en-us/windows/win32/api/_cimfs/ + +Details about how CimFS is being used in containerd can be found here: +https://github.com/containerd/containerd/issues/8346 + +CIM types: +Currently we support 2 types of CIMs: + - Standard/classic (for the lack of a better term) CIMs. + - Block CIMs. + +Standard CIMs store all the contents of a CIM in one or more region & objectID files. This +means a single CIM is made up of a `.cim` file, one or more region files and one or more +objectID files. All of these files MUST be present in the same directory in order for that +CIM to work. Block CIMs store all the data of a CIM in a single block device. A VHD can be +such a block device. For convenience CimFS also allows using a block formatted file as a +block device. + +Standard CIMs can be created with the `func Create(imagePath string, oldFSName string, +newFSName string) (_ *CimFsWriter, err error)` function defined in this package, whereas +block CIMs can be created with the `func CreateBlockCIM(blockPath, oldName, newName +string, blockType BlockCIMType) (_ *CimFsWriter, err error)` function. + +Forking & Merging CIMs: +In container world, CIMs are used for storing container image layers. Usually, one layer +is stored in one CIM. This means we need a way to combine multiple CIMs to create the +rootfs of a container. This can be achieved either by forking the CIMs or merging the +CIMs. + +Forking CIMs: +Forking means every time a CIM is created for a non-base layer, we fork it off of a parent +layer CIM. This ensures that contents that are written to this CIM are merged with that of +parent layer CIMs at the time of CIM creation itself. When such a CIM is mounted we get a +combined view of the contents of this CIM as well as the parent CIM from which this CIM +was forked. However, this means that all the CIMs MUST be stored in the same directory in +order for forked CIMs to work. And every non-base layer CIM is dependent on all of its +parent layer CIMs. + +Merging CIMs: +If we create one or more CIMs without forking them at the time of creation, we can still +merge those CIMs later to create a new special type of CIM called merged CIM. When +mounted, this merged CIM provides a view of the combined contents of all the layers that +were merged. The advantage of this approach is that each layer CIM (also referred to as +source CIMs in the context of merging CIMs) can be created & stored independent of its +parent CIMs. (Currently we only support merging block CIMs). + +In order to create a merged CIM we need at least 2 non-forked block CIMs (we can not merge +forked & non-forked CIMs), these CIMs are also referred to as source CIMs. We first create +a new CIM (for storing the merge) via the `CreateBlockCIM` API, then call +`CimAddFsToMergedImage2` repeatedly to add the source CIMs one by one to the merged +CIM. Closing the handle on this new CIM commits it automatically. The order in which +source CIMs are added matters. A source CIM that was added before another source CIM takes +precedence when merging the CIM contents. Crating this merged CIM only combines the +metadata of all the source CIMs, however the actual data isn't copied to the merged +CIM. This is why when mounting the merged CIM, we still need to provide paths to the +source CIMs. + +`CimMergeMountImage` is used to mount a merged CIM. This API expects an array of paths of +the merged CIM and all the source CIMs. Note that the array MUST include the merged CIM +path at the 0th index and all the source CIMs in the same order in which they were added +at the time of creation of the merged CIM. For example, if we merged CIMs 1.cim & 2.cim by +first adding 1.cim (via CimAddFsToMergedImage) and then adding 2.cim, then the array +should be [merged.cim, 1.cim, 2.cim] + +Merged CIM specific APIs. + +`CimTombstoneFile`: is used for creating a tombstone file in a CIM. Tombstone file is +similar to a whiteout file used in case of overlayFS. A tombstone's primary use case is +for merged CIMs. When multiple source CIMs are merged, a tombstone file/directory ensures +that any files with the same path in the lower layers (i.e source CIMs that are added +after the CIM that has a tombstone) do not show up in the mounted filesystem view. For +example, imagine 1.cim has a file at path `foo/bar.txt` and 2.cim has a tombstone at path +`foo/bar.txt`. If a merged CIM is created by first adding 2.cim (via +CimAddFsToMergedImage) and then adding 1.cim and then when that merged CIM is mounted, +`foo/bar.txt` will not show up in the mounted filesystem. A tombstone isn't required when +using forked CIMs, because we can just call `CimDeletePath` to remove a file from the +lower layers in that case. However, that doesn't work for merged CIMs since at the time of +writing one of the source CIMs, we can't delete files from other source CIMs. + +`CimCreateMergeLink`: is used to create a file link that is resolved at the time of +merging CIMs. This is required if we want to create a hardlink in one source CIM that +points to a file in another source CIM. Such a hardlink can not be resolved at the time of +writing the source CIM. It can only be resolved at the time of merge. This API allows us +to create such cross layer hard links. +*/ package cimfs diff --git a/pkg/cimfs/mount_cim.go b/pkg/cimfs/mount_cim.go index ea7341b2f0..8588d63b34 100644 --- a/pkg/cimfs/mount_cim.go +++ b/pkg/cimfs/mount_cim.go @@ -5,12 +5,14 @@ package cimfs import ( "fmt" + "os" "path/filepath" "strings" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/winapi" "github.com/pkg/errors" + "golang.org/x/sys/windows" ) type MountError struct { @@ -63,3 +65,56 @@ func Unmount(volumePath string) error { return nil } + +// MountMergedBlockCIMs mounts the given merged BlockCIM (usually created with +// `MergeBlockCIMs`) at a volume with given GUID. The `sourceCIMs` MUST be identical +// to the `sourceCIMs` passed to `MergeBlockCIMs` when creating this merged CIM. +func MountMergedBlockCIMs(mergedCIM *BlockCIM, sourceCIMs []*BlockCIM, mountFlags uint32, volumeGUID guid.GUID) (string, error) { + if !IsMergedCimSupported() { + return "", fmt.Errorf("merged CIMs aren't supported on this OS version") + } else if len(sourceCIMs) < 2 { + return "", fmt.Errorf("need at least 2 source CIMs, got %d: %w", len(sourceCIMs), os.ErrInvalid) + } + + switch mergedCIM.Type { + case BlockCIMTypeDevice: + mountFlags |= CimMountBlockDeviceCim + case BlockCIMTypeSingleFile: + mountFlags |= CimMountSingleFileCim + default: + return "", fmt.Errorf("invalid block CIM type `%d`", mergedCIM.Type) + } + + for _, sCIM := range sourceCIMs { + if sCIM.Type != mergedCIM.Type { + return "", fmt.Errorf("source CIM (%s) type doesn't match with merged CIM type: %w", sCIM.String(), os.ErrInvalid) + } + } + + // win32 mount merged CIM API expects an array of all CIMs. 0th entry in the array + // should be the merged CIM. All remaining entries should be the source CIM paths + // in the same order that was used while creating the merged CIM. + allcims := append([]*BlockCIM{mergedCIM}, sourceCIMs...) + cimsToMerge := []winapi.CimFsImagePath{} + for _, bcim := range allcims { + // Trailing backslashes cause problems-remove those + imageDir, err := windows.UTF16PtrFromString(strings.TrimRight(bcim.BlockPath, `\`)) + if err != nil { + return "", fmt.Errorf("convert string to utf16: %w", err) + } + cimName, err := windows.UTF16PtrFromString(bcim.CimName) + if err != nil { + return "", fmt.Errorf("convert string to utf16: %w", err) + } + + cimsToMerge = append(cimsToMerge, winapi.CimFsImagePath{ + ImageDir: imageDir, + ImageName: cimName, + }) + } + + if err := winapi.CimMergeMountImage(uint32(len(cimsToMerge)), &cimsToMerge[0], mountFlags, &volumeGUID); err != nil { + return "", &MountError{Cim: filepath.Join(mergedCIM.BlockPath, mergedCIM.CimName), Op: "MountMerged", Err: err} + } + return fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String()), nil +} From 1f4af71c7094661756a3402c6f612027eac10b4f Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Tue, 17 Dec 2024 10:41:38 -0500 Subject: [PATCH 3/4] Add LayerWriter for block CIMs This commit adds a layer writer that can be used for extracting an image layer tar into a Block CIM format. Existing forked CIM layer writer was renamed to a common base type `cimLayerWriter`. Forked CIM layer writer & Block CIM layer writer both now extend this common base type to write layers in that specific format. This commit also removes some code that used `time.Now()` as the default timestamps for some files that it creates within the layer CIM. These timestamps cause differences in the layer CIMs generated from the same layer tar. This change fixes that. Signed-off-by: Amit Barve --- internal/wclayer/cim/block_cim_writer.go | 135 +++++++++++++++++ internal/wclayer/cim/cim_writer_test.go | 55 +++++++ .../wclayer/cim/{LayerWriter.go => common.go} | 142 +++++++----------- internal/wclayer/cim/file_writer.go | 3 + internal/wclayer/cim/forked_cim_writer.go | 78 ++++++++++ internal/wclayer/cim/pending.go | 7 + internal/wclayer/cim/process.go | 17 +-- pkg/ociwclayer/cim/import.go | 46 +++++- 8 files changed, 375 insertions(+), 108 deletions(-) create mode 100644 internal/wclayer/cim/block_cim_writer.go create mode 100644 internal/wclayer/cim/cim_writer_test.go rename internal/wclayer/cim/{LayerWriter.go => common.go} (66%) create mode 100644 internal/wclayer/cim/forked_cim_writer.go diff --git a/internal/wclayer/cim/block_cim_writer.go b/internal/wclayer/cim/block_cim_writer.go new file mode 100644 index 0000000000..1e7da68c05 --- /dev/null +++ b/internal/wclayer/cim/block_cim_writer.go @@ -0,0 +1,135 @@ +//go:build windows + +package cim + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/Microsoft/go-winio" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/pkg/cimfs" +) + +// A BlockCIMLayerWriter implements the CIMLayerWriter interface to allow writing +// container image layers in the blocked cim format. +type BlockCIMLayerWriter struct { + *cimLayerWriter + // the layer that we are writing + layer *cimfs.BlockCIM + // parent layers + parentLayers []*cimfs.BlockCIM + // added files maintains a map of all files that have been added to this layer + addedFiles map[string]struct{} +} + +var _ CIMLayerWriter = &BlockCIMLayerWriter{} + +// NewBlockCIMLayerWriter writes the layer files in the block CIM format. +func NewBlockCIMLayerWriter(ctx context.Context, layer *cimfs.BlockCIM, parentLayers []*cimfs.BlockCIM) (_ *BlockCIMLayerWriter, err error) { + if !cimfs.IsBlockCimSupported() { + return nil, fmt.Errorf("BlockCIM not supported on this build") + } else if layer.Type != cimfs.BlockCIMTypeSingleFile { + // we only support writing single file CIMs for now because in layer + // writing process we still need to write some files (registry hives) + // outside the CIM. We currently use the parent directory of the CIM (i.e + // the parent directory of block path in this case) for this. This can't + // be reliably done with the block device CIM since the block path + // provided will be a volume path. However, once we get rid of hive rollup + // step during layer import we should be able to support block device + // CIMs. + return nil, ErrBlockCIMWriterNotSupported + } + + parentLayerPaths := make([]string, 0, len(parentLayers)) + for _, pl := range parentLayers { + if pl.Type != layer.Type { + return nil, ErrBlockCIMParentTypeMismatch + } + parentLayerPaths = append(parentLayerPaths, filepath.Dir(pl.BlockPath)) + } + + cim, err := cimfs.CreateBlockCIM(layer.BlockPath, layer.CimName, layer.Type) + if err != nil { + return nil, fmt.Errorf("error in creating a new cim: %w", err) + } + defer func() { + if err != nil { + cErr := cim.Close() + if cErr != nil { + log.G(ctx).WithError(err).Warnf("failed to close cim after error: %s", cErr) + } + } + }() + + // std file writer writes registry hives outside the CIM for 2 reasons. 1. We can + // merge the hives of this layer with the parent layer hives and then write the + // merged hives into the CIM. 2. When importing child layer of this layer, we + // have access to the merges hives of this layer. + sfw, err := newStdFileWriter(filepath.Dir(layer.BlockPath), parentLayerPaths) + if err != nil { + return nil, fmt.Errorf("error in creating new standard file writer: %w", err) + } + + return &BlockCIMLayerWriter{ + layer: layer, + parentLayers: parentLayers, + addedFiles: make(map[string]struct{}), + cimLayerWriter: &cimLayerWriter{ + ctx: ctx, + cimWriter: cim, + stdFileWriter: sfw, + layerPath: filepath.Dir(layer.BlockPath), + parentLayerPaths: parentLayerPaths, + }, + }, nil +} + +// Add adds a file to the layer with given metadata. +func (cw *BlockCIMLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error { + cw.addedFiles[name] = struct{}{} + return cw.cimLayerWriter.Add(name, fileInfo, fileSize, securityDescriptor, extendedAttributes, reparseData) +} + +// Remove removes a file that was present in a parent layer from the layer. +func (cw *BlockCIMLayerWriter) Remove(name string) error { + // set active write to nil so that we panic if layer tar is incorrectly formatted. + cw.activeWriter = nil + err := cw.cimWriter.AddTombstone(name) + if err != nil { + return fmt.Errorf("failed to remove file : %w", err) + } + return nil +} + +// AddLink adds a hard link to the layer. Note that the link added here is evaluated only +// at the CIM merge time. So an invalid link will not throw an error here. +func (cw *BlockCIMLayerWriter) AddLink(name string, target string) error { + // set active write to nil so that we panic if layer tar is incorrectly formatted. + cw.activeWriter = nil + + // when adding links to a block CIM, we need to know if the target file is present + // in this same block CIM or if it is coming from one of the parent layers. If the + // file is in the same CIM we add a standard hard link. If the file is not in the + // same CIM we add a special type of link called merged link. This merged link is + // resolved when all the individual block CIM layers are merged. In order to + // reliably know if the target is a part of the CIM or not, we wait until all + // files are added and then lookup the added entries in a map to make the + // decision. + pendingLinkOp := func(c *cimfs.CimFsWriter) error { + if _, ok := cw.addedFiles[target]; ok { + // target was added in this layer - add a normal link. Once a + // hardlink is added that hardlink also becomes a valid target for + // other links so include it in the map. + cw.addedFiles[name] = struct{}{} + return c.AddLink(target, name) + } else { + // target is from a parent layer - add a merged link + return c.AddMergedLink(target, name) + } + } + cw.pendingOps = append(cw.pendingOps, pendingCimOpFunc(pendingLinkOp)) + return nil + +} diff --git a/internal/wclayer/cim/cim_writer_test.go b/internal/wclayer/cim/cim_writer_test.go new file mode 100644 index 0000000000..0abc19da74 --- /dev/null +++ b/internal/wclayer/cim/cim_writer_test.go @@ -0,0 +1,55 @@ +//go:build windows + +package cim + +import ( + "context" + "errors" + "testing" + + "github.com/Microsoft/hcsshim/pkg/cimfs" +) + +func TestSingleFileWriterTypeMismatch(t *testing.T) { + if !cimfs.IsBlockCimSupported() { + t.Skipf("BlockCIM not supported") + } + + layer := &cimfs.BlockCIM{ + Type: cimfs.BlockCIMTypeSingleFile, + BlockPath: "", + CimName: "", + } + + parent := &cimfs.BlockCIM{ + Type: cimfs.BlockCIMTypeDevice, + BlockPath: "", + CimName: "", + } + + _, err := NewBlockCIMLayerWriter(context.TODO(), layer, []*cimfs.BlockCIM{parent}) + if !errors.Is(err, ErrBlockCIMParentTypeMismatch) { + t.Fatalf("expected error `%s`, got `%s`", ErrBlockCIMParentTypeMismatch, err) + } +} + +func TestSingleFileWriterInvalidBlockType(t *testing.T) { + if !cimfs.IsBlockCimSupported() { + t.Skipf("BlockCIM not supported") + } + + layer := &cimfs.BlockCIM{ + BlockPath: "", + CimName: "", + } + + parent := &cimfs.BlockCIM{ + BlockPath: "", + CimName: "", + } + + _, err := NewBlockCIMLayerWriter(context.TODO(), layer, []*cimfs.BlockCIM{parent}) + if !errors.Is(err, ErrBlockCIMWriterNotSupported) { + t.Fatalf("expected error `%s`, got `%s`", ErrBlockCIMWriterNotSupported, err) + } +} diff --git a/internal/wclayer/cim/LayerWriter.go b/internal/wclayer/cim/common.go similarity index 66% rename from internal/wclayer/cim/LayerWriter.go rename to internal/wclayer/cim/common.go index 9315971b64..391a5aaeda 100644 --- a/internal/wclayer/cim/LayerWriter.go +++ b/internal/wclayer/cim/common.go @@ -10,39 +10,14 @@ import ( "strings" "github.com/Microsoft/go-winio" - "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/Microsoft/hcsshim/pkg/cimfs" - "go.opencensus.io/trace" ) -// A CimLayerWriter implements the wclayer.LayerWriter interface to allow writing container -// image layers in the cim format. -// A cim layer consist of cim files (which are usually stored in the `cim-layers` directory and -// some other files which are stored in the directory of that layer (i.e the `path` directory). -type CimLayerWriter struct { - ctx context.Context - s *trace.Span - // path to the layer (i.e layer's directory) as provided by the caller. - // Even if a layer is stored as a cim in the cim directory, some files associated - // with a layer are still stored in this path. - layerPath string - // parent layer paths - parentLayerPaths []string - // Handle to the layer cim - writes to the cim file - cimWriter *cimfs.CimFsWriter - // Handle to the writer for writing files in the local filesystem - stdFileWriter *stdFileWriter - // reference to currently active writer either cimWriter or stdFileWriter - activeWriter io.Writer - // denotes if this layer has the UtilityVM directory - hasUtilityVM bool - // some files are written outside the cim during initial import (via stdFileWriter) because we need to - // make some modifications to these files before writing them to the cim. The pendingOps slice - // maintains a list of such delayed modifications to the layer cim. These modifications are applied at - // the very end of layer import process. - pendingOps []pendingCimOp -} +var ( + ErrBlockCIMWriterNotSupported = fmt.Errorf("writing block device CIM isn't supported") + ErrBlockCIMParentTypeMismatch = fmt.Errorf("parent layer block CIM type doesn't match with extraction layer") +) type hive struct { name string @@ -60,6 +35,24 @@ var ( } ) +// CIMLayerWriter is an interface that supports writing a new container image layer to the +// CIM format +type CIMLayerWriter interface { + // Add adds a file to the layer with given metadata. + Add(string, *winio.FileBasicInfo, int64, []byte, []byte, []byte) error + // AddLink adds a hard link to the layer. The target must already have been added. + AddLink(string, string) error + // AddAlternateStream adds an alternate stream to a file + AddAlternateStream(string, uint64) error + // Remove removes a file that was present in a parent layer from the layer. + Remove(string) error + // Write writes data to the current file. The data must be in the format of a Win32 + // backup stream. + Write([]byte) (int, error) + // Close finishes the layer writing process and releases any resources. + Close(context.Context) error +} + func isDeltaOrBaseHive(path string) bool { for _, hv := range hives { if strings.EqualFold(path, filepath.Join(wclayer.HivesPath, hv.delta)) || @@ -79,8 +72,33 @@ func isStdFile(path string) bool { path == wclayer.BcdFilePath || path == wclayer.BootMgrFilePath) } +// cimLayerWriter is a base struct that is further extended by forked cim writer & blocked +// cim writer to provide full functionality of writing layers. +type cimLayerWriter struct { + ctx context.Context + // Handle to the layer cim - writes to the cim file + cimWriter *cimfs.CimFsWriter + // Handle to the writer for writing files in the local filesystem + stdFileWriter *stdFileWriter + // reference to currently active writer either cimWriter or stdFileWriter + activeWriter io.Writer + // denotes if this layer has the UtilityVM directory + hasUtilityVM bool + // path to the layer (i.e layer's directory) as provided by the caller. + // Even if a layer is stored as a cim in the cim directory, some files associated + // with a layer are still stored in this path. + layerPath string + // parent layer paths + parentLayerPaths []string + // some files are written outside the cim during initial import (via stdFileWriter) because we need to + // make some modifications to these files before writing them to the cim. The pendingOps slice + // maintains a list of such delayed modifications to the layer cim. These modifications are applied at + // the very end of layer import process. + pendingOps []pendingCimOp +} + // Add adds a file to the layer with given metadata. -func (cw *CimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error { +func (cw *cimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error { if name == wclayer.UtilityVMPath { cw.hasUtilityVM = true } @@ -108,7 +126,7 @@ func (cw *CimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSi } // AddLink adds a hard link to the layer. The target must already have been added. -func (cw *CimLayerWriter) AddLink(name string, target string) error { +func (cw *cimLayerWriter) AddLink(name string, target string) error { // set active write to nil so that we panic if layer tar is incorrectly formatted. cw.activeWriter = nil if isStdFile(target) { @@ -130,7 +148,7 @@ func (cw *CimLayerWriter) AddLink(name string, target string) error { // AddAlternateStream creates another alternate stream at the given // path. Any writes made after this call will go to that stream. -func (cw *CimLayerWriter) AddAlternateStream(name string, size uint64) error { +func (cw *cimLayerWriter) AddAlternateStream(name string, size uint64) error { if isStdFile(name) { // As of now there is no known case of std file having multiple data streams. // If such a file is encountered our assumptions are wrong. Error out. @@ -144,21 +162,14 @@ func (cw *CimLayerWriter) AddAlternateStream(name string, size uint64) error { return nil } -// Remove removes a file that was present in a parent layer from the layer. -func (cw *CimLayerWriter) Remove(name string) error { - // set active write to nil so that we panic if layer tar is incorrectly formatted. - cw.activeWriter = nil - return cw.cimWriter.Unlink(name) -} - // Write writes data to the current file. The data must be in the format of a Win32 // backup stream. -func (cw *CimLayerWriter) Write(b []byte) (int, error) { +func (cw *cimLayerWriter) Write(b []byte) (int, error) { return cw.activeWriter.Write(b) } // Close finishes the layer writing process and releases any resources. -func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) { +func (cw *cimLayerWriter) Close(ctx context.Context) (retErr error) { if err := cw.stdFileWriter.Close(ctx); err != nil { return err } @@ -170,7 +181,7 @@ func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) { } }() - // UVM based containers aren't supported with CimFS, don't process the UVM layer + // We don't support running UtilityVM with CIM layers yet. processUtilityVM := false if len(cw.parentLayerPaths) == 0 { @@ -190,50 +201,3 @@ func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) { } return nil } - -func NewCimLayerWriter(ctx context.Context, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (_ *CimLayerWriter, err error) { - if !cimfs.IsCimFSSupported() { - return nil, fmt.Errorf("CimFs not supported on this build") - } - - ctx, span := trace.StartSpan(ctx, "hcsshim::NewCimLayerWriter") - defer func() { - if err != nil { - oc.SetSpanStatus(span, err) - span.End() - } - }() - span.AddAttributes( - trace.StringAttribute("path", layerPath), - trace.StringAttribute("cimPath", cimPath), - trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerCimPaths, ", ")), - trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerPaths, ", "))) - - parentCim := "" - if len(parentLayerPaths) > 0 { - if filepath.Dir(cimPath) != filepath.Dir(parentLayerCimPaths[0]) { - return nil, fmt.Errorf("parent cim can not be stored in different directory") - } - // We only need to provide parent CIM name, it is assumed that both parent CIM - // and newly created CIM are present in the same directory. - parentCim = filepath.Base(parentLayerCimPaths[0]) - } - - cim, err := cimfs.Create(filepath.Dir(cimPath), parentCim, filepath.Base(cimPath)) - if err != nil { - return nil, fmt.Errorf("error in creating a new cim: %w", err) - } - - sfw, err := newStdFileWriter(layerPath, parentLayerPaths) - if err != nil { - return nil, fmt.Errorf("error in creating new standard file writer: %w", err) - } - return &CimLayerWriter{ - ctx: ctx, - s: span, - layerPath: layerPath, - parentLayerPaths: parentLayerPaths, - cimWriter: cim, - stdFileWriter: sfw, - }, nil -} diff --git a/internal/wclayer/cim/file_writer.go b/internal/wclayer/cim/file_writer.go index 497bbbbb9a..9e5e8dd456 100644 --- a/internal/wclayer/cim/file_writer.go +++ b/internal/wclayer/cim/file_writer.go @@ -86,5 +86,8 @@ func (sfw *stdFileWriter) Close(ctx context.Context) error { if err := sfw.closeActiveFile(); err != nil { return fmt.Errorf("failed to close active file %s : %w", sfw.activeFile.Name(), err) } + if err := sfw.root.Close(); err != nil { + return fmt.Errorf("failed to close root dir: %w", err) + } return nil } diff --git a/internal/wclayer/cim/forked_cim_writer.go b/internal/wclayer/cim/forked_cim_writer.go new file mode 100644 index 0000000000..7da052b515 --- /dev/null +++ b/internal/wclayer/cim/forked_cim_writer.go @@ -0,0 +1,78 @@ +//go:build windows + +package cim + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/pkg/cimfs" +) + +// A ForkedCimLayerWriter implements the wclayer.LayerWriter interface to allow writing container +// image layers in the cim format. +// A cim layer consist of cim files (which are usually stored in the `cim-layers` directory and +// some other files which are stored in the directory of that layer (i.e the `path` directory). +type ForkedCimLayerWriter struct { + *cimLayerWriter +} + +var _ CIMLayerWriter = &ForkedCimLayerWriter{} + +func NewForkedCimLayerWriter(ctx context.Context, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (_ *ForkedCimLayerWriter, err error) { + if !cimfs.IsCimFSSupported() { + return nil, fmt.Errorf("CimFs not supported on this build") + } + + parentCim := "" + if len(parentLayerPaths) > 0 { + // We only need to provide parent CIM name, it is assumed that both parent CIM + // and newly created CIM are present in the same directory. + parentCim = filepath.Base(parentLayerCimPaths[0]) + } + + cim, err := cimfs.Create(filepath.Dir(cimPath), parentCim, filepath.Base(cimPath)) + if err != nil { + return nil, fmt.Errorf("error in creating a new cim: %w", err) + } + defer func() { + if err != nil { + cErr := cim.Close() + if cErr != nil { + log.G(ctx).WithError(err).Warnf("failed to close cim after error: %s", cErr) + } + cErr = cimfs.DestroyCim(ctx, cimPath) + if cErr != nil { + log.G(ctx).WithError(err).Warnf("failed to cleanup cim after error: %s", cErr) + } + } + }() + + sfw, err := newStdFileWriter(layerPath, parentLayerPaths) + if err != nil { + return nil, fmt.Errorf("error in creating new standard file writer: %w", err) + } + return &ForkedCimLayerWriter{ + cimLayerWriter: &cimLayerWriter{ + parentLayerPaths: parentLayerPaths, + ctx: ctx, + cimWriter: cim, + stdFileWriter: sfw, + layerPath: layerPath, + }, + }, nil +} + +// Remove removes a file that was present in a parent layer from the layer. +func (cw *ForkedCimLayerWriter) Remove(name string) error { + // set active write to nil so that we panic if layer tar is incorrectly formatted. + cw.activeWriter = nil + err := cw.cimWriter.Unlink(name) + if err == nil || os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to remove file: %w", err) +} diff --git a/internal/wclayer/cim/pending.go b/internal/wclayer/cim/pending.go index d13bdff850..f2185a0998 100644 --- a/internal/wclayer/cim/pending.go +++ b/internal/wclayer/cim/pending.go @@ -16,6 +16,13 @@ type pendingCimOp interface { apply(cw *cimfs.CimFsWriter) error } +type pendingCimOpFunc func(cw *cimfs.CimFsWriter) error + +func (f pendingCimOpFunc) apply(cw *cimfs.CimFsWriter) error { + return f(cw) + +} + // add op represents a pending operation of adding a new file inside the cim type addOp struct { // path inside the cim at which the file should be added diff --git a/internal/wclayer/cim/process.go b/internal/wclayer/cim/process.go index 8fdb3bad3f..ace81122bc 100644 --- a/internal/wclayer/cim/process.go +++ b/internal/wclayer/cim/process.go @@ -7,7 +7,6 @@ import ( "fmt" "os" "path/filepath" - "time" "github.com/Microsoft/go-winio" "github.com/Microsoft/hcsshim/internal/wclayer" @@ -34,10 +33,6 @@ func processBaseLayerHives(layerPath string) ([]pendingCimOp, error) { } hivesDirInfo := &winio.FileBasicInfo{ - CreationTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()), - ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()), FileAttributes: windows.FILE_ATTRIBUTE_DIRECTORY, } pendingOps = append(pendingOps, &addOp{ @@ -71,10 +66,6 @@ func processLayoutFile(layerPath string) ([]pendingCimOp, error) { } layoutFileInfo := &winio.FileBasicInfo{ - CreationTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()), - ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()), FileAttributes: windows.FILE_ATTRIBUTE_NORMAL, } @@ -89,7 +80,7 @@ func processLayoutFile(layerPath string) ([]pendingCimOp, error) { // Some of the layer files that are generated during the processBaseLayer call must be added back // inside the cim, some registry file links must be updated. This function takes care of all those // steps. This function opens the cim file for writing and updates it. -func (cw *CimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM bool) (err error) { +func (cw *cimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM bool) (err error) { if processUtilityVM { if err = processUtilityVMLayer(ctx, cw.layerPath); err != nil { return fmt.Errorf("process utilityVM layer: %w", err) @@ -113,7 +104,7 @@ func (cw *CimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM // processNonBaseLayer takes care of the processing required for a non base layer. As of now // the only processing required for non base layer is to merge the delta registry hives of the // non-base layer with it's parent layer. -func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilityVM bool) (err error) { +func (cw *cimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilityVM bool) (err error) { for _, hv := range hives { baseHive := filepath.Join(wclayer.HivesPath, hv.base) deltaHive := filepath.Join(wclayer.HivesPath, hv.delta) @@ -134,10 +125,6 @@ func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilit pathInCim: baseHive, hostPath: filepath.Join(cw.layerPath, baseHive), fileInfo: &winio.FileBasicInfo{ - CreationTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()), - LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()), - ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()), FileAttributes: windows.FILE_ATTRIBUTE_NORMAL, }, }) diff --git a/pkg/ociwclayer/cim/import.go b/pkg/ociwclayer/cim/import.go index d8f4a1aa95..83d7a82cdf 100644 --- a/pkg/ociwclayer/cim/import.go +++ b/pkg/ociwclayer/cim/import.go @@ -18,7 +18,9 @@ import ( "github.com/Microsoft/go-winio/backuptar" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/wclayer/cim" + "github.com/Microsoft/hcsshim/pkg/cimfs" "github.com/Microsoft/hcsshim/pkg/ociwclayer" + "github.com/sirupsen/logrus" "golang.org/x/sys/windows" ) @@ -30,13 +32,20 @@ import ( // `parentLayerPaths` are paths to the parent layer directories. Ordered from highest to lowest. // // This function returns the total size of the layer's files, in bytes. -func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (int64, error) { - err := os.MkdirAll(layerPath, 0) +func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (_ int64, err error) { + log.G(ctx).WithFields(logrus.Fields{ + "layer path": layerPath, + "layer cim path": cimPath, + "parent layer paths": strings.Join(parentLayerPaths, ", "), + "parent layer CIM paths": strings.Join(parentLayerCimPaths, ", "), + }).Debug("Importing cim layer from tar") + + err = os.MkdirAll(layerPath, 0) if err != nil { return 0, err } - w, err := cim.NewCimLayerWriter(ctx, layerPath, cimPath, parentLayerPaths, parentLayerCimPaths) + w, err := cim.NewForkedCimLayerWriter(ctx, layerPath, cimPath, parentLayerPaths, parentLayerCimPaths) if err != nil { return 0, err } @@ -52,7 +61,36 @@ func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath, cimPath return n, nil } -func writeCimLayerFromTar(ctx context.Context, r io.Reader, w *cim.CimLayerWriter) (int64, error) { +// ImportSingleFileCimLayerFromTar reads a layer from an OCI layer tar stream and extracts +// it into the SingleFileCIM format. +func ImportSingleFileCimLayerFromTar(ctx context.Context, r io.Reader, layer *cimfs.BlockCIM, parentLayers []*cimfs.BlockCIM) (_ int64, err error) { + log.G(ctx).WithFields(logrus.Fields{ + "layer": layer, + "parent layers": fmt.Sprintf("%v", parentLayers), + }).Debug("Importing single file cim layer from tar") + + err = os.MkdirAll(filepath.Dir(layer.BlockPath), 0) + if err != nil { + return 0, err + } + + w, err := cim.NewBlockCIMLayerWriter(ctx, layer, parentLayers) + if err != nil { + return 0, err + } + + n, err := writeCimLayerFromTar(ctx, r, w) + cerr := w.Close(ctx) + if err != nil { + return 0, err + } + if cerr != nil { + return 0, cerr + } + return n, nil +} + +func writeCimLayerFromTar(ctx context.Context, r io.Reader, w cim.CIMLayerWriter) (int64, error) { tr := tar.NewReader(r) buf := bufio.NewWriter(w) size := int64(0) From 97956fb2f55a3adbe2ae520ebef5ec4e36bc807d Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Tue, 17 Dec 2024 10:41:43 -0500 Subject: [PATCH 4/4] Use Block CIM layers for container RootFS This commit adds the ability to parse block CIM layer mounts and to mount the merged block CIMs to be used as a rootfs for a container. Signed-off-by: Amit Barve --- internal/layers/helpers.go | 7 +- internal/layers/wcow_mount.go | 166 +++++++++++++++++++++++--------- internal/layers/wcow_parse.go | 95 +++++++++++++++++- internal/resources/resources.go | 24 +++++ internal/wclayer/cim/mount.go | 89 ++++++++++++++++- 5 files changed, 325 insertions(+), 56 deletions(-) diff --git a/internal/layers/helpers.go b/internal/layers/helpers.go index 2a67a7fb1b..89569ad464 100644 --- a/internal/layers/helpers.go +++ b/internal/layers/helpers.go @@ -75,8 +75,11 @@ const ( // parent layer CIMs parentLayerCimPathsFlag = "parentCimPaths=" - LegacyMountType string = "windows-layer" - CimFSMountType string = "CimFS" + legacyMountType string = "windows-layer" + forkedCIMMountType string = "CimFS" + blockCIMMountType string = "BlockCIM" + blockCIMTypeFlag string = "blockCIMType=" + mergedCIMPathFlag string = "mergedCIMPath=" ) // getOptionAsArray finds if there is an option which has the given prefix and if such an diff --git a/internal/layers/wcow_mount.go b/internal/layers/wcow_mount.go index fa8a5c0777..9df9f199eb 100644 --- a/internal/layers/wcow_mount.go +++ b/internal/layers/wcow_mount.go @@ -12,12 +12,14 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/computestorage" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/uvm/scsi" @@ -37,6 +39,11 @@ func MountWCOWLayers(ctx context.Context, containerID string, vm *uvm.UtilityVM, return mountProcessIsolatedForkedCimLayers(ctx, containerID, l) } return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with forked cim layers") + case *wcowBlockCIMLayers: + if vm == nil { + return mountProcessIsolatedBlockCIMLayers(ctx, containerID, l) + } + return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with block cim layers") default: return nil, nil, fmt.Errorf("invalid layer type %T", wl) } @@ -171,53 +178,43 @@ func mountProcessIsolatedWCIFSLayers(ctx context.Context, l *wcowWCIFSLayers) (_ }, nil } -// wcowHostForkedCIMLayerCloser is used to cleanup forked CIM layers mounted on the host for process isolated -// containers -type wcowHostForkedCIMLayerCloser struct { - scratchLayerData - containerID string -} - -func (l *wcowHostForkedCIMLayerCloser) Release(ctx context.Context) error { - mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) - if err != nil { - return err - } - - if err = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS); err != nil { - return err - } - - if err = cimlayer.CleanupContainerMounts(l.containerID); err != nil { - return err - } - return wclayer.DeactivateLayer(ctx, l.scratchLayerPath) -} +// Handles the common processing for mounting all 3 types of cimfs layers. This involves +// mounting the scratch, attaching the filter and preparing the return values. +// `volume` is the path to the volume at which read only layer CIMs are mounted. +func mountProcessIsolatedCimLayersCommon(ctx context.Context, containerID string, volume string, s *scratchLayerData) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedCimLayersCommon") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("scratch path", s.scratchLayerPath), + trace.StringAttribute("mounted CIM volume", volume)) -func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { - if err = wclayer.ActivateLayer(ctx, l.scratchLayerPath); err != nil { - return nil, nil, err - } + rcl := &resources.ResourceCloserList{} defer func() { if err != nil { - _ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath) + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated cim layers common, undo failed with: %s", rErr) + } } }() - mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) - if err != nil { + if err = wclayer.ActivateLayer(ctx, s.scratchLayerPath); err != nil { return nil, nil, err } + rcl.AddFunc(func(uCtx context.Context) error { + return wclayer.DeactivateLayer(uCtx, s.scratchLayerPath) + }) - volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID) + mountPath, err := wclayer.GetLayerMountPath(ctx, s.scratchLayerPath) if err != nil { - return nil, nil, fmt.Errorf("mount layer cim: %w", err) + return nil, nil, err } - defer func() { - if err != nil { - _ = cimlayer.UnmountCimLayer(ctx, volume) - } - }() + log.G(ctx).WithFields(logrus.Fields{ + "scratch": s.scratchLayerPath, + "mounted path": mountPath, + }).Debug("scratch activated") layerID, err := cimlayer.LayerID(volume) if err != nil { @@ -239,22 +236,97 @@ func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string if err = computestorage.AttachOverlayFilter(ctx, mountPath, layerData); err != nil { return nil, nil, err } + rcl.AddFunc(func(uCtx context.Context) error { + return computestorage.DetachOverlayFilter(uCtx, mountPath, hcsschema.UnionFS) + }) + + log.G(ctx).WithField("layer data", layerData).Debug("unionFS filter attached") + + return &MountedWCOWLayers{ + RootFS: mountPath, + MountedLayerPaths: []MountedWCOWLayer{{ + LayerID: layerID, + MountedPath: volume, + }}, + }, rcl, nil +} + +func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedForkedCimLayers") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + rcl := &resources.ResourceCloserList{} + defer func() { + if err != nil { + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr) + } + } + }() + + volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID) + if err != nil { + return nil, nil, fmt.Errorf("mount forked layer cim: %w", err) + } + rcl.AddFunc(func(uCtx context.Context) error { + return cimlayer.UnmountCimLayer(uCtx, volume) + }) + + mountedLayers, closer, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData) + if err != nil { + return nil, nil, err + } + return mountedLayers, rcl.Add(closer), nil +} + +func mountProcessIsolatedBlockCIMLayers(ctx context.Context, containerID string, l *wcowBlockCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedBlockCIMLayers") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + var volume string + + rcl := &resources.ResourceCloserList{} defer func() { if err != nil { - _ = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS) + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr) + } } }() - return &MountedWCOWLayers{ - RootFS: mountPath, - MountedLayerPaths: []MountedWCOWLayer{{ - LayerID: layerID, - MountedPath: volume, - }}, - }, &wcowHostForkedCIMLayerCloser{ - containerID: containerID, - scratchLayerData: l.scratchLayerData, - }, nil + log.G(ctx).WithFields(logrus.Fields{ + "scratch": l.scratchLayerPath, + "merged layer": l.mergedLayer, + "parent layers": l.parentLayers, + }).Debug("mounting process isolated block CIM layers") + + if len(l.parentLayers) > 1 { + volume, err = cimlayer.MergeMountBlockCIMLayer(ctx, l.mergedLayer, l.parentLayers, containerID) + } else { + volume, err = cimlayer.MountBlockCIMLayer(ctx, l.parentLayers[0], containerID) + } + if err != nil { + return nil, nil, fmt.Errorf("mount block CIM layers: %w", err) + } + rcl.AddFunc(func(uCtx context.Context) error { + return cimlayer.UnmountCimLayer(uCtx, volume) + }) + + log.G(ctx).WithField("volume", volume).Debug("mounted blockCIM layers for process isolated container") + + mountedLayers, layerCloser, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData) + if err != nil { + return nil, nil, fmt.Errorf("failed mount CIM layers common: %w", err) + } + rcl.Add(layerCloser) + + return mountedLayers, rcl, nil } type wcowIsolatedWCIFSLayerCloser struct { diff --git a/internal/layers/wcow_parse.go b/internal/layers/wcow_parse.go index 541766358c..4e38305ed2 100644 --- a/internal/layers/wcow_parse.go +++ b/internal/layers/wcow_parse.go @@ -5,15 +5,18 @@ package layers import ( "context" + "encoding/json" "fmt" "os" "path/filepath" + "strings" "github.com/containerd/containerd/api/types" "github.com/Microsoft/hcsshim/internal/copyfile" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/uvmfolder" + "github.com/Microsoft/hcsshim/pkg/cimfs" ) // WCOW image layers is a tagging interface that all WCOW layers MUST implement. This is @@ -67,6 +70,17 @@ type wcowForkedCIMLayers struct { layers []forkedCIMLayer } +// Represents CIM layers where each layer is stored in a block device or in a single file +// and multiple such layer CIMs are merged before mounting them. Currently can only be +// used for process isolated containers. +type wcowBlockCIMLayers struct { + scratchLayerData + // parent layers in order [layerN (top-most), layerN-1,..layer0 (base)] + parentLayers []*cimfs.BlockCIM + // a merged layer is prepared by combining all parent layers + mergedLayer *cimfs.BlockCIM +} + func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) { parentLayerPaths, err := getOptionAsArray(m, parentLayerPathsFlag) if err != nil { @@ -94,8 +108,77 @@ func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) { }, nil } -// ParseWCOWLayers parses the layers provided by containerd into the format understood by hcsshim and prepares -// them for mounting. +// TODO(ambarve): The code to parse a mount type should be in a separate package/module +// somewhere and then should be consumed by both hcsshim & containerd from there. +func parseBlockCIMMount(m *types.Mount) (*wcowBlockCIMLayers, error) { + var ( + parentPaths []string + layerType cimfs.BlockCIMType + mergedCIMPath string + ) + + for _, option := range m.Options { + if val, ok := strings.CutPrefix(option, parentLayerCimPathsFlag); ok { + err := json.Unmarshal([]byte(val), &parentPaths) + if err != nil { + return nil, err + } + } else if val, ok = strings.CutPrefix(option, blockCIMTypeFlag); ok { + if val == "device" { + layerType = cimfs.BlockCIMTypeDevice + } else if val == "file" { + layerType = cimfs.BlockCIMTypeSingleFile + } else { + return nil, fmt.Errorf("invalid block CIM type `%s`", val) + } + } else if val, ok = strings.CutPrefix(option, mergedCIMPathFlag); ok { + mergedCIMPath = val + } + } + + if len(parentPaths) == 0 { + return nil, fmt.Errorf("need at least 1 parent layer") + } + if layerType == cimfs.BlockCIMTypeNone { + return nil, fmt.Errorf("BlockCIM type not provided") + } + if mergedCIMPath == "" && len(parentPaths) > 1 { + return nil, fmt.Errorf("merged CIM path not provided") + } + + var ( + parentLayers []*cimfs.BlockCIM + mergedLayer *cimfs.BlockCIM + ) + + if len(parentPaths) > 1 { + // for single parent layers merge won't be done + mergedLayer = &cimfs.BlockCIM{ + Type: layerType, + BlockPath: filepath.Dir(mergedCIMPath), + CimName: filepath.Base(mergedCIMPath), + } + } + + for _, p := range parentPaths { + parentLayers = append(parentLayers, &cimfs.BlockCIM{ + Type: layerType, + BlockPath: filepath.Dir(p), + CimName: filepath.Base(p), + }) + } + + return &wcowBlockCIMLayers{ + scratchLayerData: scratchLayerData{ + scratchLayerPath: m.Source, + }, + parentLayers: parentLayers, + mergedLayer: mergedLayer, + }, nil +} + +// ParseWCOWLayers parses the layers provided by containerd into the format understood by +// hcsshim and prepares them for mounting. func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, error) { if err := validateRootfsAndLayers(rootfs, layerFolders); err != nil { return nil, err @@ -112,7 +195,7 @@ func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, m := rootfs[0] switch m.Type { - case LegacyMountType: + case legacyMountType: parentLayers, err := getOptionAsArray(m, parentLayerPathsFlag) if err != nil { return nil, err @@ -123,8 +206,10 @@ func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, }, layerPaths: parentLayers, }, nil - case CimFSMountType: + case forkedCIMMountType: return parseForkedCimMount(m) + case blockCIMMountType: + return parseBlockCIMMount(m) default: return nil, fmt.Errorf("invalid windows mount type: '%s'", m.Type) } @@ -146,7 +231,7 @@ func GetWCOWUVMBootFilesFromLayers(ctx context.Context, rootfs []*types.Mount, l } else { m := rootfs[0] switch m.Type { - case LegacyMountType: + case legacyMountType: parentLayers, err = getOptionAsArray(m, parentLayerPathsFlag) if err != nil { return nil, err diff --git a/internal/resources/resources.go b/internal/resources/resources.go index cc08b3a566..a111506f62 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -168,3 +168,27 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all } return nil } + +type ResourceCloserList struct { + closers []ResourceCloser +} + +func (l *ResourceCloserList) Add(rOp ResourceCloser) *ResourceCloserList { + l.closers = append(l.closers, rOp) + return l +} + +func (l *ResourceCloserList) AddFunc(rOp ResourceCloserFunc) *ResourceCloserList { + l.closers = append(l.closers, rOp) + return l +} + +func (l *ResourceCloserList) Release(ctx context.Context) error { + // MUST release in the reverse order + for i := len(l.closers) - 1; i >= 0; i-- { + if oErr := l.closers[i].Release(ctx); oErr != nil { + return oErr + } + } + return nil +} diff --git a/internal/wclayer/cim/mount.go b/internal/wclayer/cim/mount.go index f3ddc2260b..56d0d0ac7d 100644 --- a/internal/wclayer/cim/mount.go +++ b/internal/wclayer/cim/mount.go @@ -6,11 +6,15 @@ import ( "context" "fmt" "os" + "path/filepath" "strings" "github.com/Microsoft/go-winio/pkg/guid" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" cimfs "github.com/Microsoft/hcsshim/pkg/cimfs" + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) var cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}} @@ -25,13 +29,88 @@ func MountForkedCimLayer(ctx context.Context, cimPath, containerID string) (stri return "", fmt.Errorf("generated cim mount GUID: %w", err) } - vol, err := cimfs.Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles) + vol, err := cimfs.Mount(cimPath, volumeGUID, 0) if err != nil { return "", err } return vol, nil } +// MountBlockCIMLayer mounts the given block cim and returns the mount +// location of that cim. The containerID is used to generate the volumeID for the volume +// at which this CIM is mounted. containerID is used so that if the shim process crashes +// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call. +func MountBlockCIMLayer(ctx context.Context, layer *cimfs.BlockCIM, containerID string) (_ string, err error) { + ctx, span := oc.StartSpan(ctx, "MountBlockCIMLayer") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("layer", layer.String())) + + var mountFlags uint32 + switch layer.Type { + case cimfs.BlockCIMTypeDevice: + mountFlags |= cimfs.CimMountBlockDeviceCim + case cimfs.BlockCIMTypeSingleFile: + mountFlags |= cimfs.CimMountSingleFileCim + default: + return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid) + } + + volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID)) + if err != nil { + return "", fmt.Errorf("generated cim mount GUID: %w", err) + } + + cimPath := filepath.Join(layer.BlockPath, layer.CimName) + + log.G(ctx).WithFields(logrus.Fields{ + "flags": mountFlags, + "volume": volumeGUID.String(), + }).Debug("mounting block layer CIM") + + vol, err := cimfs.Mount(cimPath, volumeGUID, mountFlags) + if err != nil { + return "", err + } + return vol, nil +} + +// MergeMountBlockCIMLayer mounts the given merged block cim and returns the mount +// location of that cim. The containerID is used to generate the volumeID for the volume +// at which this CIM is mounted. containerID is used so that if the shim process crashes +// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call. +// parentLayers MUST be in the base to topmost order. I.e base layer should be at index 0 +// and immediate parent MUST be at the last index. +func MergeMountBlockCIMLayer(ctx context.Context, mergedLayer *cimfs.BlockCIM, parentLayers []*cimfs.BlockCIM, containerID string) (_ string, err error) { + _, span := oc.StartSpan(ctx, "MergeMountBlockCIMLayer") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("merged layer", mergedLayer.String()), + trace.StringAttribute("parent layers", fmt.Sprintf("%v", parentLayers))) + + var mountFlags uint32 + switch mergedLayer.Type { + case cimfs.BlockCIMTypeDevice: + mountFlags |= cimfs.CimMountBlockDeviceCim + case cimfs.BlockCIMTypeSingleFile: + mountFlags |= cimfs.CimMountSingleFileCim + default: + return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid) + } + + volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID)) + if err != nil { + return "", fmt.Errorf("generated cim mount GUID: %w", err) + } + return cimfs.MountMergedBlockCIMs(mergedLayer, parentLayers, mountFlags, volumeGUID) +} + // Unmounts the cim mounted at the given volume func UnmountCimLayer(ctx context.Context, volume string) error { return cimfs.Unmount(volume) @@ -44,6 +123,12 @@ func CleanupContainerMounts(containerID string) error { } volPath := fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String()) + + log.L.WithFields(logrus.Fields{ + "volume": volPath, + "containerID": containerID, + }).Debug("cleanup container CIM mounts") + if _, err := os.Stat(volPath); err == nil { err = cimfs.Unmount(volPath) if err != nil {