/******************************************************************************/ /* */ /* X r d O s s C a c h e . c c */ /* */ /* (c) 2003 by the Board of Trustees of the Leland Stanford, Jr., University */ /* All Rights Reserved */ /* Produced by Andrew Hanushevsky for Stanford University under contract */ /* DE-AC02-76-SFO0515 with the Department of Energy */ /* */ /* This file is part of the XRootD software suite. */ /* */ /* XRootD is free software: you can redistribute it and/or modify it under */ /* the terms of the GNU Lesser General Public License as published by the */ /* Free Software Foundation, either version 3 of the License, or (at your */ /* option) any later version. */ /* */ /* XRootD is distributed in the hope that it will be useful, but WITHOUT */ /* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */ /* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */ /* License for more details. */ /* */ /* You should have received a copy of the GNU Lesser General Public License */ /* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */ /* COPYING (GPL license). If not, see . */ /* */ /* The copyright holder's institutional names and contributor's names may not */ /* be used to endorse or promote products derived from this software without */ /* specific prior written permission of the institution or contributor. */ /******************************************************************************/ #include #include #include #include #include #include #include #include #include #include "XrdOss/XrdOssCache.hh" #include "XrdOss/XrdOssOpaque.hh" #include "XrdOss/XrdOssPath.hh" #include "XrdOss/XrdOssSpace.hh" #include "XrdOss/XrdOssTrace.hh" #include "XrdSys/XrdSysHeaders.hh" #include "XrdSys/XrdSysPlatform.hh" /******************************************************************************/ /* G l o b a l s a n d S t a t i c M e m b e r s */ /******************************************************************************/ extern XrdSysError OssEroute; extern XrdOucTrace OssTrace; XrdOssCache_Group *XrdOssCache_Group::fsgroups = 0; long long XrdOssCache_Group::PubQuota = -1; XrdSysMutex XrdOssCache::Mutex; long long XrdOssCache::fsTotal = 0; long long XrdOssCache::fsLarge = 0; long long XrdOssCache::fsTotFr = 0; long long XrdOssCache::fsFree = 0; long long XrdOssCache::fsSize = 0; XrdOssCache_FS *XrdOssCache::fsfirst = 0; XrdOssCache_FS *XrdOssCache::fslast = 0; XrdOssCache_FSData *XrdOssCache::fsdata = 0; double XrdOssCache::fuzAlloc= 0.0; long long XrdOssCache::minAlloc= 0; int XrdOssCache::fsCount = 0; int XrdOssCache::ovhAlloc= 0; int XrdOssCache::Quotas = 0; int XrdOssCache::Usage = 0; /******************************************************************************/ /* X r d O s s C a c h e _ F S D a t a M e t h o d s */ /******************************************************************************/ XrdOssCache_FSData::XrdOssCache_FSData(const char *fsp, STATFS_t &fsbuff, dev_t fsID) { path = strdup(fsp); size = static_cast(fsbuff.f_blocks) * static_cast(fsbuff.FS_BLKSZ); frsz = static_cast(fsbuff.f_bavail) * static_cast(fsbuff.FS_BLKSZ); XrdOssCache::fsTotal += size; XrdOssCache::fsTotFr += frsz; XrdOssCache::fsCount++; if (size > XrdOssCache::fsLarge) XrdOssCache::fsLarge= size; if (frsz > XrdOssCache::fsFree) XrdOssCache::fsFree = frsz; fsid = fsID; updt = time(0); next = 0; stat = 0; seen = 0; } /******************************************************************************/ /* X r d O s s C a c h e _ F S C o n s t r u c t o r */ /******************************************************************************/ // Cache_FS objects are only created during configuration. No locks are needed. XrdOssCache_FS::XrdOssCache_FS(int &retc, const char *fsGrp, const char *fsPath, FSOpts fsOpts) { static const mode_t theMode = S_IRWXU | S_IRWXG; STATFS_t fsbuff; struct stat sfbuff; XrdOssCache_FSData *fdp; XrdOssCache_FS *fsp; // Prefill in case of failure // path = group = 0; // Verify that this is not a duplicate // fsp = XrdOssCache::fsfirst; while(fsp && (strcmp(fsp->path,fsPath)||strcmp(fsp->fsgroup->group,fsGrp))) if ((fsp = fsp->next) == XrdOssCache::fsfirst) {fsp = 0; break;} if (fsp) {retc = EEXIST; return;} // Set the groupname and the path which is the supplied path/group name // if (!(fsOpts & isXA)) path = strdup(fsPath); else {path = XrdOssPath::genPath(fsPath, fsGrp, suffix); if (mkdir(path, theMode) && errno != EEXIST) {retc=errno; return;} } plen = strlen(path); group = strdup(fsGrp); fsgroup= 0; opts = fsOpts; retc = ENOMEM; // Find the filesystem for this object // if (FS_Stat(fsPath, &fsbuff) || stat(fsPath, &sfbuff)) {retc=errno; return;} // Find the matching filesystem data // fdp = XrdOssCache::fsdata; while(fdp) {if (fdp->fsid == sfbuff.st_dev) break; fdp = fdp->next;} // If we didn't find the filesystem, then create one // if (!fdp) {if (!(fdp = new XrdOssCache_FSData(fsPath,fsbuff,sfbuff.st_dev))) return; else {fdp->next = XrdOssCache::fsdata; XrdOssCache::fsdata = fdp;} } // Complete the filesystem block (failure now is not an option) // fsdata = fdp; retc = 0; // Link this filesystem into the filesystem chain // if (!XrdOssCache::fsfirst) {next = this; XrdOssCache::fsfirst = this; XrdOssCache::fslast = this; } else {next = XrdOssCache::fslast->next; XrdOssCache::fslast->next = this; XrdOssCache::fslast = this; } // Check if this is the first group allocation // fsgroup = XrdOssCache_Group::fsgroups; while(fsgroup && strcmp(group, fsgroup->group)) fsgroup = fsgroup->next; if (!fsgroup && (fsgroup = new XrdOssCache_Group(group, this))) {fsgroup->next = XrdOssCache_Group::fsgroups; XrdOssCache_Group::fsgroups=fsgroup; } } /******************************************************************************/ /* A d d */ /******************************************************************************/ // Add is only called during configuration. No locks are needed. It merely // adds an unnamed file system partition. This allows us to track space. int XrdOssCache_FS::Add(const char *fsPath) { STATFS_t fsbuff; struct stat sfbuff; XrdOssCache_FSData *fdp; // Find the filesystem for this object // if (FS_Stat(fsPath, &fsbuff) || stat(fsPath, &sfbuff)) return -errno; // Find the matching filesystem data // fdp = XrdOssCache::fsdata; while(fdp) {if (fdp->fsid == sfbuff.st_dev) break; fdp = fdp->next;} if (fdp) return 0; // Create new filesystem data that will not be linked to any filesystem // if (!(fdp = new XrdOssCache_FSData(fsPath,fsbuff,sfbuff.st_dev))) return -ENOMEM; fdp->next = XrdOssCache::fsdata; XrdOssCache::fsdata = fdp; return 0; } /******************************************************************************/ /* f r e e S p a c e */ /******************************************************************************/ long long XrdOssCache_FS::freeSpace(long long &Size, const char *path) { STATFS_t fsbuff; long long fSpace; // Free space for a specific path // if (path) {if (FS_Stat(path, &fsbuff)) return -1; Size = static_cast(fsbuff.f_blocks) * static_cast(fsbuff.FS_BLKSZ); return static_cast(fsbuff.f_bavail) * static_cast(fsbuff.FS_BLKSZ); } // Free space for the whole system // XrdOssCache::Mutex.Lock(); fSpace = XrdOssCache::fsFree; Size = XrdOssCache::fsSize; XrdOssCache::Mutex.UnLock(); return fSpace; } /******************************************************************************/ long long XrdOssCache_FS::freeSpace(XrdOssCache_Space &Space, const char *path) { STATFS_t fsbuff; // Free space for a specific path // if (!path || FS_Stat(path, &fsbuff)) return -1; Space.Total = static_cast(fsbuff.f_blocks) * static_cast(fsbuff.FS_BLKSZ); Space.Free = static_cast(fsbuff.f_bavail) * static_cast(fsbuff.FS_BLKSZ); Space.Inodes= static_cast(fsbuff.f_files); Space.Inleft= static_cast(fsbuff.FS_FFREE); return Space.Free; } /******************************************************************************/ /* g e t S p a c e */ /******************************************************************************/ int XrdOssCache_FS::getSpace(XrdOssCache_Space &Space, const char *sname) { XrdOssCache_Group *fsg = XrdOssCache_Group::fsgroups; // Try to find the space group name // while(fsg && strcmp(sname, fsg->group)) fsg = fsg->next; if (!fsg) return 0; // Return the space // return getSpace(Space, fsg); } /******************************************************************************/ int XrdOssCache_FS::getSpace(XrdOssCache_Space &Space, XrdOssCache_Group *fsg) { static unsigned int seenVal = 0; XrdOssCache_FS *fsp; XrdOssCache_FSData *fsd; int pnum = 0; // Initialize some fields // Space.Total = 0; Space.Free = 0; // Prepare to accumulate the stats. Note that a file system may appear in // multiple cache groups. The code below only counts those once. // XrdOssCache::Mutex.Lock(); seenVal++; Space.Usage = fsg->Usage; Space.Quota = fsg->Quota; if ((fsp = XrdOssCache::fsfirst)) do {if (fsp->fsgroup == fsg && fsp->fsdata->seen != seenVal) {fsd = fsp->fsdata; pnum++; fsd->seen = seenVal; Space.Total += fsd->size; Space.Free += fsd->frsz; if (fsd->frsz > Space.Maxfree) Space.Maxfree = fsd->frsz; if (fsd->size > Space.Largest) Space.Largest = fsd->size; } fsp = fsp->next; } while(fsp != XrdOssCache::fsfirst); XrdOssCache::Mutex.UnLock(); // All done // return pnum; } /******************************************************************************/ /* A d j u s t */ /******************************************************************************/ void XrdOssCache::Adjust(dev_t devid, off_t size) { EPNAME("Adjust") XrdOssCache_FSData *fsdp; XrdOssCache_Group *fsgp; // Search for matching filesystem // fsdp = XrdOssCache::fsdata; while(fsdp && fsdp->fsid != devid) fsdp = fsdp->next; if (!fsdp) {DEBUG("dev " <group)) fsgp = fsgp->next; // Process the result // if (fsdp) {DEBUG("free=" <frsz <<'-' <st_mode & S_IFMT) != S_IFLNK) Adjust(buf->st_dev, size); else {char lnkbuff[MAXPATHLEN+64]; int lnklen = readlink(Path, lnkbuff, sizeof(lnkbuff)-1); if (lnklen > 0) {XrdOssPath::Trim2Base(lnkbuff+lnklen-1); Adjust(lnkbuff, size); } } return; } // Search for matching logical partition // fsp = fsfirst; while(fsp && strcmp(fsp->path, Path)) if ((fsp = fsp->next) == fsfirst) {fsp = 0; break;} // Process the result // if (fsp) Adjust(fsp, size); else {DEBUG("cahe path " <fsdata; DEBUG("used=" <fsgroup->Usage <<'+' <group)) cgp = cgp->next; if (!cgp) return -ENOENT; // Find a cache that will fit this allocation request. We start with the next // entry past the last one we selected and go full round looking for a // compatable entry (enough space and in the right space group). // fsp_sel = 0; maxfree = 0; fsp = cgp->curr->next; fspend = fsp; // End when we hit the start again do { if (strcmp(aInfo.cgName, fsp->group) || (aInfo.cgPath && (aInfo.cgPlen > fsp->plen || strncmp(aInfo.cgPath,fsp->path,aInfo.cgPlen)))) continue; curfree = fsp->fsdata->frsz; if (size > curfree) continue; if (fuzAlloc > 0.999) {fsp_sel = fsp; break;} else if (!fuzAlloc || !fsp_sel) {if (curfree > maxfree) {fsp_sel = fsp; maxfree = curfree;}} else {diffree = (!(curfree + maxfree) ? 0.0 : static_cast(XRDABS(maxfree - curfree)) / static_cast( maxfree + curfree)); if (diffree > fuzAlloc) {fsp_sel = fsp; maxfree = curfree;} } } while((fsp = fsp->next) != fspend); // Check if we can realy fit this file. If so, update current scan pointer // if (!fsp_sel) return -ENOSPC; cgp->curr = fsp_sel; // Construct the target filename // Info.Path = fsp_sel->path; Info.Plen = fsp_sel->plen; Info.Sfx = fsp_sel->suffix; aInfo.cgPsfx = XrdOssPath::genPFN(Info, aInfo.cgPFbf, aInfo.cgPFsz, (fsp_sel->opts & XrdOssCache_FS::isXA ? 0 : aInfo.Path)); // Verify that target name was constructed // if (!(*aInfo.cgPFbf)) return -ENAMETOOLONG; // Simply open the file in the local filesystem, creating it if need be. // if (aInfo.aMode) {madeDir = 0; do {do {datfd = open(aInfo.cgPFbf,O_CREAT|O_TRUNC|O_WRONLY,aInfo.aMode);} while(datfd < 0 && errno == EINTR); if (datfd >= 0 || errno != ENOENT || madeDir) break; *Info.Slash='\0'; rc=mkdir(aInfo.cgPFbf,theMode); *Info.Slash='/'; madeDir = 1; } while(!rc); if (datfd < 0) return (errno ? -errno : -ENOSYS); } // All done (temporarily adjust down the free space)x // DEBUG("free=" <fsdata->frsz <<'-' <group, fsp->path); if (pP) *pP = '/'; Eroute.Say(buff); fsp = fsp->next; } while(fsp != fsfirst); } /******************************************************************************/ /* P a r s e */ /******************************************************************************/ char *XrdOssCache::Parse(const char *token, char *cbuff, int cblen) { char *Path; // Check for default // if (!token || *token == ':') {strlcpy(cbuff, OSS_CGROUP_DEFAULT, cblen); return 0; } // Get the correct cache group and partition path // if (!(Path = (char *) index(token, ':'))) strlcpy(cbuff, token, cblen); else {int n = Path - token; if (n >= cblen) n = cblen-1; strncpy(cbuff, token, n); cbuff[n] = '\0'; Path++; } // All done // return Path; } /******************************************************************************/ /* S c a n */ /******************************************************************************/ void *XrdOssCache::Scan(int cscanint) { EPNAME("CacheScan") XrdOssCache_FSData *fsdp; XrdOssCache_Group *fsgp; const struct timespec naptime = {cscanint, 0}; long long frsz, llT; // llT is a dummy temporary int retc, dbgMsg, dbgNoMsg, dbgDoMsg; // Try to prevent floodingthe log with scan messages // if (cscanint > 60) dbgMsg = cscanint/60; else dbgMsg = 1; dbgNoMsg = dbgMsg; // Loop scanning the cache // while(1) {if (cscanint > 0) nanosleep(&naptime, 0); dbgDoMsg = !dbgNoMsg--; if (dbgDoMsg) dbgNoMsg = dbgMsg; // Get the cache context lock // Mutex.Lock(); // Scan through all filesystems skip filesystem that have been // recently adjusted to avoid fs statstics latency problems. // fsSize = 0; fsTotFr= 0; fsFree = 0; fsdp = fsdata; while(fsdp) {retc = 0; if ((fsdp->stat & XrdOssFSData_REFRESH) || !(fsdp->stat & XrdOssFSData_ADJUSTED) || cscanint <= 0) {frsz = XrdOssCache_FS::freeSpace(llT,fsdp->path); if (frsz < 0) OssEroute.Emsg("CacheScan", errno , "state file system ",(char *)fsdp->path); else {fsdp->frsz = frsz; fsdp->stat &= ~(XrdOssFSData_REFRESH | XrdOssFSData_ADJUSTED); if (dbgDoMsg) {DEBUG("New free=" <frsz <<" path=" <path);} } } else fsdp->stat |= XrdOssFSData_REFRESH; if (!retc) {if (fsdp->frsz > fsFree) {fsFree = fsdp->frsz; fsSize = fsdp->size;} fsTotFr += fsdp->frsz; } fsdp = fsdp->next; } // Unlock the cache and if we have quotas check them out // Mutex.UnLock(); if (cscanint <= 0) return (void *)0; if (Quotas) XrdOssSpace::Quotas(); // Update usage information if we are keeping track of it if (Usage && XrdOssSpace::Readjust()) {fsgp = XrdOssCache_Group::fsgroups; Mutex.Lock(); while(fsgp) {fsgp->Usage = XrdOssSpace::Usage(fsgp->GRPid); fsgp = fsgp->next; } Mutex.UnLock(); } } // Keep the compiler happy // return (void *)0; }