| 1 | /*************************************************************************** |
|---|
| 2 | * |
|---|
| 3 | * Copyright (C) 2001 International Business Machines |
|---|
| 4 | * All rights reserved. |
|---|
| 5 | * |
|---|
| 6 | * This file is part of the GPFS mmfslinux kernel module. |
|---|
| 7 | * |
|---|
| 8 | * Redistribution and use in source and binary forms, with or without |
|---|
| 9 | * modification, are permitted provided that the following conditions |
|---|
| 10 | * are met: |
|---|
| 11 | * |
|---|
| 12 | * 1. Redistributions of source code must retain the above copyright notice, |
|---|
| 13 | * this list of conditions and the following disclaimer. |
|---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
|---|
| 15 | * notice, this list of conditions and the following disclaimer in the |
|---|
| 16 | * documentation and/or other materials provided with the distribution. |
|---|
| 17 | * 3. The name of the author may not be used to endorse or promote products |
|---|
| 18 | * derived from this software without specific prior written |
|---|
| 19 | * permission. |
|---|
| 20 | * |
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|---|
| 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|---|
| 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|---|
| 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|---|
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|---|
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
|---|
| 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 31 | * |
|---|
| 32 | *************************************************************************** */ |
|---|
| 33 | /* @(#)13 1.45.1.2 src/avs/fs/mmfs/ts/kernext/ibm-linux/cxiIOBuffer-plat.h, mmfs, avs_rgpfs24, rgpfs24s007a 10/25/06 17:16:36 */ |
|---|
| 34 | /* |
|---|
| 35 | * Abstraction of an I/O buffer, Linux implementation |
|---|
| 36 | * |
|---|
| 37 | * Contents: |
|---|
| 38 | * struct cxiKernelIOBufferDesc_t |
|---|
| 39 | * struct cxiIOBufferAttachment_t |
|---|
| 40 | * InitBufferAttachment |
|---|
| 41 | * struct cxiContiguousBuffer_t |
|---|
| 42 | * InitContiguousBuffer |
|---|
| 43 | * EXTERNC int kxPinKernelIOBuffer |
|---|
| 44 | * Methods for manipulating cxiIOBuffer_t's |
|---|
| 45 | * #define __CXI_BUFFERS_ARE_CONTIGUOUS |
|---|
| 46 | * GetDiskInfoX |
|---|
| 47 | */ |
|---|
| 48 | |
|---|
| 49 | #ifndef _h_cxiIOBuffer_plat |
|---|
| 50 | #define _h_cxiIOBuffer_plat |
|---|
| 51 | |
|---|
| 52 | #ifndef _h_cxiIOBuffer |
|---|
| 53 | #error Platform header (XXX-plat.h) should not be included directly |
|---|
| 54 | #endif |
|---|
| 55 | |
|---|
| 56 | /* Address of the first byte past the end of memory addressible by |
|---|
| 57 | processes (PAGE_OFFSET), and routine to get this value from the kernel. |
|---|
| 58 | Stacks are below this address. */ |
|---|
| 59 | EXTERNC UIntPtr KernelBoundary; |
|---|
| 60 | EXTERNC int kxGetKernelBoundary(UIntPtr* kBoundP); |
|---|
| 61 | |
|---|
| 62 | /* forward declarations */ |
|---|
| 63 | struct page; |
|---|
| 64 | |
|---|
| 65 | /* User address space range used for page pool. */ |
|---|
| 66 | #ifdef GPFS_ARCH_I386 |
|---|
| 67 | #define LINUX_PAGE_POOL_BASE 0x44000000 |
|---|
| 68 | #endif |
|---|
| 69 | #ifdef GPFS_ARCH_POWER |
|---|
| 70 | #define LINUX_PAGE_POOL_BASE 0x54000000 |
|---|
| 71 | #endif |
|---|
| 72 | #ifdef GPFS_ARCH_IA64 |
|---|
| 73 | /* Use shared memory region after TM pool */ |
|---|
| 74 | /* TM pool address + 4GB */ |
|---|
| 75 | #define LINUX_PAGE_POOL_BASE 0x6000010100000000 |
|---|
| 76 | #endif |
|---|
| 77 | #ifdef GPFS_ARCH_PPC64 |
|---|
| 78 | /* TM pool address + 2GB */ |
|---|
| 79 | #define LINUX_PAGE_POOL_BASE 0x0000002080000000UL |
|---|
| 80 | #endif |
|---|
| 81 | #ifdef GPFS_ARCH_X86_64 |
|---|
| 82 | /* TM pool address + 4GB */ |
|---|
| 83 | #define LINUX_PAGE_POOL_BASE 0x0000005100000000UL |
|---|
| 84 | #endif |
|---|
| 85 | |
|---|
| 86 | |
|---|
| 87 | #define POOL_MMAP_CHUNK_SIZE 0x04000000 |
|---|
| 88 | |
|---|
| 89 | /* Address where token manager malloc pool begins */ |
|---|
| 90 | #ifdef GPFS_ARCH_I386 |
|---|
| 91 | # define TM_POOL_START 0x30000000 |
|---|
| 92 | #endif |
|---|
| 93 | #ifdef GPFS_ARCH_POWER |
|---|
| 94 | # define TM_POOL_START 0x40000000 |
|---|
| 95 | #endif |
|---|
| 96 | #ifdef GPFS_ARCH_IA64 |
|---|
| 97 | # define TM_POOL_START 0x6000010000000000 |
|---|
| 98 | #endif |
|---|
| 99 | #ifdef GPFS_ARCH_PPC64 |
|---|
| 100 | /* this is the top portion of the brk area. We count on the |
|---|
| 101 | daemon never growing big enough to get here. There's only |
|---|
| 102 | 2TB of address space available in userspace on ppc64, so a |
|---|
| 103 | completely safe spot is hard to find */ |
|---|
| 104 | # define TM_POOL_START 0x0000002000000000UL |
|---|
| 105 | #endif |
|---|
| 106 | #ifdef GPFS_ARCH_X86_64 |
|---|
| 107 | # define TM_POOL_START 0x0000005000000000UL |
|---|
| 108 | #endif |
|---|
| 109 | |
|---|
| 110 | /* Buffers in user address space must be aligned to a boundary of this size |
|---|
| 111 | in order to perform an I/O request. */ |
|---|
| 112 | #define IOBUF_ALIGN_SIZE 512 |
|---|
| 113 | |
|---|
| 114 | /* Kernel data structure associated with an I/O buffer. I/O buffers that |
|---|
| 115 | are pinned (or attached) point to one of these structures from their |
|---|
| 116 | kernelIOBufferDescP field. It describes the physical pages occupied by |
|---|
| 117 | the I/O buffer using Linux kiobufs. These are linked together in a |
|---|
| 118 | global list anchored in the kernel so that pinned storage can be released |
|---|
| 119 | when the GPFS daemon terminates abnormally. Each I/O buffer has one |
|---|
| 120 | cxiKernelIOBufferDesc_t on this global list. However, since one |
|---|
| 121 | cxiKernelIOBufferDesc_t can map at most PAGES_PER_KIBD pages, large I/O |
|---|
| 122 | buffers require multiple cxiKernelIOBufferDesc_t's. */ |
|---|
| 123 | struct cxiKernelIOBufferDesc_t |
|---|
| 124 | { |
|---|
| 125 | /* Daemon address for beginning of I/O buffer. This address must be |
|---|
| 126 | aligned on a page boundary. */ |
|---|
| 127 | char* kibdVaddr; |
|---|
| 128 | |
|---|
| 129 | /* Number of pages described by this cxiKernelIOBufferDesc_t. */ |
|---|
| 130 | int kibdPages; |
|---|
| 131 | |
|---|
| 132 | /* Number of pages described by this chain of cxiKernelIOBufferDesc_t |
|---|
| 133 | * Only valid for the first cxiKernelIOBufferDesc_t in the chain. |
|---|
| 134 | */ |
|---|
| 135 | int kibdTotalPages; |
|---|
| 136 | |
|---|
| 137 | /* List pointer. Used for a chain of cxiKernelIOBufferDesc_t's. */ |
|---|
| 138 | struct cxiKernelIOBufferDesc_t* kibdNextP; |
|---|
| 139 | |
|---|
| 140 | /* An I/O buffer is described by a chain of cxiKernelIOBufferDesc_t, |
|---|
| 141 | * of which the head descriptor is placed on a global list. Thus these |
|---|
| 142 | * fields are only valid for the first cxiKernelIOBufferDesc_t in the |
|---|
| 143 | * chain of descriptors. |
|---|
| 144 | */ |
|---|
| 145 | struct cxiKernelIOBufferDesc_t* gblNextP; |
|---|
| 146 | struct cxiKernelIOBufferDesc_t* gblPrevP; |
|---|
| 147 | |
|---|
| 148 | #define PAGES_PER_KIBD (64*1024/PAGE_SIZE) /* 64K */ |
|---|
| 149 | char* maplist[PAGES_PER_KIBD]; |
|---|
| 150 | }; |
|---|
| 151 | |
|---|
| 152 | /* Struct that records the mapping within |
|---|
| 153 | * the daemon address space. A group of these are allocated |
|---|
| 154 | * as an array in the shared seg and the memory descriptor |
|---|
| 155 | * vindex points to the appropriate element. |
|---|
| 156 | */ |
|---|
| 157 | struct cxiMemoryMapping_t |
|---|
| 158 | { |
|---|
| 159 | char *vaddr; /* daemon address mapping */ |
|---|
| 160 | #ifdef SSEG_SWIZZLE_PTRS |
|---|
| 161 | char *kvaddr; |
|---|
| 162 | #endif |
|---|
| 163 | int kBytes; /* size of the area in kilobytes */ |
|---|
| 164 | short vindex; /* index in shared segment mapping array */ |
|---|
| 165 | }; |
|---|
| 166 | |
|---|
| 167 | static inline void |
|---|
| 168 | InitMemoryMapping(struct cxiMemoryMapping_t *mmP) |
|---|
| 169 | { |
|---|
| 170 | mmP->vaddr = NULL; |
|---|
| 171 | mmP->kBytes = 0; |
|---|
| 172 | mmP->vindex = -1; |
|---|
| 173 | #ifdef SSEG_SWIZZLE_PTRS |
|---|
| 174 | mmP->kvaddr = NULL; |
|---|
| 175 | #endif |
|---|
| 176 | } |
|---|
| 177 | |
|---|
| 178 | static inline Boolean |
|---|
| 179 | IsMemoryMappingFree(struct cxiMemoryMapping_t *mmP) |
|---|
| 180 | { |
|---|
| 181 | if (mmP->kBytes == 0) |
|---|
| 182 | return true; |
|---|
| 183 | |
|---|
| 184 | return false; |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | typedef struct cxiMemoryMapping_t cxiMemoryMapping_t; |
|---|
| 188 | |
|---|
| 189 | /* Initialization and termination routines. Called at module load |
|---|
| 190 | and unload, respectively. */ |
|---|
| 191 | EXTERNC void KibdModuleInit(); |
|---|
| 192 | EXTERNC void KibdModuleTerm(); |
|---|
| 193 | |
|---|
| 194 | /* Create a cxiKernelIOBufferDesc_t object (or list of cxiKernelIOBufferDesc_t |
|---|
| 195 | objects) describing an I/O buffer in the user address space of the |
|---|
| 196 | calling process and link it onto the list of all such objects. Pins |
|---|
| 197 | the user-level buffer. The buffer virtual address must be on a page |
|---|
| 198 | boundary. The length can be arbitrarily large, but must be a multiple |
|---|
| 199 | of the page size. Returns 0 if successful, non-zero if unsuccessful. |
|---|
| 200 | */ |
|---|
| 201 | EXTERNC int cxiKibdPin(char* vaddr, int len, |
|---|
| 202 | struct cxiKernelIOBufferDesc_t** kibdPP); |
|---|
| 203 | |
|---|
| 204 | /* Remove a cxiKernelIOBufferDesc_t object from the list of all |
|---|
| 205 | such objects, destroy it and all chained cxiKernelIOBufferDesc_t objects |
|---|
| 206 | associated with it, and unpin the associated user-level buffer. */ |
|---|
| 207 | EXTERNC void cxiKibdUnpin(struct cxiKernelIOBufferDesc_t* kibdP); |
|---|
| 208 | |
|---|
| 209 | /* Free all cxiKernelIOBufferDesc_t's, and unpin their underlying storage. */ |
|---|
| 210 | EXTERNC void cxiKibdUnpinAll(); |
|---|
| 211 | |
|---|
| 212 | #ifdef MMAP_DIO |
|---|
| 213 | |
|---|
| 214 | /* Create a cxiKernelIOBufferDesc_t object for a page in user address space |
|---|
| 215 | that is already pinned. The page will be mapped into kernel address |
|---|
| 216 | space. This is used by mmap routines that want to do direct I/O from |
|---|
| 217 | user page to disk. The cxiKernelIOBufferDesc_t that this routine |
|---|
| 218 | creates can be passed to cxiKDoIO just like one that was created by |
|---|
| 219 | cxiKibdPin. */ |
|---|
| 220 | EXTERNC int cxiKibdPinmm(struct page *pageP, |
|---|
| 221 | struct cxiKernelIOBufferDesc_t** kibdPP); |
|---|
| 222 | |
|---|
| 223 | /* Free a cxiKernelIOBufferDesc_t that was created by cxiKibdPinmm. */ |
|---|
| 224 | EXTERNC void cxiKibdUnpinmm(struct page *pageP, |
|---|
| 225 | struct cxiKernelIOBufferDesc_t* kibdP); |
|---|
| 226 | #endif /* MMAP_DIO */ |
|---|
| 227 | |
|---|
| 228 | |
|---|
| 229 | /* Handle that describes a particular cxiIOBuffer_t that has been attached. |
|---|
| 230 | On Linux, this is a pointer to a cxiLinuxKernelIOBufferDesc_t. */ |
|---|
| 231 | struct cxiIOBufferAttachment_t |
|---|
| 232 | { |
|---|
| 233 | struct cxiKernelIOBufferDesc_t* kDescP; |
|---|
| 234 | }; |
|---|
| 235 | |
|---|
| 236 | |
|---|
| 237 | /* Initialize a cxiIOBufferAttachment_t */ |
|---|
| 238 | static inline void InitBufferAttachment(struct cxiIOBufferAttachment_t* baP) |
|---|
| 239 | { |
|---|
| 240 | baP->kDescP = NULL; |
|---|
| 241 | }; |
|---|
| 242 | |
|---|
| 243 | |
|---|
| 244 | |
|---|
| 245 | /* Result of making a read-only copy of a portion of an I/O buffer. On |
|---|
| 246 | Linux, this must record the base address of the copy buffer, if one was |
|---|
| 247 | required. If data was mapped in place, the cxiContiguousBuffer_t records |
|---|
| 248 | which page was kmapped. */ |
|---|
| 249 | struct cxiContiguousBuffer_t |
|---|
| 250 | { |
|---|
| 251 | /* Base of storage allocated with vmalloc / kmalloc, or NULL if data is |
|---|
| 252 | referenced in place. */ |
|---|
| 253 | char* mallocedBaseP; |
|---|
| 254 | |
|---|
| 255 | /* True if storage pointed to be mallocedBaseP was allocated using |
|---|
| 256 | kmalloc. If false, then vmalloc was used. */ |
|---|
| 257 | Boolean usedKmalloc; |
|---|
| 258 | |
|---|
| 259 | /* Pointer used to remember which page to unmap, or NULL if data was copied |
|---|
| 260 | to mallocedBaseP by mapContiguousRO. */ |
|---|
| 261 | void* pageP; |
|---|
| 262 | }; |
|---|
| 263 | |
|---|
| 264 | |
|---|
| 265 | /* Initialize a cxiContiguousBuffer_t */ |
|---|
| 266 | static inline void InitContiguousBuffer(struct cxiContiguousBuffer_t* cbP) |
|---|
| 267 | { |
|---|
| 268 | cbP->mallocedBaseP = NULL; |
|---|
| 269 | cbP->usedKmalloc = false; |
|---|
| 270 | cbP->pageP = NULL; |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | |
|---|
| 274 | /* Kernel calls used by cxiK... routines to call the Kibd... routines */ |
|---|
| 275 | EXTERNC int kxPinKernelIOBuffer(char* vaddr, int len, |
|---|
| 276 | struct cxiKernelIOBufferDesc_t** pinnedPP); |
|---|
| 277 | EXTERNC int kxUnpinKernelIOBuffer(struct cxiKernelIOBufferDesc_t* pinnedP); |
|---|
| 278 | EXTERNC int kxUnpinAllKernelIOBuffers(); |
|---|
| 279 | |
|---|
| 280 | /* Methods for manipulating cxiIOBuffer_t's */ |
|---|
| 281 | |
|---|
| 282 | /* Return true if the fields describing the IOBuffer are self-consistent */ |
|---|
| 283 | #define IOBUFFER_IS_CONSISTENT(IOBP) (true) |
|---|
| 284 | |
|---|
| 285 | /* Pin the pages belonging to this I/O buffer */ |
|---|
| 286 | EXTERNC void KPinIOBuffer(struct cxiIOBuffer_t* iobP); |
|---|
| 287 | |
|---|
| 288 | /* Unpin the pages belonging to this I/O buffer */ |
|---|
| 289 | EXTERNC void KUnpinIOBuffer(struct cxiIOBuffer_t* iobP); |
|---|
| 290 | |
|---|
| 291 | /* Split the kernel buffer descriptor into two adjacent I/O buffers */ |
|---|
| 292 | EXTERNC void KSplitIOBuffer(struct cxiIOBuffer_t* iobP, int frontPages, |
|---|
| 293 | struct cxiIOBuffer_t* rearBufP); |
|---|
| 294 | |
|---|
| 295 | /* Merge the kernel buffer descriptors of two adjacent I/O buffers. The |
|---|
| 296 | I/O buffer p should be destroyed after this call, since its pages will |
|---|
| 297 | be merged into the buffer *iobP. */ |
|---|
| 298 | EXTERNC void KMergeIOBuffer(struct cxiIOBuffer_t* iobP, struct cxiIOBuffer_t* p); |
|---|
| 299 | |
|---|
| 300 | /* Read or write the given sectors from dev. Data should be placed into |
|---|
| 301 | the I/O buffer beginning at byte offset bufOffset. Returns EOK |
|---|
| 302 | on success, negative values on error. All of the data to be |
|---|
| 303 | transferred will be in the first cxiKernelIOBufferDesc_t. */ |
|---|
| 304 | EXTERNC int cxiKDoIO(struct cxiKernelIOBufferDesc_t* kibdP, |
|---|
| 305 | Boolean isWrite, cxiDev_t dev, UInt64 startSector, |
|---|
| 306 | int nSectors, int sectorSize, int bufOffset); |
|---|
| 307 | |
|---|
| 308 | |
|---|
| 309 | /* On Linux, I/O buffers can be accessed at contiguous virtual addresses |
|---|
| 310 | from the daemon process, but not from kernel code */ |
|---|
| 311 | #ifndef _KERNEL |
|---|
| 312 | #define __CXI_BUFFERS_ARE_CONTIGUOUS |
|---|
| 313 | #endif |
|---|
| 314 | |
|---|
| 315 | /* Routine to set up the disk block size and get disk parameters */ |
|---|
| 316 | EXTERNC int GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP); |
|---|
| 317 | |
|---|
| 318 | #endif /* _h_cxiIOBuffer_plat */ |
|---|
| 319 | |
|---|