/*
* memtoy: segment.c - manage memory segments
*
* create/destroy/map/unmap - anonymous, file and SysV shmem segments
* touch [read or write] - ranges of segments
* mbind - ranges of segments
* show mappings or locations of segment pages
*/
/*
* Copyright (c) 2005 Hewlett-Packard, Inc
* All rights reserved.
*/
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config.h"
#if HAVE_NUMA_H
#include <numa.h>
#endif
#ifdef HAVE_NUMA_V2
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <numa.h>
#include <numaif.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "memtoy.h"
#include "segment.h"
struct segment {
char *seg_name;
void *seg_start;
size_t seg_length;
off_t seg_offset; /* memory mapped files */
char *seg_path; /* " " " */
seg_type_t seg_type;
int seg_slot;
int seg_flags; /* shared|private */
int seg_prot;
int seg_fd; /* saved file descriptor */
int seg_shmid;
};
#define MAX_SEGMENTS 63 /* arbitrary max */
#define SEG_FD_NONE (-1)
#define SHM_ID_NONE (-1)
#define SEG_ERR (0)
#define SEG_OK (1)
#define SEG_OFFSET(SEGP, ADDR) ((char *)(ADDR) - (char *)(SEGP->seg_start))
/*
* =========================================================================
*/
void segment_init(struct global_context *gcp)
{
/*
* one extra slot to terminate the list
*/
gcp->seglist = calloc(MAX_SEGMENTS + 1, sizeof(segment_t *));
if (!gcp->seglist)
die(4, "%s: can't alloc segment table\n", gcp->program_name);
gcp->seg_avail = NULL;
}
static segment_t *new_segment(void)
{
glctx_t *gcp = &glctx;
segment_t *segp = (segment_t *) calloc(1, sizeof(segment_t));
if (segp == NULL)
fprintf(stderr, "%s: failed to allocate segment\n",
gcp->program_name);
return segp;
}
/*
* get_seg_slot() -- allocate a segment table slot for a new segment
*/
static segment_t *get_seg_slot(void)
{
glctx_t *gcp = &glctx;
segment_t *segp, **segpp;
/*
* consume saved slot, if any
*/
segp = gcp->seg_avail;
if (segp != NULL) {
gcp->seg_avail = NULL;
return segp;
}
/*
* simple linear scan for first available slot
*/
for (segpp = gcp->seglist; (segp = *segpp); ++segpp) {
if (segp->seg_type == SEGT_NONE)
return segp;
}
if (segpp < &gcp->seglist[MAX_SEGMENTS]) {
/*
* previously unused slot
*/
*segpp = segp = new_segment();
segp->seg_slot = segpp - gcp->seglist;
return segp;
}
fprintf(stderr, "%s: segment table full\n", gcp->program_name);
return NULL;
}
static void unmap_segment(segment_t * segp)
{
if (segp->seg_start == MAP_FAILED)
return; /* already unmapped */
switch (segp->seg_type) {
case SEGT_ANON:
case SEGT_FILE:
munmap(segp->seg_start, segp->seg_length);
break;
case SEGT_SHM:
shmdt(segp->seg_start);
break;
default:
// shouldn't happen?
break;
}
segp->seg_start = MAP_FAILED;
}
/*
* free up a segment table slot, freeing any string storage
* and removing shm segment, if necessary
* clear out the segment, but preserve slot #
*/
static void free_seg_slot(segment_t * segp)
{
glctx_t *gcp = &glctx;
int slot = segp->seg_slot;
if (segp->seg_name != NULL)
free(segp->seg_name);
if (segp->seg_path != NULL)
free(segp->seg_path);
if (segp->seg_type == SEGT_FILE && segp->seg_fd != SEG_FD_NONE)
close(segp->seg_fd);
if (segp->seg_type == SEGT_SHM && segp->seg_shmid != SHM_ID_NONE)
shmctl(segp->seg_shmid, IPC_RMID, NULL);
(void)memset(segp, 0, sizeof(*segp));
segp->seg_slot = slot;
if (gcp->seg_avail == NULL)
gcp->seg_avail = segp;
}
/*
* called from memtoy "at exit" cleanup().
* primarily to remove any shm segments created.
*/
void segment_cleanup(struct global_context *gcp)
{
segment_t *segp, **segpp;
segpp = gcp->seglist;
if (segpp == NULL)
return;
for (; (segp = *segpp); ++segpp) {
if (segp->seg_type != SEGT_SHM) {
continue;
}
free_seg_slot(segp); /* to remove shared mem */
}
}
static size_t round_up_to_pagesize(size_t size)
{
glctx_t *gcp = &glctx;
size_t pagemask = gcp->pagesize - 1;
return ((size + pagemask) & ~pagemask);
}
static size_t round_down_to_pagesize(size_t size)
{
glctx_t *gcp = &glctx;
size_t pagemask = gcp->pagesize - 1;
return (size & ~pagemask);
}
/*
* get_node() -- fetch numa node id of page at vaddr
* [from Ray Bryant's [SGI] memory migration tests]
*/
static int get_node(void *vaddr)
{
int rc, node;
rc = get_mempolicy(&node, NULL, 0, vaddr, MPOL_F_NODE | MPOL_F_ADDR);
if (rc)
return -1;
return node;
}
/*
* =========================================================================
*/
static int map_anon_segment(segment_t * segp)
{
glctx_t *gcp = &glctx;
char *memp;
int flags = segp->seg_flags;
if (!flags)
flags = MAP_PRIVATE; /* default */
memp = (char *)mmap(0, segp->seg_length, segp->seg_prot, flags | MAP_ANONYMOUS, 0, /* fd -- ignored */
0); /* offset -- ignored */
if (memp == MAP_FAILED) {
int err = errno;
fprintf(stderr, "%s: anonymous mmap failed - %s\n",
__FUNCTION__, strerror(err));
return SEG_ERR;
}
vprint("%s: mmap()ed anon seg %s at 0x%lx-0x%lx\n",
gcp->program_name, segp->seg_name,
memp, memp + segp->seg_length - 1);
segp->seg_start = memp;
return SEG_OK;
}
/*
* open_file() -- open and validate file when registering a file segment.
* remember fd in segment struct.
*/
static int open_file(segment_t * segp)
{
glctx_t *gcp = &glctx;
struct stat stbuf;
int fd, flags;
if (stat(segp->seg_path, &stbuf) < 0) {
int err = errno;
fprintf(stderr, "%s: can't stat %s - %s\n",
gcp->program_name, segp->seg_path, strerror(err));
free_seg_slot(segp);
return SEG_ERR;
}
/*
* TODO: for now, just regular files. later?
*/
if (!S_ISREG(stbuf.st_mode)) {
fprintf(stderr, "%s: %s - is not a regular file\n",
gcp->program_name, segp->seg_path);
free_seg_slot(segp);
return SEG_ERR;
}
/*
* Open file with maximal privileges; adjust segment mapping
* protections if permissions don't allow full R/W access.
*/
if (!access(segp->seg_path, R_OK | W_OK))
flags = O_RDWR;
else if (!access(segp->seg_path, R_OK)) {
flags = O_RDONLY;
segp->seg_prot &= ~PROT_WRITE;
} else if (!access(segp->seg_path, W_OK)) {
flags = O_WRONLY;
segp->seg_prot &= ~PROT_READ;
} else {
fprintf(stderr, "%s: can't access %s\n",
gcp->program_name, segp->seg_path);
free_seg_slot(segp);
return SEG_ERR;
}
fd = open(segp->seg_path, flags);
if (fd < 0) {
int err = errno;
fprintf(stderr, "%s: can't open %s - %s\n",
gcp->program_name, segp->seg_path, strerror(err));
free_seg_slot(segp);
return SEG_ERR;
}
segp->seg_fd = fd;
return SEG_OK;
}
/*
* re-fetch file size at map time -- just in case it's changed
*/
static size_t file_size(int fd)
{
struct stat stbuf;
if (fstat(fd, &stbuf) != 0) {
return BOGUS_SIZE;
}
return stbuf.st_size;
}
/*
* map_file_segment() -- map a [range of a] registered file segment.
*/
static int map_file_segment(segment_t * segp)
{
glctx_t *gcp = &glctx;
char *memp;
size_t size;
int fd;
int flags = segp->seg_flags;
if (!flags)
flags = MAP_PRIVATE; /* default */
if ((fd = segp->seg_fd) == SEG_FD_NONE) {
fprintf(stderr, "%s: file %s not open\n",
gcp->program_name, segp->seg_path);
return SEG_ERR;
}
size = file_size(fd);
/*
* page align offset/length; verify fit in file
*/
segp->seg_offset = round_down_to_pagesize(segp->seg_offset);
if (segp->seg_offset > size) {
fprintf(stderr, "%s: offset 0x%lx beyond end of file %s\n",
gcp->program_name, segp->seg_offset, segp->seg_path);
return SEG_ERR;
}
if (segp->seg_length == 0)
segp->seg_length = round_up_to_pagesize(size) -
segp->seg_offset;
else
segp->seg_length = round_up_to_pagesize(segp->seg_length);
memp = (char *)mmap(0, segp->seg_length,
segp->seg_prot, flags, fd, segp->seg_offset);
if (memp == MAP_FAILED) {
int err = errno;
fprintf(stderr, "%s: mmap of %s failed - %s\n",
__FUNCTION__, segp->seg_path, strerror(err));
return SEG_ERR;
}
vprint("%s: mmap()ed file seg %s at 0x%lx-0x%lx\n",
gcp->program_name, segp->seg_name,
memp, memp + segp->seg_length - 1);
segp->seg_start = memp;
return SEG_OK;
}
/*
* get_shm_segment() -- create [shmget] a new shared memory segment
*/
static int get_shm_segment(segment_t * segp)
{
glctx_t *gcp = &glctx;
int shmid;
shmid = shmget(IPC_PRIVATE, segp->seg_length, SHM_R | SHM_W);
if (shmid == -1) {
int err = errno;
fprintf(stderr, "%s: failed to get shm segment %s - %s\n",
gcp->program_name, segp->seg_name, strerror(err));
free_seg_slot(segp);
return SEG_ERR;
}
segp->seg_shmid = shmid;
vprint("%s: shm seg %s id: %d\n",
gcp->program_name, segp->seg_name, segp->seg_shmid);
return SEG_OK;
}
/*
* map_shm_segment() -- attach [shmat] a shared memory segment
*/
static int map_shm_segment(segment_t * segp)
{
glctx_t *gcp = &glctx;
segp->seg_start = shmat(segp->seg_shmid, NULL, 0);
if (segp->seg_start == MAP_FAILED) {
int err = errno;
fprintf(stderr, "%s: failed to attach shm segment %s: %s\n",
gcp->program_name, segp->seg_name, strerror(err));
return SEG_ERR;
}
vprint("%s: mmap()ed shm seg %s at 0x%lx-0x%lx\n",
gcp->program_name, segp->seg_name,
segp->seg_start, segp->seg_start + segp->seg_length - 1);
return SEG_OK;
}
/*
* =========================================================================
* segment API
*/
/*
* segment_get(name) - lookup named segment
TODO: move to segment private functions?
*/
segment_t *segment_get(char *name)
{
glctx_t *gcp = &glctx;
segment_t *segp, **segpp;
for (segpp = gcp->seglist; (segp = *segpp); ++segpp) {
if (segp->seg_type == SEGT_NONE) {
if (gcp->seg_avail == NULL)
gcp->seg_avail = *segpp;
continue;
}
if (!strcmp(name, segp->seg_name))
return segp;
}
if (gcp->seg_avail == NULL && segpp < &gcp->seglist[MAX_SEGMENTS]) {
/*
* prealloc an available segment
*/
*segpp = segp = new_segment();
if (segp != NULL) {
segp->seg_slot = segpp - gcp->seglist;
gcp->seg_avail = segp;
}
}
return NULL;
}
/*
* segment_register: register an anon, file or shm segment based on args.
* for anon and shm, 'name' = segment name.
* for file, 'name' = path name; segment name = basename(path)
*
* returns: !0 on success; 0 on failure
*/
int segment_register(seg_type_t type, char *name, range_t * range, int flags)
{
glctx_t *gcp = &glctx;
segment_t *segp;
char *path;
segp = segment_get(basename(name)); /* ensure unique name */
if (segp != NULL) {
fprintf(stderr, "%s: segment %s already exists\n",
gcp->program_name, segp->seg_name);
return SEG_ERR;
}
segp = get_seg_slot();
if (segp == NULL)
return SEG_ERR;
path = strdup(name); /* save a copy */
segp->seg_name = strdup(basename(name));
segp->seg_start = MAP_FAILED;
segp->seg_length = round_up_to_pagesize(range->length);
segp->seg_offset = round_down_to_pagesize(range->offset);
segp->seg_type = type;
segp->seg_flags = flags; /* possibly 0 */
segp->seg_prot = PROT_READ | PROT_WRITE; /* default */
segp->seg_fd = SEG_FD_NONE;
segp->seg_shmid = SHM_ID_NONE;
switch (type) {
case SEGT_ANON:
free(path);
break;
case SEGT_FILE:
segp->seg_path = path;
return open_file(segp);
break;
case SEGT_SHM:
free(path);
return get_shm_segment(segp);
break;
default:
free(path);
}
return SEG_OK;
}
static char *segment_header =
" _____address______ ____length____ ____offset____ prot share name\n";
static char seg_type[] = { '.', 'a', 'f', 's' };
static int show_one_segment(segment_t * segp, bool header)
{
char *protection, *share, *name;
switch (segp->seg_prot & (PROT_READ | PROT_WRITE)) {
case PROT_READ | PROT_WRITE:
protection = "rw";
break;
case PROT_READ:
protection = "r-";
break;
case PROT_WRITE:
protection = "-w";
break;
default:
protection = "--";
break;
}
if (segp->seg_flags)
share = (segp->seg_flags & MAP_SHARED) ? "shared " : "private";
else
share = "default";
name = (segp->seg_type == SEGT_FILE) ? segp->seg_path : segp->seg_name;
if (header)
puts(segment_header);
if (segp->seg_start != MAP_FAILED) {
printf("%c 0x%p 0x%012zx 0x%012lx %s %s %s\n",
seg_type[segp->seg_type],
segp->seg_start,
segp->seg_length,
segp->seg_offset, protection, share, name);
} else {
printf("%c *** not-mapped *** 0x%012zx 0x%012lx %s %s %s\n",
seg_type[segp->seg_type],
segp->seg_length,
segp->seg_offset, protection, share, name);
}
return SEG_OK;
}
/*
* segment_show() -- show specifed segment, or all, if none specified.
*/
int segment_show(char *name)
{
glctx_t *gcp = &glctx;
segment_t *segp, **segpp;
bool header;
if (name != NULL) {
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
show_one_segment(segp, false);
return SEG_OK;
}
/*
* show all
*/
header = true;
for (segpp = gcp->seglist; (segp = *segpp); ++segpp) {
if (segp->seg_type != SEGT_NONE) {
show_one_segment(segp, header);
header = false; /* first time only */
}
}
return SEG_OK;
}
/*
* segment_remove() - remove the specified segment, if exists.
*/
int segment_remove(char *name)
{
glctx_t *gcp = &glctx;
segment_t *segp;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
unmap_segment(segp);
free_seg_slot(segp);
return SEG_OK;
}
/*
* segment_touch() - "touch" [read or write] each page of specified range
* -- from offset to offset+length -- to fault in or to
* test protection.
* NOTE: offset is relative to start of mapping, not start of file!
*/
int segment_touch(char *name, range_t * range, int rw)
{
glctx_t *gcp = &glctx;
segment_t *segp;
off_t offset;
size_t length, maxlength;
unsigned long *memp;
struct timeval t_start, t_end;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
offset = round_down_to_pagesize(range->offset);
if (offset >= segp->seg_length) {
fprintf(stderr, "%s: offset %ld is past end of segment %s\n",
gcp->program_name, offset, name);
return SEG_ERR;
}
memp = (unsigned long *)(segp->seg_start + offset);
maxlength = segp->seg_length - offset;
length = range->length;
if (length)
length = round_up_to_pagesize(length);
/*
* note: we silently truncate to max length [end of segment]
*/
if (length == 0 || length > maxlength)
length = maxlength;
gettimeofday(&t_start, NULL);
touch_memory(rw, memp, length);
gettimeofday(&t_end, NULL);
printf("%s: touched %d pages in %6.3f secs\n",
gcp->program_name, length / gcp->pagesize,
(float)(tv_diff_usec(&t_start, &t_end)) / 1000000.0);
return SEG_OK;
}
/*
* segment_unmap() - unmap the specified segment, if any, from seg_start
* to seg_start+seg_lenth. Leave the segment in the
* table;
*/
int segment_unmap(char *name)
{
glctx_t *gcp = &glctx;
segment_t *segp;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
if (segp->seg_start == MAP_FAILED)
return SEG_OK; /* silent success */
switch (segp->seg_type) {
case SEGT_ANON:
case SEGT_FILE:
munmap(segp->seg_start, segp->seg_length);
break;
case SEGT_SHM:
//TODO: shmdt()...
break;
/* Handle default to get rid of -Wswitch-enum */
default:
break;
}
segp->seg_start = MAP_FAILED;
return SEG_OK;
}
/*
* segment_map() -- [re] map() a previously unmapped segment
* no-op if already mapped.
* range only applies to mapped file.
*/
int segment_map(char *name, range_t * range, int flags)
{
glctx_t *gcp = &glctx;
segment_t *segp;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
if (segp->seg_start != MAP_FAILED) {
fprintf(stderr, "%s: segment %s already mapped\n",
gcp->program_name, name);
return SEG_OK; /* treat as success */
}
if (flags != 0)
segp->seg_flags = flags;
switch (segp->seg_type) {
case SEGT_ANON:
return map_anon_segment(segp);
break;
case SEGT_FILE:
if (range != NULL) {
segp->seg_offset = range->offset;
segp->seg_length = range->length;
}
return map_file_segment(segp);
break;
case SEGT_SHM:
return map_shm_segment(segp);
break;
/* Handle default to get rid of -Wswitch-enum */
default:
break;
}
return SEG_ERR; /* unrecognized segment type -- shouldn't happen */
}
/*
* segment_mbind() - set memory policy for a range of specified segment
*
* NOTE: offset is relative to start of mapping, not start of file
*/
int
segment_mbind(char *name, range_t * range, int policy,
nodemask_t * nodemask, int flags)
{
glctx_t *gcp = &glctx;
segment_t *segp;
char *start;
off_t offset;
size_t length, maxlength;
int ret;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
if (segp->seg_start == MAP_FAILED) {
fprintf(stderr, "%s: segment %s not mapped\n",
gcp->program_name, name);
return SEG_ERR;
}
offset = round_down_to_pagesize(range->offset);
if (offset >= segp->seg_length) {
fprintf(stderr, "%s: offset %ld is past end of segment %s\n",
gcp->program_name, offset, name);
return SEG_ERR;
}
start = segp->seg_start + offset;
maxlength = segp->seg_length - offset;
length = range->length;
if (length)
length = round_up_to_pagesize(length);
/*
* note: we silently truncate to max length [end of segment]
*/
if (length == 0 || length > maxlength)
length = maxlength;
ret = mbind(segp->seg_start + offset, length, policy, nodemask->n,
NUMA_NUM_NODES, flags);
if (ret == -1) {
int err = errno;
fprintf(stderr, "%s: mbind() of segment %s failed - %s\n",
gcp->program_name, name, strerror(err));
return SEG_ERR;
}
return SEG_OK;
}
/*
* segment_location() - report node location of specified range of segment
*
* NOTE: offset is relative to start of mapping, not start of file
*/
#define PG_PER_LINE 8
#define PPL_MASK (PG_PER_LINE - 1)
int segment_location(char *name, range_t * range)
{
glctx_t *gcp = &glctx;
segment_t *segp;
char *apage, *end;
off_t offset;
size_t length, maxlength;
int pgid, i;
bool need_nl;
segp = segment_get(name);
if (segp == NULL) {
fprintf(stderr, "%s: no such segment: %s\n",
gcp->program_name, name);
return SEG_ERR;
}
if (segp->seg_start == MAP_FAILED) {
fprintf(stderr, "%s: segment %s not mapped\n",
gcp->program_name, name);
return SEG_ERR;
}
offset = round_down_to_pagesize(range->offset);
if (offset >= segp->seg_length) {
fprintf(stderr, "%s: offset %ld is past end of segment %s\n",
gcp->program_name, offset, name);
return SEG_ERR;
}
apage = segp->seg_start + offset;
maxlength = segp->seg_length - offset;
length = range->length;
if (length)
length = round_up_to_pagesize(length);
/*
* note: we silently truncate to max length [end of segment]
*/
if (length == 0 || length > maxlength)
length = maxlength;
end = apage + length;
pgid = offset / gcp->pagesize;
show_one_segment(segp, false); /* show mapping, no header */
printf("page offset ");
for (i = 0; i < PG_PER_LINE; ++i)
printf(" +%02d", i);
printf("\n");
if (pgid & PPL_MASK) {
/*
* start partial line
*/
int pgid2 = pgid & ~PPL_MASK;
printf("%12x: ", pgid2);
while (pgid2 < pgid) {
printf(" ");
++pgid2;
}
need_nl = true;
} else
need_nl = false;
for (; apage < end; apage += gcp->pagesize, ++pgid) {
int node;
node = get_node(apage);
if (node < 0) {
fprintf(stderr, "\n%s: "
"failed to get node for segment %s, offset 0x%x\n",
gcp->program_name, name, SEG_OFFSET(segp,
apage));
return SEG_ERR;
}
if ((pgid & PPL_MASK) == 0) {
if (need_nl)
printf("\n");
printf("%12x: ", pgid); /* start a new line */
need_nl = true;
}
printf(" %3d", node);
if (signalled(gcp)) {
reset_signal();
break;
}
}
printf("\n");
return SEG_OK;
}
#endif