Issue 4-13, March 31, 1999

Be Engineering Insights: Doing File I/O From A Device Driver

By Dmitriy Budko

Many BeOS developers ask if it is possible to do a file or /dev/* I/O from a kernel driver. This is a very reasonable question. On other OSes it's complicated: one has to use special unfamiliar functions like Windows 95/98 IFSMgr_Ring0_FileIO set (OpenCreateFile(), ReadFile(), WriteAbsoluteDisk(), etc.) or Windows NT ZwCreatefile(), ZwReadFile(), etc. A small example from the Microsoft Windows NT DDK:

ntStatus = ZwCreateFile( &NtFileHandle,
  SYNCHRONIZE | FILE_READ_DATA,
  &ObjectAttributes,
  &IoStatus,
  NULL,   // alloc size = none
  FILE_ATTRIBUTE_NORMAL,
  FILE_SHARE_READ,
  FILE_OPEN,
  FILE_SYNCHRONOUS_IO_NONALERT,
  NULL, // eabuffer
  0 );  // ealength

Can you understand without extensive comments what is going on here?

Or from the NT DDK documentation:

NTSTATUS ZwReadFile(
  IN HANDLE FileHandle,
  IN HANDLE Event OPTIONAL,
  IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
  IN PVOID ApcContext OPTIONAL,
  OUT PIO_STATUS_BLOCK IoStatusBlock,
  OUT PVOID Buffer,
  IN ULONG Length,
  IN PLARGE_INTEGER ByteOffset OPTIONAL,
  IN PULONG Key OPTIONAL
  );

Under BeOS it's much easier: a driver can call the standard POSIX low-level I/O functions: open(), close(), read(), write(), etc.

Here is the simple driver that uses these functions and provides very simple encryption capabilities. It publishes a "secure" device in /dev/misc/cryptodevice. Programs can read/write to it as it was a normal file but the data is scrambled and stored in the normal /boot/home/cryptod_storage file. The source code, makefile, installation script, PPC and x86 BeIDE projects are at

ftp://ftp.be.com/pub/samples/drivers/cryptodevice.zip

#include <OS.h>
#include <KernelExport.>h
#include <Drivers.h>
#include <unistd.h>
#include <string.h>

int fh;
const char*  file_name  = "/boot/home/cryptod_storage";
const char*  key_string = "VERY lousy encryption";

static void
encrypt(uchar* buf, size_t len, off_t pos)
{
  size_t i;
  const size_t  key_len = strlen(key_string);

  for(i=0; i<len; i++)
    buf[i] ^= key_string[((unsigned)(pos+i))%key_len];
}

static void
decrypt(uchar* buf, size_t len, off_t pos)
{
  encrypt(buf, len, pos);
}

static status_t
cryptod_open (const char *name, uint32 flags, void **cookie)
{
  dprintf("cryptod: open()\n");
  return B_OK;
}

static status_t
cryptod_close (void *cookie)
{
  dprintf("cryptod: close()\n");
  return B_OK;
}

static status_t
cryptod_free (void *cookie)
{
  dprintf("cryptod: free()\n");
  return B_OK;
}

static status_t
cryptod_read (void *cookie, off_t pos, void *buf, size_t *len)
{
  dprintf("cryptod: read(%Ld, %d)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = read(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }
  decrypt((uchar*)buf, *len, pos);
  return B_OK;
}

static status_t
cryptod_write (void *cookie, off_t pos, const void *buf,
  size_t *len)
{
  dprintf("cryptod: write(%Ld, %Ld)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  encrypt((uchar*)buf, *len, pos);

  if (-1 == (*len = write(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }
  return B_OK;
}

static status_t
cryptod_readv (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: readv(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = readv(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    decrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_writev (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: writev(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = writev(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    encrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_control(void *cookie, uint32 msg, void *buf,
  size_t len)
{
  return B_DEV_INVALID_IOCTL;
}

static device_hooks cryptod_device = {
  cryptod_open,
  cryptod_close,
  cryptod_free,
  cryptod_control,
  cryptod_read,
  cryptod_write,
  NULL,      /* select */
  NULL,      /* deselect */
  cryptod_readv,
  cryptod_writev
};

static char *cryptod_name[] = {
  "misc/cryptodevice",
  NULL
};

status_t
init_driver()
{
  dprintf("cryptod: init_driver(), %s, %s\n",
    __DATE__, __TIME__);

  if(-1 == (fh=open(file_name, O_RDWR| O_CREAT)))
    return B_ERROR;
  return B_OK;
}

void
uninit_driver()
{
  dprintf("cryptod: uninit_driver()\n");
  close(fh);
}

const char **
publish_devices()
{
  return (const char **)&cryptod_name;
}

device_hooks *
find_device(const char *name)
{
  return &cryptod_device;
}

The driver just passes all read/write request to the file system. Everything should be obvious to any C/DOS/POSIX programmer except two functions: readv()/writev(). They are common extensions to POSIX and are used to read/write contiguous portion of a file from/to many buffers in one system (or file system) call. These functions may provide better performance in many cases then multiple calls are done to read()/write().

And from Linux man pages, with a few changes:

#include <sys/uio.h>

int readv(
  int fd, const struct iovec *vector, size_t count);

int writev(
  int fd, const struct iovec *vector, size_t count);

struct iovec {
  __ptr_t iov_base; /* Starting address. */
  size_t iov_len; /* Length in bytes. */
  };

Description

readv() reads data from file descriptor fd, and puts the the order specified. Operates just like read() except that data is put in vector instead of a contiguous buffer.

writev() writes data to file descriptor fd, and from the buffers described by vector. The number of buffers is specified by count. The buffers are used in the order specified. Operates just like write() except that data is taken from vector instead of a contiguous buffer.

Return Value

On success readv() returns the number of bytes read. On success writev() returns the number of bytes written. On error, -1 is returned, and errno is set appropriately.

If you want to initialize and mount a file system on the encrypted device then you will have to:

  1. Use a raw device or partition like /dev/disk/ide/ata/1/master/0/0_1 as the backing storage for the data.

  2. Change the published device name to /dev/disk/foo/bar.

  3. Implement ioctl() handlers for the standard requests for a mass storage device. See old RAMDrive as an example:

    ftp://ftp.be.com/pub/samples/drivers/obsolete/ramdrive.zip

Unfortunately, in current versions of the BeOS you can not mount a file system over such device if it uses a file on a file system as the backing storage. A deadlock will occur. This will be fixed in a future version of the BeOS.


Developers' Workshop: Floating Point and Shared Libraries - Life's Different Down Under

By Steven Olson

As a media driver writer I find myself working with many different audio devices. Of the milestones along the path to driver completion, the one I enjoy the most is getting a new audio card and corresponding driver to make some noise. Typically, this implies writing predetermined (as opposed to random) data directly to buffers on the device from the driver. A finished driver normally just passes data from a media node to the hardware, but this means that the interface to the node must be completed first. At the start of a project, I just want to see if the registers are documented correctly; I don't care about the final interface between the driver and the corresponding node.

Writing the values of a sine wave into the audio device's buffer would be a quick way to see if the registers are set correctly. However, because drivers operate in kernel space, they can't be linked against the shared math libraries (more on this later). Consequently, you can't use the sin() function to determine values for the tone.

There are several methods you can use to get around this limitation. If you can stand listening to multiple tones, you could generate a signal using any one of a myriad of waveforms that don't require the math libraries. These include square, triangle, and sawtooth waves. If you're feeling ambitious, you can even use parabolas. The following code shows one way to do this:

short buffer[2*NUM_FRAMES];
short gain = 320;
short sign = -1;

for ( i = 0; i < NUM_FRAMES; i++) {
  if ( ! ( i % half_period) ) {
    sign = -sign;
    x = 0;
  }
  buffer[2*i] = buffer[2*i+1] =
    (short) (sign * gain * x * (x - half_period));
  x++;
}

Try starting with a half_period of 10. Be careful when changing gain and half_period values, so you don't end up clipping the signal.

Why not use a Taylor series expansion instead? After all, sin x = x - x^3/3! + x^5/5! - ... and the signal harmonics will be much smaller than with any of the other waveforms. The Taylor series is not used because it requires floating point calculations, and using floating point in the kernel, while permitted in rare circumstances, is discouraged as a general rule.

Floating Point in the Kernel

Floating point calculations in the kernel are discouraged when using the BeOS. Their use, when absolutely required, should be restricted to simple addition, subtraction, multiplication, and division. Why? Because they're time consuming, and floating point exceptions in the kernel are a pain. The kernel is designed to be small and fast, with a limited set of export functions; it's not a general purpose playground (that's what user mode is for).

Also, remember that there's a difference between using floating point routines and calling math functions that are part of the shared libraries. On rare occasions, floating point calculations are allowed, but calling a function in the shared libraries is never supported in kernel mode; e.g., calling the standard library function sin() from a driver. If you really need standard library and floating point support, have your node do the calculations in user mode and then pass the results to your driver. Your user mode node has the advantage of floating point support as well as the ability to link against the shared libraries that accompany the BeOS.

Shared Libraries in the Kernel

Kernel components in general, and drivers in particular, link against the kernel—not against the shared libraries that are a part of the BeOS. What if you need C runtime support in a driver? No problem, but realize that the functions are implemented and exported by the kernel and not by the shared libraries. This can be an issue if you're not aware of how the functions are implemented.

Consider the malloc() function. When malloc() is called by a user mode program, the function is implemented by the shared libraries and the memory allocated is not page locked. However, when malloc() is called in a driver, the function is implemented by the kernel and the memory allocated is page locked. Another example is acquire_sem(). When called from user mode, acquire_sem() is interruptible; when called in kernel mode, it is non-interruptible. The moral of the story is to be aware of the implementation differences between the libraries and their corresponding kernel equivalents.

For obvious reasons, not all the shared library procedures have been implemented and exported from the kernel. What do you do if your driver needs to link against a shared library? You could copy the source code directly into your driver (if you have access to it). If not, you may want to consider linking statically instead of dynamically. However, this requires access to static libraries. Note that static linking will NOT work on shared (dynamic) libraries and that the libraries that ship with BeOS are dynamic. Static libraries have a .a extension, while dynamic (shared) libraries have a .so extension.

The Missing Link

When you actually link a driver, with or without floating point calculations, make sure that you're linking against the kernel and not the shared libraries. This is done by using the -nostdlib option with gcc (Intel) and the -nodefaults option with Metrowerks (PowerPC). This helps ensure that you won't link in things you shouldn't. I recommend using the templates that come with BeIDE to make sure that your link options are set correctly.

And Finally....

Because floating point exceptions were not disabled on the Pentium III during the transition to kernel mode, an "unbreaking" was made for the next release of the BeOS. Therefore, this article applies only to releases after BeOS Release 4.0.


Is There Life After the PC?

By Jean-Louis Gassée

Before I answer the question—and possibly get into more trouble than I did with last week's column—I'd like to thank the sharp-eyed readers who kindly took me to task for creating more confusion than usual. I made an abrupt transition between VOIP (Voice Over IP) and voice recognition: "The relationship of BeOS to VOIP wasn't obvious, but the need to answer the Media OS challenge was clear."

"Wasn't obvious" was a poor choice of words. It left the impression that there was a connection to be discovered; there isn't. Please accept my apologies for the misleading turn of phrase. My only excuse is that I wrote the column on the road, at 2 a.m. Paris time. I won't blame tap water or Parisian waiters - just jet-lag.

The real topic of the column, the difficulties with available voice-recognition products, generated interesting mail beyond the gentle reminders. In particular, one reader asked if I wasn't too harsh in my criticism. In substance some readers asked whether, as we see the beginning of the "after the PC" era, voice isn't a good way to communicate with all the Web-connected appliances we read about in the news? Perhaps voice input can't help with dictating the Great Immigrant Novel but, with a more limited vocabulary, could recognition become robust enough to help control appliances?

Simplifying the problem sounds good. Let's start with the "after the PC" Web-connected appliances. I don't believe that PCs as we know them are forever and I also don't believe that they'll be "replaced" by appliances. What I think we'll see is a broader range of computing devices. Some will continue to look like the PCs we use everyday; others will assume different shapes and roles, just as most devices around us do as technology and tastes evolve.

The automobile market comes to mind. The word "appliance" implies a more specialized, single-purpose device than the protean PC, and we've all read about how the next generation of refrigerators or microwave ovens will have a screen and a Web connection. I prefer that to digitized reproductions of grand masters on the walls. In the case of the refrigerator, the story continues with my waving the egg carton before the built-in bar code reader, thus updating the e-shopping list maintained on the screen, adding a few items, pressing the one-click ordering button, and, voilà, groceries appear a few hours later.

This where voice input, with a limited vocabulary, might come in handier than scrolling through lists and menus. Go to the cereals aisle, pick three boxes of Frosties. Limiting the vocabulary works, as evidenced by a number of successful applications for physically challenged individuals or eye surgeons who use voice input as the "third hand" to control a stereoscopic microscope as they operate.

I'd like voice recognition to work on a broader range of devices, and I hope it will become robust enough for consumer appliances. But, for the short to medium term, I'm skeptical. I'm more optimistic about the prospects of networks in the home. Web-connected appliances aren't going to have an isolated connection; they'll be part of one of two forms of home networks, RF or hardwired.

In either case, the refrigerator, the microwave oven, or the MP3 player will be part of a network. And on this network, you'll find personal computers as we know them or specialized Web terminals—with the right applet to compensate for the UI limitation of the appliances. If I could use a mouse and a keyboard to program a VCR by merely clicking on the program guide, I'd watch more TV. Your idea of progress may vary. (In any event, others agree that this is an opportunity. Two companies, Replay and Tivo, skip the VCR and use a hard disk instead.)

I realize that home networks will not happen overnight, but they don't require any technical breakthrough, at least for the hardwired ones, and the benefits look so attractive and so close that I'd be surprised if they didn't become an important part of life with or without PCs.

Creative Commons License
Legal Notice
This work is licensed under a Creative Commons Attribution-Non commercial-No Derivative Works 3.0 License.