Sie sind auf Seite 1von 53

Read the file fs/nfs

Hello All,
Iam trying to do some socket programming in kernel space by trying to make a client/server utility. I
want a very simple fuctionality like the simple message transfer of hello hai. Below is the code up to
now but it has some problems.
Server Side
----------sock1.c (First File)
-------------------#include
#include
#include
#include
#include
#include
MODULE_DESCRIPTION("Sockets Test Module");
MODULE_AUTHOR("root (root@localhost.localdomain)");
MODULE_LICENSE("GPL");
/* Basic socket functions */
extern void AcceptConnections(struct socket *);
/* MainSocket is shared by all threads, therefore it has to be a global variable */
struct socket *MainSocket = NULL;
/* The port to use for the connection */
#define PORT 5060
int StartListening(const int Port)
{
struct socket *sock;
struct sockaddr_in sin;
int error;
/* Protocol families, same as address families. */
/* #define PF_INET AF_INET */
/* #define AF_INET 2 Internet IP Protocol */
/* Socket types: @SOCK_STREAM: stream (connection) socket */
/* Standard well-defined IP protocols. */
/* IPPROTO_TCP = 6, Transmission Control Protocol */
/* First create a socket */
error = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);

if( error < 0 )


printk(KERN_ERR "Error during creation of socket; terminating \n");
/* Now bind the socket */
sin.sin_family = AF_INET;
/* Address to accept any incoming messages. */
/* #define INADDR_ANY ((unsigned long int) 0x00000000) */
sin.sin_addr.s_addr = INADDR_ANY;
sin.sin_port = htons((unsigned short)Port);
error = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
if( error < 0 )
{
printk(KERN_ERR "kHTTPd: Error binding socket. This means that some other \n");
printk(KERN_ERR " daemon is (or was a short time ago) using port %i.\n",Port);
return 0;
}
sock->sk->sk_reuse = 1;
/* Now start listening on the socket */
error = sock->ops->listen(sock, 48);
if(error != 0)
printk(KERN_ERR "kHTTPd: Error listening on socket \n");
MainSocket = sock;
return 1;
}
void StopListening( void )
{
struct socket *sock;
if( MainSocket == NULL )
return;
sock = MainSocket;
MainSocket = NULL;
sock_release(sock);
}
static int sock1_init_module(void)
{
printk( KERN_DEBUG "Module sock1 init\n" );
StartListening(PORT);
AcceptConnections(MainSocket);
return 0;
}
static void sock1_exit_module(void)
{
printk( KERN_DEBUG "Module sock1 exit\n" );
StopListening();

}
module_init(sock1_init_module);
module_exit(sock1_exit_module);
accept.c (Second File)
---------------------#include
#include
/* Accept connections */
/*
Purpose:
AcceptConnections puts all "accepted" connections in the
"WaitForHeader" queue.
Return value:
The number of accepted connections
*/
int AcceptConnections(struct socket *Socket)
{
struct socket *NewSock;
int error;
struct kiocb iocb;
if (Socket==NULL) return 0;
error = 0;
while (error>=0)
{
NewSock = sock_alloc();
if (NewSock==NULL)
break;
NewSock->type = Socket->type;
NewSock->ops = Socket->ops;
error = Socket->ops->accept(Socket,NewSock,O_NONBLOCK);
if (errorsk->sk_state==TCP_CLOSE)
{
sock_release(NewSock);
continue;
}
// printk("Connection from %s", inet_ntoa (remote.sin_addr.s_addr));
}
}
Client Side
----------sock_client1 (First File)
-------------------------

#include
#include
#include
MODULE_DESCRIPTION("Simple Testing Client Module in Kernel Space");
MODULE_AUTHOR("root (root@localhost.localdomain)");
MODULE_LICENSE("GPL");
struct socket *MainSocket = NULL;
#define PORT 5060
int Connect_to_Server( void )
{
struct sockaddr_in local;
struct sockaddr_in server;
struct socket *socket;
int ret, result = 0;
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &socket);
if( ret < 0 )
printk(KERN_ERR "Error during creation of socket; terminating \n");
local.sin_family = AF_INET;
local.sin_addr.s_addr = INADDR_ANY;
server.sin_family = AF_INET;
server.sin_addr.s_addr = INADDR_ANY;
server.sin_port = PORT;
/* Call sys_connect */
ret = socket->ops->connect (socket, (struct sockaddr *) &server,
sizeof (server), socket->file->f_flags);
if (ret >= 0)
result = 1;
else {
printk("Cannot connect to the server \n");
}
MainSocket = socket;
return result;
}
void Disconnect_from_Server( void )
{
struct socket *sock;
if( MainSocket == NULL )
return;
sock = MainSocket;
MainSocket = NULL;
sock_release(sock);
}
static int sock_client1_init_module(void)

{
Connect_to_Server();
return 0;
}
static void sock_client1_exit_module(void)
{
printk( KERN_DEBUG "Module sock_client1 exit\n" );
Disconnect_from_Server();
}
module_init(sock_client1_init_module);
module_exit(sock_client1_exit_module);

Hi,
I have implemented sockets in kernel 2.6 and made simple functions like userspace. (I made for TCP
but I think UDP can also be done in a similar manner)
Here is the complete code. I think the function names are self explanatory. First to use sockets, you
have to create a struct socket objects.
For a server, use set_up_server_socket, followed by server_accept connection and for the client use
set_up_client_socket. To send and recieve messages use the SendBuffer and RecvBuffer Functions.
/*
Sendbuffer sends "Length" bytes from "Buffer" through the socket "sock".
*/
size_t SendBuffer(struct socket *sock, const char *Buffer, size_t Length)
{
struct msghdr msg;
mm_segment_t oldfs; // mm_segment_t is just a long
struct iovec iov; // structure containing a base addr. and length
int len2;
//printk("Entering SendBuffer\n");
msg.msg_name = 0;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1; //point to be noted
msg.msg_control = NULL;
msg.msg_controllen = 0;

msg.msg_flags = MSG_NOSIGNAL;//0/*MSG_DONTWAIT*/;
iov.iov_base = (char*) Buffer; // as we know that iovec is
iov.iov_len = (__kernel_size_t) Length; // nothing but a base addr and length

// #define get_fs() (current_thread_info()->addr_limit)


// similar for set_fs;
/*
Therefore this line sets the "fs" to KERNEL_DS and saves its old value
*/
oldfs = get_fs(); set_fs(KERNEL_DS);
/* Actual Sending of the Message */
len2 = sock_sendmsg(sock,&msg,(size_t)(Length));
/* retrieve the old value of fs (whatever it is)*/
set_fs(oldfs);
return len2;
}
/*
Recieves data from the socket "sock" and puts it in the 'Buffer'.
Returns the length of data recieved
The Calling function must do a:
Buffer = (char*) get_free_page(GFP_KERNEL);
or a kmalloc to allocate kernel's memory
(or it can use the kernel's stack space [very small] )
*/
size_t RecvBuffer(struct socket *sock, const char *Buffer, size_t Length)
{
struct msghdr msg;
struct iovec iov;
int len;
mm_segment_t oldfs;
/* Set the msghdr structure*/
msg.msg_name = 0;
msg.msg_namelen = 0;

msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
/* Set the iovec structure*/
iov.iov_base = (void *) &Buffer[0];
iov.iov_len = (size_t)Length;
/* Recieve the message */
oldfs = get_fs(); set_fs(KERNEL_DS);
len = sock_recvmsg(sock,&msg,Length,0/*MSG_DONTWAIT*/); // let it wait if there is no message
set_fs(oldfs);
// if ((len!=-EAGAIN)&&(len!=0))
// printk("RecvBuffer Recieved %i bytes \n",len);
return len;
}

/*
Sets up a server-side socket
1. Create a new socket
2. Bind the address to the socket
3. Start listening on the socket
*/
struct socket* set_up_server_socket(int port_no) {
struct socket *sock;
struct sockaddr_in sin;
int error;

/* First create a socket */


error = sock_create(PF_INET,SOCK_STREAM,IPPROTO_TCP,&sock) ;
if (error<0)
printk("Error during creation of socket; terminating\n");

/* Now bind the socket */


sin.sin_family = AF_INET;
sin.sin_addr.s_addr = INADDR_ANY;

sin.sin_port = htons(port_no);
error = sock->ops->bind(sock,(struct sockaddr*)&sin,sizeof(sin));
if (error<0)
{
printk("Error binding socket \n");
return 0;
}

/* Now, start listening on the socket */


error=sock->ops->listen(sock,32);
if (error!=0)
printk("Error listening on socket \n");
/* Now start accepting */
// Accepting is performed by the function server_accept_connection
return sock;
}
/*
Accepts a new connection (server calls this function)
1. Create a new socket
2. Call socket->ops->accept
3. return the newly created socket
*/
struct socket* server_accept_connection(struct socket *sock) {
struct socket * newsock;
int error;
/* Before accept: Clone the socket */
error = sock_create(PF_INET,SOCK_STREAM,IPPROTO_TCP,&newso ck);
if (error<0)
printk("Error during creation of the other socket; terminating\n");
newsock->type = sock->type;
newsock->ops=sock->ops;
/* Do the actual accept */
error = newsock->ops->accept(sock,newsock,0);

if (error<0) {
printk("Error accepting socket\n") ;
return 0;
}
return newsock;
}

struct socket * set_up_client_socket(unsigned int IP_addr, int port_no)


{
struct socket * clientsock;
struct sockaddr_in sin;
int error, i;
/* First create a socket */
error = sock_create(PF_INET,SOCK_STREAM,IPPROTO_TCP,&clien tsock);
if (error<0) {
printk("Error during creation of socket; terminating\n");
return 0;
}
/* Now bind and connect the socket */
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(IP_addr);
sin.sin_port = htons(port_no);
for(i=0;i<10;i++) {
error = clientsock->ops->connect(clientsock,(struct sockaddr*)&sin,sizeof(sin),0);
if (error<0) {
printk("Error connecting client socket to server: %i, retrying .. %d \n",error, i);
if(i==10-1) {
printk("Giving Up!\n"); return 0;
}
}
else break; //connected
}
return clientsock;
}

Home

Kernel Korner - Network Programming in the


Kernel
By Pradeep Padala and Ravi Parimi on Fri, 2005-08-26 01:00. Software
Take a tour of the kernel's networking functionality by writing a network client that runs in kernel
space.
All Linux distributions provide a wide range of network applications-from dmons that provide a
variety of services such as WWW, mail and SSH to client programs that access one or more of these
services. These programs are written in user mode and use the system calls provided by the kernel to
perform various operations like network read and write. Although this is the traditional method of
writing programs, there is another interesting way to develop these applications by implementing them
in the kernel. The TUX Web server is a good example of an application that runs inside the kernel and
serves static content. In this article, we explain the basics of writing network applications within the
kernel and their advantages and disadvantages. As an example, we explain the implementation of an inkernel FTP client.

Advantages and Disadvantages of In-Kernel Implementations


Why would one want to implement applications within the kernel? Here are a few advantages:

When a user-space program makes a system call, there is some overhead associated in the userspace/kernel-space transition. By programming all functionality in the kernel, we can make
gains in performance.

The data corresponding to any application that sends or receives packets is copied from user
mode to kernel mode and vice versa. By implementing network applications within the kernel, it
is possible to reduce such overhead and increase efficiency by not copying data to user mode.

In specific research and high-performance computing environments, there is a need for


achieving data transfers at great speeds. Kernel applications find use in such situations.

On the other hand, in-kernel implementations have certain disadvantages:

Security is a primary concern within the kernel, and a large class of user-mode applications are
not suitable to be run directly in the kernel. Consequently, special care needs to be taken while
designing in-kernel applications. For example, reading and writing to files within the kernel is
usually a bad idea, but most applications require some kind of file I/O.

Large applications cannot be implemented in the kernel due to memory constraints.

Network Programming Basics


Network programming is usually done with sockets. A socket serves as a communication end point
between two processes. In this article, we describe network programming with TCP/IP sockets.
Server programs create sockets, bind to well-known ports, listen and accept connections from clients.
Servers are usually designed to accept multiple connections from clients-they either fork a new process
to serve each client request (concurrent servers) or completely serve one request before accepting more

connections (iterative servers). Client programs, on the other hand, create sockets to connect to servers
and exchange information.

FTP Client-Server Interaction


Let's take a quick look at how an FTP client and server are implemented in user mode. We discuss only
active FTP in this article. The differences between active and passive FTP are not relevant to our
discussion of network programming here.

Socket Programming Basics


Here is a brief explanation of the design of an FTP client and server. The server program creates a
socket using the socket() system call. It then binds on a well-known port using bind() and waits
for connections from clients using the listen() system call. The server then accepts incoming
requests from clients using accept() and forks a new process (or thread) to serve each incoming
client request.
The client program creates a control socket using socket() and next calls connect() to establish
a connection with the server. It then creates a separate socket for data transfer using socket() and
binds to an unprivileged port (>1024) using bind(). The client now listen()s on this port for data
transfer from the server. The server now has enough knowledge to honor a data transfer request from
the client. Finally, the client uses accept() to accept connections from the server to send and receive
data. For sending and receiving data, the client and server use the write() and read() or
sendmsg() and recvmsg() system calls. The client issues close() on all open sockets to tear
down its connection to the server. Figure 1 sums it up.

Figure 1. The FTP protocol uses two sockets: one for control messages and one for data.

FTP Commands
Here is a list of a few the FTP commands we used. Because our program provides only a basic
implementation of the protocol, we discuss only the relevant commands:

The client sends a USER <username>\r\n command to the server to begin the
authentication process.

To send the password, the client uses PASS password\r\n'.

In some cases, the client sends a PORT command to inform the server of its preferred port for
data transfer. In such cases, the client sends PORT <a1,a2,a3,a4,p1,p2>\r\n. The
RFC for FTP requires that the a1-a4 constitute the 32-bit IP address of the client, and p1-p2
constitute the 16-bit port number. For example, if the client's IP address is 10.10.1.2 and it
chooses port 12001 for data transfer, the client sends PORT 10,10,1,2,46,225.

Some FTP clients request, by default, that data be transferred in binary format, while others
explicitly ask the server to enable data transfer in binary mode. Such clients send a TYPE

I\r\n command to the server to request this.


Figure 2 is a diagram that shows a few FTP commands and their responses from the server.

Figure 2. The client issues FTP commands over the control connection to set up the file transfer.

Socket Programming in the Kernel


Writing programs in the kernel is different from doing the same in user space.
We explain a few issues concerned with writing a network application in the kernel. Refer to Greg
Kroah-Hartman's article "Things You Never Should Do in the Kernel" (see the on-line Resources).
First, let's examine how a system call in user space completes its task. For example, look at the
socket() system call:
sockfd = socket(AF_INET,SOCK_STREAM,0);

When a program executes a system call, it traps into the kernel via an interrupt and hands over control
to the kernel. Among other things, the kernel performs various tasks, such as saving contents of
registers, making changes to address space boundaries and checking for errors with system call
parameters. Eventually, the sys_socket() function in the kernel is responsible for creating the
socket of specified address and family type, finding an unused file descriptor and returning this number
back to user space. Browsing through the kernel's code we can trace the path followed by this function
(Figure 3).

Figure 3. Behind the scenes of a system call: when user space executes socket(), the kernel does
necessary housekeeping and then returns a new file descriptor.

Design of an FTP Client


We now explain the design and implementation of a kernel FTP client. Please follow through the code
available at the Linux Journal FTP site (see Resources) as you read through the article. The main
functionality of this client is written in the form of a kernel module that adds a system call dynamically
that user-space programs can invoke to start the FTP client process. The module allows only the root
user to read a file using FTP. The user-space program that calls the system call in this module should
be used with extreme caution. For example, it is easy to imagine the catastrophic results when root
runs:
./a.out 10.0.0.1 10.0.0.2 foo_file /dev/hda1/*

and overwrites /dev/hda1 with a downloaded file from 10.0.0.1.

Exporting sys_call_table
We first need to configure the Linux kernel to allow us to add new system calls via a kernel module
dynamically. Starting with version 2.6, the symbol sys_call_table is no longer exported by the
kernel. For our module to be able to add a system call dynamically, we need to add the following lines
to arch/i386/kernel/i386_ksyms.c in the kernel source (assuming you are using a Pentium-class
machine):
extern void *sys_call_table;
EXPORT_SYMBOL(sys_call_table);

After recompiling the kernel and booting the machine into it, we are all set to run the FTP client. Refer
to the Kernel Rebuild HOWTO (see Resources) for details on compiling a kernel.

Module Basics
Let's examine the code for the module first. In the code snippets in the article, we omit error-checking
and other irrelevant details for clarity. The complete code is available from the LJ FTP site (see
Resources):
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
/* For socket etc */
#include <linux/net.h>
#include <net/sock.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <asm/uaccess.h>
#include <linux/file.h>
#include <linux/socket.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
...
int ftp_init(void)
{
printk(KERN_INFO FTP_STRING
"Starting ftp client module\n");
sys_call_table[SYSCALL_NUM] = my_sys_call;
return 0;
}
void ftp_exit(void)
{
printk(KERN_INFO FTP_STRING
"Cleaning up ftp client module, bye !\n");
sys_call_table[SYSCALL_NUM] = sys_ni_syscall;
}
...

The program begins with the customary include directives. Notable among the header files are
linux/kernel.h for KERN_ALERT and linux/slab.h, which contains definitions for kmalloc() and
linux/smp_lock.h that define kernel-locking routines. System calls are handled in the kernel by
functions with the same names in user space but are prefixed with sys_. For example, the
sys_socket function in the kernel handles the task of the socket() system call. In this module,
we are using system call number 223 for our new system call. This method is not foolproof and will not
work on SMP machines. Upon unloading the module, we unregister our system call.

The System Call


The workhorse of the module is the new system call that performs an FTP read. The system call takes a
structure as a parameter. The structure is self-explanatory and is given below:
struct params {
/* Destination IP address */
unsigned char destip[4];
/* Source IP address */
unsigned char srcip[4];
/* Source file - file to be downloaded from
the server */
char src[64];
/* Destination file - local file where the
downloaded file is copied */
char dst[64];
char user[16]; /* Username */
char pass[64]; /* Password */
};

The system call is given below. We explain the relevant details in next few paragraphs:
asmlinkage int my_sys_call
(struct params __user *pm)
{
struct sockaddr_in saddr, daddr;
struct socket *control= NULL;
struct socket *data = NULL;
struct socket *new_sock = NULL;
int r = -1;
char *response = kmalloc(SNDBUF, GFP_KERNEL);
char *reply = kmalloc(RCVBUF, GFP_KERNEL);
struct params pmk;
if(unlikely(!access_ok(VERIFY_READ,
pm, sizeof(pm))))
return -EFAULT;
if(copy_from_user(&pmk, pm,
sizeof(struct params)))
return -EFAULT;
if(current->uid != 0)
return r;

r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &control);
memset(&servaddr,0, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(PORT);
servaddr.sin_addr.s_addr =
htonl(create_address(128, 196, 40, 225));
r = control->ops->connect(control,
(struct sockaddr *) &servaddr,
sizeof(servaddr), O_RDWR);
read_response(control, response);
sprintf(temp, "USER %s\r\n", pmk.user);
send_reply(control, temp);
read_response(control, response);
sprintf(temp, "PASS %s\r\n", pmk.pass);
send_reply(control, temp);
read_response(control, response);

We start out by declaring pointers to a few socket structures. kmalloc() is the kernel equivalent
of malloc() and is used to allocate memory for our character array. The array's response and reply
will contain the responses to and replies from the server.
The first step is to read the parameters from user mode to kernel mode. This is customarily done with
access_ok and verify_read/verify_write calls. access_ok checks whether the userspace pointer is valid to be referenced. verify_read is used to read data from user mode. For
reading simple variables like char and int, use __get_user.
Now that we have the user-specified parameters, the next step is to create a control socket and establish
a connection with the FTP server. sock_create() does this for us-its arguments are similar to those
we pass to the user-level socket() system call. The struct sockaddr_in variable servaddr
is now filled in with all the necessary information-address family, destination port and IP address of the
server. Each socket structure has a member that is a pointer to a structure of type struct
proto_ops. This structure contains a list of function pointers to all the operations that can be
performed on a socket. We use the connect() function of this structure to establish a connection to
the server. Our functions read_response() and send_reply() transfer data between the client
and server (these functions are explained later):
r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &data);
memset(&claddr,0, sizeof(claddr));
claddr.sin_family = AF_INET;
claddr.sin_port = htons(EPH_PORT);
clddr.sin_addr.s_addr= htonl(
create_address(srcip));
r = data->ops->bind(data,
(struct sockaddr *)&claddr,
sizeof (claddr));
r = data->ops->listen(data, 1);

Now, a data socket is created to transfer data between the client and server. We fill in another struct
sockaddr_in variable claddr with information about the client-protocol family, local
unprivileged port that our client would bind to and, of course, the IP address. Next, the socket is bound
to the ephemeral port EPH_PORT. The function listen() lets the kernel know that this socket can
accept incoming connections:
a = (char *)&claddr.sin_addr;
p = (char *)&claddr.sin_port;
send_reply(control, reply);
read_response(control, response);
strcpy(reply, "RETR ");
strcat(reply, src);
strcat(reply, "\r\n");
send_reply(control, reply);
read_response(control, response);

As explained previously, a PORT command is issued to the FTP server to let it know the port for data
transfer. This command is sent over the control socket and not over the data socket:
new_sock = sock_alloc();
new_sock->type = data->type;
new_sock->ops = data->ops;
r = data->ops->accept(data, new_sock, 0);
new_sock->ops->getname(new_sock,
(struct sockaddr *)address, &len, 2);

Now, the client is ready to accept data from the server. We create a new socket and assign it the same
type and ops as our data socket. The accept() function pulls the first pending connection in the
listen queue and creates a new socket with the same connection properties as data. The new socket
thus created handles all data transfer between the client and server. The getname() function gets the
address at the other end of the socket. The last three lines in the above segment of code are useful only
for printing information about the server:
if((total_written = write_to_file(pmk.dst,
new_sock, response)) < 0)
goto err3;

The function write_to_file deals with opening a file in the kernel and writing data from the
socket back into the file. Writing to sockets works like this:
void send_reply(struct socket *sock, char *str)
{
send_sync_buf(sock, str, strlen(str),

MSG_DONTWAIT);
}
int send_sync_buf
(struct socket *sock, const char *buf,
const size_t length, unsigned long flags)
{
struct msghdr msg;
struct iovec iov;
int len, written = 0, left = length;
mm_segment_t oldmm;
msg.msg_name
= 0;
msg.msg_namelen = 0;
msg.msg_iov
= &iov;
msg.msg_iovlen
= 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags
= flags;
oldmm = get_fs(); set_fs(KERNEL_DS);
repeat_send:
msg.msg_iov->iov_len = left;
msg.msg_iov->iov_base = (char *) buf +
written;
len = sock_sendmsg(sock, &msg, left);
...
return written ? written : len;
}

The send_reply() function calls send_sync_buf(), which does the real job of sending the
message by calling sock_sendmsg(). The function sock_sendmsg() takes a pointer to struct
socket, the message to be sent and the message length. The message is represented by the struture
msghdr. One of the important members of this structure is iov (io vector). The iovector has two
members, iov_base and iov_len:
struct iovec
{
/* Should point to message buffer */
void *iov_base;
/* Message length */
__kernel_size_t iov_len;
};

These members are filled with appropriate values, and sock_sendmsg() is called to send the
message.
The macro set_fs is used to set the FS register to point to the kernel data segment. This allows
sock_sendmsg() to find the data in the kernel data segment instead of the user-space data segment.
The macro get_fs saves the old value of FS. After a call to sock_sendmsg(), the saved value of
FS is restored.
Reading from the socket works similarly:

int read_response(struct socket *sock, char *str)


{
...
len = sock_recvmsg(sock, &msg,
max_size, 0);
...
return len;
}

The read_response() function is similar to send_reply(). After filling the msghdr structure
appropriately, it uses sock_recvmsg() to read data from a socket and returns the number of bytes
read.

A User-Space Program
Now, let's take a look at a user-space program that invokes our system call to transfer a file. We explain
the relevant details for calling a new system call:
...
#define __NR_my_sys_call 223
_syscall1(long long int, my_sys_call,
struct params *, p);
int main(int argc, char **argv)
{
struct params pm;
/* fill pm with appropriate values */
...
r = my_sys_call(&pm);
...
}

#define __NR_my_sys_call 223 assigns a number to our system call. _syscall1() is a


macro that creates a stub for the system call. It shows the type and number of arguments that our
system call expects. With this in place, my_sys_call can be invoked just like any other system call.
Upon running the program, with correct values for the source and destination files, a file from a remote
FTP server is downloaded onto the client machine. Here is a transcript of a sample run:
# make
make -C /lib/modules/2.6.9/build SUBDIRS=/home/ppadala/ftp modules
make[1]: Entering directory `/home/ppadala/linux-2.6.9'
CC [M] /home/ppadala/ftp/ftp.o
Building modules, stage 2.
MODPOST
CC
/home/ppadala/ftp/ftp.mod.o
LD [M] /home/ppadala/ftp/ftp.ko
make[1]: Leaving directory `/home/ppadala/linux-2.6.9'
# gcc do_ftp.c
# ./a.out <local host's IP address> 152.2.210.80 /README /tmp/README anonymous

anon@cs.edu
Connection from 152.2.210.80
return = 215 (length of file copied)

Conclusions
We have seen a basic implementation of an FTP client within the kernel. This article explains various
issues of socket programming in the kernel. Interested readers can follow these ideas to write various
network applications, such as an HTTP client or even a Web server in the kernel. Kernel applications,
such as the TUX Web server are used for high-performance content serving and are well suited for
environments that demand data transfer at high rates. Careful attention has to be paid to the design,
implementation and security issues of such applications.
Resources for this article: www.linuxjournal.com/article/8453.
Pradeep Padala is a PhD student at the University of Michigan. His general interests are in distributed
systems with specific emphasis on scheduling and fault tolerance. He is the author of the NCurses
Programming HOWTO and contributes to various open-source projects. More about him can be found
on his Web site at www.eecs.umich.edu/~ppadala.
Ravi Parimi has a Master's degree in Computer Engineering and currently works in Silicon Valley,
California. His main interests are in operating systems, networking and Internet security. He has been
using Linux since 1998 and aspires to be a kernel hacker. In his free time, he pursues Vedic studies and
Chess.

Home

Driving Me Nuts - Things You Never Should Do


in the Kernel
By Greg Kroah-Hartman on Wed, 2005-04-06 01:00.
How do you read and write files from a kernel module? Wait, make that \"how would you read and
write files from a kernel module if that weren't a bad thing to do?\"
On Linux kernel programming mailing lists oriented toward new developers (see the on-line
Resources), a number of common questions are asked. Almost every time one of these questions is
asked, the response always is, "Don't do that!", leaving the bewildered questioner wondering what kind
of strange development community they have stumbled into. This is the first in an occasional series of
articles that attempts to explain why it generally is not a good idea to do these kinds of things. Then, in
order to make up for the chastising, we break all of the rules and show you exactly how to do them
anyway.

Read a File
The most common question asked in this don't-do-that category is, "How do I read a file from within
my kernel module?" Most new kernel developers are coming from user-space programming
environments or other operating systems where reading a file is a natural and essential part of bringing
configuration information into a program. From within the Linux kernel, however, reading data out of a
file for configuration information is considered to be forbidden. This is due to a vast array of different
problems that could result if a developer tries to do this.
The most common problem is interpreting the data. Writing a file interpreter from within the kernel is a
process ripe for problems, and any errors in that interpreter can cause devastating crashes. Also, any
errors in the interpreter could cause buffer overflows. These might allow unprivileged users to take
over a machine or get access to protected data, such as password files.
Trying to protect the kernel from dumb programming errors is not the most important reason for not
allowing drivers to read files. The biggest issue is policy. Linux kernel programmers try to flee from
the word policy as fast as they can. They almost never want to force the kernel to force a policy on to
user space that can possibly be avoided. Having a module read a file from a filesystem at a specific
location forces the policy of the location of that file to be set. If a Linux distributor decides the easiest
way to handle all configuration files for the system is to place them in the /var/black/hole/of/configs,
this kernel module has to be modified to support this change. This is unacceptable to the Linux kernel
community.
Another big issue with trying to read a file from within the kernel is trying to figure out exactly where
the file is. Linux supports filesystem namespaces, which allow every process to contain its own view of
the filesystem. This allows some programs to see only portions of the entire filesystem, while others
see the filesystem in different locations. This is a powerful feature, and trying to determine that your
module lives in the proper filesystem namespace is an impossible task.
If these big issues are not enough, the final problem of how to get the configuration into the kernel is
also a policy decision. By forcing the kernel module to read a file every time, the author is forcing that
decision. However, some distributions might decide it is better to store system configurations in a local
database and have helper programs funnel that data into the kernel at the proper time. Or, they might
want to connect to an external machine in some manner to determine the proper configuration at that
moment. Whatever method the user decides to employ to store configuration data, by forcing it to be in
a specific file, he or she is forcing that policy decision on the user, which is a bad idea.

But How Do I Configure Things?


After finally understanding the Linux kernel programmer's aversion to policy decisions and thinking
that those idealists are out of their mind, you still are left with the real problem of how to get
configuration data into a kernel module. How can this be done without incurring the wrath of an angry
e-mail flame war?
A common way of sending data to a specific kernel module is to use a char device and the ioctl system
call. This allows the author to send almost any kind of data to the kernel, with the user-space program
sending the data at the proper time in the initialization process. The ioctl command, however, has been
determined to have a lot of nasty side affects, and creating new ioctls in the kernel generally is frowned
on. Also, trying properly to handle a 32-bit user-space program making an ioctl call into a 64-bit kernel
and converting all of the data types in the correct manner is a horrible task to undertake.

Because ioctls are not allowed, the /proc filesystem can be used to get configuration data into the
kernel. By writing data to a file in the filesystem created by the kernel module, the kernel module has
direct access to it. Recently, though, the proc filesystem has been clamped down on by the kernel
developers, as it was horribly abused by programmers over time to contain almost any type of data.
Slowly this filesystem is being cleaned up to contain only process information, such as the names of
filesystem states.
For a more structured filesystem, the sysfs filesystem provides a way for any device and any driver to
create files to which configuration data may be sent. This interface is preferred over ioctls and using
/proc. See previous articles in this column for how to create and use sysfs files within a kernel module.

I Want to Do This Anyway


Now that you understand the reasoning behind forbidding the ability to read a file from a kernel
module, you of course can skip the rest of this article. It does not concern you, as you are off busily
converting your kernel module to use sysfs.
Still here? Okay, so you still want to know how to read a file from a kernel module, and no amount of
persuading can convince you otherwise. You promise never to try to do this in code that will be
submitted for inclusion into the main kernel tree and that I never described how to do this, right?
Actually, reading a file is quite simple, once one minor issue is resolved. A number of the kernel
system calls are exported for module use; these system calls start with sys_. So, for the read system
call, the function sys_read should be used.
The common approach to reading a file is to try code that looks like the following:
fd = sys_open(filename, O_RDONLY, 0);
if (fd >= 0) {
/* read the file here */
sys_close(fd);
}

However, when this is tried within a kernel module, the sys_open() call usually returns the error
-EFAULT. This causes the author to post the question to a mailing list, which elicits the "don't read a
file from the kernel" response described above.
The main thing the author forgot to take into consideration is the kernel expects the pointer passed to
the sys_open() function call to be coming from user space. So, it makes a check of the pointer to verify
it is in the proper address space in order to try to convert it to a kernel pointer that the rest of the kernel
can use. So, when we are trying to pass a kernel pointer to the function, the error -EFAULT occurs.

Fixing the Address Space


To handle this address space mismatch, use the functions get_fs() and set_fs(). These functions modify
the current process address limits to whatever the caller wants. In the case of sys_open(), we want to
tell the kernel that pointers from within the kernel address space are safe, so we call:
set_fs(KERNEL_DS);

The only two valid options for the set_fs() function are KERNEL_DS and USER_DS, roughly standing
for kernel data segment and user data segment, respectively.
To determine what the current address limits are before modifying them, call the get_fs() function.
Then, when the kernel module is done abusing the kernel API, it can restore the proper address limits.
So, with this knowledge, the proper way to write the above code snippet is:
old_fs = get_fs();
set_fs(KERNEL_DS);
fd = sys_open(filename, O_RDONLY, 0);
if (fd >= 0) {
/* read the file here */
sys_close(fd);
}
set_fs(old_fs);

An example of an entire module that reads the file /etc/shadow and dumps it out to the kernel system
log, proving that this can be a dangerous thing to do, can be seen below:
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <asm/uaccess.h>
static void read_file(char *filename)
{
int fd;
char buf[1];
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
fd = sys_open(filename, O_RDONLY, 0);
if (fd >= 0) {
printk(KERN_DEBUG);
while (sys_read(fd, buf, 1) == 1)
printk("%c", buf[0]);
printk("\n");
sys_close(fd);
}
set_fs(old_fs);
}
static int __init init(void)
{
read_file("/etc/shadow");
return 0;
}
static void __exit exit(void)
{ }
MODULE_LICENSE("GPL");
module_init(init);
module_exit(exit);

But What about Writing?


Now, armed with this newfound knowledge of how to abuse the kernel system call API and annoy a
kernel programmer at the drop of a hat, you really can push your luck and write to a file from within
the kernel. Fire up your favorite editor, and pound out something like the following:
old_fs = get_fs();
set_fs(KERNEL_DS);
fd = sys_open(filename, O_WRONLY|O_CREAT, 0644);
if (fd >= 0) {
sys_write(data, strlen(data);
sys_close(fd);
}
set_fs(old_fs);

The code seems to build properly, with no compile time warnings, but when you try to load the
module, you get this odd error:
insmod: error inserting 'evil.ko': -1 Unknown symbol in module

This means that a symbol your module is trying to use has not been exported and is not available in the
kernel. By looking at the kernel log, you can determine what symbol that is:
evil: Unknown symbol sys_write

So, even though the function sys_write is present in the syscalls.h header file, it is not exported for use
in a kernel module. Actually, on three different platforms this symbol is exported, but who really uses a
parisc architecture anyway? To work around this, we need to take advantage of the kernel functions
that are available to kernel modules. By reading the code of how the sys_write function is
implemented, the lack of the exported symbol can be thwarted. The following kernel module shows
how this can be done by not using the sys_write call:
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fcntl.h>
#include <asm/uaccess.h>
static void write_file(char *filename, char *data)
{
struct file *file;
loff_t pos = 0;
int fd;

mm_segment_t old_fs = get_fs();


set_fs(KERNEL_DS);
fd = sys_open(filename, O_WRONLY|O_CREAT, 0644);
if (fd >= 0) {
sys_write(fd, data, strlen(data));
file = fget(fd);
if (file) {
vfs_write(file, data, strlen(data), &pos);
fput(file);
}
sys_close(fd);
}
set_fs(old_fs);

}
static int __init init(void)
{
write_file("/tmp/test", "Evil file.\n");
return 0;
}
static void __exit exit(void)
{ }
MODULE_LICENSE("GPL");
module_init(init);
module_exit(exit);

As you can see, by using the functions fget, fput and vfs_write, we can implement our own sys_write
functionality.

I Never Told You about This


In conclusion, reading and writing a file from within the kernel is a bad, bad thing to do. Never do it.
Ever. Both modules from this article, along with a Makefile for compiling them, are available from the
Linux Journal FTP site, but we expect to see no downloads in the logs. And, I never told you how to do
it either. You picked it up from someone else, who learned it from his sister's best friend, who heard
about how to do it from her coworker.

See the attached file. This is part of a network file system I hope to release in a
few weeks. Most
of it are wrappers I use for networking (connect, listen, accept...). Not as
polished as Stevens's examples
from user land but it gets the job done. Take a look at thug_receive() and
thug_send() for example
of sock_sendmsg() and sock_recvmsg().
Comments from anyone would be greatly appreciated.
-Etay

************************************************
"When in doubt, use brute force."
Thompson
Etay Meiri
cl1@netvision.net.il
************************************************
/*
*
*
*
*
*
*/

Ken

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include

Networking code for thug

<linux/config.h>
<linux/module.h>
<linux/kernel.h>
<linux/malloc.h>
<linux/socket.h>
<linux/in.h>
<linux/locks.h>
<linux/smp_lock.h>
<asm/fcntl.h>
<net/scm.h>
<asm/uaccess.h>
<linux/socket.h>
<linux/inet.h>
<asm/errno.h>

#include "thugd.h"
void fput(struct file *file);
struct socket *socki_lookup(struct inode *inode)
{
return &inode->u.socket_i;
}
struct socket* thug_connect(struct sockaddr_in* sock_addr)
{
int retval;
struct socket *sock;
retval = sock_create(AF_INET,SOCK_STREAM,0,&sock);
if (retval < 0) {
printk("thug_connect: error creating socket\n");
return NULL;
}
retval = sock->ops->connect(sock,(struct sockaddr *)sock_addr,sizeof(struct
sockaddr_in),0);
if (retval < 0) {
printk("thug_connect: error on connect: %d\n",retval);

sock_release(sock);
return NULL;
}
thug_print("Connection ok\n");
}

return sock;

struct socket* thug_listen(void)


{
struct socket *sock;
struct sockaddr_in servaddr;
if (sock_create(AF_INET,SOCK_STREAM,0,&sock) < 0) {
printk("thug_listen: error creating socket\n");
return NULL;
}
memset(&servaddr,0,sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
servaddr.sin_port = htons(THUG_PORT);
if(sock->ops->bind(sock, (struct sockaddr *)&servaddr, sizeof(servaddr)) <
0) {

printk("thug_listen: bind error\n");


sock_release(sock);
return NULL;
}
if (sock->ops->listen(sock,5) < 0) {
printk("thug_listen: listen error\n");
sock_release(sock);
return NULL;
}

return sock;

struct socket* thug_accept(struct socket* sock)


{
struct inode* inode;
struct socket* newsock;
int err;
struct sockaddr_in sin;
int slen;
if (!(newsock = sock_alloc())) {
printk("thug_accept: error creating socket(2)\n");
return NULL;
}
inode = newsock->inode;
newsock->type = sock->type;
newsock->ops = sock->ops;

/*if (sock->ops->dup(newsock,sock) < 0) {


printk("thug_accept: error dupping socket\n");
sock_release(newsock);
return NULL;
}*/
if ((err = newsock->ops->accept(sock,newsock,0)) < 0) {
sock_release(newsock);
return ERR_PTR(err);
}
slen = sizeof(sin);
if ( (err = newsock->ops->getname(newsock,(struct sockaddr*)&sin,&slen,1))
< 0) {

printk("thug_accept: error on getname: %d\n",err);


sock_release(newsock);
return ERR_PTR(err);
}
newsock = socki_lookup(inode);

return newsock;

int thug_receive(struct socket* sock,unsigned char* buf,int len)


{
struct msghdr msg;
struct iovec iov;
mm_segment_t oldfs;
int size = 0;
iov.iov_base = buf;
iov.iov_len = len;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_name = NULL;
msg.msg_namelen = 0;
oldfs = get_fs();
set_fs(KERNEL_DS);
size = sock_recvmsg(sock,&msg,len,0);
set_fs(oldfs);
if (size < 0)
printk("thug_read_from_socket: sock_recvmsg error: %d\n",size);
return size;
}
int thug_read_from_socket(struct socket* sock,unsigned char* buf,int len)
{
int result;
int received = 0;
while(received < len) {

result = thug_receive(sock,buf + received,len - received);


if (result == 0)
return -EIO;
else if (result < 0)
return result;
received += result;
}
}

return received;

int thug_send(struct socket* sock,unsigned char* buf,int len)


{
struct msghdr msg;
struct iovec iov;
mm_segment_t oldfs;
int size = 0;
iov.iov_base = buf;
iov.iov_len = len;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_flags
= 0;
oldfs = get_fs();
set_fs(KERNEL_DS);
size = sock_sendmsg(sock,&msg,len);
set_fs(oldfs);
if (size < 0)
printk("thug_write_to_socket: sock_sendmsg error: %d\n",size);
}

return size;

int thug_write_to_socket(struct socket* sock,unsigned char* buf,int len)


{
int result;
int sent = 0;
while (sent < len) {
result = thug_send(sock,buf + sent,len - sent);
if (result == 0)
return -EIO;
else if (result < 0)
return result;
}

sent += result;

return sent;

int send_ack(struct socket* sock)


{
unsigned char buf = 0;
if (thug_write_to_socket(sock,&buf,1) != 1) {
printk("Error sending ack");
return -1;
}
return 0;
}
int send_break(struct socket* sock)
{
unsigned char buf = 1;
if (thug_write_to_socket(sock,&buf,1) != 1) {
printk("Error sending break");
return -1;
}
}

return 0;

int get_ack(struct socket* sock)


{
unsigned char buf;
if (thug_read_from_socket(sock,&buf,1) != 1) {
printk("Error getting ack");
return -1;
}
if (buf != 0)
return -1;
}

return 0;

void decodeui(unsigned int* dest,char* src)


{
if (dest && src) {
*dest = 0;
*dest = (src[0] << 24) & 0xFF000000;
*dest |= (src[1] << 16) & 0xFF0000;
*dest |= (src[2] << 8) & 0xFF00;
*dest |= src[3] & 0xFF;
}
}
void encodeui(char* dest,unsigned int src)
{

if (dest) {
dest[0]
dest[1]
dest[2]
dest[3]
}

=
=
=
=

(src & 0xFF000000) >> 24;


(src & 0xFF0000) >> 16;
(src & 0xFF00) >> 8;
src & 0xFF;

}
void encodeus(char* dest,unsigned short src)
{
if (dest) {
dest[0] = (src & 0xFF00) >> 8;
dest[1] = src & 0xFF;
}
}
void decodeus(unsigned short* dest,char* src)
{
if (dest && src) {
*dest = 0;
*dest = src[0] << 8;
*dest |= src[1] & 0xFF;
}
}
void decodel(long* dest,char* src)
{
if (dest && src) {
*dest = 0;
*dest = (src[0] << 24) & 0xFF000000;
*dest |= (src[1] << 16) & 0xFF0000;
*dest |= (src[2] << 8) & 0xFF00;
*dest |= src[3] & 0xFF;
}
}
void encodel(char* dest,long src)
{
if (dest) {
dest[0] = (src & 0xFF000000) >> 24;
dest[1] = (src & 0xFF0000) >> 16;
dest[2] = (src & 0xFF00) >> 8;
dest[3] = src & 0xFF;
}
}
void decodeul(unsigned long* dest,char* src)
{
if (dest && src) {
*dest = 0;
*dest = (src[0] << 24) & 0xFF000000;
*dest |= (src[1] << 16) & 0xFF0000;
*dest |= (src[2] << 8) & 0xFF00;
*dest |= src[3] & 0xFF;
}
}

void encodeul(char* dest,unsigned long src)


{
if (dest) {
dest[0] = (src & 0xFF000000) >> 24;
dest[1] = (src & 0xFF0000) >> 16;
dest[2] = (src & 0xFF00) >> 8;
dest[3] = src & 0xFF;
}
}
void decodell(long long* dest,char*
{
if (dest && src) {
*dest = 0;
*dest = (src[0] <<
*dest |= (src[1] <<
*dest |= (src[2] <<
*dest |= (src[3] <<
*dest |= (src[4] <<
*dest |= (src[5] <<
*dest |= (src[6] <<
*dest |= src[7]
}
}

src)

56)
48)
40)
32)
24)
16)
8)

&
&
&
&
&
&
&
&

0xFF00000000000000;
0xFF000000000000;
0xFF0000000000;
0xFF00000000;
0xFF000000;
0xFF0000;
0xFF00;
0xFF;

void encodell(char* dest,long long src)


{
if (dest) {
dest[0] = (src & 0xFF00000000000000)
dest[1] = (src & 0x00FF000000000000)
dest[2] = (src & 0x0000FF0000000000)
dest[3] = (src & 0x000000FF00000000)
dest[4] = (src & 0x00000000FF000000)
dest[5] = (src & 0x0000000000FF0000)
dest[6] = (src & 0x000000000000FF00)
dest[7] = src & 0x00000000000000FF;
}
}
void decodei(int* dest,char* src)
{
if (dest && src) {
*dest = 0;
*dest = (src[0] << 24) & 0xFF000000;
*dest |= (src[1] << 16) & 0xFF0000;
*dest |= (src[2] << 8) & 0xFF00;
*dest |= src[3] & 0xFF;
}
}
void encodei(char* dest,int src)
{
if (dest) {
dest[0] = (src & 0xFF000000) >> 24;
dest[1] = (src & 0xFF0000) >> 16;

>>
>>
>>
>>
>>
>>
>>

56;
48;
40;
32;
24;
16;
8;

dest[2] = (src & 0xFF00) >> 8;


dest[3] = src & 0xFF;
}

/* must be called with the socket locked */


int thug_request(struct mfs_sb_info* thug_sb, unsigned int msglen)
{
unsigned short len;
unsigned long flags;
int error = 0;
int mask;
sigset_t old_set;
spin_lock_irqsave(&current->sigmask_lock, flags);
old_set = current->blocked;
mask = sigmask(SIGKILL) | sigmask(SIGSTOP);
siginitsetinv(&current->blocked,mask);
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
if ((error = thug_write_to_socket(thug_sb->sock, thug_sb->packet,
msglen)) != msglen) {
printk("thug_request: error sending request to server\n");
goto out;
}
memset(thug_sb->packet, 0, thug_sb->packet_len);
if ((error = thug_read_from_socket(thug_sb->sock, thug_sb->packet, 2)) !=
2) {
}

printk("thug_request: error reading reply length from socket\n");


goto out;

decodeus(&len, thug_sb->packet);
if ((error = thug_read_from_socket(thug_sb->sock, thug_sb->packet + 2,
len)) != len) {
printk("thug_request: error reading reply body from socket
(%d)\n",len);
goto out;
}
out:

spin_lock_irqsave(&current->sigmask_lock, flags);
current->blocked = old_set;
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
return error;

void encode_iattr(char* dest, struct iattr* attr)


{
encodeui(dest, attr->ia_valid);

encodeus(dest + 4, attr->ia_mode);
encodeui(dest + 6, attr->ia_uid);
encodeui(dest + 10, attr->ia_gid);
encodell(dest + 14, attr->ia_size);
encodel(dest + 22, attr->ia_atime);
encodel(dest + 26, attr->ia_mtime);
encodel(dest + 30, attr->ia_ctime);
encodeui(dest + 34, attr->ia_attr_flags);
}
void decode_iattr(struct iattr* dest, char* src)
{
decodeui(&dest->ia_valid, src);
decodeus(&dest->ia_mode, src + 4);
decodeui(&dest->ia_uid, src + 6);
decodeui(&dest->ia_gid, src + 10);
decodell(&dest->ia_size, src + 14);
decodel(&dest->ia_atime, src + 22);
decodel(&dest->ia_mtime, src + 26);
decodel(&dest->ia_ctime, src + 30);

Kernel Korner - Network Programming in the


Kernel
By Pradeep Padala and Ravi Parimi on Fri, 2005-08-26 01:00. Software
Take a tour of the kernel's networking functionality by writing a network client that runs in kernel
space.
All Linux distributions provide a wide range of network applications-from dmons that provide a
variety of services such as WWW, mail and SSH to client programs that access one or more of these
services. These programs are written in user mode and use the system calls provided by the kernel to
perform various operations like network read and write. Although this is the traditional method of
writing programs, there is another interesting way to develop these applications by implementing them
in the kernel. The TUX Web server is a good example of an application that runs inside the kernel and
serves static content. In this article, we explain the basics of writing network applications within the
kernel and their advantages and disadvantages. As an example, we explain the implementation of an inkernel FTP client.

Advantages and Disadvantages of In-Kernel Implementations


Why would one want to implement applications within the kernel? Here are a few advantages:

When a user-space program makes a system call, there is some overhead associated in the userspace/kernel-space transition. By programming all functionality in the kernel, we can make
gains in performance.

The data corresponding to any application that sends or receives packets is copied from user
mode to kernel mode and vice versa. By implementing network applications within the kernel, it
is possible to reduce such overhead and increase efficiency by not copying data to user mode.

In specific research and high-performance computing environments, there is a need for


achieving data transfers at great speeds. Kernel applications find use in such situations.

On the other hand, in-kernel implementations have certain disadvantages:

Security is a primary concern within the kernel, and a large class of user-mode applications are
not suitable to be run directly in the kernel. Consequently, special care needs to be taken while
designing in-kernel applications. For example, reading and writing to files within the kernel is
usually a bad idea, but most applications require some kind of file I/O.

Large applications cannot be implemented in the kernel due to memory constraints.

Network Programming Basics


Network programming is usually done with sockets. A socket serves as a communication end point
between two processes. In this article, we describe network programming with TCP/IP sockets.
Server programs create sockets, bind to well-known ports, listen and accept connections from clients.
Servers are usually designed to accept multiple connections from clients-they either fork a new process
to serve each client request (concurrent servers) or completely serve one request before accepting more
connections (iterative servers). Client programs, on the other hand, create sockets to connect to servers
and exchange information.

FTP Client-Server Interaction


Let's take a quick look at how an FTP client and server are implemented in user mode. We discuss only
active FTP in this article. The differences between active and passive FTP are not relevant to our
discussion of network programming here.

Socket Programming Basics


Here is a brief explanation of the design of an FTP client and server. The server program creates a
socket using the socket() system call. It then binds on a well-known port using bind() and waits
for connections from clients using the listen() system call. The server then accepts incoming
requests from clients using accept() and forks a new process (or thread) to serve each incoming
client request.
The client program creates a control socket using socket() and next calls connect() to establish
a connection with the server. It then creates a separate socket for data transfer using socket() and
binds to an unprivileged port (>1024) using bind(). The client now listen()s on this port for data
transfer from the server. The server now has enough knowledge to honor a data transfer request from
the client. Finally, the client uses accept() to accept connections from the server to send and receive
data. For sending and receiving data, the client and server use the write() and read() or
sendmsg() and recvmsg() system calls. The client issues close() on all open sockets to tear
down its connection to the server. Figure 1 sums it up.

Figure 1. The FTP protocol uses two sockets: one for control messages and one for data.

FTP Commands
Here is a list of a few the FTP commands we used. Because our program provides only a basic
implementation of the protocol, we discuss only the relevant commands:

The client sends a USER <username>\r\n command to the server to begin the
authentication process.

To send the password, the client uses PASS password\r\n'.

In some cases, the client sends a PORT command to inform the server of its preferred port for
data transfer. In such cases, the client sends PORT <a1,a2,a3,a4,p1,p2>\r\n. The
RFC for FTP requires that the a1-a4 constitute the 32-bit IP address of the client, and p1-p2
constitute the 16-bit port number. For example, if the client's IP address is 10.10.1.2 and it
chooses port 12001 for data transfer, the client sends PORT 10,10,1,2,46,225.

Some FTP clients request, by default, that data be transferred in binary format, while others
explicitly ask the server to enable data transfer in binary mode. Such clients send a TYPE
I\r\n command to the server to request this.

Figure 2 is a diagram that shows a few FTP commands and their responses from the server.

Figure 2. The client issues FTP commands over the control connection to set up the file transfer.

Socket Programming in the Kernel


Writing programs in the kernel is different from doing the same in user space.
We explain a few issues concerned with writing a network application in the kernel. Refer to Greg
Kroah-Hartman's article "Things You Never Should Do in the Kernel" (see the on-line Resources).
First, let's examine how a system call in user space completes its task. For example, look at the
socket() system call:
sockfd = socket(AF_INET,SOCK_STREAM,0);

When a program executes a system call, it traps into the kernel via an interrupt and hands over control
to the kernel. Among other things, the kernel performs various tasks, such as saving contents of
registers, making changes to address space boundaries and checking for errors with system call
parameters. Eventually, the sys_socket() function in the kernel is responsible for creating the
socket of specified address and family type, finding an unused file descriptor and returning this number
back to user space. Browsing through the kernel's code we can trace the path followed by this function
(Figure 3).

Figure 3. Behind the scenes of a system call: when user space executes socket(), the kernel does
necessary housekeeping and then returns a new file descriptor.

Design of an FTP Client


We now explain the design and implementation of a kernel FTP client. Please follow through the code
available at the Linux Journal FTP site (see Resources) as you read through the article. The main
functionality of this client is written in the form of a kernel module that adds a system call dynamically
that user-space programs can invoke to start the FTP client process. The module allows only the root
user to read a file using FTP. The user-space program that calls the system call in this module should

be used with extreme caution. For example, it is easy to imagine the catastrophic results when root
runs:
./a.out 10.0.0.1 10.0.0.2 foo_file /dev/hda1/*

and overwrites /dev/hda1 with a downloaded file from 10.0.0.1.

Exporting sys_call_table
We first need to configure the Linux kernel to allow us to add new system calls via a kernel module
dynamically. Starting with version 2.6, the symbol sys_call_table is no longer exported by the
kernel. For our module to be able to add a system call dynamically, we need to add the following lines
to arch/i386/kernel/i386_ksyms.c in the kernel source (assuming you are using a Pentium-class
machine):
extern void *sys_call_table;
EXPORT_SYMBOL(sys_call_table);

After recompiling the kernel and booting the machine into it, we are all set to run the FTP client. Refer
to the Kernel Rebuild HOWTO (see Resources) for details on compiling a kernel.

Module Basics
Let's examine the code for the module first. In the code snippets in the article, we omit error-checking
and other irrelevant details for clarity. The complete code is available from the LJ FTP site (see
Resources):
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
/* For socket etc */
#include <linux/net.h>
#include <net/sock.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <asm/uaccess.h>
#include <linux/file.h>
#include <linux/socket.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
...
int ftp_init(void)
{
printk(KERN_INFO FTP_STRING
"Starting ftp client module\n");
sys_call_table[SYSCALL_NUM] = my_sys_call;
return 0;

}
void ftp_exit(void)
{
printk(KERN_INFO FTP_STRING
"Cleaning up ftp client module, bye !\n");
sys_call_table[SYSCALL_NUM] = sys_ni_syscall;
}
...

The program begins with the customary include directives. Notable among the header files are
linux/kernel.h for KERN_ALERT and linux/slab.h, which contains definitions for kmalloc() and
linux/smp_lock.h that define kernel-locking routines. System calls are handled in the kernel by
functions with the same names in user space but are prefixed with sys_. For example, the
sys_socket function in the kernel handles the task of the socket() system call. In this module,
we are using system call number 223 for our new system call. This method is not foolproof and will not
work on SMP machines. Upon unloading the module, we unregister our system call.

The System Call


The workhorse of the module is the new system call that performs an FTP read. The system call takes a
structure as a parameter. The structure is self-explanatory and is given below:
struct params {
/* Destination IP address */
unsigned char destip[4];
/* Source IP address */
unsigned char srcip[4];
/* Source file - file to be downloaded from
the server */
char src[64];
/* Destination file - local file where the
downloaded file is copied */
char dst[64];
char user[16]; /* Username */
char pass[64]; /* Password */
};

The system call is given below. We explain the relevant details in next few paragraphs:
asmlinkage int my_sys_call
(struct params __user *pm)
{
struct sockaddr_in saddr, daddr;
struct socket *control= NULL;
struct socket *data = NULL;
struct socket *new_sock = NULL;
int r = -1;
char *response = kmalloc(SNDBUF, GFP_KERNEL);
char *reply = kmalloc(RCVBUF, GFP_KERNEL);

struct params pmk;


if(unlikely(!access_ok(VERIFY_READ,
pm, sizeof(pm))))
return -EFAULT;
if(copy_from_user(&pmk, pm,
sizeof(struct params)))
return -EFAULT;
if(current->uid != 0)
return r;
r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &control);
memset(&servaddr,0, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(PORT);
servaddr.sin_addr.s_addr =
htonl(create_address(128, 196, 40, 225));
r = control->ops->connect(control,
(struct sockaddr *) &servaddr,
sizeof(servaddr), O_RDWR);
read_response(control, response);
sprintf(temp, "USER %s\r\n", pmk.user);
send_reply(control, temp);
read_response(control, response);
sprintf(temp, "PASS %s\r\n", pmk.pass);
send_reply(control, temp);
read_response(control, response);

We start out by declaring pointers to a few socket structures. kmalloc() is the kernel equivalent
of malloc() and is used to allocate memory for our character array. The array's response and reply
will contain the responses to and replies from the server.
The first step is to read the parameters from user mode to kernel mode. This is customarily done with
access_ok and verify_read/verify_write calls. access_ok checks whether the userspace pointer is valid to be referenced. verify_read is used to read data from user mode. For
reading simple variables like char and int, use __get_user.
Now that we have the user-specified parameters, the next step is to create a control socket and establish
a connection with the FTP server. sock_create() does this for us-its arguments are similar to those
we pass to the user-level socket() system call. The struct sockaddr_in variable servaddr
is now filled in with all the necessary information-address family, destination port and IP address of the
server. Each socket structure has a member that is a pointer to a structure of type struct
proto_ops. This structure contains a list of function pointers to all the operations that can be
performed on a socket. We use the connect() function of this structure to establish a connection to
the server. Our functions read_response() and send_reply() transfer data between the client
and server (these functions are explained later):
r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &data);
memset(&claddr,0, sizeof(claddr));

claddr.sin_family = AF_INET;
claddr.sin_port = htons(EPH_PORT);
clddr.sin_addr.s_addr= htonl(
create_address(srcip));
r = data->ops->bind(data,
(struct sockaddr *)&claddr,
sizeof (claddr));
r = data->ops->listen(data, 1);

Now, a data socket is created to transfer data between the client and server. We fill in another struct
sockaddr_in variable claddr with information about the client-protocol family, local
unprivileged port that our client would bind to and, of course, the IP address. Next, the socket is bound
to the ephemeral port EPH_PORT. The function listen() lets the kernel know that this socket can
accept incoming connections:
a = (char *)&claddr.sin_addr;
p = (char *)&claddr.sin_port;
send_reply(control, reply);
read_response(control, response);
strcpy(reply, "RETR ");
strcat(reply, src);
strcat(reply, "\r\n");
send_reply(control, reply);
read_response(control, response);

As explained previously, a PORT command is issued to the FTP server to let it know the port for data
transfer. This command is sent over the control socket and not over the data socket:
new_sock = sock_alloc();
new_sock->type = data->type;
new_sock->ops = data->ops;
r = data->ops->accept(data, new_sock, 0);
new_sock->ops->getname(new_sock,
(struct sockaddr *)address, &len, 2);

Now, the client is ready to accept data from the server. We create a new socket and assign it the same
type and ops as our data socket. The accept() function pulls the first pending connection in the
listen queue and creates a new socket with the same connection properties as data. The new socket
thus created handles all data transfer between the client and server. The getname() function gets the
address at the other end of the socket. The last three lines in the above segment of code are useful only
for printing information about the server:
if((total_written = write_to_file(pmk.dst,
new_sock, response)) < 0)

goto err3;

The function write_to_file deals with opening a file in the kernel and writing data from the
socket back into the file. Writing to sockets works like this:
void send_reply(struct socket *sock, char *str)
{
send_sync_buf(sock, str, strlen(str),
MSG_DONTWAIT);
}
int send_sync_buf
(struct socket *sock, const char *buf,
const size_t length, unsigned long flags)
{
struct msghdr msg;
struct iovec iov;
int len, written = 0, left = length;
mm_segment_t oldmm;
msg.msg_name
= 0;
msg.msg_namelen = 0;
msg.msg_iov
= &iov;
msg.msg_iovlen
= 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags
= flags;
oldmm = get_fs(); set_fs(KERNEL_DS);
repeat_send:
msg.msg_iov->iov_len = left;
msg.msg_iov->iov_base = (char *) buf +
written;
len = sock_sendmsg(sock, &msg, left);
...
return written ? written : len;
}

The send_reply() function calls send_sync_buf(), which does the real job of sending the
message by calling sock_sendmsg(). The function sock_sendmsg() takes a pointer to struct
socket, the message to be sent and the message length. The message is represented by the struture
msghdr. One of the important members of this structure is iov (io vector). The iovector has two
members, iov_base and iov_len:
struct iovec
{
/* Should point to message buffer */
void *iov_base;
/* Message length */
__kernel_size_t iov_len;
};

These members are filled with appropriate values, and sock_sendmsg() is called to send the
message.
The macro set_fs is used to set the FS register to point to the kernel data segment. This allows
sock_sendmsg() to find the data in the kernel data segment instead of the user-space data segment.
The macro get_fs saves the old value of FS. After a call to sock_sendmsg(), the saved value of
FS is restored.
Reading from the socket works similarly:
int read_response(struct socket *sock, char *str)
{
...
len = sock_recvmsg(sock, &msg,
max_size, 0);
...
return len;
}

The read_response() function is similar to send_reply(). After filling the msghdr structure
appropriately, it uses sock_recvmsg() to read data from a socket and returns the number of bytes
read.

A User-Space Program
Now, let's take a look at a user-space program that invokes our system call to transfer a file. We explain
the relevant details for calling a new system call:
...
#define __NR_my_sys_call 223
_syscall1(long long int, my_sys_call,
struct params *, p);
int main(int argc, char **argv)
{
struct params pm;
/* fill pm with appropriate values */
...
r = my_sys_call(&pm);
...
}

#define __NR_my_sys_call 223 assigns a number to our system call. _syscall1() is a


macro that creates a stub for the system call. It shows the type and number of arguments that our
system call expects. With this in place, my_sys_call can be invoked just like any other system call.
Upon running the program, with correct values for the source and destination files, a file from a remote
FTP server is downloaded onto the client machine. Here is a transcript of a sample run:

# make
make -C /lib/modules/2.6.9/build SUBDIRS=/home/ppadala/ftp modules
make[1]: Entering directory `/home/ppadala/linux-2.6.9'
CC [M] /home/ppadala/ftp/ftp.o
Building modules, stage 2.
MODPOST
CC
/home/ppadala/ftp/ftp.mod.o
LD [M] /home/ppadala/ftp/ftp.ko
make[1]: Leaving directory `/home/ppadala/linux-2.6.9'
# gcc do_ftp.c
# ./a.out <local host's IP address> 152.2.210.80 /README /tmp/README anonymous
anon@cs.edu
Connection from 152.2.210.80
return = 215 (length of file copied)

Conclusions
We have seen a basic implementation of an FTP client within the kernel. This article explains various
issues of socket programming in the kernel. Interested readers can follow these ideas to write various
network applications, such as an HTTP client or even a Web server in the kernel. Kernel applications,
such as the TUX Web server are used for high-performance content serving and are well suited for
environments that demand data transfer at high rates. Careful attention has to be paid to the design,
implementation and security issues of such applications.
Resources for this article: www.linuxjournal.com/article/8453.
Pradeep Padala is a PhD student at the University of Michigan. His general interests are in distributed
systems with specific emphasis on scheduling and fault tolerance. He is the author of the NCurses
Programming HOWTO and contributes to various open-source projects. More about him can be found
on his Web site at www.eecs.umich.edu/~ppadala.
Ravi Parimi has a Master's degree in Computer Engineering and currently works in Silicon Valley,
California. His main interests are in operating systems, networking and Internet security. He has been
using Linux since 1998 and aspires to be a kernel hacker. In his free time, he pursues Vedic studies and
Chess.

Kernel Korner - Network Programming in the


Kernel
By Pradeep Padala and Ravi Parimi on Fri, 2005-08-26 01:00. Software
Take a tour of the kernel's networking functionality by writing a network client that runs in kernel
space.
All Linux distributions provide a wide range of network applications-from dmons that provide a
variety of services such as WWW, mail and SSH to client programs that access one or more of these
services. These programs are written in user mode and use the system calls provided by the kernel to
perform various operations like network read and write. Although this is the traditional method of
writing programs, there is another interesting way to develop these applications by implementing them
in the kernel. The TUX Web server is a good example of an application that runs inside the kernel and
serves static content. In this article, we explain the basics of writing network applications within the
kernel and their advantages and disadvantages. As an example, we explain the implementation of an inkernel FTP client.

Advantages and Disadvantages of In-Kernel Implementations


Why would one want to implement applications within the kernel? Here are a few advantages:

When a user-space program makes a system call, there is some overhead associated in the userspace/kernel-space transition. By programming all functionality in the kernel, we can make
gains in performance.

The data corresponding to any application that sends or receives packets is copied from user
mode to kernel mode and vice versa. By implementing network applications within the kernel, it
is possible to reduce such overhead and increase efficiency by not copying data to user mode.

In specific research and high-performance computing environments, there is a need for


achieving data transfers at great speeds. Kernel applications find use in such situations.

On the other hand, in-kernel implementations have certain disadvantages:

Security is a primary concern within the kernel, and a large class of user-mode applications are
not suitable to be run directly in the kernel. Consequently, special care needs to be taken while
designing in-kernel applications. For example, reading and writing to files within the kernel is
usually a bad idea, but most applications require some kind of file I/O.

Large applications cannot be implemented in the kernel due to memory constraints.

Network Programming Basics


Network programming is usually done with sockets. A socket serves as a communication end point
between two processes. In this article, we describe network programming with TCP/IP sockets.
Server programs create sockets, bind to well-known ports, listen and accept connections from clients.
Servers are usually designed to accept multiple connections from clients-they either fork a new process
to serve each client request (concurrent servers) or completely serve one request before accepting more
connections (iterative servers). Client programs, on the other hand, create sockets to connect to servers
and exchange information.

FTP Client-Server Interaction


Let's take a quick look at how an FTP client and server are implemented in user mode. We discuss only
active FTP in this article. The differences between active and passive FTP are not relevant to our
discussion of network programming here.

Socket Programming Basics


Here is a brief explanation of the design of an FTP client and server. The server program creates a
socket using the socket() system call. It then binds on a well-known port using bind() and waits
for connections from clients using the listen() system call. The server then accepts incoming
requests from clients using accept() and forks a new process (or thread) to serve each incoming
client request.
The client program creates a control socket using socket() and next calls connect() to establish
a connection with the server. It then creates a separate socket for data transfer using socket() and
binds to an unprivileged port (>1024) using bind(). The client now listen()s on this port for data
transfer from the server. The server now has enough knowledge to honor a data transfer request from
the client. Finally, the client uses accept() to accept connections from the server to send and receive
data. For sending and receiving data, the client and server use the write() and read() or
sendmsg() and recvmsg() system calls. The client issues close() on all open sockets to tear
down its connection to the server. Figure 1 sums it up.

Figure 1. The FTP protocol uses two sockets: one for control messages and one for data.

FTP Commands
Here is a list of a few the FTP commands we used. Because our program provides only a basic
implementation of the protocol, we discuss only the relevant commands:

The client sends a USER <username>\r\n command to the server to begin the
authentication process.

To send the password, the client uses PASS password\r\n'.

In some cases, the client sends a PORT command to inform the server of its preferred port for
data transfer. In such cases, the client sends PORT <a1,a2,a3,a4,p1,p2>\r\n. The
RFC for FTP requires that the a1-a4 constitute the 32-bit IP address of the client, and p1-p2
constitute the 16-bit port number. For example, if the client's IP address is 10.10.1.2 and it
chooses port 12001 for data transfer, the client sends PORT 10,10,1,2,46,225.

Some FTP clients request, by default, that data be transferred in binary format, while others
explicitly ask the server to enable data transfer in binary mode. Such clients send a TYPE
I\r\n command to the server to request this.

Figure 2 is a diagram that shows a few FTP commands and their responses from the server.

Figure 2. The client issues FTP commands over the control connection to set up the file transfer.

Socket Programming in the Kernel


Writing programs in the kernel is different from doing the same in user space.
We explain a few issues concerned with writing a network application in the kernel. Refer to Greg
Kroah-Hartman's article "Things You Never Should Do in the Kernel" (see the on-line Resources).
First, let's examine how a system call in user space completes its task. For example, look at the
socket() system call:
sockfd = socket(AF_INET,SOCK_STREAM,0);

When a program executes a system call, it traps into the kernel via an interrupt and hands over control
to the kernel. Among other things, the kernel performs various tasks, such as saving contents of
registers, making changes to address space boundaries and checking for errors with system call
parameters. Eventually, the sys_socket() function in the kernel is responsible for creating the
socket of specified address and family type, finding an unused file descriptor and returning this number
back to user space. Browsing through the kernel's code we can trace the path followed by this function
(Figure 3).

Figure 3. Behind the scenes of a system call: when user space executes socket(), the kernel does
necessary housekeeping and then returns a new file descriptor.

Design of an FTP Client


We now explain the design and implementation of a kernel FTP client. Please follow through the code
available at the Linux Journal FTP site (see Resources) as you read through the article. The main
functionality of this client is written in the form of a kernel module that adds a system call dynamically
that user-space programs can invoke to start the FTP client process. The module allows only the root
user to read a file using FTP. The user-space program that calls the system call in this module should

be used with extreme caution. For example, it is easy to imagine the catastrophic results when root
runs:
./a.out 10.0.0.1 10.0.0.2 foo_file /dev/hda1/*

and overwrites /dev/hda1 with a downloaded file from 10.0.0.1.

Exporting sys_call_table
We first need to configure the Linux kernel to allow us to add new system calls via a kernel module
dynamically. Starting with version 2.6, the symbol sys_call_table is no longer exported by the
kernel. For our module to be able to add a system call dynamically, we need to add the following lines
to arch/i386/kernel/i386_ksyms.c in the kernel source (assuming you are using a Pentium-class
machine):
extern void *sys_call_table;
EXPORT_SYMBOL(sys_call_table);

After recompiling the kernel and booting the machine into it, we are all set to run the FTP client. Refer
to the Kernel Rebuild HOWTO (see Resources) for details on compiling a kernel.

Module Basics
Let's examine the code for the module first. In the code snippets in the article, we omit error-checking
and other irrelevant details for clarity. The complete code is available from the LJ FTP site (see
Resources):
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
/* For socket etc */
#include <linux/net.h>
#include <net/sock.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <asm/uaccess.h>
#include <linux/file.h>
#include <linux/socket.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
...
int ftp_init(void)
{
printk(KERN_INFO FTP_STRING
"Starting ftp client module\n");
sys_call_table[SYSCALL_NUM] = my_sys_call;
return 0;

}
void ftp_exit(void)
{
printk(KERN_INFO FTP_STRING
"Cleaning up ftp client module, bye !\n");
sys_call_table[SYSCALL_NUM] = sys_ni_syscall;
}
...

The program begins with the customary include directives. Notable among the header files are
linux/kernel.h for KERN_ALERT and linux/slab.h, which contains definitions for kmalloc() and
linux/smp_lock.h that define kernel-locking routines. System calls are handled in the kernel by
functions with the same names in user space but are prefixed with sys_. For example, the
sys_socket function in the kernel handles the task of the socket() system call. In this module,
we are using system call number 223 for our new system call. This method is not foolproof and will not
work on SMP machines. Upon unloading the module, we unregister our system call.

The System Call


The workhorse of the module is the new system call that performs an FTP read. The system call takes a
structure as a parameter. The structure is self-explanatory and is given below:
struct params {
/* Destination IP address */
unsigned char destip[4];
/* Source IP address */
unsigned char srcip[4];
/* Source file - file to be downloaded from
the server */
char src[64];
/* Destination file - local file where the
downloaded file is copied */
char dst[64];
char user[16]; /* Username */
char pass[64]; /* Password */
};

The system call is given below. We explain the relevant details in next few paragraphs:
asmlinkage int my_sys_call
(struct params __user *pm)
{
struct sockaddr_in saddr, daddr;
struct socket *control= NULL;
struct socket *data = NULL;
struct socket *new_sock = NULL;
int r = -1;
char *response = kmalloc(SNDBUF, GFP_KERNEL);
char *reply = kmalloc(RCVBUF, GFP_KERNEL);

struct params pmk;


if(unlikely(!access_ok(VERIFY_READ,
pm, sizeof(pm))))
return -EFAULT;
if(copy_from_user(&pmk, pm,
sizeof(struct params)))
return -EFAULT;
if(current->uid != 0)
return r;
r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &control);
memset(&servaddr,0, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(PORT);
servaddr.sin_addr.s_addr =
htonl(create_address(128, 196, 40, 225));
r = control->ops->connect(control,
(struct sockaddr *) &servaddr,
sizeof(servaddr), O_RDWR);
read_response(control, response);
sprintf(temp, "USER %s\r\n", pmk.user);
send_reply(control, temp);
read_response(control, response);
sprintf(temp, "PASS %s\r\n", pmk.pass);
send_reply(control, temp);
read_response(control, response);

We start out by declaring pointers to a few socket structures. kmalloc() is the kernel equivalent
of malloc() and is used to allocate memory for our character array. The array's response and reply
will contain the responses to and replies from the server.
The first step is to read the parameters from user mode to kernel mode. This is customarily done with
access_ok and verify_read/verify_write calls. access_ok checks whether the userspace pointer is valid to be referenced. verify_read is used to read data from user mode. For
reading simple variables like char and int, use __get_user.
Now that we have the user-specified parameters, the next step is to create a control socket and establish
a connection with the FTP server. sock_create() does this for us-its arguments are similar to those
we pass to the user-level socket() system call. The struct sockaddr_in variable servaddr
is now filled in with all the necessary information-address family, destination port and IP address of the
server. Each socket structure has a member that is a pointer to a structure of type struct
proto_ops. This structure contains a list of function pointers to all the operations that can be
performed on a socket. We use the connect() function of this structure to establish a connection to
the server. Our functions read_response() and send_reply() transfer data between the client
and server (these functions are explained later):
r = sock_create(PF_INET, SOCK_STREAM,
IPPROTO_TCP, &data);
memset(&claddr,0, sizeof(claddr));

claddr.sin_family = AF_INET;
claddr.sin_port = htons(EPH_PORT);
clddr.sin_addr.s_addr= htonl(
create_address(srcip));
r = data->ops->bind(data,
(struct sockaddr *)&claddr,
sizeof (claddr));
r = data->ops->listen(data, 1);

Now, a data socket is created to transfer data between the client and server. We fill in another struct
sockaddr_in variable claddr with information about the client-protocol family, local
unprivileged port that our client would bind to and, of course, the IP address. Next, the socket is bound
to the ephemeral port EPH_PORT. The function listen() lets the kernel know that this socket can
accept incoming connections:
a = (char *)&claddr.sin_addr;
p = (char *)&claddr.sin_port;
send_reply(control, reply);
read_response(control, response);
strcpy(reply, "RETR ");
strcat(reply, src);
strcat(reply, "\r\n");
send_reply(control, reply);
read_response(control, response);

As explained previously, a PORT command is issued to the FTP server to let it know the port for data
transfer. This command is sent over the control socket and not over the data socket:
new_sock = sock_alloc();
new_sock->type = data->type;
new_sock->ops = data->ops;
r = data->ops->accept(data, new_sock, 0);
new_sock->ops->getname(new_sock,
(struct sockaddr *)address, &len, 2);

Now, the client is ready to accept data from the server. We create a new socket and assign it the same
type and ops as our data socket. The accept() function pulls the first pending connection in the
listen queue and creates a new socket with the same connection properties as data. The new socket
thus created handles all data transfer between the client and server. The getname() function gets the
address at the other end of the socket. The last three lines in the above segment of code are useful only
for printing information about the server:
if((total_written = write_to_file(pmk.dst,
new_sock, response)) < 0)

goto err3;

The function write_to_file deals with opening a file in the kernel and writing data from the
socket back into the file. Writing to sockets works like this:
void send_reply(struct socket *sock, char *str)
{
send_sync_buf(sock, str, strlen(str),
MSG_DONTWAIT);
}
int send_sync_buf
(struct socket *sock, const char *buf,
const size_t length, unsigned long flags)
{
struct msghdr msg;
struct iovec iov;
int len, written = 0, left = length;
mm_segment_t oldmm;
msg.msg_name
= 0;
msg.msg_namelen = 0;
msg.msg_iov
= &iov;
msg.msg_iovlen
= 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags
= flags;
oldmm = get_fs(); set_fs(KERNEL_DS);
repeat_send:
msg.msg_iov->iov_len = left;
msg.msg_iov->iov_base = (char *) buf +
written;
len = sock_sendmsg(sock, &msg, left);
...
return written ? written : len;
}

The send_reply() function calls send_sync_buf(), which does the real job of sending the
message by calling sock_sendmsg(). The function sock_sendmsg() takes a pointer to struct
socket, the message to be sent and the message length. The message is represented by the struture
msghdr. One of the important members of this structure is iov (io vector). The iovector has two
members, iov_base and iov_len:
struct iovec
{
/* Should point to message buffer */
void *iov_base;
/* Message length */
__kernel_size_t iov_len;
};

These members are filled with appropriate values, and sock_sendmsg() is called to send the
message.
The macro set_fs is used to set the FS register to point to the kernel data segment. This allows
sock_sendmsg() to find the data in the kernel data segment instead of the user-space data segment.
The macro get_fs saves the old value of FS. After a call to sock_sendmsg(), the saved value of
FS is restored.
Reading from the socket works similarly:
int read_response(struct socket *sock, char *str)
{
...
len = sock_recvmsg(sock, &msg,
max_size, 0);
...
return len;
}

The read_response() function is similar to send_reply(). After filling the msghdr structure
appropriately, it uses sock_recvmsg() to read data from a socket and returns the number of bytes
read.

A User-Space Program
Now, let's take a look at a user-space program that invokes our system call to transfer a file. We explain
the relevant details for calling a new system call:
...
#define __NR_my_sys_call 223
_syscall1(long long int, my_sys_call,
struct params *, p);
int main(int argc, char **argv)
{
struct params pm;
/* fill pm with appropriate values */
...
r = my_sys_call(&pm);
...
}

#define __NR_my_sys_call 223 assigns a number to our system call. _syscall1() is a


macro that creates a stub for the system call. It shows the type and number of arguments that our
system call expects. With this in place, my_sys_call can be invoked just like any other system call.
Upon running the program, with correct values for the source and destination files, a file from a remote
FTP server is downloaded onto the client machine. Here is a transcript of a sample run:

# make
make -C /lib/modules/2.6.9/build SUBDIRS=/home/ppadala/ftp modules
make[1]: Entering directory `/home/ppadala/linux-2.6.9'
CC [M] /home/ppadala/ftp/ftp.o
Building modules, stage 2.
MODPOST
CC
/home/ppadala/ftp/ftp.mod.o
LD [M] /home/ppadala/ftp/ftp.ko
make[1]: Leaving directory `/home/ppadala/linux-2.6.9'
# gcc do_ftp.c
# ./a.out <local host's IP address> 152.2.210.80 /README /tmp/README anonymous
anon@cs.edu
Connection from 152.2.210.80
return = 215 (length of file copied)

Conclusions
We have seen a basic implementation of an FTP client within the kernel. This article explains various
issues of socket programming in the kernel. Interested readers can follow these ideas to write various
network applications, such as an HTTP client or even a Web server in the kernel. Kernel applications,
such as the TUX Web server are used for high-performance content serving and are well suited for
environments that demand data transfer at high rates. Careful attention has to be paid to the design,
implementation and security issues of such applications.
Resources for this article: www.linuxjournal.com/article/8453.
Pradeep Padala is a PhD student at the University of Michigan. His general interests are in distributed
systems with specific emphasis on scheduling and fault tolerance. He is the author of the NCurses
Programming HOWTO and contributes to various open-source projects. More about him can be found
on his Web site at www.eecs.umich.edu/~ppadala.
Ravi Parimi has a Master's degree in Computer Engineering and currently works in Silicon Valley,
California. His main interests are in operating systems, networking and Internet security. He has been
using Linux since 1998 and aspires to be a kernel hacker. In his free time, he pursues Vedic studies and
Chess.

Das könnte Ihnen auch gefallen