/*
 * Linux scanlogd v1.0 by Solar Designer.  You're allowed to do whatever you
 * like with this software (including re-distribution in any form, with or
 * without modification), provided that credit is given where it is due, and
 * any modified versions are marked as such.  There's absolutely no warranty.
 */

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <syslog.h>
#include <sys/times.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#if (linux)
#define __BSD_SOURCE
#endif
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>

/*
 * Port scan detection thresholds: at least COUNT ports need to be scanned
 * from the same source, with no longer than DELAY ticks between ports.
 */
#define SCAN_COUNT_THRESHOLD		10
#define SCAN_DELAY_THRESHOLD		(CLK_TCK * 5)

/*
 * Log flood detection thresholds: temporarily stop logging if more than
 * COUNT port scans are detected with no longer than DELAY between them.
 */
#define LOG_COUNT_THRESHOLD		5
#define LOG_DELAY_THRESHOLD		(CLK_TCK * 20)

/*
 * You might want to adjust these for using your tiny append-only log file.
 */
#define SYSLOG_IDENT			"scanlogd"
#define SYSLOG_FACILITY			LOG_DAEMON
#define SYSLOG_LEVEL			LOG_ALERT

/*
 * Keep track of up to LIST_SIZE source addresses, using a hash table of
 * HASH_SIZE entries for faster lookups, but limiting hash collisions to
 * HASH_MAX source addresses per the same hash value.
 */
#define LIST_SIZE			0x400
#define HASH_LOG			11
#define HASH_SIZE			(1 << HASH_LOG)
#define HASH_MAX			0x10

/*
 * Packet header as read from a raw TCP socket. In reality, the TCP header
 * can be at a different offset; this is just to get the total size right.
 */
struct header {
	struct ip ip;
	struct tcphdr tcp;
	char space[60 - sizeof(struct ip)];
};

/*
 * Information we keep per each source address.
 */
struct host {
	struct host *next;		/* Next entry with the same hash */
	clock_t timestamp;		/* Last update time */
	time_t start;			/* Entry creation time */
	struct in_addr saddr, daddr;	/* Source and destination addresses */
	unsigned short sport;		/* Source port, if fixed */
	int count;			/* Number of ports in the list */
	unsigned short ports[SCAN_COUNT_THRESHOLD - 1];	/* List of ports */
	unsigned char flags_or;		/* TCP flags OR mask */
	unsigned char flags_and;	/* TCP flags AND mask */
	unsigned char ttl;		/* TTL, if fixed */
};

/*
 * State information.
 */
struct {
	struct host list[LIST_SIZE];	/* List of source addresses */
	struct host *hash[HASH_SIZE];	/* Hash: pointers into the list */
	int index;			/* Oldest entry to be replaced */
} state;

/*
 * Convert an IP address into a hash table index.
 */
int hashfunc(struct in_addr addr)
{
	unsigned int value;
	int hash;

	value = addr.s_addr;
	hash = 0;
	do {
		hash ^= value;
	} while ((value >>= HASH_LOG));

	return hash & (HASH_SIZE - 1);
}

/*
 * Log this port scan.
 */
void do_log(struct host *info)
{
	char s_saddr[32];
	char s_daddr[32 + 8 * SCAN_COUNT_THRESHOLD];
	char s_flags[8];
	char s_ttl[16];
	char s_time[32];
	int index, size;
	unsigned char mask;

/* Source address and port number, if fixed */
	snprintf(s_saddr, sizeof(s_saddr),
		info->sport ? "%s:%u" : "%s",
		inet_ntoa(info->saddr),
		(unsigned int)ntohs(info->sport));

/* Destination address, if fixed */
	size = snprintf(s_daddr, sizeof(s_daddr),
		info->daddr.s_addr ? "%s ports " : "ports ",
		inet_ntoa(info->daddr));

/* Scanned port numbers */
	for (index = 0; index < info->count; index++)
		size += snprintf(s_daddr + size, sizeof(s_daddr) - size,
			"%u, ", (unsigned int)ntohs(info->ports[index]));

/* TCP flags: lowercase letters for "always clear", uppercase for "always
 * set", and question marks for "sometimes set". */
	for (index = 0; index < 6; index++) {
		mask = 1 << index;
		if ((info->flags_or & mask) == (info->flags_and & mask)) {
			s_flags[index] = "fsrpau"[index];
			if (info->flags_or & mask)
				s_flags[index] = toupper(s_flags[index]);
		} else
			s_flags[index] = '?';
	}
	s_flags[index] = 0;

/* TTL, if fixed */
	snprintf(s_ttl, sizeof(s_ttl), info->ttl ? ", TTL %u" : "",
		(unsigned int)info->ttl);

/* Scan start time */
	strftime(s_time, sizeof(s_time), "%X", localtime(&info->start));

/* Log it all */
	syslog(SYSLOG_LEVEL,
		"From %s to %s..., flags %s%s, started at %s",
		s_saddr, s_daddr, s_flags, s_ttl, s_time);
}

/*
 * Log this port scan unless we're being flooded.
 */
void safe_log(struct host *info)
{
	static clock_t last = 0;
	static int count = 0;
	clock_t now;

	now = info->timestamp;
	if (now - last > LOG_DELAY_THRESHOLD || now < last) count = 0;
	if (++count <= LOG_COUNT_THRESHOLD + 1) last = now;

	if (count <= LOG_COUNT_THRESHOLD) {
		do_log(info);
	} else if (count == LOG_COUNT_THRESHOLD + 1) {
		syslog(SYSLOG_LEVEL, "More possible port scans follow.\n");
	}
}

/*
 * Process a TCP packet.
 */
void process_packet(struct header *packet, int size)
{
	struct ip *ip;
	struct tcphdr *tcp;
	struct in_addr addr;
	unsigned short port;
	unsigned char flags;
	struct tms buf;
	clock_t now;
	struct host *current, *last, **head;
	int hash, index, count;

/* Get the IP and TCP headers */
	ip = &packet->ip;
	tcp = (struct tcphdr *)((char *)packet + ((int)ip->ip_hl << 2));

/* Sanity check */
	if ((char *)tcp + sizeof(struct tcphdr) > (char *)packet + size)
		return;

/* Get the source address, destination port, and TCP flags */
	addr = ip->ip_src;
	port = tcp->th_dport;
	flags = tcp->th_flags;

/* We're using IP address 0.0.0.0 for a special purpose here, so don't let
 * them spoof us. */
	if (!addr.s_addr) return;

/* Use times(2) here not to depend on someone setting the time while we're
 * running; we need to be careful with possible return value overflows. */
	now = times(&buf);

/* Do we know this source address already? */
	count = 0;
	last = NULL;
	if ((current = *(head = &state.hash[hash = hashfunc(addr)])))
	do {
		if (current->saddr.s_addr == addr.s_addr) break;
		count++;
		if (current->next) last = current;
	} while ((current = current->next));

/* We know this address, and the entry isn't too old. Update it. */
	if (current)
	if (now - current->timestamp <= SCAN_DELAY_THRESHOLD &&
	    now >= current->timestamp) {
/* Just update the TCP flags if we've seen this port already */
		for (index = 0; index < current->count; index++)
		if (current->ports[index] == port) {
			current->flags_or |= flags;
			current->flags_and &= flags;
			return;
		}

/* ACK to a new port? This could be an outgoing connection. */
		if (flags & TH_ACK) return;

/* Packet to a new port, and not ACK: update the timestamp */
		current->timestamp = now;

/* Logged this scan already? Then leave. */
		if (current->count == SCAN_COUNT_THRESHOLD) return;

/* Update the TCP flags */
		current->flags_or |= flags;
		current->flags_and &= flags;

/* Zero out the destination address, source port and TTL if not fixed. */
		if (current->daddr.s_addr != ip->ip_dst.s_addr)
			current->daddr.s_addr = 0;
		if (current->sport != tcp->th_sport)
			current->sport = 0;
		if (current->ttl != ip->ip_ttl)
			current->ttl = 0;

/* Got enough destination ports to decide that this is a scan? Then log it. */
		if (current->count == SCAN_COUNT_THRESHOLD - 1) {
			safe_log(current);
			current->count++;
			return;
		}

/* Remember the new port */
		current->ports[current->count++] = port;

		return;
	}

/* We know this address, but the entry is outdated. Mark it unused, and
 * remove from the hash table. We'll allocate a new entry instead since
 * this one might get re-used too soon. */
	if (current) {
		current->saddr.s_addr = 0;

		if (last)
			last->next = last->next->next;
		else if (*head)
			*head = (*head)->next;
		last = NULL;
	}

/* We don't need an ACK from a new source address */
	if (flags & TH_ACK) return;

/* Got too many source addresses with the same hash value? Then remove the
 * oldest one from the hash table, so that they can't take too much of our
 * CPU time even with carefully chosen spoofed IP addresses. */
	if (count >= HASH_MAX && last) last->next = NULL;

/* We're going to re-use the oldest list entry, so remove it from the hash
 * table first (if it is really already in use, and isn't removed from the
 * hash table already because of the HASH_MAX check above). */

/* First, find it */
	if (state.list[state.index].saddr.s_addr)
		head = &state.hash[hashfunc(state.list[state.index].saddr)];
	else
		head = &last;
	last = NULL;
	if ((current = *head))
	do {
		if (current == &state.list[state.index]) break;
		last = current;
	} while ((current = current->next));

/* Then, remove it */
	if (current) {
		if (last)
			last->next = last->next->next;
		else if (*head)
			*head = (*head)->next;
	}

/* Get our list entry */
	current = &state.list[state.index++];
	if (state.index >= LIST_SIZE) state.index = 0;

/* Link it into the hash table */
	head = &state.hash[hash];
	current->next = *head;
	*head = current;

/* And fill in the fields */
	current->timestamp = now;
	current->start = time(NULL);
	current->saddr = addr;
	current->daddr = ip->ip_dst;
	current->sport = tcp->th_sport;
	current->count = 1;
	current->ports[0] = port;
	current->flags_or = current->flags_and = flags;
	current->ttl = ip->ip_ttl;
}

/*
 * Hmm, what could this be?
 */
int main()
{
	int raw, size;
	struct header packet;

/* Get a raw socket. We could drop root right after that. */
	if ((raw = socket(AF_INET, SOCK_RAW, IPPROTO_TCP)) < 0) {
		perror("socket");
		return 1;
	}

/* Become a daemon */
	switch (fork()) {
	case -1:
		perror("fork");
		return 1;

	case 0:
		break;

	default:
		return 0;
	}

	signal(SIGHUP, SIG_IGN);

/* Initialize the state. All source IP addresses are set to 0.0.0.0, which
 * means the list entries aren't in use yet. */
	memset(&state, 0, sizeof(state));

/* Huh? */
	openlog(SYSLOG_IDENT, 0, SYSLOG_FACILITY);

/* Let's start */
	while (1)
	if ((size = read(raw, &packet, sizeof(packet))) >= sizeof(packet.ip))
		process_packet(&packet, size);
}
