x1.cpp is a utility for extracting lists of hostnames from northernlight search engine. Uses wget - tested on Linux.
050e801933c4356b808efa3fc09830292199ef555a08e5ad9a3052c6a9048fc7
/* X1 - get hosts fast & easy.
Installation: gcc x1.cpp -o x1
Requirements: Linux, but great chances are
it could work on other Unix-like OSs too.
Example: x1 .jp 1000 hosts.list
will get you 1000 japanese hosts and save them
one per line to hosts.list. Remember, it
automatically removes duplicate hosts so
you might not get a total of 1000 hosts.
Have a lot of fun!!
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <unistd.h>
#define urltemp "a.temp.a"
#define wgetfile "b.temp.b"
#define tempdomainlist "c.temp.c"
int createurls(char *maxhosts, char *domain);
int checkok(char *description, int status, bool skipable);
int downloadpages();
int cleartemp();
int contained(char *word, char *expression);
void clean(char *word);
int extract(char *expression);
int removeduplicates(char *outputfilename);
int main(int argc, char *argv[])
{
if(argc < 4)
{
printf("Usage: %s domain maxhosts outputfile\n", argv[0]);
return 0;
}
if(!checkok("Creating url list...", createurls(argv[2], argv[1]), false))
return 0;
if(!checkok("Downloading URLs from northernlight...", downloadpages(), false))
return 0;
if(!checkok("Extracting server names...", extract(argv[1]), false))
return 0;
if(!checkok("Removing duplicates...", removeduplicates(argv[3]), false))
return 0;
checkok("Deleting temporary files...", cleartemp(), true);
printf("Ok.. Everything worked fine. Saved to %s. Have a nice day!\n\n(c) 2000 by Killer-Whale(killer.whale@gmx.net)\n", argv[3]);
return 0;
}
int createurls(char *maxhosts, char *domain)
{
int max = 0;
max = atoi(maxhosts);
char query[1024];
FILE *urlfile;
unlink(urltemp);
urlfile = fopen(urltemp, "a");
if (urlfile == NULL)
return 0;
for(int k=1;k<=max/10;k++)
{
sprintf(query, "%s","http://www.northernlight.com/nlquery.fcg?ho=mantle&po=5222&qr=*+URL%3A");
strcat(query, domain);
strcat(query, "&cb=0&nth=");
for(int i=k; i>0; i--)
{
char temp[10];
sprintf(temp, "%d+", i);
strcat(query, temp);
}
strcat(query, "&orl=");
fprintf(urlfile, "%s\n", query);
}
fclose(urlfile);
return 1;
}
int checkok(char *description, int status, bool skipable)
{
printf("%s", description);
if(status == 1)
{
printf(" [Success]\n");
return 1;
}
else
{
printf(" [Failed]");
if(skipable == true)
printf(" Skipping...\n");
else
printf("\nExiting...\n");
}
return 0;
}
int downloadpages()
{
char wgetcommand[50];
sprintf(wgetcommand, "wget -i %s -O %s", urltemp, wgetfile);
if(system(wgetcommand) == (127 || -1 || NULL))
return 0;
return 1;
}
int cleartemp()
{ if((unlink(urltemp) != 0) || (unlink(wgetfile) != 0))
return 0;
return 1;
}
int contained(char *word, char *expression)
{
char subword[50];
if(strlen(expression) > strlen(word))
return 0;
for(int i=0; i<=strlen(word) - strlen(expression); i++)
{
for(int o=0; o<strlen(expression); o++)
subword[o] = word[o+i];
subword[strlen(expression)] = '\0';
if(strcmp(subword, expression) == 0)
return 1;
}
return 0;
}
void clean(char *word)
{
int i;
char subword[5];
char expression[] = ".";
for(i=0; i<=strlen(word) - strlen(expression); i++)
{
for(int o=0; o<strlen(expression); o++)
subword[o] = word[o+i];
subword[strlen(expression)] = '\0';
if((strcmp(subword, expression) == 0) && (word[i + 3] == '/'))
word[i + 3] = '\0';
}
for(int h=0; h<(strlen(word)-13); h++)
{
word[h] = word[h + 13];
}
word[strlen(word) - 13] = '\0';
}
int extract(char *expression)
{
FILE *fp1, *fp2;
char oneword[500];
int c;
fp1 = fopen(wgetfile, "r");
fp2 = fopen(tempdomainlist, "a");
if((fp1 == NULL) || (fp2 == NULL))
return 0;
do
{
c = fscanf(fp1, "%s", oneword);
if(c != EOF)
if(contained(oneword, "href=\"http://"))
if(contained(oneword, expression))
{
clean(oneword);
fprintf(fp2, "%s\n", oneword);
}
} while (c != EOF);
fclose(fp1);
fclose(fp2);
return 1;
}
int removeduplicates(char *outputfilename)
{
char sortcommand[50];
sprintf(sortcommand, "sort %s -u -o %s", tempdomainlist, outputfilename);
if(system(sortcommand) == (127 || -1 || NULL))
return 0;
return 1;
}