#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main (int argc, char **argv)
{
int i;
char itochar[5];
int len;
FILE *stream;
int dweb = 0;
int dwebstr;
char bd[20] = {" baidu"};
char *bdhtml = ".html ";
char godown[512] = {"curl -o"};
char urlmsg[512];
char *purl = urlmsg;
char serurl[30] = {"'http://www.baidu.com/s?wd="};
char urlsufix[5] = {"&pn="};
char buf[5];
if (argc == 1) {
printf ("使用方法: ./web 关键词 下载结果页的页数(限制在9页内)\n");
exit (1);
}
dweb = atoi (argv[2]);
if (dweb > 9) {
printf ("下载页数在10页内\n");
exit (1);
}
/*下载搜索结果页*/
for (i = 0; i <= dweb; i ++) {
sprintf (itochar, "%d", i);
strcat (bd, itochar); /*变成 baidu(x)..x为网页号*/
strcat (bd, bdhtml); /*如变成 baidu7.html*/
strcat (godown, bd); /*连接成 curl -o baidu7.html 的形式(后面有1空格)*/
/*curl -o baidux.html 'http://www.baidu.com/s?wd=*/
strcat (godown, serurl);
dwebstr = i * 10;
sprintf (buf, "%d", dwebstr); /*页码设置*/
strcat (godown, argv[1]);
strcat (godown, urlsufix);
strcat (godown, buf);
len = strlen (godown);
godown[len] = '\'';
godown[len + 1] = '\0'; /*命令设置完毕*/
system (godown);
sprintf (godown, "%s", "curl -o ");
sprintf (bd, "%s", " baidu");
}
system ("sh filter.sh");
stream = fopen ("merge.txt", "r");
if (stream == NULL) {
perror ("errno");
exit (1);
}
i = 1; /*下载网页命名号*/
fread (purl, 1, 1, stream);
while ( !feof (stream) ) {
while (*purl != '\n') {
purl += 1;
fread (purl, 1, 1, stream);
}
*purl = '\0'; /*读取一行URL结束*/
sprintf (buf, "%d", i);
strcat (godown, buf); /*curl -o 1*/
strcat (godown, bdhtml); /*curl -o x.html*/
strcat (godown, urlmsg); /*curl -o x.html url*/
system (godown);
i += 1;
fread (purl, 1, 1, stream);
sprintf (godown, "%s", "curl -o ");
purl = urlmsg; /*重新定位*/
}
close (stream);
return (0);
}
欢迎光临 曲径通幽论坛 (http://www.groad.net/bbs/) | Powered by Discuz! X3.2 |