`
virusea
  • 浏览: 1749 次
  • 性别: Icon_minigender_1
最近访客 更多访客>>
社区版块
存档分类
最新评论

java 抓取本地网页信息 进行多线程访问

 
阅读更多

package crawls.crawls;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.Scanner;

public class crawlsClient extends Thread{
Scanner b= new Scanner(System.in);
private PrintWriter pw;
private BufferedReader br;
private Socket ss;
private String name;
public crawlsClient(){}
public crawlsClient(String a,int s,String name){
try {
this.name=name;
ss=new Socket(a,s);
br=new BufferedReader(new InputStreamReader(ss.getInputStream()));
pw=new PrintWriter(new OutputStreamWriter(ss.getOutputStream()),true);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnknownHostException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
start();
}
@Override
public void run(){
try {
pw.println("你好,我是客户端:"+name);
String a=br.readLine();
System.out.println(a);
a=br.readLine();
System.out.println(a);
a=br.readLine();
System.out.println(a);
a=br.readLine();
System.out.println(a);
int t=6;
String a1;
a1=b.nextLine();
pw.println(a1);
a=br.readLine();
System.out.println(a);
a=br.readLine();
String [] a2=a.split("!");
for(String a3:a2)
{
System.out.println(a3);
}
while(t>=1) {
t--;
a=br.readLine();
System.out.println(a);
a1=b.nextLine();
pw.println(a1);
a=br.readLine();
if(a.equals("查询已结束")){
break;
}
System.out.println(a);
a=br.readLine();
String [] a4=a.split("!");
for(String a5:a4)
{
System.out.println(a5);
}
}
} catch (IOException e) {
e.printStackTrace();
}
try {
pw.close();
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/--------------------------------------------------/
package crawls.crawls;

import java.io.BufferedReader;
import java.io.File;
import java.io.PrintWriter;
import java.net.Socket;
import java.util.ArrayList;
import java.util.Scanner;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.OutputStreamWriter;
import java.io.OutputStreamWriter;


public class crawlSData extends Thread {
private static BufferedReader br1;
private static BufferedReader br;
private static PrintWriter pw;
private static Socket ss;
private static ArrayList<String> list1=new ArrayList<String>();
private static ArrayList<String> list2=new ArrayList<String>();
private static ArrayList<String> list3=new ArrayList<String>();
private static ArrayList<String> list4=new ArrayList<String>();
private static Scanner b= new Scanner(System.in);//new一个输入语句
private static File file =new File("D:/tom/data.txt");//导入一个包含数据的文本
public crawlSData(){}
public crawlSData(Socket ss){
this.ss=ss;
try {
br1=new BufferedReader(new InputStreamReader(new FileInputStream(file)));
    br=new BufferedReader(new InputStreamReader(ss.getInputStream()));
pw=new PrintWriter(new OutputStreamWriter(ss.getOutputStream()),true);
} catch (IOException e) {
e.printStackTrace();
}
start();// 运行时立即启动线程
}
@Override
public void run(){
try {
String clientLine=br.readLine();// 定义a用于接受客户端第一句输入的字符串
String [] clientLines=clientLine.split(":");//定义字符串数组a2将a以“:”进行分割
System.out.println(clientLines[0]+":"+clientLines[1]);
pw.println("2014年7月22日-2014年7月28日国内航班表");
pw.println("你好!"+clientLines[1]+",有如下五个地方时间表");
pw.println("上海,北京,武汉,广州,长沙");
pw.println("请输入条件之一:例如:07-22,周六,吉祥航空,HO1251,出发21:50,出发虹桥国际机场T2,到达00:20,到达首都国际机场T3,520起"); //向客户端输出字符串
String console=br.readLine(); // 定义b用于接受客户端第二句输入的字符串
while(br1.read()!=-1){ //循环判断从文本中 输入的字符串是否为空
String input=br1.readLine();//定义c用于接受文本 输入的字符串
String[] inputs=input.split(",");//定义字符串数组c1将c以“,”分割
for(String inpu:inputs){ //定义字符串c2遍历字符串数组c1
if(inpu.equals(console))//判断
{
list1.add(input);//添加
list3.add("!"+input);//加上“!”添加到集合list3 以下有用
}
}
}if(list1.size()==0){//判断
pw.println("没有这个查询条件");
return;
}
ArrayList<String> list33=list3;//用String类型的list33copy集合list3用于对客户端输出
    pw.println("时间  星期 航空公司 公司编号 出发时间   出发地点        到达时间    到达地点       预计价格");
pw.println(list33);
   
    int t=6;
while(t>=1)
{
pw.println("请输入子查询条件:");
String d=br.readLine();//定义
if(d.equals("结束")){
pw.println("查询已结束");
break;
}
   
for(String lists1:list1)//遍历
{
String[] a1=lists1.split(",");
for(String b3:a1)//遍历
{
if(b3.equals(d))//判断
{
list2.add(lists1); //添加
list4.add("!"+lists1);
}
}
}
if(list2.size()==0){//判断
pw.println("没有这个查询条件");
return;
}
ArrayList<String> list44=list4;//用String类型的list44copy集合list4用于对客户端输出
pw.println("时间  星期 航空公司 公司编号 出发时间   起飞地点        到达时间    到达地点       预计价格");
pw.println(list44);
list1.removeAll(list1);//清空
list1.addAll(list2);//添加
list2.removeAll(list2);
list4.removeAll(list4);
t--;
}
} catch (IOException e) {
e.printStackTrace();
}
try {
br.close();//关闭
pw.close();
br1.close();
} catch (IOException e) {
e.printStackTrace();
}
}

}
/--------------------------------------------------/
package crawls.crawls;

import java.io.IOException;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.Scanner;

public class crawlServer {

public static void main(String[] args) {
//Scanner console=new Scanner(System.in);
try {
ServerSocket server=new ServerSocket(8080);
System.out.println("等待客户端连接:......");
while(true){
Socket st=server.accept();
new crawlSData(st);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}

/--------------------------------------------------/
package crawls.crawls;

public class crawlsRun {

public static void main(String[] args) {
Thread t1=new crawlsClient("localhost",8080,"中国国航");
}

}


/--------------------------------------------------/
package crawls.crawls;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Scanner;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class CrawlUtil {
private static File file=new File("D:/tom/data.txt");
private static ArrayList<String> list=new ArrayList<String>();
private static ArrayList<String> list1=new ArrayList<String>();
private static PrintWriter writer;
private static BufferedReader reader;
private static int count;
private static int no=count++;
public static void saveData(){
File search =new File("C:/Users/virusea/Desktop/2.htm");
Document doc;
try {
doc = Jsoup.parse(search,"GBK");
Elements eles = doc.select("#searchPannel").select(".searchresult_content").select(".search_table_header").select("tbody");
Elements tds1 = doc.select("#searchPannel").select("#searchControlPannel").select(".current").select(".calendar_date");
for (Element ele : eles) {
Elements tds = ele.select("tr").select("td");
Elements planeTime = tds.get(0).select("div");
String datanow=tds1.get(0).text().trim();
    String plan=planeTime.get(0).select("span").text().trim();
    String planumber=planeTime.get(0).select("strong").text().trim();
    String startime=tds.get(1).select("div").get(0).text().trim();
    String startadd=tds.get(1).select("div").get(1).text().trim();
    String arrivrtime=tds.get(3).text().trim().substring(0,5);
    String arrivradd=tds.get(3).html();
    String arrivradds=arrivradd.substring(arrivradd.lastIndexOf("</div>") + 6);
    String price=tds.get(7).text().trim().substring(1);
    list.add(" "+datanow+","+plan+","+planumber+", 出发时间:"+startime+", 出发地点:"+startadd+", 到达时间:"+arrivrtime+", 到达地点"+arrivradds+","+price);    
}
writer=new PrintWriter(new OutputStreamWriter(new FileOutputStream(file)),true);
//writer.clear();

} catch (IOException e) {
e.printStackTrace();
}
//writer.close();
}
public static void readData() throws IOException{
reader=new BufferedReader(new InputStreamReader(new FileInputStream(file)));
for(String lists:list)
{
writer.println(lists);
}
Scanner console= new Scanner(System.in);
System.out.println("请输入查询条件:");
String input=console.nextLine();
if(reader==null)
{

}else{
try {
while(reader.read()!=-1){
String b1=reader.readLine();
if(b1==null){
System.out.println("被查询内容为空!");
}else{
String[] a1=b1.split(",");
for(String b3:a1){
if(b3.equals(input))
{
System.out.println(Arrays.toString(a1));
count++;
System.out.println("共查询到数据数:"+count+"条");
}

}

}

}
} catch (IOException e) {
e.printStackTrace();
}
}
reader.close();
}
public static void main(String[] args) throws IOException {
saveData();
readData();
}

}

/--------------------------------------------------/

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics