`
soulwzy
  • 浏览: 15017 次
  • 性别: Icon_minigender_1
  • 来自: 厦门
最近访客 更多访客>>
社区版块
存档分类
最新评论

2b公交抓取程序,第一版(JAVA)

阅读更多
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 厦门公交数据爬虫
 * 
 * @author Administrator
 * 
 */
public class PaPA {
	static List<String> stationList = new ArrayList<String>();

	static int STAT_MOVING = 1;
	static int STAT_READY = 2;
	static int STAT_NON = 0;

	/**
	 * 获取所有的公交站名
	 * 
	 * @author Administrator
	 * 
	 */

	public List<String> grabStation(String url) {
		try {
			Document doc = Jsoup.connect(url).get();

			// 计算出一共有几站
			Elements countStationElements = doc
					.getElementsContainingOwnText("共");
			Element countStationElement = countStationElements.get(0);
			String countStationElementString = countStationElement.childNode(0)
					.toString();
			int countStation = Integer.parseInt(countStationElementString
					.substring(3, 5));
			// 计算出一共有几站

			// 得到值为“反向”的节点,以此来得到后面的公交数据
			Elements elements = doc.getElementsContainingOwnText("反向");
			Element element = elements.get(0);
			// 这时候还要退回去找爹,把爹找到之后爹的兄弟就是公交站的名字了

			Element nextStation = element.parent().nextElementSibling();

			for (int i = 0; i < countStation; i++) {

				Element child = nextStation.child(0);
				String value = child.childNode(0).toString();
				stationList.add(value);
				nextStation = nextStation.nextElementSibling();
			}

		} catch (IOException e) {
			e.printStackTrace();
		}
		return stationList;
	}

	public static void main(String[] args) {
		PaPA grab = new PaPA();

		// // 显示所有公交站的地址
		// String url =
		// "http://mybus.xiamentd.com/LineDetailQuery?lineId=400&direction=1";
		//
		// // 抓取公交站
		// grab.grabStation(url);
		//
		// for (Iterator iterator = stationList.iterator(); iterator.hasNext();)
		// {
		// String station = (String) iterator.next();
		// System.out.println(station);
		// }

		// 设置起始的公交站
		int lineId = 400;
		int start = 12;
		int end = 13;
		int direction = 1; // 表示从观音山出发

		// String urlNew =
		// "http://mybus.xiamentd.com/RealtimeQuery?lineId="+lineId+"&direction="+direction+"&station=%E4%B8%8A%E6%9D%8E%E7%AB%99&ordinal="+start;
		try {
			System.out.println(grab.checkEachStation(lineId, start, end, direction));
		} catch (IOException e) {
			e.printStackTrace();
		}

	}

	private long checkEachStation(int lineId, int start, int end, int direction)
			throws IOException {

		long sysTime1 = System.currentTimeMillis();

		for (;;) {

			// 都是为了得到车状态信息所做的准备
			int state = PaPA.STAT_NON;
			String url = generateUrl(lineId, start, end, direction);
			Document doc = Jsoup.connect(url).get();
			Element busNearBy = doc.getElementsContainingOwnText("最近公交").get(0);
			String sss = busNearBy.toString();

			// 出现暂无信息的时候,页面发生变化,要及时中止。
			if (sss.contains("暂无信息")) {
				System.out.println("暂无信息");
				continue;
			}

			Element child = busNearBy.child(0);
			String result = child.toString();

			if (result.contains("到达") || result.contains("开往")) {
				state = PaPA.STAT_MOVING;
			} else if (result.contains("已经到站")) {
				state = PaPA.STAT_READY;
			}

			// 两个分支,一个是到达,一个是等待
			if (state == PaPA.STAT_READY) {

				System.out.println("车已经到站");

				start++;
				if (start <= end)
					continue;
				// start==end就代表已经到达要的站了,可以记录时间了
				else {
					long sysTime2 = System.currentTimeMillis();
					return sysTime2-sysTime1;
				}

			}

			else {
				try {
					Thread.sleep(2000);
				} catch (InterruptedException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
//				System.out.println("车还没到站");
				continue;
			}

		}

	}

	private String generateUrl(int lineId, int start, int end, int direction) {
		return "http://mybus.xiamentd.com/RealtimeQuery?lineId=" + lineId
				+ "&direction=" + direction
				+ "&station=%E4%B8%8A%E6%9D%8E%E7%AB%99&ordinal=" + start;
	}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics