Selenium 與PhantomJS
步驟

接下來安裝 Selenium.WebDriver, Selenium.PhantomJS.WebDriver, Selenium.WebDriver.PhantomJS.Xplatform, Selenium.Support 四個套件.
完成安裝四個套件後, 底下的爬蟲程式即可抓取台灣銀行黃金存簿的歷史金價, 並存入MySQL資料庫中
using MySql.Data.MySqlClient;
using OpenQA.Selenium;
using OpenQA.Selenium.PhantomJS;
using OpenQA.Selenium.Support.UI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApp1
{
class Program
{
static IWebDriver web;
static string connStr = "server=ip;uid=account;pwd=pwd;database=db";
static MySqlConnection mySqlConn = new MySqlConnection(connStr);
static void Main(string[] args)
{
Random r = new Random();
web = new PhantomJSDriver(PhantomJSDriverService.CreateDefaultService());
for (int y = 2018; y <= 2019; y++)
{
for (int m = 1; m <= 12; m++) { if (y == 2019 && m > 8) break;
WebCollect(y, m);
int delay = (int)(r.NextDouble() * 1000) + 1000;
Console.WriteLine("sleep : {0} ms", delay);
Thread.Sleep(delay);
}
}
web.Close();
}
static void WebCollect(int yy, int mm)
{
mySqlConn.Open();
StringBuilder sbInsert = new StringBuilder();
sbInsert.Append("insert into taiwan_bank_gold (gd_date, gd_buy, gd_sale) values ");
Dictionary<string, List<int>> wareHouse = new Dictionary<string, List<int>>();
web.Navigate().GoToUrl("https://rate.bot.com.tw/gold/passbook");
IWebElement radio = web.FindElement(By.Id("input_date"));
radio.Click();
SelectElement select_yy = new SelectElement(web.FindElement(By.Name("year")));
SelectElement select_mm = new SelectElement(web.FindElement(By.Name("month")));
select_yy.SelectByValue(string.Format("{0}", yy));
select_mm.SelectByValue(string.Format("{0:00}", mm));
IWebElement btn = web.FindElement(By.CssSelector("[class='btn btn-inverse noscript']"));
btn.Submit();
IWebElement tbody = web.FindElement(By.TagName("tbody"));
IReadOnlyCollection<IWebElemnet> trs = tbody.FindElements(By.TagName("tr"));
foreach (IWebElement tr in trs)
{
IReadOnlyCollection<IWebElement> tds = tr.FindElements(By.TagName("td"));
List<int> list = new List<int>();
list.Add(Int32.Parse(tds.ToList()[3].Text.Replace(",", "")));
list.Add(Int32.Parse(tds.ToList()[4].Text.Replace(",", "")));
wareHouse.Add(tds.ToList()[0].Text, list);
}
wareHouse = wareHouse.OrderBy(o => o.Key).ToDictionary(o => o.Key, p => p.Value);
foreach (string key in wareHouse.Keys)
{
List<int>list = wareHouse[key];
sbInsert.Append(string.Format("('{0}', {1}, {2}),", key, list[0], list[1]));
}
sbInsert.Remove(sbInsert.Length - 1, 1);
try
{
new MySqlCommand(sbInsert.ToString(), mySqlConn).ExecuteNonQuery();
Console.WriteLine("{0}/{1} : write to database successful", yy, mm);
}
catch(Exception e)
{
Console.WriteLine(e.Message);
}
finally
{
mySqlConn.Close();
}
}
}
}
MySQL資料庫結構
CREATE DEFINER=`account`@`%` PROCEDURE `Create_taiwan_bank_gold`() BEGIN drop table IF EXISTS taiwan_bank_gold; CREATE TABLE `taiwan_bank_gold` ( `gd_id` int(11) NOT NULL AUTO_INCREMENT, `gd_date` date NOT NULL, `gd_buy` double DEFAULT NULL, `gd_sale` double DEFAULT NULL, PRIMARY KEY (`gd_id`), UNIQUE KEY `gd_date_UNIQUE` (`gd_date`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; END
