CrawlerHandler.cs 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Net;
  6. using System.Web;
  7. /// <summary>
  8. /// Crawler 的摘要说明
  9. /// </summary>
  10. public class CrawlerHandler : Handler
  11. {
  12. private string[] Sources;
  13. private Crawler[] Crawlers;
  14. public CrawlerHandler(HttpContext context) : base(context) { }
  15. public override void Process()
  16. {
  17. Sources = Request.Form.GetValues("source[]");
  18. if (Sources == null || Sources.Length == 0)
  19. {
  20. WriteJson(new
  21. {
  22. state = "参数错误:没有指定抓取源"
  23. });
  24. return;
  25. }
  26. Crawlers = Sources.Select(x => new Crawler(x, Server).Fetch()).ToArray();
  27. WriteJson(new
  28. {
  29. state = "SUCCESS",
  30. list = Crawlers.Select(x => new
  31. {
  32. state = x.State,
  33. source = x.SourceUrl,
  34. url = x.ServerUrl
  35. })
  36. });
  37. }
  38. }
  39. public class Crawler
  40. {
  41. public string SourceUrl { get; set; }
  42. public string ServerUrl { get; set; }
  43. public string State { get; set; }
  44. private HttpServerUtility Server { get; set; }
  45. public Crawler(string sourceUrl, HttpServerUtility server)
  46. {
  47. this.SourceUrl = sourceUrl;
  48. this.Server = server;
  49. }
  50. public Crawler Fetch()
  51. {
  52. var request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  53. using (var response = request.GetResponse() as HttpWebResponse)
  54. {
  55. if (response.StatusCode != HttpStatusCode.OK)
  56. {
  57. State = "Url returns " + response.StatusCode + ", " + response.StatusDescription;
  58. return this;
  59. }
  60. if (response.ContentType.IndexOf("image") == -1)
  61. {
  62. State = "Url is not an image";
  63. return this;
  64. }
  65. ServerUrl = PathFormatter.Format(Path.GetFileName(this.SourceUrl), Config.GetString("catcherPathFormat"));
  66. var savePath = Server.MapPath(ServerUrl);
  67. if (!Directory.Exists(Path.GetDirectoryName(savePath)))
  68. {
  69. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  70. }
  71. try
  72. {
  73. var stream = response.GetResponseStream();
  74. var reader = new BinaryReader(stream);
  75. byte[] bytes;
  76. using (var ms = new MemoryStream())
  77. {
  78. byte[] buffer = new byte[4096];
  79. int count;
  80. while ((count = reader.Read(buffer, 0, buffer.Length)) != 0)
  81. {
  82. ms.Write(buffer, 0, count);
  83. }
  84. bytes = ms.ToArray();
  85. }
  86. File.WriteAllBytes(savePath, bytes);
  87. State = "SUCCESS";
  88. }
  89. catch (Exception e)
  90. {
  91. State = "抓取错误:" + e.Message;
  92. }
  93. return this;
  94. }
  95. }
  96. }