빅데이터/데이터분석 with C#

뉴스 크롤링 라이브러리 만들기2

언제나휴일 2020. 5. 4. 20:35
반응형

 

뉴스 기사를 검색 요청하면 실제 기사를 얻어와서 출력하는 부분까지 구현합니다.

소스 코드

  • News.cs
using System;
using System.Xml;

namespace 네이버_뉴스_크롤링_라이브러리_제작
{
    public class News
    {
        public string Title
        {
            get;
            private set;
        }
        public string OriginalLink
        {
            get;
            private set;
        }
        public string Link
        {
            get;
            private set;
        }
        public string Description
        {
            get;
            private set;
        }
        public DateTime Pubdate
        {
            get;
            private set;
        }
        public News(string title, string olink, string link, string description,DateTime pubdate)
        {
            Title = title;
            OriginalLink = olink;
            Link = link;
            Description = description;
            Pubdate = pubdate;
        }
        public static News Make(XmlNode xn)
        {
            try
            {
                string title = Strip(xn.SelectSingleNode("title").InnerText);
                string olink = xn.SelectSingleNode("originallink").InnerText;
                string link = xn.SelectSingleNode("link").InnerText;
                string description = Strip(xn.SelectSingleNode("description").InnerText);
                DateTime pubdate = DateTime.Parse(xn.SelectSingleNode("pubDate").InnerText);
                return new News(title, olink, link, description, pubdate);
            }
            catch
            {
                return null;
            }
        }
        public static string Strip(string htmltext)
        {
            int s = htmltext.IndexOf("<");
            int e = htmltext.IndexOf(">");
            while (s < e)
            {
                string b = htmltext.Substring(0, s);
                string a = htmltext.Substring(e + 1);
                htmltext = b + a;
                s = htmltext.IndexOf("<");
                e = htmltext.IndexOf(">");
            }
            return htmltext;
        }
        public override string ToString()
        {
            return Title;
        }
    }
}
  • NaverNews.cs
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Xml;

namespace 네이버_뉴스_크롤링_라이브러리_제작
{
    public class NaverNews
    {
        public string SRC
        {
            get;
            set;
        }
        public string ID
        {
            get;
            set;
        }
        public string Secret
        {
            get;
            set;
        }
        public NaverNews(string id,string secret)
        {
            ID = id;
            Secret = secret;
        }                
        public int Find(string src)
        {
            SRC = src;
            Stream stream;
            string url = string.Format("https://openapi.naver.com/v1/search/news.xml?query={0}&sort=date", src);
            XmlDocument xdoc = MakeDocument(url, out stream);
            XmlNode node = xdoc.SelectSingleNode("rss");
            XmlNode n = node.SelectSingleNode("channel");
            int total = int.Parse(n.SelectSingleNode("total").InnerText);
            stream.Close();
            return total;
        }

        private XmlDocument MakeDocument(string url, out Stream stream)
        {
            WebRequest request = null;
            request = WebRequest.Create(url);
            request.Headers.Add("X-Naver-Client-Id", ID);
            request.Headers.Add("X-Naver-Client-Secret", Secret);

            WebResponse response = request.GetResponse();
            stream = response.GetResponseStream();
            XmlDocument xdoc = new XmlDocument();
            xdoc.Load(stream);
            return xdoc;
        }

        public List<News> FindNews(int start, int display)
        {
            Stream stream;
            string url = string.Format("https://openapi.naver.com/v1/search/news.xml?query={0}&sort=date&start={1}&display={2}"
                , SRC,start,display);
            XmlDocument xdoc = MakeDocument(url, out stream);
            XmlNode node = xdoc.SelectSingleNode("rss");
            XmlNode n = node.SelectSingleNode("channel");
            XmlNodeList xnl = n.SelectNodes("item");
            List<News> nc = new List<News>();
            News news;
            foreach(XmlNode xn in xnl)
            {
                news = News.Make(xn);
                if(news == null)
                {
                    break;
                }
                nc.Add(news);
            }
            stream.Close();
            return nc;
        }
    }
}
  • Program.cs
//                                                                                                                                           ehpub.co.kr
//                                                                                                                                데이터분석 with C#
//네이버 뉴스 크롤링 라이브러리 v0.1 제작하기

using System;
using System.Collections.Generic;

namespace 네이버_뉴스_크롤링_라이브러리_제작
{
    class Program
    {
        static void Main(string[] args)
        {
            string id = MySecret.id;
            string secret = MySecret.secret;
            NaverNews nn = new NaverNews(id, secret);
            int total = nn.Find("코로나");
            Console.WriteLine(total);
            List<News> nc = nn.FindNews(1, 20);
            foreach(News news in nc)
            {
                Console.WriteLine(news.Title);
                Console.WriteLine("==");
                Console.WriteLine(news.Description);
                Console.WriteLine("==================================================");
            }
        }
    }
}
반응형