DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Screen Scape Heise.de Newsticker (german)

11.12.2005
| 4254 views |
  • submit to reddit
        
#!/usr/bin/env python
# -*- encoding: latin1 -*-

import BeautifulSoup
from PyRSS2Gen import RSSItem, Guid
import ScrapeNFeed
import urllib2
import re

debug = 0

def fetch(url):
    response = urllib2.urlopen(urllib2.Request(url))
    return response.read(),response.info()

class HeiFeed(ScrapeNFeed.ScrapedFeed):    
    def HTML2RSS(self, headers, body):
        items = []
        soup = BeautifulSoup.BeautifulSoup(body)
        for item in soup('a', {'href' : re.compile('^meldung.*')}):
            link = 'http://www.heise.de/newsticker/' + item['href']
            if not self.hasSeen(link):
                title = item.contents[0].strip()
                if debug:
                    print "title: " + title
                    print "link : " + link
                response, headers = fetch(link)
                s = BeautifulSoup.BeautifulSoup(response)
                desc = s.fetch('div',{'class':'meldung_wrapper'})[0].prettify()
                items.append(RSSItem(title=title, description=desc, link=link))
            self.addRSSItems(items)

HeiFeed.load("heise.de newsticker", 'http://www.heise.de/newsticker/',
             "heise.de newsticker", 'heise_rss.xml', 'heise_rss.pickle',
             managingEditor = 'tsch')