Module `event_processor.scrapers.formyblock`

Expand source code

from event_processor.base.custom_spiders import ScraperNoTransposeSpider

class ForMyBlockSpider(ScraperNoTransposeSpider):
    name = 'formyblock'
    allowed_domains = ['www.formyblock.org']
    start_urls = [
        "https://www.formyblock.org/events/",
    ]
    
    def __init__(self, name=None, **kwargs):
        super().__init__(self, 'My Block, My Hood, My City', base_url='https://www.formyblock.org/', date_format = '%Y-%m-%d', **kwargs)

    def parse(self, response):
        all_future_events=response.css(".eventlist--upcoming")

        for event in all_future_events.css("article"):
            yield {
                "url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()),
                "title": event.css(".eventlist-title-link::text").get(),
                "event_time":{
                    "date": event.css(".event-date::attr(datetime)").get(),
                    "start_time": event.css(".event-time-12hr-start::text").get(),
                    "end_time": event.css(".event-time-12hr-end::text").get()
                },
                "description":event.css(".eventlist-description").get(),
                "address":event.css(".eventlist-meta-address-line::text").get(
                        default=event.css(".eventlist-meta-address::text").get(default="").strip()),
                "price":0.0,
                "organization":"My Block, My Hood, My City"  #they seem to only be publishing their own events
            }

Classes

class ForMyBlockSpider (name=None, **kwargs)

Base class for scrapy spiders. All spiders must inherit from this class.

Expand source code

class ForMyBlockSpider(ScraperNoTransposeSpider):
    name = 'formyblock'
    allowed_domains = ['www.formyblock.org']
    start_urls = [
        "https://www.formyblock.org/events/",
    ]
    
    def __init__(self, name=None, **kwargs):
        super().__init__(self, 'My Block, My Hood, My City', base_url='https://www.formyblock.org/', date_format = '%Y-%m-%d', **kwargs)

    def parse(self, response):
        all_future_events=response.css(".eventlist--upcoming")

        for event in all_future_events.css("article"):
            yield {
                "url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()),
                "title": event.css(".eventlist-title-link::text").get(),
                "event_time":{
                    "date": event.css(".event-date::attr(datetime)").get(),
                    "start_time": event.css(".event-time-12hr-start::text").get(),
                    "end_time": event.css(".event-time-12hr-end::text").get()
                },
                "description":event.css(".eventlist-description").get(),
                "address":event.css(".eventlist-meta-address-line::text").get(
                        default=event.css(".eventlist-meta-address::text").get(default="").strip()),
                "price":0.0,
                "organization":"My Block, My Hood, My City"  #they seem to only be publishing their own events
            }

Ancestors

ScraperNoTransposeSpider
scrapy.spiders.Spider
scrapy.utils.trackref.object_ref
SpiderBase
AggregatorBase

Class variables

var allowed_domains: Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
var start_urls: Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.

Methods

def parse(self, response)

Expand source code

def parse(self, response):
    all_future_events=response.css(".eventlist--upcoming")

    for event in all_future_events.css("article"):
        yield {
            "url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()),
            "title": event.css(".eventlist-title-link::text").get(),
            "event_time":{
                "date": event.css(".event-date::attr(datetime)").get(),
                "start_time": event.css(".event-time-12hr-start::text").get(),
                "end_time": event.css(".event-time-12hr-end::text").get()
            },
            "description":event.css(".eventlist-description").get(),
            "address":event.css(".eventlist-meta-address-line::text").get(
                    default=event.css(".eventlist-meta-address::text").get(default="").strip()),
            "price":0.0,
            "organization":"My Block, My Hood, My City"  #they seem to only be publishing their own events
        }

Inherited members

ScraperNoTransposeSpider:
- custom_settings
- empty_check_extract
- enabled
- get_request
- item_filter
- name