Module event_processor.scrapy_impl.middlewares
Expand source code
# -*- coding: utf-8 -*-
# Define here the models for your spider middleware
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
from scrapy import signals, Request
class SplitItemsMiddleware:
def get_event_count(self, item, spider):
count = None
for _, value in item.items():
if count == None:
count = len(value)
else:
if len(value) != count:
raise ValueError(f'{spider.organization}: Selectors returned data of differing lengths')
return count
def process_spider_output(self, response, result, spider):
for item in result:
if type(item) is Request:
yield item
continue
event_count = self.get_event_count(item, spider)
for processed_item in ({key: value[i] for key, value in item.items()} for i in range(event_count)):
yield processed_item
Classes
class SplitItemsMiddleware (*args, **kwargs)
-
Expand source code
class SplitItemsMiddleware: def get_event_count(self, item, spider): count = None for _, value in item.items(): if count == None: count = len(value) else: if len(value) != count: raise ValueError(f'{spider.organization}: Selectors returned data of differing lengths') return count def process_spider_output(self, response, result, spider): for item in result: if type(item) is Request: yield item continue event_count = self.get_event_count(item, spider) for processed_item in ({key: value[i] for key, value in item.items()} for i in range(event_count)): yield processed_item
Methods
def get_event_count(self, item, spider)
-
Expand source code
def get_event_count(self, item, spider): count = None for _, value in item.items(): if count == None: count = len(value) else: if len(value) != count: raise ValueError(f'{spider.organization}: Selectors returned data of differing lengths') return count
def process_spider_output(self, response, result, spider)
-
Expand source code
def process_spider_output(self, response, result, spider): for item in result: if type(item) is Request: yield item continue event_count = self.get_event_count(item, spider) for processed_item in ({key: value[i] for key, value in item.items()} for i in range(event_count)): yield processed_item