I am defining a spider class in scrapy.
I want to configure custom_settings of this class. If I do self.custom_settings = {} in __init__, it's not working. But if I do custom_settings = {} directly in the class definition (i.e. outside any function), it works.
What's the difference between these two ways of initialization?
class GoodreadsSpider(scrapy.Spider):
name = 'GoodreadsSpider'
custom_settings = { #################### this works
'ITEM_PIPELINES': {
'crawler.pipelines.CrawlerPipeline': 300,
},
'LOG_LEVEL': 'DEBUG'
}
def __init__(self, start_url = None, num_books=None, num_authors=None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.start_url = start_url if start_url != None else SAMPLE_URL
self.num_books = int(num_books) if num_books != None else SAMPLE_NUM_BOOKS
self.num_authors = int(num_authors) if num_authors != None else SAMPLE_NUM_AUTHORS
self.custom_settings = { #################### this does not work
'ITEM_PIPELINES': {
'crawler.pipelines.CrawlerPipeline': 300,
},
'LOG_LEVEL': 'DEBUG'
}