0

I am defining a spider class in scrapy. I want to configure custom_settings of this class. If I do self.custom_settings = {} in __init__, it's not working. But if I do custom_settings = {} directly in the class definition (i.e. outside any function), it works. What's the difference between these two ways of initialization?

class GoodreadsSpider(scrapy.Spider):
    name = 'GoodreadsSpider'
    custom_settings = { #################### this works
        'ITEM_PIPELINES': {
            'crawler.pipelines.CrawlerPipeline': 300,
        },
        'LOG_LEVEL': 'DEBUG'
    }

    def __init__(self, start_url = None, num_books=None, num_authors=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.start_url = start_url if start_url != None else SAMPLE_URL
        self.num_books = int(num_books) if num_books != None else SAMPLE_NUM_BOOKS
        self.num_authors = int(num_authors) if num_authors != None else SAMPLE_NUM_AUTHORS
        self.custom_settings = { #################### this does not work
            'ITEM_PIPELINES': {
                'crawler.pipelines.CrawlerPipeline': 300,
            },
            'LOG_LEVEL': 'DEBUG'
        }
Cal_W
  • 19
  • 2

0 Answers0