mercredi 30 décembre 2015

Scrapy exports all results in one row in SQLite

I'm making a basic spider using Scrapy and want to store the data with SQLite. The spider is working fine and saves the data I want, but it writes all data on the same row in the database.

Here's my spider:

    def parse(self, response):

    for sel in response.xpath('//*[@class="class"]'):
        item = ScrapedItem()
        item['Name'] = sel.xpath('*/header/div//h2/a/text()').extract()
        item['Site'] = sel.xpath('*/header/div/a[1]/text()').extract()
        item['Category'] = sel.xpath('*/header/div/h6[3]/text()').extract()

        yield item

And here is my pipeline:

import sqlite3 as lite
from xyz import settings
from xyz import items
con = None

class Pipeline(object):

    def __init__(self):
        self.setupDBCon()
        self.createTables()

    def process_item(self, item, spider):
        self.storeInfoInDb(item)
        return item

def storeInfoInDb(self, item):
        self.cur.execute("INSERT INTO Table(\
            Name, \
            Site, \
            Category\
            ) \
        VALUES( ?, ?, ?, ? )", \
        ( \
            str(item.get('Name', '')),
            str(item.get('Site', '')),
            str(item.get('Category', ''))
        ))
        print item.get('Name', '')
        self.con.commit()

def setupDBCon(self):
    self.con = lite.connect('test.db')
    self.cur = self.con.cursor()

def __del__(self):
    self.closeDB()

def createTables(self):
    self.dropAgencyTable()
    self.createAgencyTable()

def createTable(self):
    self.cur.execute("CREATE TABLE IF NOT EXISTS Table(id INTEGER PRIMARY KEY NOT NULL, \
        Name TEXT, \
        Site TEXT, \
        Category TEXT )")

def dropTable(self):
    self.cur.execute("DROP TABLE IF EXISTS Agency")

def closeDB(self):
    self.con.close()

How do I save my scraped data in one separate row per scraped item?

Aucun commentaire:

Enregistrer un commentaire