I'm making a basic spider using Scrapy and want to store the data with SQLite. The spider is working fine and saves the data I want, but it writes all data on the same row in the database.
Here's my spider:
def parse(self, response):
for sel in response.xpath('//*[@class="class"]'):
item = ScrapedItem()
item['Name'] = sel.xpath('*/header/div//h2/a/text()').extract()
item['Site'] = sel.xpath('*/header/div/a[1]/text()').extract()
item['Category'] = sel.xpath('*/header/div/h6[3]/text()').extract()
yield item
And here is my pipeline:
import sqlite3 as lite
from xyz import settings
from xyz import items
con = None
class Pipeline(object):
def __init__(self):
self.setupDBCon()
self.createTables()
def process_item(self, item, spider):
self.storeInfoInDb(item)
return item
def storeInfoInDb(self, item):
self.cur.execute("INSERT INTO Table(\
Name, \
Site, \
Category\
) \
VALUES( ?, ?, ?, ? )", \
( \
str(item.get('Name', '')),
str(item.get('Site', '')),
str(item.get('Category', ''))
))
print item.get('Name', '')
self.con.commit()
def setupDBCon(self):
self.con = lite.connect('test.db')
self.cur = self.con.cursor()
def __del__(self):
self.closeDB()
def createTables(self):
self.dropAgencyTable()
self.createAgencyTable()
def createTable(self):
self.cur.execute("CREATE TABLE IF NOT EXISTS Table(id INTEGER PRIMARY KEY NOT NULL, \
Name TEXT, \
Site TEXT, \
Category TEXT )")
def dropTable(self):
self.cur.execute("DROP TABLE IF EXISTS Agency")
def closeDB(self):
self.con.close()
How do I save my scraped data in one separate row per scraped item?
Aucun commentaire:
Enregistrer un commentaire