I wrote the below python code. I haven't got any array or list that needs memory and I can't understand why there is memory overflow.
con = db.connect('SentiWords.db')
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS Words")
cur.execute('''CREATE TABLE Words(word Text, type Text) ''')
infile = open("train_reviews.txt")
lines = infile.readlines()
stopwords = nltk.corpus.stopwords.words('english')
sentiment_words = dict()
counter =0
for line in lines:
words = nltk.word_tokenize(line.decode("UTF-8"))
words = [ w for w in words if w.lower() not in stopwords]
for word in words:
counter_sent = 0
counter_obj = 0
check = swn.senti_synsets(word)
for i in range(0,len(check)):
if check[i].pos_score() < check[i].obj_score() and check[i].neg_score() < check[i].obj_score():
counter_obj +=1
else:
counter_sent +=1
if counter_obj < counter_sent:
cur.execute('''SELECT type FROM Words WHERE word=? ''', [word])
data=cur.fetchall() #extract frequency
if len(data) == 0: #if there isn't any frequency
cur.execute("INSERT INTO Words VALUES(?,?)", (word,"no-obj"))
if counter % 1000 == 0:
con.commit()
con.close()
con = db.connect('SentiWords.db')
cur = con.cursor()
print counter
counter +=1
con.commit()
con.close()
I want to find from reviews all words that have got a sentiment meaning. So I decide to use sentiwornet to compare all words from the reviews and keep the words that have got sentiment meaning in a database. Can you exlain me what goes wrong ?
Aucun commentaire:
Enregistrer un commentaire