# -*- coding: UTF-8 -*-
from nltk.stem.isri import ISRIStemmer
def stemming_text_1():
with open('test.txt', 'r') as f:
for line in f:
print line
singles = []
stemmer = ISRIStemmer()
for plural in line.split():
singles.append(stemmer.stem(plural))
print ' '.join(singles)
stemming_text_1()
/home/waheeb/anaconda2/lib/python2.7/site-packages/nltk/stem/isri.py:154: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
if token in self.stop_words:
Traceback (most recent call last):
File "Arabic_stem.py", line 15, in <module>
stemming_text_1()
File "Arabic_stem.py", line 12, in stemming_text_1
singles.append(stemmer.stem(plural))
File "/home/waheeb/anaconda2/lib/python2.7/site-packages/nltk/stem /isri.py", line 156, in stem
token = self.pre32(token) # remove length three and length two prefixes in this order
File "/home/waheeb/anaconda2/lib/python2.7/site-packages/nltk/stem /isri.py", line 198, in pre32
if word.startswith(pre3):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd8 in position 0: ordinal not in range(128)