from nltk.corpus import stopwords # stop words in sentences
from nltk.sentiment.vader import SentimentIntensityAnalyzer
def remove_stopwords(sentence):
stop_words = stopwords.words('english')
return ' '.join([w for w in nltk.word_tokenize(sentence) if not w in stop_words])
def sentence_tokenize(text):
return nltk.sent_tokenize(text)
def word_tokenize(text):
return nltk.word_tokenize(text)
tf = open('1.0',encoding="utf-8").read().replace("\n"," ").lower()
tf = remove_stopwords(tf)
st = sentence_tokenize(tf)
wt = word_tokenize(tf)
I have a file named '1.0'. It has a article stored in it. I used NLTK to split the words and sentences. what should I do so that I can remove all the punctuations from the sentences list st and the word list wt