0
from nltk.corpus import stopwords # stop words in sentences
from nltk.sentiment.vader import SentimentIntensityAnalyzer    

def remove_stopwords(sentence):
    stop_words = stopwords.words('english')
    return ' '.join([w for w in nltk.word_tokenize(sentence) if not w in stop_words])        

def sentence_tokenize(text):
    return nltk.sent_tokenize(text)

def word_tokenize(text):
        return nltk.word_tokenize(text) 
        
tf = open('1.0',encoding="utf-8").read().replace("\n"," ").lower()
tf = remove_stopwords(tf)  
st = sentence_tokenize(tf)     
wt = word_tokenize(tf)

I have a file named '1.0'. It has a article stored in it. I used NLTK to split the words and sentences. what should I do so that I can remove all the punctuations from the sentences list st and the word list wt

uozcan12
  • 127
  • 1
  • 11
  • 2
    Does this answer your question? [How to get rid of punctuation using NLTK tokenizer?](https://stackoverflow.com/questions/15547409/how-to-get-rid-of-punctuation-using-nltk-tokenizer) – Masoud Gheisari May 27 '22 at 12:22

1 Answers1

-1

Hope this example will help :

import string 
from nltk.tokenize import word_tokenize
s =  set(string.punctuation)          # !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
sentence = "Hey guys !, How are 'you' ?"
sentence = word_tokenize(sentence)
filtered_word = []
for i in sentence:
    if i not in s:
        filtered_word.append(i);
for word in filtered_word:
  print(word,end = " ")
Talha Tayyab
  • 2,102
  • 9
  • 14
  • 25