### Python 2.7
from __future__ import division
import nltk
from nltk.corpus import wordnet as wn
from pattern.en import tag
# choose text source & open it
source = open("frankenstein_fragment.txt", 'r')
# tokenize source and get Part-of-Speech tags for each word
definitions = []
for line in source:
# create tuple of tuples with pairs of word + POS-tag
collection = tag(line, tokenize=True, encoding='utf-8')
# transform tuple into list to be able to manipulate it
collection = list(collection)
# for each pair:
for element in collection:
# look for nouns & replace them with their definition
if element[1] == "NN":
synset = wn.synsets(element[0])
definitions.append("<")
definitions.append(synset[0].definition()) definitions.append(">")
else:
# non-nouns are left as words
definitions.append(element[0])
# write the transformed sentence
print(" ".join(definitions))
# close the text file
source.close()
# ----------------------------------------------------------------
## alternative using nltk
## tokenize in words
#tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
##to tokenize input text into sentences
#print '\n-----\n'.join(tokenizer.tokenize(data))# splits text into sentences
##to tokenize the tokenized sentences into words
# tokens = nltk.wordpunct_tokenize(data)
# text = nltk.Text(tokens)
# words = [w.lower() for w in text]