### Python 2.7

from __future__ import division
import nltk
from nltk.corpus import wordnet as wn
from pattern.en import tag

# choose text source & open it
source = open("frankenstein_fragment.txt", 'r')

# tokenize source and get Part-of-Speech tags for each word
definitions = []

for line in source:
        # create tuple of tuples with pairs of word + POS-tag
        collection = tag(line, tokenize=True, encoding='utf-8')
        # transform tuple into list to be able to manipulate it
        collection = list(collection)
        # for each pair:
        for element in collection:
                # look for nouns & replace them with their definition
                if element[1] == "NN":
                        synset = wn.synsets(element[0])
                        definitions.append("<")
                        definitions.append(synset[0].definition())
                        definitions.append(">")
                else:
                        # non-nouns are left as words
                        definitions.append(element[0])

# write the transformed sentence
print(" ".join(definitions))

# close the text file
source.close()



# ----------------------------------------------------------------
## alternative using nltk
## tokenize in words
#tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

##to tokenize input text into sentences
#print '\n-----\n'.join(tokenizer.tokenize(data))# splits text into sentences

##to tokenize the tokenized sentences into words
# tokens = nltk.wordpunct_tokenize(data)
# text = nltk.Text(tokens)
# words = [w.lower() for w in text]