#! /usr/bin/env python from useful import tokenize # Declare file to be worked with textfile = "furniture.txt" # Open file & read first line file = open(textfile,'r') line = file.readline() Unigrams = {} while line: line = line.rstrip() # tokenize the text: tokens = tokenize(line) # loop over unigrams: for word in tokens: if word in Unigrams: Unigrams[word] += 1 else: Unigrams[word] = 1 line = file.readline() file.close() # Write unigrams to output file: output_file = open('unigrams.txt','w') for unigram in Unigrams: count = Unigrams[unigram] output_file.write(str(count)+'\t'+unigram+'\n') output_file.close()