#! /usr/bin/env python from useful import tokenize # Declare file to be worked with textfile = "furniture.txt" # Open file & read first line file = open(textfile,'r') line = file.readline() # initialize variables: Bigrams = {} prev_word = "START" while line: line = line.rstrip() # tokenize the text: tokens = tokenize(line) # loop over words in input: for word in tokens: # concatenate words to get bigram: bigram = prev_word + ' ' + word if bigram in Bigrams: Bigrams[bigram] += 1 else: Bigrams[bigram] = 1 # change value of prev_word prev_word = word line = file.readline() file.close() # Write bigrams to output file: output_file = open('bigrams.txt','w') for bigram in Bigrams: count = Bigrams[bigram] output_file.write(str(count)+'\t'+bigram+'\n') output_file.close()