# stoplist - # loads a stop list of words into a dictionary, # creates a test dictionary with a random selection of the stoplist words, # and then applies the stop list to the test dictionary, # i.e. eliminates all the words in the stoplist from the test dictionary import string, random def load_stoplist(filename): infile = open(filename,'r') stoplist = {} for line in infile.readlines(): word = string.strip(line) stoplist[word] = 1 infile.close() return stoplist def apply_stoplist(stoplist,dict): todelete = [] for key in dict.keys(): if stoplist.has_key(key): todelete.append(key) for key in todelete: del dict[key] def make_random_selection(dict): keys = dict.keys() max = len(keys) - 1 selections = [] for i in range(1,100): selection = random.randrange(0,max,1) selections.append(keys[selection]) return selections def list_to_dict(list): dict = {} for item in list: dict[item] = 1 return dict filename = 'stoplist3.txt' stoplist = load_stoplist(filename) # print stoplist words = make_random_selection(stoplist) words_dict = list_to_dict(words) print "words_dict: ", words_dict apply_stoplist(stoplist,words_dict) print "w/ stop words eliminated: ", words_dict