1 2 3 4 5 6 7 | class TreeNode: def __init__(self, nodeName, count, nodeParent): self.nodeName = nodeName self.count = count self.nodeParent = nodeParent self.nextSimilarItem = None self.children = {} |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | def createFPTree(frozenDataSet, minSupport): #scan dataset at the first time, filter out items which are less than minSupport headPointTable = {} for items in frozenDataSet: for item in items: headPointTable[item] = headPointTable.get(item, 0) + frozenDataSet[items] headPointTable = {k:v for k,v in headPointTable.items() if v >= minSupport} frequentItems = set(headPointTable.keys()) if len(frequentItems) == 0: return None, None for k in headPointTable: headPointTable[k] = [headPointTable[k], None] fptree = TreeNode("null", 1, None) #scan dataset at the second time, filter out items for each record for items,count in frozenDataSet.items(): frequentItemsInRecord = {} for item in items: if item in frequentItems: frequentItemsInRecord[item] = headPointTable[item][0] if len(frequentItemsInRecord) > 0: orderedFrequentItems = [v[0] for v in sorted(frequentItemsInRecord.items(), key=lambda v:v[1], reverse = True)] updateFPTree(fptree, orderedFrequentItems, headPointTable, count) return fptree, headPointTable |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | def updateFPTree(fptree, orderedFrequentItems, headPointTable, count): #handle the first item if orderedFrequentItems[0] in fptree.children: fptree.children[orderedFrequentItems[0]].increaseC(count) else: fptree.children[orderedFrequentItems[0]] = TreeNode(orderedFrequentItems[0], count, fptree) #update headPointTable if headPointTable[orderedFrequentItems[0]][1] == None: headPointTable[orderedFrequentItems[0]][1] = fptree.children[orderedFrequentItems[0]] else: updateHeadPointTable(headPointTable[orderedFrequentItems[0]][1], fptree.children[orderedFrequentItems[0]]) #handle other items except the first item if(len(orderedFrequentItems) > 1): updateFPTree(fptree.children[orderedFrequentItems[0]], orderedFrequentItems[1::], headPointTable, count) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | def mineFPTree(headPointTable, prefix, frequentPatterns, minSupport): #for each item in headPointTable, find conditional prefix path, create conditional fptree, then iterate until there is only one element in conditional fptree headPointItems = [v[0] for v in sorted(headPointTable.items(), key = lambda v:v[1][0])] if(len(headPointItems) == 0): return for headPointItem in headPointItems: newPrefix = prefix.copy() newPrefix.add(headPointItem) support = headPointTable[headPointItem][0] frequentPatterns[frozenset(newPrefix)] = support prefixPath = getPrefixPath(headPointTable, headPointItem) if(prefixPath != {}): conditionalFPtree, conditionalHeadPointTable = createFPTree(prefixPath, minSupport) if conditionalHeadPointTable != None: mineFPTree(conditionalHeadPointTable, newPrefix, frequentPatterns, minSupport) |
1 2 3 4 5 6 7 8 9 10 11 12 13 | def getPrefixPath(headPointTable, headPointItem): prefixPath = {} beginNode = headPointTable[headPointItem][1] prefixs = ascendTree(beginNode) if((prefixs != [])): prefixPath[frozenset(prefixs)] = beginNode.count while(beginNode.nextSimilarItem != None): beginNode = beginNode.nextSimilarItem prefixs = ascendTree(beginNode) if (prefixs != []): prefixPath[frozenset(prefixs)] = beginNode.count return prefixPath |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | def rulesGenerator(frequentPatterns, minConf, rules): for frequentset in frequentPatterns: if(len(frequentset) > 1): getRules(frequentset,frequentset, rules, frequentPatterns, minConf) def getRules(frequentset,currentset, rules, frequentPatterns, minConf): for frequentElem in currentset: subSet = removeStr(currentset, frequentElem) confidence = frequentPatterns[frequentset] / frequentPatterns[subSet] if (confidence >= minConf): flag = False for rule in rules: if(rule[0] == subSet and rule[1] == frequentset - subSet): flag = True if(flag == False): rules.append((subSet, frequentset - subSet, confidence)) if(len(subSet) >= 2): getRules(frequentset, subSet, rules, frequentPatterns, minConf) |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |