k 表示 - Python 2 到 3 错误。类型错误:不可排序的类型:int() <= str()



我有一个基于k均值算法的程序。当我在python 2上运行程序时,没有任何问题。当我在python 3上运行它时,我会得到以下错误:

Traceback (most recent call last):
  File "kmeans.py", line 111, in <module>
    main()
  File "kmeans.py", line 13, in main
    clusters = kmeans(points, num_cluster, cutoff)
  File "kmeans.py", line 67, in kmeans
    initial = random.sample(points, k) # generating k random points for initial centroids
  File "/afs/cad/linux/anaconda3/anaconda/pkgs/python-3.5.2-1/lib/python3.5/random.py", line 314, in sample
    if not 0 <= k <= n:
TypeError: unorderable types: int() <= str()

这是我目前为止的代码,我似乎不知道如何修复。

import sys
import math
import random
def main():
    points = readDataFile("datafile.txt")
    print ('Original dataset: n' + str(points) + 'n')
    # generating clusters
    # num_cluster = 2 # k for default testing value
    num_cluster = input ('Please declare number of clusters k: ') #k
    cutoff = 0.5 # iter cut off
    clusters = kmeans(points, num_cluster, cutoff)
    # Print our clusters
    for i,c in enumerate(clusters):
      print ("Cluster " + str(i) + "t" + str(c))
# reading in data points from file
def readDataFile(filename):
    points = []
    lines = [line.rstrip('n') for line in open(filename)]
    for line in lines:
      pieces = line.split(' ')
      points.append(Point([int(pieces[0]), int(pieces[1])]))
    return points
# point class to contain a set of 2d coordinates
class Point:
    def __init__(self, coords):
        self.coords = coords
        self.n = len(coords)
    def __repr__(self):
        return str(self.coords)
# cluster class to define cluster functionality
class Cluster:
    # constructor
    def __init__(self, points):
        self.points = points
        self.n = points[0].n
        self.centroid = self.calculateCentroid()
    # to string method
    def __repr__(self):
        return str(self.points)
    # updates the current loc
    def update(self, points):
        old_centroid = self.centroid
        self.points = points
        self.centroid = self.calculateCentroid()
        shift = getDistance(old_centroid, self.centroid)
        return shift
    # calculates new location of centroid based on mean
    def calculateCentroid(self):
        numPoints = len(self.points) # finding center point
        coords = [p.coords for p in self.points] # list of all coordinates in curr cluster
        unzipped = zip(*coords)
        centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped] # mean for each point
        return Point(centroid_coords)
# kmean algo to cluster data
def kmeans(points, k, cutoff):
    initial = random.sample(points, k) # generating k random points for initial centroids
    clusters = [Cluster([p]) for p in initial] # creating k clusters using generated centroids
    loopCounter = 0 # looping thru data until the clusters stabilize
    while True:
        # list of lists to hold point objects
        lists = [ [] for c in clusters]
        clusterCount = len(clusters)
        loopCounter += 1
        for p in points:
            # dist bw curr to first centroid
            smallest_distance = getDistance(p, clusters[0].centroid)
            clusterIndex = 0
            for i in range(clusterCount - 1):
                # calc point to point diff in distances
                distance = getDistance(p, clusters[i+1].centroid)
                # setting cluster index based on dist
                if distance < smallest_distance:
                    smallest_distance = distance
                    clusterIndex = i+1
            lists[clusterIndex].append(p) # appending point to cluster
        biggest_shift = 0.0 # resetting biggest_shift to zero for curr iteration
        for i in range(clusterCount):
            # calc centroid movement dist
            shift = clusters[i].update(lists[i])
            # keeping track of the largest move from all cluster centroid updates
            biggest_shift = max(biggest_shift, shift)
        # checking if centroids movement is not vast (convergence)
        if biggest_shift < cutoff:
            break
    return clusters

# generates euclidean distance between two points
def getDistance(a, b):
    ret = reduce(lambda x,y: x + pow((a.coords[y]-b.coords[y]), 2),range(a.n),0.0)
    return math.sqrt(ret)
# init
if __name__ == "__main__":
    main()

此外,我的data.txt文件如下所示:

0 0
0 1
1 0
10 10
10 11
11 10
11 11

如有任何帮助,我们将不胜感激。

num_cluster = input ('Please declare number of clusters k: ') #k
cutoff = 0.5 # iter cut off
clusters = kmeans(points, num_cluster, cutoff)

input返回一个字符串,因此需要将其转换为int:

num_cluster = int(input ('Please declare number of clusters k: ')) #k

相关内容

最新更新