class BisectingKMeans extends Logging
A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
by Steinbach, Karypis, and Kumar, with modification to fit Spark.
The algorithm starts from a single cluster that contains all points.
Iteratively it finds divisible clusters on the bottom level and bisects each of them using
k-means, until there are k leaf clusters in total or no leaf clusters are divisible.
The bisecting steps of clusters on the same level are grouped together to increase parallelism.
If bisecting all divisible clusters on the bottom level would result more than k leaf clusters,
larger clusters get higher priority.
- Annotations
- @Since( "1.6.0" )
- Source
- BisectingKMeans.scala
- See also
- Alphabetic
- By Inheritance
- BisectingKMeans
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- 
      
      
      
        
      
    
      
        
        new
      
      
        BisectingKMeans()
      
      
      Constructs with the default configuration Constructs with the default configuration - Annotations
- @Since( "1.6.0" )
 
Value Members
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        !=(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ##(): Int
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ==(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        asInstanceOf[T0]: T0
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        clone(): AnyRef
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        eq(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        equals(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        finalize(): Unit
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        getClass(): Class[_]
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        getDistanceMeasure: String
      
      
      The distance suite used by the algorithm. The distance suite used by the algorithm. - Annotations
- @Since( "2.4.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        getK: Int
      
      
      Gets the desired number of leaf clusters. Gets the desired number of leaf clusters. - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        getMaxIterations: Int
      
      
      Gets the max number of k-means iterations to split clusters. Gets the max number of k-means iterations to split clusters. - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        getMinDivisibleClusterSize: Double
      
      
      Gets the minimum number of points (if greater than or equal to 1.0) or the minimum proportion of points (if less than1.0) of a divisible cluster.Gets the minimum number of points (if greater than or equal to 1.0) or the minimum proportion of points (if less than1.0) of a divisible cluster.- Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        getSeed: Long
      
      
      Gets the random seed. Gets the random seed. - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        hashCode(): Int
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        isInstanceOf[T0]: Boolean
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        isTraceEnabled(): Boolean
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        log: Logger
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logName: String
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ne(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notify(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notifyAll(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        run(data: JavaRDD[Vector]): BisectingKMeansModel
      
      
      Java-friendly version of run().
- 
      
      
      
        
      
    
      
        
        def
      
      
        run(input: RDD[Vector]): BisectingKMeansModel
      
      
      Runs the bisecting k-means algorithm. Runs the bisecting k-means algorithm. - input
- RDD of vectors 
- returns
- model for the bisecting kmeans 
 - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        setDistanceMeasure(distanceMeasure: String): BisectingKMeans.this.type
      
      
      Set the distance suite used by the algorithm. Set the distance suite used by the algorithm. - Annotations
- @Since( "2.4.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        setK(k: Int): BisectingKMeans.this.type
      
      
      Sets the desired number of leaf clusters (default: 4). Sets the desired number of leaf clusters (default: 4). The actual number could be smaller if there are no divisible leaf clusters. - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        setMaxIterations(maxIterations: Int): BisectingKMeans.this.type
      
      
      Sets the max number of k-means iterations to split clusters (default: 20). Sets the max number of k-means iterations to split clusters (default: 20). - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        setMinDivisibleClusterSize(minDivisibleClusterSize: Double): BisectingKMeans.this.type
      
      
      Sets the minimum number of points (if greater than or equal to 1.0) or the minimum proportion of points (if less than1.0) of a divisible cluster (default: 1).Sets the minimum number of points (if greater than or equal to 1.0) or the minimum proportion of points (if less than1.0) of a divisible cluster (default: 1).- Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        setSeed(seed: Long): BisectingKMeans.this.type
      
      
      Sets the random seed (default: hash value of the class name). Sets the random seed (default: hash value of the class name). - Annotations
- @Since( "1.6.0" )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        synchronized[T0](arg0: ⇒ T0): T0
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        toString(): String
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long, arg1: Int): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()