SMLHomework/Experiments/learningmethod/example.py

41 lines
998 B
Python

from sklearn.datasets import load_iris
from sklearn.feature_extraction.text import CountVectorizer
import pandas
# Example of dataset
iris = load_iris()
featureMatrix = iris.data
labelVector = iris.target
print(featureMatrix.shape)
print(labelVector.shape)
print(iris.feature_names)
print(featureMatrix[1])
# ----------------------#
# Text training example #
# ----------------------#
print("---")
dataset = ["call you tonight", "Call me a cab", "please call me... PLEASE!"]
vector = CountVectorizer()
# Learn the "vocabulary" of the training data (occurs in-place)
vector.fit(dataset)
featureNames = vector.get_feature_names()
print(featureNames)
# Transform training data into a "document-term matrix'
documentTermMatrix = vector.transform(dataset)
# convert sparse matrix to a dense matrix
documentTermMatrix.toarray()
# examine the vocabulary and document-term matrix together
df = pandas.DataFrame(documentTermMatrix.toarray(), columns=vector.get_feature_names())
print(df.head())