from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import pandas as pd

# Przykładowe zdania do analizy
zdania = ["To jest pierwsze         zdanie.",
          "To jest         drugie   zdanie.",
          "A na deser jeszcze jedno zdanie." ]

tf_vectorizer = CountVectorizer()
tf_matrix = tf_vectorizer.fit_transform(zdania)
tf_feature_names = tf_vectorizer.get_feature_names_out()

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(zdania)
tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()

print("Macierz TF:")
print(pd.DataFrame(tf_matrix.toarray(), columns=tf_feature_names))
print(tf_matrix.shape)

print("Macierz TF-IDF:")
print(pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_feature_names))
print(tfidf_matrix.shape)