Amazing-Python-Scripts
63 строки · 1.9 Кб
1import pandas as pd2import numpy as np3from sklearn.feature_extraction.text import TfidfVectorizer4from sklearn.model_selection import train_test_split5from sklearn.svm import SVC6from sklearn.metrics import accuracy_score, classification_report7
8
9def sentiment_analysis():10"""11Perform sentiment analysis using an SVM classifier.
12
13The function reads the data from a CSV file, preprocesses it, and trains an SVM classifier
14for sentiment analysis on the 'text' column with the 'label' column as the target.
15
16Prints the accuracy and classification report on the test data.
17"""
18# Load data from a CSV file (replace 'data.csv' with your data file)19data = pd.read_csv('data.csv')20
21# Preprocess data (remove any special characters, convert to lowercase, etc.)22data['text'] = data['text'].apply(preprocess_text)23
24# Split the data into features (X) and labels (y)25X = data['text']26y = data['label']27
28# Convert text data to numerical features using TF-IDF29vectorizer = TfidfVectorizer()30X = vectorizer.fit_transform(X)31
32# Split the data into training and testing sets33X_train, X_test, y_train, y_test = train_test_split(34X, y, test_size=0.2, random_state=42)35
36# Train an SVM classifier37svm_classifier = SVC(kernel='linear')38svm_classifier.fit(X_train, y_train)39
40# Make predictions on the test set41y_pred = svm_classifier.predict(X_test)42
43# Calculate and print accuracy and classification report44accuracy = accuracy_score(y_test, y_pred)45print("Accuracy:", accuracy)46print("Classification Report:")47print(classification_report(y_test, y_pred, zero_division=1))48
49
50def preprocess_text(text):51# Replace special characters with spaces52text = text.replace('\n', ' ')53text = text.replace('\t', ' ')54text = text.replace('-', ' ')55
56# Convert to lowercase57text = text.lower()58
59return text60
61
62if __name__ == '__main__':63sentiment_analysis()64