OpenAI Bot Engine
import os
from langchain.chains import LLMChain
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from cryptography.fernet import Fernet
class CollegeAIEngine:
@staticmethod
def get_openai_answer(user_input):
encrypted_app_token = 'gAAAAABlOLQoLplaL2-lfD1T4VkBXnkKxq1XK_VlVHiEm7MaftNJmZ4f-7rQlUws-NIMHjpWOMtevkwB5NX7f4kqknvrVtwH3ccAsOHB_Yg9dzksRxh5yVuuIXRD3hov8yU6BSXwd-HLTnBRLX5ARDOqzxJoK6M15A=='
crypto_key = os.getenv("CRYPTO_KEY")
if crypto_key is None:
raise ValueError("CRYPTO_KEY environment variable is not set.")
cipher_suite = Fernet(crypto_key)
api_key = cipher_suite.decrypt(encrypted_app_token).decode()
os.environ["OPENAI_API_KEY"] = api_key
summary_template = """
{user_input}, answer using information only about colleges in United States of America
"""
summary_prompt_template = PromptTemplate(
input_variables=["user_input"], template=summary_template
)
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k", api_key=api_key)
chain = LLMChain(llm=llm, prompt=summary_prompt_template)
response = chain.run(user_input=user_input)
return response
# Example usage:
# result = CollegeAIEngine.get_openai_answer(user_input="Your question here.")
# print(result)
The code defines a CollegeAIEngine class with a static method get_openai_answer that uses the OpenAI API to answer queries related to colleges in the USA. It first decrypts an encrypted API token using a CRYPTO_KEY from the environment variables, then sets up a prompt template to include the user input into a predefined structure, ensuring the response is focused on US colleges. It utilizes the langchain library to integrate with a language model from OpenAI, sending the structured prompt and returning the generated response. An example usage is provided at the end, demonstrating how to call the method with a user question and print the answer.
Machine Learning Engine
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
class RecEngine:
regressor = None
def __init__(self):
self.buildModel()
def buildModel(self):
basedir = os.path.abspath(os.path.dirname(__file__))
# Specify the file path
file_path = basedir + "/../static/data/MLData.csv"
dataset = pd.read_csv(file_path, header=0)
array = dataset.values
x = array[:, 0:2]
y = array[:, 2]
X_Train, X_Test, Y_Train, Y_Test = train_test_split(x, y, test_size=0.2, random_state=0)
# Using ElasticNet regression model
# alpha is the penalty parameter and l1_ratio defines the balance between L1 and L2 regularizations
self.regressor = LinearRegression()
self.regressor.fit(X_Train, Y_Train)
Y_Prediction = self.regressor.predict(X_Test)
df = pd.DataFrame({'Actual' : Y_Test, 'Predicted': Y_Prediction})
print(df)
mae = metrics.mean_absolute_error(Y_Test, Y_Prediction)
r2 = metrics.r2_score(Y_Test, Y_Prediction)
print("The model performance for testing set")
print("-------------------------------------")
print('MAE is {}'.format(mae))
print('R2 score is {}'.format(r2))
def predict(self, gpa, sat):
prediction = self.regressor.predict([[gpa, sat]])
return prediction[0]
model = RecEngine()
prediction = model.predict(4.0, 1600)
print(f"The predicted value is {prediction}")
"""Y_Prediction = regressor.predict(X_Test)
df = pd.DataFrame({'Actual' : Y_Test, 'Predicted': Y_Prediction})
print(df)
mae = metrics.mean_absolute_error(Y_Test, Y_Prediction)
r2 = metrics.r2_score(Y_Test, Y_Prediction)
print("The model performance for testing set")
print("-------------------------------------")
print('MAE is {}'.format(mae))
print('R2 score is {}'.format(r2))
"""
The Python script sets up a RecEngine class that encapsulates the functionality of a linear regression-based recommendation engine. Upon instantiation, the class automatically builds a predictive model using data from ‘MLData.csv’ by splitting it into training and test sets, training a LinearRegression model, and evaluating its performance with metrics such as MAE and R-squared. The class also provides a predict method to make individual predictions based on GPA and SAT inputs. The model is immediately instantiated and used to predict an output given a GPA of 4.0 and an SAT score of 1600, with the result printed to the console. The commented-out code at the end seems to be a previous version of the model evaluation step that has since been integrated into the buildModel method.
College Data Model
from __init__ import login_manager, app, db
from werkzeug.security import generate_password_hash, check_password_hash
from flask_login import UserMixin
import os
from sqlalchemy.orm import relationship
from sqlalchemy.exc import IntegrityError
import pandas as pd
class College(db.Model, UserMixin):
__tablename__ = 'colleges'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(128), unique=True, index=True, nullable=True)
state = db.Column(db.String(8), index=True, nullable=True)
city = db.Column(db.String(64), index=True, nullable=True)
zip = db.Column(db.Integer, index=True, nullable=True)
type = db.Column(db.String(64), index=True, nullable=True)
image = db.Column(db.String(256), index=True, nullable=True)
ranking = db.Column(db.Integer, index=True, nullable=True)
ACTAvg = db.Column(db.Integer, index=True, nullable=True)
aidpercent = db.Column(db.Integer, index=True, nullable=True)
acceptance = db.Column(db.Integer, index=True, nullable=True)
fees = db.Column(db.Integer, index=True, nullable=True)
GPAAvg = db.Column(db.Float, index=True, nullable=True)
enrollment = db.Column(db.Integer, index=True, nullable=True)
SATAvg = db.Column(db.Integer, index=True, nullable=True)
SATRange = db.Column(db.String(64), index=True, nullable=True)
ACTRange = db.Column(db.String(64), index=True, nullable=True)
YearFounded = db.Column(db.Integer, index=True, nullable=True)
AcademicCalendar = db.Column(db.String(128), index=True, nullable=True)
setting = db.Column(db.String(64), index=True, nullable=True)
SchoolWebsite = db.Column(db.String(128), index=True, nullable=True)
favorites = db.relationship('Favorite', backref='College', uselist=True, lazy='dynamic')
def __init__(self, name, state, city, zip, type, image, ranking, ACTAvg,
aidpercent, acceptance, fees, GPAAvg, enrollment, SATAvg,
SATRange, ACTRange, YearFounded, AcademicCalendar, setting,
SchoolWebsite):
self.name = name
self.state = state
self.city = city
self.zip = zip
self.type = type
self.image = image
self.ranking = ranking
self.ACTAvg = ACTAvg
self.aidpercent = aidpercent
self.acceptance = acceptance
self.fees = fees
self.GPAAvg = GPAAvg
self.enrollment = enrollment
self.SATAvg = SATAvg
self.SATRange = SATRange
self.ACTRange = ACTRange
self.YearFounded = YearFounded
self.AcademicCalendar = AcademicCalendar
self.setting = setting
self.SchoolWebsite = SchoolWebsite
def alldetails(self):
return {
"id": self.id,
"name": self.name,
"state": self.state,
"city": self.city,
"zip": self.zip,
"type": self.type,
"image": self.image,
"ranking": self.ranking,
"ACTAvg": self.ACTAvg,
"aidpercent": self.aidpercent,
"acceptance": self.acceptance,
"fees": self.fees,
"GPAAvg": self.GPAAvg,
"enrollment": self.enrollment,
"SATAvg": self.SATAvg,
"SATRange": self.SATRange,
"ACTRange": self.ACTRange,
"YearFounded": self.YearFounded,
"AcademicCalendar": self.AcademicCalendar,
"setting": self.setting,
"SchoolWebsite": self.SchoolWebsite,
}
def fewdetails(self):
return {
"id": self.id,
"name": self.name,
"state": self.state,
"city": self.city,
"image": self.image,
"ranking": self.ranking
}
def initColleges():
with app.app_context():
"""Create database and tables"""
print("Creating college tables")
db.create_all()
college_count = db.session.query(College).count()
if college_count > 0:
return
basedir = os.path.abspath(os.path.dirname(__file__))
# Specify the file path
file_path = basedir + "/../static/data/CollegeData.csv"
# Load the CSV file into a DataFrame
df = pd.read_csv(file_path)
# Iterate through each row in the DataFrame and add to the colleges table
for index, row in df.iterrows():
try:
college = College(
name=row['Name'] if pd.notna(row['Name']) else None,
state=row['State'] if pd.notna(row['State']) else None,
city=row['City'] if pd.notna(row['City']) else None,
zip=row['Zip'] if pd.notna(row['Zip']) else None,
type=row['Type'] if pd.notna(row['Type']) else None,
image=row['Image'] if pd.notna(row['Image']) else None,
ranking=row['Ranking'] if pd.notna(row['Ranking']) else None,
ACTAvg=row['ACT'] if pd.notna(row['ACT']) else None,
aidpercent=row['AidPercentage'] if pd.notna(row['AidPercentage']) else None,
acceptance=row['AcceptanceRate'] if pd.notna(row['AcceptanceRate']) else None,
fees=row['Fees'] if pd.notna(row['Fees']) else None,
GPAAvg=row['GPAAvg'] if pd.notna(row['GPAAvg']) else None,
enrollment=row['Enrollment'] if pd.notna(row['Enrollment']) else None,
SATAvg=row['SATAvg'] if pd.notna(row['SATAvg']) else None,
SATRange=row['SATRange'] if pd.notna(row['SATRange']) else None,
ACTRange=row['ACTRange'] if pd.notna(row['ACTRange']) else None,
YearFounded=row['YearFounded'] if pd.notna(row['YearFounded']) else None,
AcademicCalendar=row['AcademicCalendar'] if pd.notna(row['AcademicCalendar']) else None,
setting=row['Setting'] if pd.notna(row['Setting']) else None,
SchoolWebsite=row['SchoolWebsite'] if pd.notna(row['SchoolWebsite']) else None
)
db.session.add(college)
db.session.commit()
except IntegrityError:
'''fails with bad or duplicate data'''
db.session.remove()
print(f"Records exist, duplicate college, or error: {college.name}")
except Exception as e_inner:
print(f"Error adding college at index {index}: {str(e_inner)}")
The provided Python code integrates with a Flask web application to define a database model for a College using SQLAlchemy. The College class includes a variety of attributes related to college information such as name, location, rankings, and admissions data, and provides methods to output detailed or brief college profiles. It also includes an initColleges function to populate the database with college information from a CSV file, handling duplicates and errors gracefully.