OpenAI Bot Engine

import os
from langchain.chains import LLMChain
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from cryptography.fernet import Fernet


class CollegeAIEngine:    
    @staticmethod
    def get_openai_answer(user_input):
        encrypted_app_token = 'gAAAAABlOLQoLplaL2-lfD1T4VkBXnkKxq1XK_VlVHiEm7MaftNJmZ4f-7rQlUws-NIMHjpWOMtevkwB5NX7f4kqknvrVtwH3ccAsOHB_Yg9dzksRxh5yVuuIXRD3hov8yU6BSXwd-HLTnBRLX5ARDOqzxJoK6M15A=='

        crypto_key = os.getenv("CRYPTO_KEY")
        if crypto_key is None:
            raise ValueError("CRYPTO_KEY environment variable is not set.")
        
        cipher_suite = Fernet(crypto_key)
        api_key = cipher_suite.decrypt(encrypted_app_token).decode()
        os.environ["OPENAI_API_KEY"] = api_key
        
        summary_template = """
            {user_input}, answer using information only about colleges in United States of America
        """

        summary_prompt_template = PromptTemplate(
            input_variables=["user_input"], template=summary_template
        )

        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k", api_key=api_key)

        chain = LLMChain(llm=llm, prompt=summary_prompt_template)
        
        response = chain.run(user_input=user_input)
        
        return response

# Example usage:
# result = CollegeAIEngine.get_openai_answer(user_input="Your question here.")
# print(result)

The code defines a CollegeAIEngine class with a static method get_openai_answer that uses the OpenAI API to answer queries related to colleges in the USA. It first decrypts an encrypted API token using a CRYPTO_KEY from the environment variables, then sets up a prompt template to include the user input into a predefined structure, ensuring the response is focused on US colleges. It utilizes the langchain library to integrate with a language model from OpenAI, sending the structured prompt and returning the generated response. An example usage is provided at the end, demonstrating how to call the method with a user question and print the answer.

Machine Learning Engine

import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

class RecEngine:
    
    regressor = None

    def __init__(self):
        self.buildModel()
        
    def buildModel(self):
        basedir = os.path.abspath(os.path.dirname(__file__))

        # Specify the file path
        file_path = basedir + "/../static/data/MLData.csv"

        dataset = pd.read_csv(file_path, header=0)
        array = dataset.values

        x = array[:, 0:2]
        y = array[:, 2]

        X_Train, X_Test, Y_Train, Y_Test = train_test_split(x, y, test_size=0.2, random_state=0)

        # Using ElasticNet regression model
        # alpha is the penalty parameter and l1_ratio defines the balance between L1 and L2 regularizations
        self.regressor = LinearRegression()
        self.regressor.fit(X_Train, Y_Train)

        Y_Prediction = self.regressor.predict(X_Test)

        df = pd.DataFrame({'Actual' : Y_Test, 'Predicted': Y_Prediction})

        print(df)

        mae = metrics.mean_absolute_error(Y_Test, Y_Prediction)
        r2 = metrics.r2_score(Y_Test, Y_Prediction)

        print("The model performance for testing set")
        print("-------------------------------------")
        print('MAE is {}'.format(mae))
        print('R2 score is {}'.format(r2))

    def predict(self, gpa, sat):
        prediction = self.regressor.predict([[gpa, sat]])
        return prediction[0]


model = RecEngine()
prediction = model.predict(4.0, 1600)
print(f"The predicted value is {prediction}")


"""Y_Prediction = regressor.predict(X_Test)

df = pd.DataFrame({'Actual' : Y_Test, 'Predicted': Y_Prediction})

print(df)

mae = metrics.mean_absolute_error(Y_Test, Y_Prediction)
r2 = metrics.r2_score(Y_Test, Y_Prediction)

print("The model performance for testing set")
print("-------------------------------------")
print('MAE is {}'.format(mae))
print('R2 score is {}'.format(r2))
"""

The Python script sets up a RecEngine class that encapsulates the functionality of a linear regression-based recommendation engine. Upon instantiation, the class automatically builds a predictive model using data from ‘MLData.csv’ by splitting it into training and test sets, training a LinearRegression model, and evaluating its performance with metrics such as MAE and R-squared. The class also provides a predict method to make individual predictions based on GPA and SAT inputs. The model is immediately instantiated and used to predict an output given a GPA of 4.0 and an SAT score of 1600, with the result printed to the console. The commented-out code at the end seems to be a previous version of the model evaluation step that has since been integrated into the buildModel method.

College Data Model

from __init__ import login_manager, app, db
from werkzeug.security import generate_password_hash, check_password_hash
from flask_login import UserMixin
import os
from sqlalchemy.orm import relationship
from sqlalchemy.exc import IntegrityError
import pandas as pd


class College(db.Model, UserMixin):
    __tablename__ = 'colleges'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), unique=True, index=True, nullable=True)
    state = db.Column(db.String(8), index=True, nullable=True)
    city = db.Column(db.String(64), index=True, nullable=True)
    zip = db.Column(db.Integer, index=True, nullable=True)
    type = db.Column(db.String(64), index=True, nullable=True)
    image = db.Column(db.String(256), index=True, nullable=True)
    ranking = db.Column(db.Integer, index=True, nullable=True)
    ACTAvg = db.Column(db.Integer, index=True, nullable=True)
    aidpercent = db.Column(db.Integer, index=True, nullable=True)
    acceptance = db.Column(db.Integer, index=True, nullable=True)
    fees = db.Column(db.Integer, index=True, nullable=True)
    GPAAvg = db.Column(db.Float, index=True, nullable=True)
    enrollment = db.Column(db.Integer, index=True, nullable=True)
    SATAvg = db.Column(db.Integer, index=True, nullable=True)
    SATRange = db.Column(db.String(64), index=True, nullable=True)
    ACTRange = db.Column(db.String(64), index=True, nullable=True)
    YearFounded = db.Column(db.Integer, index=True, nullable=True)
    AcademicCalendar = db.Column(db.String(128), index=True, nullable=True)
    setting = db.Column(db.String(64), index=True, nullable=True)
    SchoolWebsite = db.Column(db.String(128), index=True, nullable=True)
    favorites = db.relationship('Favorite', backref='College', uselist=True, lazy='dynamic')

    def __init__(self, name, state, city, zip, type, image, ranking, ACTAvg, 
                 aidpercent, acceptance, fees, GPAAvg, enrollment, SATAvg, 
                 SATRange, ACTRange, YearFounded, AcademicCalendar, setting,
                 SchoolWebsite):
        self.name = name
        self.state = state
        self.city = city
        self.zip = zip
        self.type = type
        self.image = image
        self.ranking = ranking
        self.ACTAvg = ACTAvg
        self.aidpercent = aidpercent
        self.acceptance = acceptance
        self.fees = fees
        self.GPAAvg = GPAAvg
        self.enrollment = enrollment
        self.SATAvg = SATAvg
        self.SATRange = SATRange
        self.ACTRange = ACTRange
        self.YearFounded = YearFounded
        self.AcademicCalendar = AcademicCalendar
        self.setting = setting
        self.SchoolWebsite = SchoolWebsite
    
    def alldetails(self):
        return {
            "id": self.id,
            "name": self.name,
            "state": self.state,
            "city": self.city,
            "zip": self.zip,
            "type": self.type,
            "image": self.image,
            "ranking": self.ranking,
            "ACTAvg": self.ACTAvg,
            "aidpercent": self.aidpercent,
            "acceptance": self.acceptance,
            "fees": self.fees,
            "GPAAvg": self.GPAAvg,
            "enrollment": self.enrollment,
            "SATAvg": self.SATAvg,
            "SATRange": self.SATRange,
            "ACTRange": self.ACTRange,
            "YearFounded": self.YearFounded,
            "AcademicCalendar": self.AcademicCalendar,
            "setting": self.setting,
            "SchoolWebsite": self.SchoolWebsite,
        }
    
    def fewdetails(self):
        return {
            "id": self.id,
            "name": self.name,
            "state": self.state,
            "city": self.city,
            "image": self.image,
            "ranking": self.ranking
        }

def initColleges():
    with app.app_context():
        """Create database and tables"""
        print("Creating college tables")
        db.create_all()
        college_count = db.session.query(College).count()
        if college_count > 0:
            return
        
        basedir = os.path.abspath(os.path.dirname(__file__))

        # Specify the file path
        file_path = basedir + "/../static/data/CollegeData.csv"
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_path)

        # Iterate through each row in the DataFrame and add to the colleges table
        for index, row in df.iterrows():
            try:
                college = College(
                    name=row['Name'] if pd.notna(row['Name']) else None,
                    state=row['State'] if pd.notna(row['State']) else None,
                    city=row['City'] if pd.notna(row['City']) else None,
                    zip=row['Zip'] if pd.notna(row['Zip']) else None,
                    type=row['Type'] if pd.notna(row['Type']) else None,
                    image=row['Image'] if pd.notna(row['Image']) else None,
                    ranking=row['Ranking'] if pd.notna(row['Ranking']) else None,
                    ACTAvg=row['ACT'] if pd.notna(row['ACT']) else None,
                    aidpercent=row['AidPercentage'] if pd.notna(row['AidPercentage']) else None,
                    acceptance=row['AcceptanceRate'] if pd.notna(row['AcceptanceRate']) else None,
                    fees=row['Fees'] if pd.notna(row['Fees']) else None,
                    GPAAvg=row['GPAAvg'] if pd.notna(row['GPAAvg']) else None,
                    enrollment=row['Enrollment'] if pd.notna(row['Enrollment']) else None,
                    SATAvg=row['SATAvg'] if pd.notna(row['SATAvg']) else None,
                    SATRange=row['SATRange'] if pd.notna(row['SATRange']) else None,
                    ACTRange=row['ACTRange'] if pd.notna(row['ACTRange']) else None,
                    YearFounded=row['YearFounded'] if pd.notna(row['YearFounded']) else None,
                    AcademicCalendar=row['AcademicCalendar'] if pd.notna(row['AcademicCalendar']) else None,
                    setting=row['Setting'] if pd.notna(row['Setting']) else None,
                    SchoolWebsite=row['SchoolWebsite'] if pd.notna(row['SchoolWebsite']) else None
                )
                db.session.add(college)
                db.session.commit()
            except IntegrityError:
                '''fails with bad or duplicate data'''
                db.session.remove()
                print(f"Records exist, duplicate college, or error: {college.name}")
            except Exception as e_inner:
                print(f"Error adding college at index {index}: {str(e_inner)}")

The provided Python code integrates with a Flask web application to define a database model for a College using SQLAlchemy. The College class includes a variety of attributes related to college information such as name, location, rankings, and admissions data, and provides methods to output detailed or brief college profiles. It also includes an initColleges function to populate the database with college information from a CSV file, handling duplicates and errors gracefully.

Passion Project Code Contributions • 17 min read

Description

OpenAI Bot Engine

Machine Learning Engine

College Data Model