Code Monkey home page Code Monkey logo

chatgpt_summarizepdf's People

Contributors

davidlevy247 avatar

Stargazers

 avatar

Watchers

 avatar

chatgpt_summarizepdf's Issues

failed refactor

I tried to do a refactor and made a bigger mess of things. Here is the direction I tried to go:

import os
import openai
import re
import base64
import sys
import logging
import shutil
from PyPDF2 import PdfReader
from tkinter import Tk
from tkinter.filedialog import askopenfilename
from datetime import datetime
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
from cryptography.fernet import Fernet
from getpass import getpass
from openai.error import OpenAIError, RateLimitError
from typing import Optional, Tuple
from colorama import Fore, Style

# Set up logging
logging.basicConfig(filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')

CONFIG_FILE = 'config.txt'
ENCRYPTION_SIGIL = "<<ENCRYPTED>>"

def log(msg, level="INFO"):
    """Helper function to log messages."""
    if level == "INFO":
        logging.info(msg)
    elif level == "ERROR":
        logging.error(msg)

def create_encryption_key(key: str) -> bytes:
    """Create a 32 byte encryption key from a password."""
    password = key.encode()  # Convert to bytes
    salt = b'\x00'*16  # NOTE: This is a placeholder, in a real-world case, you should use a random salt and save it.
    kdf = PBKDF2HMAC(
        algorithm=hashes.SHA256(),
        length=32,
        salt=salt,
        iterations=100_000,
        backend=default_backend()
    )
    return base64.urlsafe_b64encode(kdf.derive(password))


def encrypt_config(api_key: str, prompt: str, encryption_key: str) -> Tuple[str, str]:
    """Encrypts API key and prompt with given encryption key."""

    encryptor = Fernet(encryption_key)

    api_key_enc = encryptor.encrypt(api_key.encode())
    prompt_enc = encryptor.encrypt(prompt.encode())

    return api_key_enc, prompt_enc


def get_encryption_key():
    password = getpass.getpass("Enter your password: ").encode()  # Get the password for encryption
    salt = b'\x00'*16  # Just for simplicity we use static salt
    kdf = PBKDF2HMAC(
        algorithm=hashes.SHA256(),
        length=32,
        salt=salt,
        iterations=100000,
    )
    return base64.urlsafe_b64encode(kdf.derive(password))

def encrypt_data(data, key):
    f = Fernet(key)
    return f.encrypt(data)

def decrypt_data(data, key):
    f = Fernet(key)
    return f.decrypt(data)

def load_config(config_file: str) -> Tuple[str, str, bool]:
    global ENCRYPTION_SIGIL
    # password retry loop
    for _ in range(3):  # Give user 3 tries to enter password
        try:
            with open(config_file, 'r') as file:
                lines = file.readlines()
                if ENCRYPTION_SIGIL in lines[0]:
                    api_key = lines[1].strip()
                    prompt = lines[2].strip()
                    is_encrypted = True
                else:
                    api_key = lines[0].strip()
                    prompt = lines[1].strip()
                    is_encrypted = False
            return api_key, prompt, is_encrypted
        except FileNotFoundError:
            log("Config file not found.", "ERROR")
            return None, None, False
        except Exception as e:
            log(f"Failed to load config: {e}", "ERROR")
    else:
        remake_config = input("Failed to enter the correct password 3 times. Would you like to create a new config file? (y/n): ")
        if remake_config.lower() == 'y':
            return None, None, False
        else:
            sys.exit(0)


def save_config(config_file, api_key, prompt, encryption_key=None):
    # Backup existing config file
    if os.path.exists(config_file):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_file = f"{config_file}_{timestamp}.bak"
        shutil.copy2(config_file, backup_file)
    # Continue with save logic
    data = f"{api_key}\n{prompt}"
    if encryption_key is not None:
        data = encrypt_data(data.encode(), encryption_key)
    else:
        data = data.encode()
    with open(config_file, 'wb') as f:
        f.write(data)

def modify_config(api_key: str, prompt: str, encryption_key: Optional[str] = None):
    """Modify configuration file with new API key and prompt"""
    
    change = input("Would you like to change any configurations? (1 = API key, 2 = prompt, 3 = encryption, n = no): ")
    if change.lower() == "n":
        return

    while True:
        option = change
        if option == "1":
            api_key = getpass("Enter the new API key: ")
        elif option == "2":
            prompt = input("Enter the new prompt: ")
        elif option == "3":
            encrypt_option = input("Would you like to encrypt the config file? (y/n): ")
            if encrypt_option.lower() == "y":
                encryption_key = get_encryption_key()
            else:
                encryption_key = None
        else:
            print("Invalid option. Please enter 1 for API key, 2 for prompt, or 3 for encryption.")

        change = input("Would you like to change any other configurations? (1 = API key, 2 = prompt, 3 = encryption, n = no): ")
        if change.lower() == "n":
            break

    if encryption_key:
        api_key, prompt = encrypt_config(api_key, prompt, encryption_key)
        
    save_config(CONFIG_FILE, api_key, prompt, encryption_key)



def get_user_input():
    api_key = input("Enter your OpenAI API key: ")
    sys.stdout.write("Enter your prompt to ask ChatGPT for your desired results per page. ")
    sys.stdout.write("For example, 'Please summarize the following single page from a PDF book into coherent easy to understand paragraphs without indentations or early line breaks; sometimes a single page may be impossible to summarize into one to three paragraphs, so when that happens report what the page is generally about and whether it was instructions, problems, examples, or something else: '\n")
    prompt = input()
    use_encryption = input("Would you like to use encryption for the config file? [y/N]: ").strip().lower() in ['y', 'yes']
    encryption_key = create_encryption_key(get_encryption_key()) if use_encryption else None
    return api_key, prompt, encryption_key

def summarize_text(text, prompt):
    try:
        response = openai.Completion.create(
            engine="text-davinci-002",
            prompt=prompt,
            max_tokens=100,
            n=1,
            stop=None,
            temperature=0.5
        )
        return response.choices[0].text.strip()
    except OpenAIError as e:
        logging.error(f"OpenAI error: {e}")
        return None
    except RateLimitError:
        logging.error("Rate limit exceeded.")
        return None

def main():
    # Load config
    api_key, prompt, is_encrypted = load_config(CONFIG_FILE)
    encryption_key = None

    if api_key is None or prompt is None:
        api_key, prompt, encryption_key = get_user_input()
        save_config(api_key, prompt, encryption_key)
        openai.api_key = api_key
    else:
        if is_encrypted:
            encryption_key = getpass("Enter the encryption key for the config file: ")
            api_key, prompt = decrypt_config(api_key, prompt, encryption_key)
        modify_config(api_key, prompt, encryption_key)
        openai.api_key = api_key

    # Ask for file
    Tk().withdraw()
    filename = askopenfilename()

    # Extract text from PDF
    try:
        with open(filename, 'rb') as f:
            pdf = PdfReader(f)
            total_pages = len(pdf.pages)
            print(f"Starting summarization process of {total_pages} pages...")
            for page_num, page in enumerate(pdf.pages, start=1):
                print(f"Processing page {page_num} of {total_pages}...")
                text = page.extract_text()
                if text is None:
                    print(f"Page {page_num}: Unable to extract text. Skipping.")
                    continue
                text = re.sub('\s+', ' ', text).strip()  # Clean up text
                summary = summarize_text(text, prompt)  # Summarize
                if summary is None:
                    print(f"Page {page_num}: Failed to get summary.")
                else:
                    print(summary)
    except Exception as e:
        logging.error(f"PDF read error: {e}")
        return
    except IOError as e:
        logging.error(f"File error: {e}")
        return


if __name__ == "__main__":
    main()

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.