I have shared code snippets. Can i run twice in single file like as shown below ?

<div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clip

Here is an other example <div class="snippet-clipboard-content

Hey <a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url=

Erorr : Model provided by the configuration not supported about scrapegraph-ai HOT 4 CLOSED

HacanAle commented on July 23, 2024

Erorr : Model provided by the configuration not supported

from scrapegraph-ai.

Comments (4)

VinciGit00 commented on July 23, 2024

can you provide the whole output and input?

from scrapegraph-ai.

HacanAle commented on July 23, 2024

`
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

def get_graph_config():
    return {
        "llm": {
            "api_key": openai_api_key,
            "model": "gpt-4-turbo",
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",  
        },
    }

def scrape_url(url):
    headers = {
        'User-Agent': 'Mozilla/5.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Referer': 'https://www.google.com/',
        'Connection': 'keep-alive',
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text(separator=' ', strip=True)
    except requests.RequestException as e:
        return str(e)

def is_subpage_or_subdomain(source_url, url):
    source_parsed = urlparse(source_url)
    url_parsed = urlparse(url)
    
    source_netloc = source_parsed.netloc.strip('www.')
    url_netloc = url_parsed.netloc.strip('www.')
    
    return (url_parsed.path.startswith(source_parsed.path) or url_netloc.endswith(source_netloc)) and (url_netloc != '')

def extract_emails_from_text(text):
    email_pattern = r'\b(?:contact|info|sales|queries?|query)@[\w.-]+\b'
    emails = re.findall(email_pattern, text)
    return emails

client = OpenAI(api_key=openai_api_key)

def generate_email_text(scraped_data):
    response = client.chat.completions.create(
        model="gpt-4o",
        max_tokens=1500,
        messages=[
            {"role": "system", "content": "role"},
            {"role": "user", "content": role"}
        ]
    )
    
    email_text = response.choices[0].message.content

    print("Email Text:", email_text)
    return email_text

def send_email(subject, body, to_email):
    sender_email = "[email protected]"
    receiver_email = to_email
    password = "password"

    message = MIMEMultipart()
    message["From"] = sender_email
    message["To"] = receiver_email
    message["Subject"] = subject

    message.attach(MIMEText(body, "plain"))

    with smtplib.SMTP('smtp.gmail.com', 587) as server:
        server.starttls()
        server.login(sender_email, password)
        server.sendmail(sender_email, receiver_email, message.as_string())

def process_url(source_url):
    graph_config = get_graph_config()

    smart_scraper_graph = SmartScraperGraph(
        prompt="prompt1.",
        source=source_url,
        config=graph_config
    )
    
    result = smart_scraper_graph.run()
    
    print(result, "\n")
    url_pattern = r'https://[^\s\'"\\]+'
    urls = re.findall(url_pattern, str(result))
    print(urls, "\n")

    email_pattern = r'\b[\w.-]+@[\w.-]+\.[\w.-]+\b'
    emails_list_1 = re.findall(email_pattern, str(result))

    scraped_data = {}
    if urls:
        for url in urls:
            if is_subpage_or_subdomain(source_url, url):
                print(f"Scraping URL: {url}")
                smart_scraper = SmartScraperGraph(
                    prompt="prompt2",
                    source=url,
                    config=graph_config
                )
                
                text_content = smart_scraper.run()
                emails_from_text = extract_emails_from_text(text_content)
                scraped_data[url] = {
                    'text_content': text_content,
                    'emails': emails_from_text
                }
            else:
                print(f"Skipping URL: {url} (not a subpage or subdomain of {source_url})")

    email = generate_email_text(scraped_data)

    subject_pattern = r'Subject:\s*(.*)'
    match = re.search(subject_pattern, email)
    if match:
        subject = match.group(1).strip()
        email_body = email[match.end():].strip()
    else:
        subject = None
        email_body = email.strip()

    send_email(subject, email_body, "[email protected]")

    name_pattern = r'www\.([a-zA-Z0-9-]+)\.[a-zA-Z]{2,3}(\.[a-zA-Z]{2})?'
    match = re.search(name_pattern, url)
    if match:
        filename = match.group(1)
        
    file_name = f"{filename}_{random.randint(1, 100)}.txt"
    with open(file_name, 'w', encoding='utf-8') as f:
        json.dump(email, f, indent=4, ensure_ascii=False)

    print(f"Output saved to {file_name}")

def main(input_file):
    with open(input_file, 'r') as file:
        urls = file.readlines()

    for url in urls:
        url = url.strip()
        if url:
            process_url(url)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Scrape websites .')
    parser.add_argument('--file', type=str, required=True, help='The text file containing URLs to scrape')

    args = parser.parse_args()
    main(args.file)`

Error

Traceback (most recent call last):
File "/home/user/scraper/ai.py", line 186, in
main(args.file)
File "/home/user/scraper/ai.py", line 178, in main
process_url(url)
File "/home/user/scraper/ai.py", line 128, in process_url
smart_scraper = SmartScraperGraph(
^^^^^^^^^^^^^^^^^^
File "/home/user/miniconda3/envs/scrapeai/lib/python3.11/site-packages/scrapegraphai/graphs/smart_scraper_graph.py", line 52, in init
super().init(prompt, config, source, schema)
File "/home/user/miniconda3/envs/scrapeai/lib/python3.11/site-packages/scrapegraphai/graphs/abstract_graph.py", line 73, in init
self.embedder_model = self._create_default_embedder(llm_config=config["llm"] ) if "embeddings" not in config else self._create_embedder(
^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/miniconda3/envs/scrapeai/lib/python3.11/site-packages/scrapegraphai/graphs/abstract_graph.py", line 371, in _create_embedder
raise ValueError("Model provided by the configuration not supported")
ValueError: Model provided by the configuration not supported

from scrapegraph-ai.

HacanAle commented on July 23, 2024

Here is an other example

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")



def get_graph_config():
    return {
        "llm": {
            "api_key": openai_api_key,
            "model": "gpt-3.5-turbo",
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",  
        },
    }
client = OpenAI(api_key=openai_api_key)
def rewrite_faqs(scraped_data):
    response = client.chat.completions.create(
    model="gpt-4o",
    max_tokens=1500,
    messages=[
        {"role": "system", "content": "You are a helpful assistant that rewrites FAQs in a clear and concise manner."},
        {"role": "user", "content": f"Please rewrite the following FAQs:\n{scraped_data}"}
    ]
    )
    
    faqs = response.choices[0].message.content

    print("Faq Text:", faqs)
    return faqs


def scrape_and_save(urls):
    graph_config = get_graph_config()
    
    results = []
    for url in urls:
        try:
            smart_scraper_graph = SmartScraperGraph(
                prompt="Extract all FAQs from the provided URL.",
                source=url,
                config=graph_config
            )
            result = smart_scraper_graph.run()
            rewritten_faqs = rewrite_faqs(result)
            results.append(f"URL: {url}\nRewritten FAQs: {rewritten_faqs}\n")
        except Exception as e:
            results.append(f"URL: {url}\nError: {str(e)}\n")

    with open("faq_result.txt", "w") as file:
        file.writelines(results)

def main(input_file):
    with open(input_file, "r") as file:
        urls = [line.strip() for line in file.readlines()]
    scrape_and_save(urls)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Scrape multiple websites from a text file for useful links, sublinks, and email addresses, and save the results.')
    parser.add_argument('--file', type=str, required=True, help='The text file containing URLs to scrape')

    args = parser.parse_args()
    main(args.file)
`

### OUTPUT

URL: https://www.hekis.com/support/faq/faq-gq
Rewritten FAQs: ### FAQs

What are the rules/regulations for importing a vehicle?
- Please consult with your country for export rules and pre-export inspections. You can find references to these authorities in our FAQ section “Basic Export Rules/Regulations per Country.” It's important to understand the requirements and responsibilities of importing a vehicle in your country before proceeding.
Can you ship auto parts inside the vehicle I am buying?
- No, we cannot place anything inside the vehicle unless it is being shipped by container.
  URL: https://www.hekis.com/support/faq/faq-vi
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-bp
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-bs
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-doc
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-rc
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-cr
  Error: Model provided by the configuration not supported
  URL: https://www.hekis.com/support/faq/faq-terms
  Error: Model provided by the configuration not supported

from scrapegraph-ai.

PeriniM commented on July 23, 2024

Hey @HacanAle I have tried the following with no errors, it seems like your issue is related to the embedder model:

def get_graph_config():
    return {
        "llm": {
            "api_key": openai_key,
            "model": "gpt-3.5-turbo",
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",  
        },
        "verbose": True,
        "headless": False,
    }

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

config = get_graph_config()

smart_scraper_graph1 = SmartScraperGraph(
    prompt="List me all the projects with their description",
    # also accepts a string with the already downloaded HTML code
    source="https://perinim.github.io/projects/",
    config=config,
)

result = smart_scraper_graph1.run()

smart_scraper_graph = SmartScraperGraph(
    prompt="List me all the projects",
    # also accepts a string with the already downloaded HTML code
    source="https://perinim.github.io/projects/",
    config=config,
)

result1 = smart_scraper_graph.run()

You can try installing the new version and see if you still got the problem. I have also tried it inside a for loop iterating over prompts and still got no errors.

Hope it helps

from scrapegraph-ai.

Erorr : Model provided by the configuration not supported about scrapegraph-ai HOT 4 CLOSED

Comments (4)

Error

Here is an other example

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent