Added AI generated Summaries and images
This commit is contained in:
		
							parent
							
								
									0c86bd74ef
								
							
						
					
					
						commit
						105d14eebc
					
				| 
						 | 
				
			
			@ -16,5 +16,8 @@
 | 
			
		|||
        "country" : "$NEWSAPI_COUNTRY",
 | 
			
		||||
        "article_lifetime": "$NEWSAPI_ARTICLE_LIFETIME",
 | 
			
		||||
        "article_interval": "$NEWSAPI_ARTICLE_INTERVAL"
 | 
			
		||||
    },
 | 
			
		||||
    "pixabay" : {
 | 
			
		||||
        "api_key" : "$PIXABAY_API_KEY"
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -14,6 +14,7 @@ services:
 | 
			
		|||
      - NEWSAPI_COUNTRY=gb
 | 
			
		||||
      - ARTICLE_LIFETIME=6 # in hours
 | 
			
		||||
      - ARTICLE_INTERVAL=1 # in hours
 | 
			
		||||
      - PIXABAY_API_KEY=
 | 
			
		||||
  mongodb:
 | 
			
		||||
    container_name: notoric-snc-mongo
 | 
			
		||||
    image: mongo
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,7 +15,8 @@ config = config_template.replace('$MONGO_HOST', os.environ['MONGO_HOST']) \
 | 
			
		|||
    .replace('$NEWSAPI_API_KEY', os.environ['NEWSAPI_API_KEY']) \
 | 
			
		||||
    .replace('$NEWSAPI_COUNTRY', os.environ['NEWSAPI_COUNTRY']) \
 | 
			
		||||
    .replace('$NEWSAPI_ARTICLE_LIFETIME', os.environ['ARTICLE_LIFETIME']) \
 | 
			
		||||
    .replace('$NEWSAPI_ARTICLE_INTERVAL', os.environ['ARTICLE_INTERVAL'])
 | 
			
		||||
    .replace('$NEWSAPI_ARTICLE_INTERVAL', os.environ['ARTICLE_INTERVAL']) \
 | 
			
		||||
    .replace('$PIXABAY_API_KEY', os.environ['PIXABAY_API_KEY']) \
 | 
			
		||||
 | 
			
		||||
# Write the config to a file
 | 
			
		||||
with open('config.json', 'w') as config_file:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										247
									
								
								Docker/snc.py
								
								
								
								
							
							
						
						
									
										247
									
								
								Docker/snc.py
								
								
								
								
							| 
						 | 
				
			
			@ -35,6 +35,8 @@ news_url = f"http://newsapi.org/v2/top-headlines?country={config['news']['countr
 | 
			
		|||
 | 
			
		||||
groq_key = config['groq']['api_key']
 | 
			
		||||
 | 
			
		||||
pixabayApiKey = config['pixabay']['api_key']
 | 
			
		||||
 | 
			
		||||
# Connect to MongoDB
 | 
			
		||||
print("Connecting to MongoDB...")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -125,8 +127,15 @@ def get_newsfeed(category='general'):
 | 
			
		|||
        article_data['author'] = article['author']
 | 
			
		||||
        article_data['category'] = category
 | 
			
		||||
        article_data['timestamp'] = datetime.now()
 | 
			
		||||
        if (article['url'].contains("reuters.com") == False):
 | 
			
		||||
            articles.append(article_data)
 | 
			
		||||
 | 
			
		||||
        if (article['url'].find("news.google") != -1):
 | 
			
		||||
            response = requests.get(article['url'])
 | 
			
		||||
            soup = BeautifulSoup(response.text, 'html.parser')
 | 
			
		||||
            htmlarticle = soup.find('article')
 | 
			
		||||
            if htmlarticle != None:
 | 
			
		||||
                if len(htmlarticle.text.strip()) > 250:
 | 
			
		||||
                    article_data['content'] = htmlarticle.text.strip()
 | 
			
		||||
                    articles.append(article_data)
 | 
			
		||||
 | 
			
		||||
    print("Newsfeed data retrieved!")
 | 
			
		||||
    return articles
 | 
			
		||||
| 
						 | 
				
			
			@ -134,84 +143,170 @@ def get_newsfeed(category='general'):
 | 
			
		|||
# Get most interesting news articles with AI
 | 
			
		||||
 | 
			
		||||
def get_interesting_news(articles):
 | 
			
		||||
    print("Getting interesting news...")
 | 
			
		||||
    interesting_articles = []
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        client = Groq(api_key=groq_key)
 | 
			
		||||
        completion = client.chat.completions.create(
 | 
			
		||||
            model="gemma-7b-it",
 | 
			
		||||
            messages=[
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "system",
 | 
			
		||||
                    "content": "You will be given an array of json elements, please provide the 3 indexes of the most interesting, important and notable news headlines that a mid-twenties person would like to read in the following format: {\"most_interesting\": {\"index\": index,\"title\": title},\"second_most_interesting\": {\"index\": index,\"title\": title},\"third_most_interesting\": {\"index\": index,\"title\": title}}"
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "user",
 | 
			
		||||
                    "content": str(articles)
 | 
			
		||||
                }
 | 
			
		||||
            ],
 | 
			
		||||
            temperature=1.3,
 | 
			
		||||
            max_tokens=1024,
 | 
			
		||||
            top_p=1,
 | 
			
		||||
            stream=False,
 | 
			
		||||
            response_format={"type": "json_object"},
 | 
			
		||||
            stop=None,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        response = str(completion.choices[0].message.content)
 | 
			
		||||
        response = response.replace("\n", " ")
 | 
			
		||||
        response = json.loads(response)
 | 
			
		||||
    except Exception as e: # If ai doesnt return a valid response, check anyway, if not use the first 3 articles
 | 
			
		||||
        try:
 | 
			
		||||
            response = e
 | 
			
		||||
            response = response[18:]
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
            response = response['error']['failed_generation']
 | 
			
		||||
            response = response.replace("\n", " ")
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
        except:
 | 
			
		||||
            print("Error selecting articles! Using random selection...")
 | 
			
		||||
            response = {
 | 
			
		||||
                "most_interesting": {
 | 
			
		||||
                    "index": 0,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                },
 | 
			
		||||
                "second_most_interesting": {
 | 
			
		||||
                    "index": 1,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                },
 | 
			
		||||
                "third_most_interesting": {
 | 
			
		||||
                    "index": 2,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
    selected_articles = []
 | 
			
		||||
    
 | 
			
		||||
    if len(articles) <= 3:
 | 
			
		||||
        print("Not enough articles to select from! Using all articles...")
 | 
			
		||||
        selected_articles = articles
 | 
			
		||||
    else:
 | 
			
		||||
        print("Getting interesting news...")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given an array of json elements, please provide the 3 indexes of the most interesting, important and notable news headlines that a mid-twenties person would like to read in the following format: {\"most_interesting\": {\"index\": index,\"title\": title},\"second_most_interesting\": {\"index\": index,\"title\": title},\"third_most_interesting\": {\"index\": index,\"title\": title}}"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": str(articles)
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=1.3,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                response_format={"type": "json_object"},
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            response = str(completion.choices[0].message.content)
 | 
			
		||||
            response = response.replace("\n", " ")
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
        except Exception as e: # If ai doesnt return a valid response, check anyway, if not use the first 3 articles
 | 
			
		||||
            try:
 | 
			
		||||
                response = e
 | 
			
		||||
                response = response[18:]
 | 
			
		||||
                response = json.loads(response)
 | 
			
		||||
                response = response['error']['failed_generation']
 | 
			
		||||
                response = response.replace("\n", " ")
 | 
			
		||||
                response = json.loads(response)
 | 
			
		||||
            except:
 | 
			
		||||
                print("Error selecting articles! Using random selection...")
 | 
			
		||||
                response = {
 | 
			
		||||
                    "most_interesting": {
 | 
			
		||||
                        "index": 0,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    },
 | 
			
		||||
                    "second_most_interesting": {
 | 
			
		||||
                        "index": 1,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    },
 | 
			
		||||
                    "third_most_interesting": {
 | 
			
		||||
                        "index": 2,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    article_index = [0, 1, 2]
 | 
			
		||||
    try:
 | 
			
		||||
        article_index[0] = response['most_interesting']['index']
 | 
			
		||||
        article_index[1] = response['second_most_interesting']['index']
 | 
			
		||||
        article_index[2] = response['third_most_interesting']['index']
 | 
			
		||||
        print("Selected articles:" + str(article_index))
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print(e)
 | 
			
		||||
        article_index = [0, 1, 2]
 | 
			
		||||
        print("Using default article selection...")
 | 
			
		||||
        try:
 | 
			
		||||
            article_index[0] = response['most_interesting']['index']
 | 
			
		||||
            article_index[1] = response['second_most_interesting']['index']
 | 
			
		||||
            article_index[2] = response['third_most_interesting']['index']
 | 
			
		||||
            print("Selected articles:" + str(article_index))
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(e)
 | 
			
		||||
            article_index = [0, 1, 2]
 | 
			
		||||
            print("Using default article selection...")
 | 
			
		||||
 | 
			
		||||
        for i in article_index:
 | 
			
		||||
            article = articles[i]
 | 
			
		||||
            selected_article = {}
 | 
			
		||||
            selected_article['title'] = article['title']
 | 
			
		||||
            selected_article['author'] = article['author']
 | 
			
		||||
            selected_article['url'] = article['url']
 | 
			
		||||
            selected_article['category'] = article['category']
 | 
			
		||||
            selected_article['timestamp'] = datetime.now()
 | 
			
		||||
            selected_article['content'] = article['content']
 | 
			
		||||
            selected_articles.append(selected_article)
 | 
			
		||||
 | 
			
		||||
    for i in article_index:
 | 
			
		||||
        article = articles[i]
 | 
			
		||||
        selected_article = {}
 | 
			
		||||
        selected_article['title'] = article['title']
 | 
			
		||||
        selected_article['author'] = article['author']
 | 
			
		||||
        selected_article['url'] = article['url']
 | 
			
		||||
        selected_article['category'] = article['category']
 | 
			
		||||
        selected_article['timestamp'] = datetime.now()
 | 
			
		||||
        selected_articles.append(selected_article)
 | 
			
		||||
        print("Interesting news retrieved!")
 | 
			
		||||
 | 
			
		||||
    print("Interesting news retrieved!")
 | 
			
		||||
    # Get image & summary for all selected articles
 | 
			
		||||
 | 
			
		||||
    print("Getting images and summaries for selected articles...")
 | 
			
		||||
 | 
			
		||||
    for article in selected_articles:
 | 
			
		||||
        img_keywords = ""
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given a title for an article, provide a few keywords (around 3 maximum) (please only use short, vague and common words) for an image that would match the article (less than 50 characters) in the following format: keyword1 keyword2 keyword3"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": article['title']
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=0.5,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            img_keywords = str(completion.choices[0].message.content)
 | 
			
		||||
            img_keywords = img_keywords[:99]
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print("Could not get image keywords, using defaults...")
 | 
			
		||||
            img_keywords = article['category'] + " News article"
 | 
			
		||||
        
 | 
			
		||||
        try:
 | 
			
		||||
            image_response = requests.get(f"https://pixabay.com/api/?q={img_keywords}&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
            image_data = image_response.json()
 | 
			
		||||
            article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
            print("Image found!")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            try:
 | 
			
		||||
                img_keywords = img_keywords.split(" ")[0]
 | 
			
		||||
                image_response = requests.get(f"https://pixabay.com/api/?q={img_keywords}&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
                image_data = image_response.json()
 | 
			
		||||
                article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
                print("Image found with shortened prompt!")
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                try:
 | 
			
		||||
                    image_response = requests.get(f"https://pixabay.com/api/?q={article['category']} news&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
                    image_data = image_response.json()
 | 
			
		||||
                    article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
                    print("Image found using category!")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    article['image'] = "https://picsum.photos/800/600"
 | 
			
		||||
 | 
			
		||||
        summary = ""
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given the source code for a webpage. Please respond with a descriptive summary (around 100 words) of the articles content as a radio announcer would read it out, assuming i know nothing about the subject of the article you will need to provide context and your summary should work as a standalone article. Make sure the article is using spoken language and is easy to read and understand for everyone"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": article['content']
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=1.4,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            summary = str(completion.choices[0].message.content)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(e)
 | 
			
		||||
            summary = "Read more about this article on the source website."
 | 
			
		||||
        article['summary'] = summary
 | 
			
		||||
 | 
			
		||||
    return selected_articles
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -238,9 +333,7 @@ def get_all_news():
 | 
			
		|||
def delete_old_news():
 | 
			
		||||
    print("Deleting old news articles...")
 | 
			
		||||
 | 
			
		||||
    hrs = int(config['news']['article_lifetime'])
 | 
			
		||||
 | 
			
		||||
    db.newsfeed.delete_many({'timestamp': {'$lt': datetime.now() - timedelta(hours=1) }})
 | 
			
		||||
    db.newsfeed.delete_many({'timestamp': {'$lt': datetime.now() - timedelta(hours=config['news']['article_lifetime']) }})
 | 
			
		||||
 | 
			
		||||
    print("Old news articles deleted!")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -249,7 +342,7 @@ def delete_old_news():
 | 
			
		|||
create_collections()
 | 
			
		||||
 | 
			
		||||
schedule.every(5).minutes.do(write_weather)
 | 
			
		||||
schedule.every(int(config['news']['article_interval'])).hours.do(get_all_news)
 | 
			
		||||
schedule.every(config['news']['article_interval']).hours.do(get_all_news)
 | 
			
		||||
schedule.every(1).hours.do(delete_old_news)
 | 
			
		||||
 | 
			
		||||
write_weather()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										
											BIN
										
									
								
								requirements.txt
								
								
								
								
							
							
						
						
									
										
											BIN
										
									
								
								requirements.txt
								
								
								
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										249
									
								
								snc.py
								
								
								
								
							
							
						
						
									
										249
									
								
								snc.py
								
								
								
								
							| 
						 | 
				
			
			@ -4,6 +4,9 @@ import json
 | 
			
		|||
import pymongo
 | 
			
		||||
import requests
 | 
			
		||||
import schedule
 | 
			
		||||
import re
 | 
			
		||||
import requests
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from groq import Groq
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -37,6 +40,9 @@ if os.path.exists('config.json') == False:
 | 
			
		|||
            "country" : "gb",
 | 
			
		||||
            "article_lifetime": 6,
 | 
			
		||||
            "article_interval": 1
 | 
			
		||||
        },
 | 
			
		||||
        "pixabay" : {
 | 
			
		||||
            "api_key" : ""
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -59,6 +65,8 @@ news_url = f"http://newsapi.org/v2/top-headlines?country={config['news']['countr
 | 
			
		|||
 | 
			
		||||
groq_key = config['groq']['api_key']
 | 
			
		||||
 | 
			
		||||
pixabayApiKey = config['pixabay']['api_key']
 | 
			
		||||
 | 
			
		||||
# Connect to MongoDB
 | 
			
		||||
print("Connecting to MongoDB...")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -149,10 +157,15 @@ def get_newsfeed(category='general'):
 | 
			
		|||
        article_data['author'] = article['author']
 | 
			
		||||
        article_data['category'] = category
 | 
			
		||||
        article_data['timestamp'] = datetime.now()
 | 
			
		||||
        if (article['url'].contains("reuters.com") == False):
 | 
			
		||||
            articles.append(article_data)
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        if (article['url'].find("news.google") != -1):
 | 
			
		||||
            response = requests.get(article['url'])
 | 
			
		||||
            soup = BeautifulSoup(response.text, 'html.parser')
 | 
			
		||||
            htmlarticle = soup.find('article')
 | 
			
		||||
            if htmlarticle != None:
 | 
			
		||||
                if len(htmlarticle.text.strip()) > 250:
 | 
			
		||||
                    article_data['content'] = htmlarticle.text.strip()
 | 
			
		||||
                    articles.append(article_data)
 | 
			
		||||
 | 
			
		||||
    print("Newsfeed data retrieved!")
 | 
			
		||||
    return articles
 | 
			
		||||
| 
						 | 
				
			
			@ -160,84 +173,170 @@ def get_newsfeed(category='general'):
 | 
			
		|||
# Get most interesting news articles with AI
 | 
			
		||||
 | 
			
		||||
def get_interesting_news(articles):
 | 
			
		||||
    print("Getting interesting news...")
 | 
			
		||||
    interesting_articles = []
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        client = Groq(api_key=groq_key)
 | 
			
		||||
        completion = client.chat.completions.create(
 | 
			
		||||
            model="gemma-7b-it",
 | 
			
		||||
            messages=[
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "system",
 | 
			
		||||
                    "content": "You will be given an array of json elements, please provide the 3 indexes of the most interesting, important and notable news headlines that a mid-twenties person would like to read in the following format: {\"most_interesting\": {\"index\": index,\"title\": title},\"second_most_interesting\": {\"index\": index,\"title\": title},\"third_most_interesting\": {\"index\": index,\"title\": title}}"
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    "role": "user",
 | 
			
		||||
                    "content": str(articles)
 | 
			
		||||
                }
 | 
			
		||||
            ],
 | 
			
		||||
            temperature=1.3,
 | 
			
		||||
            max_tokens=1024,
 | 
			
		||||
            top_p=1,
 | 
			
		||||
            stream=False,
 | 
			
		||||
            response_format={"type": "json_object"},
 | 
			
		||||
            stop=None,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        response = str(completion.choices[0].message.content)
 | 
			
		||||
        response = response.replace("\n", " ")
 | 
			
		||||
        response = json.loads(response)
 | 
			
		||||
    except Exception as e: # If ai doesnt return a valid response, check anyway, if not use the first 3 articles
 | 
			
		||||
        try:
 | 
			
		||||
            response = e
 | 
			
		||||
            response = response[18:]
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
            response = response['error']['failed_generation']
 | 
			
		||||
            response = response.replace("\n", " ")
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
        except:
 | 
			
		||||
            print("Error selecting articles! Using random selection...")
 | 
			
		||||
            response = {
 | 
			
		||||
                "most_interesting": {
 | 
			
		||||
                    "index": 0,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                },
 | 
			
		||||
                "second_most_interesting": {
 | 
			
		||||
                    "index": 1,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                },
 | 
			
		||||
                "third_most_interesting": {
 | 
			
		||||
                    "index": 2,
 | 
			
		||||
                    "title": "Interesting"
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
    selected_articles = []
 | 
			
		||||
    
 | 
			
		||||
    if len(articles) <= 3:
 | 
			
		||||
        print("Not enough articles to select from! Using all articles...")
 | 
			
		||||
        selected_articles = articles
 | 
			
		||||
    else:
 | 
			
		||||
        print("Getting interesting news...")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given an array of json elements, please provide the 3 indexes of the most interesting, important and notable news headlines that a mid-twenties person would like to read in the following format: {\"most_interesting\": {\"index\": index,\"title\": title},\"second_most_interesting\": {\"index\": index,\"title\": title},\"third_most_interesting\": {\"index\": index,\"title\": title}}"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": str(articles)
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=1.3,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                response_format={"type": "json_object"},
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            response = str(completion.choices[0].message.content)
 | 
			
		||||
            response = response.replace("\n", " ")
 | 
			
		||||
            response = json.loads(response)
 | 
			
		||||
        except Exception as e: # If ai doesnt return a valid response, check anyway, if not use the first 3 articles
 | 
			
		||||
            try:
 | 
			
		||||
                response = e
 | 
			
		||||
                response = response[18:]
 | 
			
		||||
                response = json.loads(response)
 | 
			
		||||
                response = response['error']['failed_generation']
 | 
			
		||||
                response = response.replace("\n", " ")
 | 
			
		||||
                response = json.loads(response)
 | 
			
		||||
            except:
 | 
			
		||||
                print("Error selecting articles! Using random selection...")
 | 
			
		||||
                response = {
 | 
			
		||||
                    "most_interesting": {
 | 
			
		||||
                        "index": 0,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    },
 | 
			
		||||
                    "second_most_interesting": {
 | 
			
		||||
                        "index": 1,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    },
 | 
			
		||||
                    "third_most_interesting": {
 | 
			
		||||
                        "index": 2,
 | 
			
		||||
                        "title": "Interesting"
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    article_index = [0, 1, 2]
 | 
			
		||||
    try:
 | 
			
		||||
        article_index[0] = response['most_interesting']['index']
 | 
			
		||||
        article_index[1] = response['second_most_interesting']['index']
 | 
			
		||||
        article_index[2] = response['third_most_interesting']['index']
 | 
			
		||||
        print("Selected articles:" + str(article_index))
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print(e)
 | 
			
		||||
        article_index = [0, 1, 2]
 | 
			
		||||
        print("Using default article selection...")
 | 
			
		||||
        try:
 | 
			
		||||
            article_index[0] = response['most_interesting']['index']
 | 
			
		||||
            article_index[1] = response['second_most_interesting']['index']
 | 
			
		||||
            article_index[2] = response['third_most_interesting']['index']
 | 
			
		||||
            print("Selected articles:" + str(article_index))
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(e)
 | 
			
		||||
            article_index = [0, 1, 2]
 | 
			
		||||
            print("Using default article selection...")
 | 
			
		||||
 | 
			
		||||
        for i in article_index:
 | 
			
		||||
            article = articles[i]
 | 
			
		||||
            selected_article = {}
 | 
			
		||||
            selected_article['title'] = article['title']
 | 
			
		||||
            selected_article['author'] = article['author']
 | 
			
		||||
            selected_article['url'] = article['url']
 | 
			
		||||
            selected_article['category'] = article['category']
 | 
			
		||||
            selected_article['timestamp'] = datetime.now()
 | 
			
		||||
            selected_article['content'] = article['content']
 | 
			
		||||
            selected_articles.append(selected_article)
 | 
			
		||||
 | 
			
		||||
    for i in article_index:
 | 
			
		||||
        article = articles[i]
 | 
			
		||||
        selected_article = {}
 | 
			
		||||
        selected_article['title'] = article['title']
 | 
			
		||||
        selected_article['author'] = article['author']
 | 
			
		||||
        selected_article['url'] = article['url']
 | 
			
		||||
        selected_article['category'] = article['category']
 | 
			
		||||
        selected_article['timestamp'] = datetime.now()
 | 
			
		||||
        selected_articles.append(selected_article)
 | 
			
		||||
        print("Interesting news retrieved!")
 | 
			
		||||
 | 
			
		||||
    print("Interesting news retrieved!")
 | 
			
		||||
    # Get image & summary for all selected articles
 | 
			
		||||
 | 
			
		||||
    print("Getting images and summaries for selected articles...")
 | 
			
		||||
 | 
			
		||||
    for article in selected_articles:
 | 
			
		||||
        img_keywords = ""
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given a title for an article, provide a few keywords (around 3 maximum) (please only use short, vague and common words) for an image that would match the article (less than 50 characters) in the following format: keyword1 keyword2 keyword3"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": article['title']
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=0.5,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            img_keywords = str(completion.choices[0].message.content)
 | 
			
		||||
            img_keywords = img_keywords[:99]
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print("Could not get image keywords, using defaults...")
 | 
			
		||||
            img_keywords = article['category'] + " News article"
 | 
			
		||||
        
 | 
			
		||||
        try:
 | 
			
		||||
            image_response = requests.get(f"https://pixabay.com/api/?q={img_keywords}&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
            image_data = image_response.json()
 | 
			
		||||
            article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
            print("Image found!")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            try:
 | 
			
		||||
                img_keywords = img_keywords.split(" ")[0]
 | 
			
		||||
                image_response = requests.get(f"https://pixabay.com/api/?q={img_keywords}&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
                image_data = image_response.json()
 | 
			
		||||
                article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
                print("Image found with shortened prompt!")
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                try:
 | 
			
		||||
                    image_response = requests.get(f"https://pixabay.com/api/?q={article['category']} news&key={pixabayApiKey}&orientation=horizontal&per_page=3")
 | 
			
		||||
                    image_data = image_response.json()
 | 
			
		||||
                    article['image'] = image_data['hits'][0]['largeImageURL']
 | 
			
		||||
                    print("Image found using category!")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    article['image'] = "https://picsum.photos/800/600"
 | 
			
		||||
 | 
			
		||||
        summary = ""
 | 
			
		||||
        try:
 | 
			
		||||
            client = Groq(api_key=groq_key)
 | 
			
		||||
            completion = client.chat.completions.create(
 | 
			
		||||
                model="gemma-7b-it",
 | 
			
		||||
                messages=[
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "system",
 | 
			
		||||
                        "content": "You will be given the source code for a webpage. Please respond with a descriptive summary (around 100 words) of the articles content as a radio announcer would read it out, assuming i know nothing about the subject of the article you will need to provide context and your summary should work as a standalone article. Make sure the article is using spoken language and is easy to read and understand for everyone"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        "role": "user",
 | 
			
		||||
                        "content": article['content']
 | 
			
		||||
                    }
 | 
			
		||||
                ],
 | 
			
		||||
                temperature=1.4,
 | 
			
		||||
                max_tokens=1024,
 | 
			
		||||
                top_p=1,
 | 
			
		||||
                stream=False,
 | 
			
		||||
                stop=None,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            summary = str(completion.choices[0].message.content)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(e)
 | 
			
		||||
            summary = "Read more about this article on the source website."
 | 
			
		||||
        article['summary'] = summary
 | 
			
		||||
 | 
			
		||||
    return selected_articles
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue