Module top_github_scraper.scrape_user

Expand source code
import json
import os
from pathlib import Path

from dotenv import load_dotenv
from rich import print
from top_github_scraper.utils import ScrapeGithubUrl, UserProfileGetter


TOKEN = os.getenv("GITHUB_TOKEN")

def get_top_user_urls(
    keyword: str,
    save_directory: str=".",
    start_page: int = 1,
    stop_page: int = 50,
    """Get the URLs of the repositories pop up when searching for a specific
    keyword on GitHub.

    keyword : str
        Keyword to search for (.i.e, machine learning)
    save_directory: str, optional 
        directory to save the output file, by default "."
    start_page : int, optional
        page number to start scraping from, by default 1
    stop_page : int, optional
        page number of the last page to scrape, by default 50
    Path(save_directory).mkdir(parents=True, exist_ok=True)
    save_path = f"{save_directory}/top_repo_urls_{keyword}_{start_page}_{stop_page}.json"
    repo_urls = ScrapeGithubUrl(
        keyword, 'Users', 'followers', start_page, stop_page
    with open(save_path, "w") as outfile:
        json.dump(repo_urls, outfile)
def get_top_users(
    keyword: int,
    start_page: int = 1,
    stop_page: int = 50,
    save_directory: str="."
    Get the information of the owners and contributors of the repositories pop up when searching for a specific
    keyword on GitHub.
    keyword : str
        Keyword to search for (.i.e, machine learning)
    start_page : int, optional
        page number to start scraping from, by default 1
    stop_page : int, optional
        page number of the last page to scrape, by default 50
    save_directory: str, optional 
        directory to save the output file, by default "."
    full_url_save_path = (
    user_save_path = f"{save_directory}/top_user_info_{keyword}_{start_page}_{stop_page}.csv"
    if not Path(full_url_save_path).exists():
    with open(full_url_save_path, "r") as infile:
        user_urls = json.load(infile)
        url = ''
        urls = [url + user for user in user_urls]
        top_users = UserProfileGetter(urls).get_all_user_profiles()
        return top_users


def get_top_user_urls(keyword: str, save_directory: str = '.', start_page: int = 1, stop_page: int = 50)

Get the URLs of the repositories pop up when searching for a specific keyword on GitHub.


keyword : str
Keyword to search for (.i.e, machine learning)
save_directory : str, optional
directory to save the output file, by default "."
start_page : int, optional
page number to start scraping from, by default 1
stop_page : int, optional
page number of the last page to scrape, by default 50
Expand source code
def get_top_user_urls(
    keyword: str,
    save_directory: str=".",
    start_page: int = 1,
    stop_page: int = 50,
    """Get the URLs of the repositories pop up when searching for a specific
    keyword on GitHub.

    keyword : str
        Keyword to search for (.i.e, machine learning)
    save_directory: str, optional 
        directory to save the output file, by default "."
    start_page : int, optional
        page number to start scraping from, by default 1
    stop_page : int, optional
        page number of the last page to scrape, by default 50
    Path(save_directory).mkdir(parents=True, exist_ok=True)
    save_path = f"{save_directory}/top_repo_urls_{keyword}_{start_page}_{stop_page}.json"
    repo_urls = ScrapeGithubUrl(
        keyword, 'Users', 'followers', start_page, stop_page
    with open(save_path, "w") as outfile:
        json.dump(repo_urls, outfile)
def get_top_users(keyword: int, start_page: int = 1, stop_page: int = 50, save_directory: str = '.')

Get the information of the owners and contributors of the repositories pop up when searching for a specific keyword on GitHub. Parameters

keyword : str
Keyword to search for (.i.e, machine learning)
start_page : int, optional
page number to start scraping from, by default 1
stop_page : int, optional
page number of the last page to scrape, by default 50
save_directory : str, optional
directory to save the output file, by default "."
Expand source code
def get_top_users(
    keyword: int,
    start_page: int = 1,
    stop_page: int = 50,
    save_directory: str="."
    Get the information of the owners and contributors of the repositories pop up when searching for a specific
    keyword on GitHub.
    keyword : str
        Keyword to search for (.i.e, machine learning)
    start_page : int, optional
        page number to start scraping from, by default 1
    stop_page : int, optional
        page number of the last page to scrape, by default 50
    save_directory: str, optional 
        directory to save the output file, by default "."
    full_url_save_path = (
    user_save_path = f"{save_directory}/top_user_info_{keyword}_{start_page}_{stop_page}.csv"
    if not Path(full_url_save_path).exists():
    with open(full_url_save_path, "r") as infile:
        user_urls = json.load(infile)
        url = ''
        urls = [url + user for user in user_urls]
        top_users = UserProfileGetter(urls).get_all_user_profiles()
        return top_users