7
\$\begingroup\$

I have a problem with Youtube videos being deleted from my big playlist and not knowing which videos got removed. So, I decided to write a Python script to save certain information(title etc.) about the videos in a playlist into a text-file. I particularly don't like the setChannelNames() func, but that's the best I could write it.

import os
import argparse
import codecs
import datetime
import sys
from apiclient.discovery import build

DEVELOPER_KEY = "REPLACE_THIS_WITH_YOUR_OWN_API_KEY"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
RESULTS_PER_PAGE = 50 #1-50 as per Google's rules.
MAX_PLAYLIST_SIZE = 5000

def getArgs():
    """
    Process command-line arguments.
    """
    parser = argparse.ArgumentParser(description='Retrieve a list of youtube videos in a playlist.')
    parser.add_argument("id", type=str, metavar="id", help="Youtube ID of the playlist to scrap.")
    parser.add_argument('-dt', '--date', action='store_true', help="Include the date when the video was added to the playlist?")
    parser.add_argument('-ds', '--description', action='store_true', help="Include the description of videos?")
    parser.add_argument('-un', '--uploadername', action='store_true', help="Include the channel name of the uploader of video?")

    args = parser.parse_args()
    args = vars(args) #Turn into dict-like view.
    return args

def getExtraFields(args):
    """
    Returns a string of what information to filter in the Google API based on args optional parameters.
    """
    extraFields = ""

    if args["description"]:
        extraFields += ",description"

    if args["date"]:
        extraFields += ",publishedAt"

    if args["uploadername"]:
        extraFields += ",resourceId(videoId)"

    return extraFields

def getExtraInfo(args, item):
    """
    Returns a string of information to save with the video name based on args optional parameters.
    """
    extraInfo = ""

    if args["uploadername"]:
        extraInfo += " |Uploader: {}|".format(item["snippet"]["resourceId"]["uploader"])

    if args["date"]:
        extraInfo += " |Date Added: {}|".format(item["snippet"]["publishedAt"][:-5]) #-5 to remove 000Z

    if args["description"]:
        extraInfo += " |Description: {}|".format(item["snippet"]["description"])

    return extraInfo

def save(pages, fileName, args):
    """
    Saves selected playlistItems' information in a txt. 
    """
    f = codecs.open(fileName, "wb", "utf-8")

    i = 0
    for playlistItems in pages:
        for items in playlistItems["items"]:
            i = i + 1
            f.write("{}.".format(i) + str(items["snippet"]["title"]) + getExtraInfo(args, items) + u'\r\n')

    f.close()


def setChannelNames(pages):
    """
    Retrieves all the videos in pages and gets the uploader's name.
    Sets the uploader's name to the playlistItem's ["snippet"]["resourceId"]["uploader"].
    """
    #videoIds = [item["snippet"]["resourceId"]["videoId"] for playlistItem in pages for item in playlistItem["items"]]
    videoIds = []
    for playlistItems in pages:
        for items in playlistItems["items"]:
            videoIds.append(items["snippet"]["resourceId"]["videoId"])

    for i in range(1, playlistItems["pageInfo"]["totalResults"]):
        videoIdsString = ""
        if i % RESULTS_PER_PAGE == 0 or i == playlistItems["pageInfo"]["totalResults"] - 1: #Every 50 or on the last iteration.
            for id in videoIds[:RESULTS_PER_PAGE]: #Generate the string of ids to put into the API request.
                videoIdsString += "{},".format(id)

            videoIdsString = videoIdsString[:-1] #Remove last ','
            videoIds = videoIds[RESULTS_PER_PAGE:]

            videos = youtube.videos().list(
                                        part="snippet",
                                        id=videoIdsString,
                                        fields="items(snippet(channelTitle))",
                                        maxResults=RESULTS_PER_PAGE
                                    ).execute()

            #Associate the channelTitles with their respective videos.
            j = 0
            for items in pages[int((i - 1) / 50)]["items"]:
                if j > len(videos["items"]) - 1:
                    print(j)
                    break

                items["snippet"]["resourceId"]["uploader"] = videos["items"][j]["snippet"]["channelTitle"]
                j+=1

if __name__ == "__main__":
    if DEVELOPER_KEY == "REPLACE_THIS_WITH_YOUR_OWN_API_KEY":
        print("You must first enter your own Youtube Data API developer key. Check for more info: https://github.com/Majiick/YoutubePlaylistSnapshot/blob/master/README.md#usage")
        sys.exit()

    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
    args = getArgs()
    extraFields = getExtraFields(args)


    pages = []
    nextPageToken = ""
    while True: #Get all the playListItems
        playlistItems = youtube.playlistItems().list(
            part="snippet", #What part to return.
            maxResults=RESULTS_PER_PAGE,
            playlistId=args["id"],
            pageToken=nextPageToken,
            fields="nextPageToken,pageInfo,items(snippet(title{0}))".format(extraFields) #Filters down returned information to only these fields.
        ).execute()

        if playlistItems["pageInfo"]["totalResults"] > MAX_PLAYLIST_SIZE:
            print("Playlist is too large. Edit MAX_PLAYLIST_SIZE to a higher value.")
            sys.exit()

        pages.append(playlistItems)

        if "nextPageToken" in playlistItems:
            nextPageToken = playlistItems["nextPageToken"]
        else:
            break

    if args["uploadername"]:
        setChannelNames(pages)


    playlistName = youtube.playlists().list(part="snippet", id=args["id"], fields="items(snippet(title))").execute()["items"][0]["snippet"]["title"]
    save(pages, "{} {}.txt".format(playlistName, datetime.datetime.today().strftime('%d-%m-%Y')), args)
\$\endgroup\$

1 Answer 1

3
\$\begingroup\$

Ok, I'll give it a try but please bear with my answer as I don't have an API key to test this.

I'll start as I always do with some style guides which you can read about on the official page: PEP8

1.Comments

When writing inline comments, you should have two spaces after the last character, and one space after # (e.g. test_comment_string = 'some text' # this is a test string comment

2.Spacing and formatting

Between methods you should have two spaces, not just one:

def getArgs():
    ...


def getExtraFields(args)
    ...

Try to limit all lines to max. 120 characters. The official documentation says the maximum length shouldn't be greater than 79. But I just like the Intelij (Pycharm) proposed style better (I guess here is a mater of preference but anyhow.. it helps readability)

Instead of i = i + 1 you can use an augmented assignment: i += 1. I saw you used some of these assignments. Try to be consistent when writing code!

3.Naming conventions

It's also specified in PEP8 the following (about variables / methods naming):

Use the function naming rules: lowercase with words separated by underscores as necessary to improve readability.

That said, extraFields would become extra_fields

I've seen many people using also camelCase naming conventions for variables so as long as you're being consistent with your code it's ok.

The same rule applies for your methods:

getArgs() -> get_args()

4.Imports

Don't import modules if you're not using them. It's confusing. For example, the os module is not used anywhere, so why would you import it ?

Doing all the above modifications, we now have the following:

import argparse
import codecs
import datetime
import sys

from apiclient.discovery import build

DEVELOPER_KEY = "REPLACE_THIS_WITH_YOUR_OWN_API_KEY"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
RESULTS_PER_PAGE = 50  # 1-50 as per Google's rules.
MAX_PLAYLIST_SIZE = 5000


def get_args():
    """
    Process command-line arguments.
    """
    parser = argparse.ArgumentParser(description='Retrieve a list of youtube videos in a playlist.')
    parser.add_argument("id", type=str, metavar="id", help="Youtube ID of the playlist to scrap.")
    parser.add_argument('-dt', '--date', action='store_true',
                        help="Include the date when the video was added to the playlist?")
    parser.add_argument('-ds', '--description', action='store_true', help="Include the description of videos?")
    parser.add_argument('-un', '--uploadername', action='store_true',
                        help="Include the channel name of the uploader of video?")

    args = parser.parse_args()
    args = vars(args)  # Turn into dict-like view.
    return args


def get_extra_fields(args):
    """
    Returns a string of what information to filter in the Google API based on args optional parameters.
    """
    extra_fields = ""

    if args["description"]:
        extra_fields += ",description"

    if args["date"]:
        extra_fields += ",publishedAt"

    if args["uploadername"]:
        extra_fields += ",resourceId(videoId)"

    return extra_fields


def get_extra_info(args, item):
    """
    Returns a string of information to save with the video name based on args optional parameters.
    """
    extra_info = ""

    if args["uploadername"]:
        extra_info += " |Uploader: {}|".format(item["snippet"]["resourceId"]["uploader"])

    if args["date"]:
        extra_info += " |Date Added: {}|".format(item["snippet"]["publishedAt"][:-5])  # -5 to remove 000Z

    if args["description"]:
        extra_info += " |Description: {}|".format(item["snippet"]["description"])

    return extra_info


def save(pages, file_name, args):
    """
    Saves selected playlistItems' information in a txt.
    """
    f = codecs.open(file_name, "wb", "utf-8")

    i = 0
    for playlist_items in pages:
        for items in playlist_items["items"]:
            i += 1
            f.write("{}.".format(i) + str(items["snippet"]["title"]) + get_extra_info(args, items) + u'\r\n')

    f.close()


def set_channel_names(pages):
    """
    Retrieves all the videos in pages and gets the uploader's name.
    Sets the uploader's name to the playlistItem's ["snippet"]["resourceId"]["uploader"].
    """
    # videoIds = [item["snippet"]["resourceId"]["videoId"] for playlistItem in pages for item in playlistItem["items"]]
    video_ids = []
    for playlist_items in pages:
        for items in playlist_items["items"]:
            video_ids.append(items["snippet"]["resourceId"]["videoId"])

    for i in range(1, playlist_items["pageInfo"]["totalResults"]):
        video_ids_string = ""
        if i % RESULTS_PER_PAGE == 0 or i == playlist_items["pageInfo"]["totalResults"] - 1:
            for id in video_ids[:RESULTS_PER_PAGE]:  # Generate the string of ids to put into the API request.
                video_ids_string += "{},".format(id)

            video_ids_string = video_ids_string[:-1]  # Remove last ','
            video_ids = video_ids[RESULTS_PER_PAGE:]

            videos = youtube.videos().list(
                part="snippet",
                id=video_ids_string,
                fields="items(snippet(channelTitle))",
                maxResults=RESULTS_PER_PAGE
            ).execute()

            # Associate the channelTitles with their respective videos.
            j = 0
            for items in pages[int((i - 1) / 50)]["items"]:
                if j > len(videos["items"]) - 1:
                    print(j)
                    break

                items["snippet"]["resourceId"]["uploader"] = videos["items"][j]["snippet"]["channelTitle"]
                j += 1


if __name__ == "__main__":
    if DEVELOPER_KEY == "REPLACE_THIS_WITH_YOUR_OWN_API_KEY":
        print("You must first enter your own Youtube Data API developer key.")
        sys.exit()

    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
    args = get_args()
    extraFields = get_extra_fields(args)

    pages = []
    nextPageToken = ""
    while True:  # Get all the playListItems
        playlistItems = youtube.playlistItems().list(
            part="snippet",  # What part to return.
            maxResults=RESULTS_PER_PAGE,
            playlistId=args["id"],
            pageToken=nextPageToken,
            fields="nextPageToken,pageInfo,items(snippet(title{0}))".format(extraFields)
            # Filters down returned information to only these fields.
        ).execute()

        if playlistItems["pageInfo"]["totalResults"] > MAX_PLAYLIST_SIZE:
            print("Playlist is too large. Edit MAX_PLAYLIST_SIZE to a higher value.")
            sys.exit()

        pages.append(playlistItems)

        if "nextPageToken" in playlistItems:
            nextPageToken = playlistItems["nextPageToken"]
        else:
            break

    if args["uploadername"]:
        set_channel_names(pages)

    playlistName = \
    youtube.playlists().list(part="snippet", id=args["id"], fields="items(snippet(title))").execute()["items"][0][
        "snippet"]["title"]
    save(pages, "{} {}.txt".format(playlistName, datetime.datetime.today().strftime('%d-%m-%Y')), args)

5.A lil' bit more:

Here:

f.write("{}.".format(i) + str(items["snippet"]["title"]) + get_extra_info(args, items) + u'\r\n')

Using str() and then adding a unicode literal to it does not make a lot of sense. You may just want to use u'...'.format() instead.

This:

if "nextPageToken" in playlistItems:
   nextPageToken = playlistItems["nextPageToken"]
else:
   break

Could be rewritten as:

if not "nextPageToken" in playlistItems:
   break
nextPageToken = playlistItems["nextPageToken"]

That's what I've got so far. I really like how your code is structured. Your logic is well-defined and regarding setChannelNames() it looks pretty straight-forward to me.

As a general feedback, well-done!

P.S: I might create and API Key to test your code later on when I'll have some more time. Then, I'll try to modify/improve a bit of the logic.

\$\endgroup\$
1
  • \$\begingroup\$ Have mine, if you want - AIzaSyAny7lBhCSkwgUvCgyWkGKun30UPDse4To \$\endgroup\$
    – Majiick
    Commented Jul 7, 2016 at 14:37

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Not the answer you're looking for? Browse other questions tagged or ask your own question.