Script to retrieve new YouTube videos

Question

I wrote a script which retrieves YouTube videos from your favourite channels - channels you specify in a .txt file. Let me know what you think about it.

Channel and Video classes

import urllib.request
import json
import re
from datetime import datetime, timedelta

#**************
apiKey = #Insert your Youtube API key.
#**************

youtubeApiUrl = "https://www.googleapis.com/youtube/v3"
youtubeChannelsApiUrl = youtubeApiUrl + "/channels?key={0}&".format(apiKey)
youtubeSearchApiUrl = youtubeApiUrl + "/search?key={0}&".format(apiKey)
youtubeVideoApi = youtubeApiUrl +"/videos?key={0}&".format(apiKey)

requestParametersChannelId = youtubeChannelsApiUrl + 'forUsername={0}&part=id' 
requestChannelVideosInfo = youtubeSearchApiUrl + 'channelId={0}&part=id&order=date&type=video&publishedBefore={1}&publishedAfter={2}&pageToken={3}&maxResults=50'
requestVideoInfo = youtubeVideoApi + "part=snippet&id={0}"
requestVideoTime = youtubeVideoApi +"part=contentDetails&id={0}"

class Channel:

    def __init__(self, channelName, channelAlias=None, channelId=None):

        self.channelName = channelName
        self.channelAlias = channelAlias
        self.channelId = channelId

    def __str__(self):
         return "Channel name:  {}\nChannel Alias:  {}\nChannelId:  {}".format(self.channelName,
                                                                             self.channelAlias,
                                                                             self.channelId)

    def __repr__(self):
         return "Channel name:  {}\nChannel Alias:  {}\nChannelId:  {}".format(self.channelName,
                                                                             self.channelAlias,
                                                                             self.channelId)


    def setChannelId(self):
        try:
            url = requestParametersChannelId.format(self.channelName)
            resp = urllib.request.urlopen(url).read().decode("utf8")

            jsonResp = json.loads(resp)
            self.channelId = jsonResp["items"][0]["id"]
        except IndexError:
            print("ERROR setting ID for channel: {}".format(self.channelName))
            self.channelId = "error"





    def _getVideosBetween(self,  sinceDate, toDate):
        """
        dates= datetime.datetime objects
        """
        #format dates
        sinceDate = sinceDate.strftime("%Y-%m-%dT%H:%M:%SZ")
        toDate = toDate.strftime("%Y-%m-%dT%H:%M:%SZ")

        nextPToken = ""
        foundAll = False

        retVal = []

        while not foundAll:
            try:
                url = requestChannelVideosInfo.format(self.channelId, toDate, sinceDate, nextPToken)
                resp = urllib.request.urlopen(url).read().decode("utf8")


                jsonResp = json.loads(resp)
                returnedVideos = jsonResp["items"]

                for video in returnedVideos:
                    retVal.append(video["id"]["videoId"])

                try:
                    nextPToken = jsonResp["nextPageToken"]

                except: #no nextPageToken
                    foundAll = True

            except IndexError: #error; no videos found. dont print anything
                foundAll = True

        return retVal

    def getLastXDaysVideos(self, last_x_days):
        todayDate = datetime.now()
        previousDate = datetime.now() - timedelta(days=last_x_days)

        return self._getVideosBetween(previousDate, todayDate)


    def getVideosSince(self, sinceDate):
        """
        sinceDate = datetime.datetime object
        """

        todayDate = datetime.now()
        return self._getVideosBetween(sinceDate, todayDate)

    def getAllVideos(self):
        firstDate = datetime(year=2005, month=4, day=22) #first youtube video -1
        todayDate = datetime.now()

        return self._getVideosBetween(firstDate, todayDate)



class Video:
    def __init__(self, videoId):
        self.videoId = videoId

    def getData(self, parseDuration=True, parseDate=True):
        try:
            results = {}
            url = requestVideoInfo.format(self.videoId)
            resp = urllib.request.urlopen(url).read().decode("utf8")


            jsonResp = json.loads(resp)
            results["title"] = jsonResp["items"][0]["snippet"]["title"]
            results["date"] = jsonResp["items"][0]["snippet"]["publishedAt"]
            results["description"] = jsonResp["items"][0]["snippet"]["description"]
            results["url"] = "https://www.youtube.com/watch?v={}".format(self.videoId)

            #need to create different request for duration
            url = requestVideoTime.format(self.videoId)
            resp = urllib.request.urlopen(url).read().decode("utf8")   
            jsonResp = json.loads(resp)
            duration = jsonResp["items"][0]["contentDetails"]["duration"]

            if parseDuration:
                #parses iso 8601 duration manually
                digits = re.findall(r"\d+", duration)
                times = ["seconds", "minutes", "hours"]
                res = []

                for digit, time in zip(digits[::-1], times):
                    res.append("{} {},".format(digit, time))

                res.reverse() #start with biggest unit
                parsedDuration = " ".join(res)[:-1] #omit last colon
                results["duration"] = parsedDuration
            else:
                results["duration"] = duration

            if parseDate:
                #2016-12-17T14:54:05.000Z --> 14:54  12.12.2016
                digits = re.findall(r"\d+", results["date"])
                parsedDate = "{hours}:{minutes}  {day}.{month}.{year}".format(
                    hours=digits[3], minutes=digits[4], day=digits[2], month=digits[1], year=digits[0]
                    )

                results["date"] = parsedDate

            #no need for else as unparsed date is already in results dict
            return results


        except IndexError:
            print("ERROR: Finding video data for video {}".format(self.videoId))
            return None

Channel Feed

from youtube_api import *


FILENAME = "channels.txt"

def process_file(filename):
    """
    returns tuple(
        datetime object,
        dictionary - channel:id
        )

    """
    channels = {}

    with open(FILENAME, "r+") as f:    
        #2016 12 17   19 29
        #tuple to create datetime object
        date = [int(i) for i in f.readline().strip().split()]
        _ = f.readline() #consume newline
        lines = f.readlines()

        for line in lines:
            try: #explicit channel Id
                channelName, channelId = line.strip().split(" ### ")
                channels[channelName] = channelId
            except:
                channelName = line.strip()
                channels[channelName] = 0

        currDate = datetime.now().strftime("%Y %m %d   %H %M")
        f.seek(0)        
        f.write(currDate)


    date_obj = datetime(*date) #unpack tuple with date
    return (date_obj, channels)


def main():
    date, channels = process_file(FILENAME)
    print("Last checked: {}".format(date.strftime("%H:%M  %d.%m.%Y")))
    for channelName, chId in sorted(channels.items()): #alphabetical order
        #create Channel objects for every channel, assign an ID unless explicitly specified in file
        if chId == 0:
            chan = Channel(channelName)
            chan.setChannelId()
        else:
            chan = Channel(channelName, channelId=chId)


        print("\n" + "*" * 40 +" \n{}\n".format(channelName) + "*" * 40)
        videos = chan.getVideosSince(date)
        for videoId in videos:
           vid = Video(videoId)
           data = vid.getData()
           try:
               print("   {}\n   {} ; {}\n   {}\n   {}\n\n".format(data["title"], data["date"],data["duration"], data["url"], data["description"].split("\n")[0]))
           except: #unicode error (not running in IDLE)
               print("   {}\n   {}\n   {}\n    \n    -Unable to display more informtaion\n\n".format(data["date"], data["duration"], data["url"]))
        if len(videos) == 0:
           print("   No videos found in this time period :(\n")


    i = input("\nPress enter to exit. ")

if __name__ == "__main__":
    main()

Jean-François Fabre · Accepted Answer · 2016-12-26 16:46:40Z

General remark: your try/except blocks without any arguments are bad: if anything happens (including syntax error, ...) your exception block is called. It would be better to narrow down the block to the exceptions you're expecting:

try:
   (some code)
except (ValueError, NameError):
     pass

Channel and video classes:

This is cumbersome and inefficient, you're accessing the same dict keys over and over:

jsonResp = json.loads(resp)
results["title"] = jsonResp["items"][0]["snippet"]["title"]
results["date"] = jsonResp["items"][0]["snippet"]["publishedAt"]
results["description"] = jsonResp["items"][0]["snippet"]["description"]

I'd do:

jsonResp = json.loads(resp)
response_items = jsonResp["items"][0]  # you can reuse it below
snippet = response_items["snippet"]
results["title"] = snippet["title"]
results["date"] = snippet["publishedAt"]
results["description"] = snippet

Also, this part of channel feed needs improving:

_ = f.readline() #consume newline
lines = f.readlines()

for line in lines:
    try: #explicit channel Id
        channelName, channelId = line.strip().split(" ### ")
        channels[channelName] = channelId
    except:
        channelName = line.strip()
        channels[channelName] = 0

_ = f.readline() #consume newline is better written as next(f). Then don't use readlines, just iterate on f. You'll save time storing data you don't need.

I'd rewrite that as:

next(f) #consume newline

for line in f:  # iterate on file lines
    try: #explicit channel Id
        channelName, channelId = line.strip().split(" ### ")
        channels[channelName] = channelId
    except ValueError: # too many values to unpack can happen
        channelName = line.strip()
        channels[channelName] = 0

Still, the seek in the end to write the new date works, although that's a really weird way to timestamp a file. I would just read the file, then open as append and close to update timestamp, and use os.path.getmtime() to get last modification time. That way you don't have to store the date in the file, and use read/write on a text file, which is risky.

Thanks for extensive analysis of my code! All of your remarks are absolutely on point, and I already updated my code to follow them. One more question though. — Jac08H, Commented Dec 27, 2016 at 10:34

Stack Exchange Network

Script to retrieve new YouTube videos

1 Answer 1

Your Answer

Not the answer you're looking for? Browse other questions tagged
python
youtube
or ask your own question.

Hot Network Questions

Script to retrieve new YouTube videos

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Not the answer you're looking for? Browse other questions tagged pythonyoutube or ask your own question.

Related

Hot Network Questions

Not the answer you're looking for? Browse other questions tagged
python
youtube
or ask your own question.