framepos.sh is a bash script to search images in a video file,
using ffmpeg and findimagedupes.
with the threshold
variable, you can specify the fuzziness of the search.
we use the script with success to remove intros and outros from tv show episodes.
for "audiodetect" we use dejavu, and maybe pyAudioAnalysis Segmentation can be used.
relevant pseudocode ....
most complexity in the code is to run "extract" and "compare" in parallel, to improve speed and efficiency:
function framepos() {
# extract frames. run in background
# bmp format is faster than png or jpg
$ffmpeg_cmd \
$ff_args -i "$V" \
${tmp_pre}frame-%04d.bmp \
2>/dev/null &
pid=$!
# output function for findimagedupes
script_include=$(cat <<-'EOF'
VIEW () {
for f in "$@"
do
echo -n "$f"
echo -ne "\t\t\t\t"
done
echo
}
EOF
)
n2=0
while true
do
n=$(ls ${tmp_pre}frame-*.bmp 2>/dev/null | wc -l)
(( $n == 0 )) && {
kill -0 $pid 2>/dev/null || {
# extract done
echo debug found no match >&2
break
}
kill -SIGCONT $pid
sleep $step_duration
continue
}
(( $n == 1 )) && {
# only one frame extracted.
# if ffmpeg is still extracting, this file is incomplete
kill -0 $pid 2>/dev/null && {
# extract running
kill -SIGCONT $pid
sleep $step_duration
continue
}
n2=1
}
(( 1 < $n && $n <= $frames_bufsize )) && {
# frame buffer not full
# if extract is running, then wait before compare
kill -0 $pid 2>/dev/null && {
# extract running
kill -SIGCONT $pid
sleep $step_duration
continue
}
n2=$(( $n - 1 ))
}
(( $n > $frames_bufsize )) && { #echo found $n frames
# pause frame extraction to save space
# extract is faster than compare
kill -SIGSTOP $pid
n2=$(( $n - 1 ))
}
echo compare $n2 frames
break_while=false
for I_cur in "${I[@]}"
do
# we need the "real path" for findimagedupes
pattern=$(readlink -f "$I_cur")
# call findimagedupes
# to find "visually similar images"
res=$(
ls ${tmp_pre}frame-*.bmp \
| head -n $n2 \
| xargs findimagedupes -t $threshold \
-i "$script_include" "$pattern" \
| grep -a "$pattern"
)
if [ ! -z "$res" ]
then
res=$(
echo "$res" \
| sed 's/\t\t\t\t/\n/g' \
| grep -v '^$' \
| grep -v "$pattern" \
| sort \
| head -n 1 \
| sed -E 's/^.*frame-(.*?)\.bmp$/\1/'
)
# get frame time
# note: frame numbers start with 1
# frame minus one:
t=$(
echo $T1 $res $fps \
| awk '{printf "%.4f\n", $1 + ( ( $2 - 2 ) / $3 ) }'
)
# matching frame:
# | awk '{printf "%.4f\n", $1 + ( ( $2 - 1 ) / $3 ) }'
# return
echo $t
# stop extracting
kill -9 $pid 2>/dev/null
# remove all temp files
rm ${tmp_pre}frame-*.bmp
break_while=true
break
fi
done
$break_while && break
# remove processed temp files
(( $n2 > 0 )) \
&& ls ${tmp_pre}frame-*.bmp | head -n $n2 | xargs rm
done
}