使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字
生活随笔
收集整理的這篇文章主要介紹了
使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
1、首先需要下載FFmpeg;
2、Gradle依賴
def void forceVersion(details, group, version) {if (details.requested.group == group) {details.useVersion version} }def void forceVersion(details, group, name, version) {if (details.requested.group == group && details.requested.name == name) {details.useVersion version} }allprojects { p ->group = 'com.my.spider'version = '1.0.0'apply plugin: 'java'apply plugin: 'maven'apply plugin: 'maven-publish'[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'jar.doFirst {manifest {def manifestFile = "${projectDir}/META-INF/MANIFEST.MF"if (new File(manifestFile).exists())from (manifestFile)attributes 'Implementation-Title':p.nameif (p.version.endsWith('-SNAPSHOT')) {attributes 'Implementation-Version': p.version + '-' + p.ext.Timestamp} else {attributes 'Implementation-Version': p.version}attributes 'Implementation-BuildDateTime':new Date()}}javadoc {options {encoding 'UTF-8'charSet 'UTF-8'author falseversion truelinks 'http://docs.oracle.com/javase/8/docs/api/index.html'memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE}}if (System.env.uploadArchives) {build.dependsOn publish}buildscript {repositories {mavenCentral()}dependencies {classpath 'org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE' }}afterEvaluate {Project project -> if (project.pluginManager.hasPlugin('java')) {configurations.all {resolutionStrategy.eachDependency {DependencyResolveDetails details -> forceVersion details, 'org.springframework.boot', '1.4.1.RELEASE'forceVersion details, 'org.slf4j', '1.7.21'forceVersion details, 'org.springframework', '4.3.3.RELEASE'}exclude module:'slf4j-log4j12'exclude module:'log4j'}dependencies {testCompile 'junit:junit:4.12' }}}repositories {mavenCentral()}// 時間戳:年月日時分p.ext.Timestamp = new Date().format('yyyyMMddHHmm')// Build Numberp.ext.BuildNumber = System.env.BUILD_NUMBERif (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) {p.ext.BuildNumber = 'x'} }task zipSources(type: Zip) {description '壓縮源代碼'project.ext.zipSourcesFile = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-sources.zip' archiveName = project.ext.zipSourcesFileincludeEmptyDirs = falsefrom project.projectDirexclude '**/.*'exclude 'build/*'allprojects.each { p ->exclude '**/' + p.name + '/bin/*'exclude '**/' + p.name + '/build/*'exclude '**/' + p.name + '/data/*'exclude '**/' + p.name + '/work/*'exclude '**/' + p.name + '/logs/*' } }def CopySpec appCopySpec(Project prj, dstname = null) {if (!dstname) { dstname = prj.name }return copySpec{// Fat jarfrom (prj.buildDir.toString() + '/libs/' + prj.name + '-' + project.version + '.jar') {into dstname} // Configsfrom (prj.projectDir.toString() + '/config/examples') {into dstname + '/config'}// Windows start scriptfrom (prj.projectDir.toString() + '/' + prj.name + '.bat') {into dstname}// Unix conf scriptfrom (prj.projectDir.toString() + '/' + prj.name + '.conf') {into dstnamerename prj.name, prj.name + '-' + project.version}} }task zipSetup(type: Zip, dependsOn: subprojects.build) { description '制作安裝包' project.ext.zipSetupFile = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip' archiveName = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip'with appCopySpec(project(':spider-demo')) }import java.security.MessageDigestdef generateMD5(final file) {MessageDigest digest = MessageDigest.getInstance("MD5")file.withInputStream(){is->byte[] buffer = new byte[8192]int read = 0while( (read = is.read(buffer)) > 0) {digest.update(buffer, 0, read);}}byte[] md5sum = digest.digest()BigInteger bigInt = new BigInteger(1, md5sum)return bigInt.toString(16) }task md5(dependsOn: [zipSetup, zipSources]) << {String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile));String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile));println project.ext.zipSetupFile + '=' + md5_setupprintln project.ext.zipSourcesFile + '=' + md5_sourcesdef newFile = new File("${projectDir}/build/distributions/" + project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-md5.txt')PrintWriter printWriter = newFile.newPrintWriter()printWriter.println project.ext.zipSetupFile + '=' + md5_setupprintWriter.println project.ext.zipSourcesFile + '=' + md5_sourcesprintWriter.flush()printWriter.close() }build.dependsOn subprojects.build, zipSetup, zipSources, md5 bulid.gradle工程組件gradle依賴: 語音識別使用 百度api;需引入?compile 'com.baidu.aip:java-sdk:3.2.1'
apply plugin: 'spring-boot' apply plugin: 'application'distributions {main {contents {from ("${projectDir}/config/examples") {into "config"}}} }distTar.enabled = falsespringBoot {executable = truemainClass = 'com.my.ai.Application' }dependencies {compile 'org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE'compile 'dom4j:dom4j:1.6.1'compile 'commons-httpclient:commons-httpclient:3.1'compileOnly 'com.h2database:h2:1.4.191'compile 'javax.cache:cache-api:1.0.0'compile 'org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final'compile 'org.jboss.resteasy:resteasy-client:3.0.14.Final'// Axiscompile 'axis:axis:1.4'compile 'org.jsoup:jsoup:1.10.1'compile 'com.alibaba:fastjson:1.2.21'compile 'com.baidu.aip:java-sdk:3.2.1'}3、視頻抽取音頻服務“
package com.my.ai.service;import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.List;import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service;//視頻抽取音頻 @Service public class ExtractAudioService {public static Logger logger = LoggerFactory.getLogger(ExtractAudioService.class);public static void main(String[] args) {new ExtractAudioService().getAudioFromVideo("E:\\QLDownload\\氧化還原反應中電子轉移的方向和數目的表示方法\\氧化還原反應中電子轉移的方向和數目的表示方法.mp4","D:\\ffmpeg4.2\\bin\\ffmpeg.exe");}public String getAudioFromVideo(String videoPath,String ffmpegPath) {File video = new File(videoPath);if(video.exists() && video.isFile()){String format = "wav";String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav";processCmd(videoPath, ffmpegPath, format, outPath);return outPath;}return null;}//D:\ffmpeg4.2\bin\ffmpeg.exe -i 氧化還原反應中電子轉移的方向和數目的表示方法.mp4 -f wav -vn -y 3.wavpublic String processCmd(String inputPath,String ffmpegPath,String format,String outPath) {List<String> commend = new java.util.ArrayList<String>();commend.add(ffmpegPath);commend.add("-i");commend.add(inputPath);commend.add("-y");commend.add("-vn");commend.add("-f");commend.add(format);commend.add(outPath);try {ProcessBuilder builder = new ProcessBuilder();builder.command(commend);builder.redirectErrorStream(true);Process p = builder.start();// 1. startBufferedReader buf = null; // 保存ffmpeg的輸出結果流String line = null;// read the standard output buf = new BufferedReader(new InputStreamReader(p.getInputStream()));StringBuffer sb = new StringBuffer();while ((line = buf.readLine()) != null) {System.out.println(line);sb.append(line);continue;}p.waitFor();// 這里線程阻塞,將等待外部轉換進程運行成功運行結束后,才往下執行// 1. endreturn sb.toString();} catch (Exception e) { // System.out.println(e); return null;}}} ExtractAudioService4、音頻切段:
package com.my.ai.service;import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern;import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service;@Service public class CutService {public static Logger logger = LoggerFactory.getLogger(CutService.class);public List<String> cutFile(String media_path, String ffmpeg_path) {List<String> audios = new ArrayList<>();int mediaTime = getMediaTime(media_path, ffmpeg_path); int num = mediaTime / 59;int lastNum = mediaTime % 59;System.out.println(mediaTime +"|" + num + "|"+ lastNum);int length = 59;File file = new File(media_path);String filename = file.getName();for (int i = 0; i < num; i++) {String outputPath = file.getParent() + File.separator + i + "-"+filename;processCmd(media_path, ffmpeg_path, String.valueOf(length * i) , String.valueOf(length), outputPath);audios.add(outputPath);}if(lastNum > 0) {String outputPath = file.getParent() + File.separator + num + "-"+filename;processCmd(media_path, ffmpeg_path, String.valueOf(length * num) , String.valueOf(lastNum), outputPath);audios.add(outputPath);}return audios;}/*** 獲取視頻總時間* * @param viedo_path 視頻路徑* @param ffmpeg_path ffmpeg路徑* @return*/public int getMediaTime(String video_path, String ffmpeg_path) {List<String> commands = new java.util.ArrayList<String>();commands.add(ffmpeg_path);commands.add("-i");commands.add(video_path);try {ProcessBuilder builder = new ProcessBuilder();builder.command(commands);final Process p = builder.start();// 從輸入流中讀取視頻信息BufferedReader br = new BufferedReader(new InputStreamReader(p.getErrorStream()));StringBuffer sb = new StringBuffer();String line = "";while ((line = br.readLine()) != null) {sb.append(line);}System.out.println(sb.toString());br.close();// 從視頻信息中解析時長String regexDuration = "Duration: (.*?), bitrate: (\\d*) kb\\/s";Pattern pattern = Pattern.compile(regexDuration);Matcher m = pattern.matcher(sb.toString());if (m.find()) {int time = getTimelen(m.group(1));System.out.println(video_path + ",視頻時長:" + time + ",比特率:" + m.group(2) + "kb/s");return time;}} catch (Exception e) {e.printStackTrace();}return 0;}// 格式:"00:00:10.68"public int getTimelen(String timelen) {int min = 0;String strs[] = timelen.split(":");if (strs[0].compareTo("0") > 0) {min += Integer.valueOf(strs[0]) * 60 * 60;// 秒}if (strs[1].compareTo("0") > 0) {min += Integer.valueOf(strs[1]) * 60;}if (strs[2].compareTo("0") > 0) {min += Math.round(Float.valueOf(strs[2]));} return min;}//D:\ffmpeg4.2\bin\ffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wavpublic String processCmd(String inputPath,String ffmpegPath,String startTime,String length,String outputPath) {List<String> commend = new java.util.ArrayList<String>();commend.add(ffmpegPath);commend.add("-i");commend.add(inputPath);commend.add("-ss");commend.add(startTime);commend.add("-t");commend.add(length);commend.add(outputPath);try {ProcessBuilder builder = new ProcessBuilder();builder.command(commend);builder.redirectErrorStream(true);Process p = builder.start();// 1. startBufferedReader buf = null; // 保存ffmpeg的輸出結果流String line = null;// read the standard outputbuf = new BufferedReader(new InputStreamReader(p.getInputStream()));StringBuffer sb = new StringBuffer();while ((line = buf.readLine()) != null) {System.out.println(line);sb.append(line);continue;}p.waitFor();// 這里線程阻塞,將等待外部轉換進程運行成功運行結束后,才往下執行// 1. endreturn sb.toString();} catch (Exception e) {System.out.println(e); return null;}}//ffmpeg -y -i 16k.wav -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) {List<String> commend = new java.util.ArrayList<String>();commend.add(ffmpegPath);commend.add("-i");commend.add(inputPath);commend.add("-acodec");commend.add("pcm_s16le");commend.add("-f");commend.add("s16le");commend.add("-ac");commend.add("1");commend.add("-ar");commend.add("16000");commend.add(outputPath);try {ProcessBuilder builder = new ProcessBuilder();builder.command(commend);builder.redirectErrorStream(true);Process p = builder.start();// 1. startBufferedReader buf = null; // 保存ffmpeg的輸出結果流String line = null;// read the standard outputbuf = new BufferedReader(new InputStreamReader(p.getInputStream()));StringBuffer sb = new StringBuffer();while ((line = buf.readLine()) != null) {System.out.println(line);sb.append(line);continue;}p.waitFor();// 這里線程阻塞,將等待外部轉換進程運行成功運行結束后,才往下執行// 1. endreturn outputPath;//sb.toString();} catch (Exception e) {System.out.println(e); return null;}}public static void main(String[] args) {List<String> audios = new CutService().cutFile("E:\\QLDownload\\氧化還原反應中電子轉移的方向和數目的表示方法\\氧化還原反應中電子轉移的方向和數目的表示方法.wav","D:\\ffmpeg4.2\\bin\\ffmpeg.exe");System.out.println(audios.size());for (String wavPath : audios) {String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);}}}?
5、音頻格式轉換,便于進行語音識別,代碼如上:
6、調用sdk,獲取識別結果:
package com.my.ai.service;import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service;import com.baidu.aip.speech.AipSpeech;@Service public class TokenService {public static Logger logger = LoggerFactory.getLogger(TokenService.class);//設置APPID/AK/SKpublic static final String APP_ID = "***";public static final String API_KEY = "***";public static final String SECRET_KEY = "***";static AipSpeech client = null;static {if(client == null) {client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);}}public static void main(String[] args) {getResult("E:\\QLDownload\\氧化還原反應中電子轉移的方向和數目的表示方法\\0-氧化還原反應中電子轉移的方向和數目的表示方法.pcm");}public static String getResult(String file) {// 可選:設置網絡連接參數client.setConnectionTimeoutInMillis(2000);client.setSocketTimeoutInMillis(60000);// 可選:設置代理服務器地址, http和socket二選一,或者均不設置//client.setHttpProxy("proxy_host", proxy_port); // 設置http代理//client.setSocketProxy("proxy_host", proxy_port); // 設置socket代理JSONObject res = client.asr(file, "pcm", 16000, null);//System.out.println(res.toString(2));System.out.println(res.get("result").toString());return res.get("result").toString();}}
7、結果寫入文件:
package com.my.ai.service;import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.RandomAccessFile;import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service;@Service public class FileService {public static Logger logger = LoggerFactory.getLogger(FileService.class);//最慢public static void writeFile1(String file,String content) throws IOException {FileOutputStream out = null;out = new FileOutputStream(new File(file));long begin = System.currentTimeMillis();out.write(content.getBytes());out.close();long end = System.currentTimeMillis();System.out.println("FileOutputStream執行耗時:" + (end - begin) + " 毫秒");}//中public static void writeFile2(String file,String content) throws IOException{FileWriter fw = null;fw = new FileWriter(file);long begin3 = System.currentTimeMillis();fw.write(content);fw.close();long end3 = System.currentTimeMillis();System.out.println("FileWriter執行耗時:" + (end3 - begin3) + " 毫秒");}//最快public static void writeFile3(String file,String content) throws IOException{FileOutputStream outSTr = null;BufferedOutputStream buff = null;outSTr = new FileOutputStream(new File(file));buff = new BufferedOutputStream(outSTr);long begin0 = System.currentTimeMillis();buff.write(content.getBytes());buff.flush();buff.close();long end0 = System.currentTimeMillis();System.out.println("BufferedOutputStream執行耗時:" + (end0 - begin0) + " 毫秒");}public static void main(String[] args) {for (int i = 0; i < 7; i++) {String result = TokenService.getResult("E:\\QLDownload\\氧化還原反應中電子轉移的方向和數目的表示方法\\" + i +"-氧化還原反應中電子轉移的方向和數目的表示方法.pcm");appendFile2("E:\\QLDownload\\氧化還原反應中電子轉移的方向和數目的表示方法\\氧化還原反應中電子轉移的方向和數目的表示方法.txt", result+"\r\n");}}public static void appendFile1(String file, String conent) {BufferedWriter out = null;try {out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));out.write(conent);} catch (Exception e) {e.printStackTrace();} finally {try {if (out != null) {out.close();}} catch (IOException e) {e.printStackTrace();}}}/*** 追加文件:使用FileWriter* * @param fileName* @param content*/public static void appendFile2(String fileName, String content) {FileWriter writer = null;try {// 打開一個寫文件器,構造函數中的第二個參數true表示以追加形式寫文件writer = new FileWriter(fileName, true);writer.write(content);} catch (IOException e) {e.printStackTrace();} finally {try {if (writer != null) {writer.close();}} catch (IOException e) {e.printStackTrace();}}}/*** 追加文件:使用RandomAccessFile* * @param fileName 文件名* @param content 追加的內容*/public static void appendFile3(String fileName, String content) {RandomAccessFile randomFile = null;try {// 打開一個隨機訪問文件流,按讀寫方式randomFile = new RandomAccessFile(fileName, "rw");// 文件長度,字節數long fileLength = randomFile.length();// 將寫文件指針移到文件尾。randomFile.seek(fileLength);randomFile.writeBytes(content);} catch (IOException e) {e.printStackTrace();} finally {if (randomFile != null) {try {randomFile.close();} catch (IOException e) {e.printStackTrace();}}}}}
8、測試:
package com.my.ai.test;import java.util.List;import com.my.ai.service.CutService; import com.my.ai.service.ExtractAudioService; import com.my.ai.service.FileService; import com.my.ai.service.TokenService;public class TestService {public static void main(String[] args) {ExtractAudioService audioService = new ExtractAudioService();String outPath = audioService.getAudioFromVideo("G:\\Youku Files\\transcode\\化學高中必修1__第2章第3節·氧化還原反應_標清.mp4", "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");List<String> audios = new CutService().cutFile(outPath,"D:\\ffmpeg4.2\\bin\\ffmpeg.exe");for (String wavPath : audios) {String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";String outPcm = CutService.processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);String result = TokenService.getResult(outPcm);FileService.appendFile2("G:\\Youku Files\\transcode\\化學高中必修1__第2章第3節·氧化還原反應_標清.mp4-字幕.txt", result+"\r\n");}}}
?
轉載于:https://www.cnblogs.com/liangblog/p/9750306.html
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: CSS3变形属性
- 下一篇: 整数的无符号编码和有符号编码