Include whisper again

This commit is contained in:
Nehemiah of Zebulun 2023-12-04 11:56:42 -05:00
parent 23cccf2455
commit 4da4eabc19
12 changed files with 113 additions and 87 deletions

Binary file not shown.

View File

@ -31,8 +31,13 @@ interface IME {
fun onDeleteText(beforeCursor: Int = 0, afterCursor: Int = 0, finishComposing: Boolean = false) fun onDeleteText(beforeCursor: Int = 0, afterCursor: Int = 0, finishComposing: Boolean = false)
fun defaultView()
fun onSwitchInputHandler(inputMode: InputMode) fun onSwitchInputHandler(inputMode: InputMode)
fun onUpdateStatusIcon(icon: Int?) fun onUpdateStatusIcon(icon: Int?)
fun record()
fun transcribe()
} }

View File

@ -26,8 +26,11 @@ import net.mezimmah.wkt9.inputmode.InputModeManager
import net.mezimmah.wkt9.keypad.Event import net.mezimmah.wkt9.keypad.Event
import net.mezimmah.wkt9.keypad.Key import net.mezimmah.wkt9.keypad.Key
import net.mezimmah.wkt9.keypad.KeyEventStat import net.mezimmah.wkt9.keypad.KeyEventStat
import net.mezimmah.wkt9.layout.Words import net.mezimmah.wkt9.layout.LoadingLayout
import net.mezimmah.wkt9.layout.MessageLayout
import net.mezimmah.wkt9.layout.WordsLayout
import net.mezimmah.wkt9.t9.T9 import net.mezimmah.wkt9.t9.T9
import net.mezimmah.wkt9.voice.Whisper
import java.util.Locale import java.util.Locale
@ -35,11 +38,14 @@ class WKT9IME: IME, InputMethodService() {
private val tag = "WKT9" private val tag = "WKT9"
private val inputModeManager = InputModeManager(this) private val inputModeManager = InputModeManager(this)
private val whisper: Whisper = Whisper(this)
private lateinit var locale: Locale private lateinit var locale: Locale
private var inputHandler: InputHandler? = null private var inputHandler: InputHandler? = null
private var wordsView: Words? = null private var wordsLayoutView: WordsLayout? = null
private var loadingLayoutView: LoadingLayout? = null
private var messageLayoutView: MessageLayout? = null
private val keyDownStats = KeyEventStat(0, 0) private val keyDownStats = KeyEventStat(0, 0)
private val keyUpStats = KeyEventStat(0, 0) private val keyUpStats = KeyEventStat(0, 0)
@ -88,9 +94,11 @@ class WKT9IME: IME, InputMethodService() {
@SuppressLint("InflateParams") @SuppressLint("InflateParams")
override fun onCreateInputView(): View? { override fun onCreateInputView(): View? {
wordsView = layoutInflater.inflate(R.layout.words, null) as Words wordsLayoutView = layoutInflater.inflate(R.layout.words, null) as WordsLayout
loadingLayoutView = layoutInflater.inflate(R.layout.loading, null) as LoadingLayout
messageLayoutView = layoutInflater.inflate(R.layout.message, null) as MessageLayout
return wordsView return wordsLayoutView
} }
override fun onCurrentInputMethodSubtypeChanged(newSubtype: InputMethodSubtype?) { override fun onCurrentInputMethodSubtypeChanged(newSubtype: InputMethodSubtype?) {
@ -272,7 +280,7 @@ class WKT9IME: IME, InputMethodService() {
override fun onWords(words: List<Word>, capMode: Int?) { override fun onWords(words: List<Word>, capMode: Int?) {
this.capMode = capMode this.capMode = capMode
wordsView?.words = words wordsLayoutView?.words = words
} }
override fun onWordSelected(word: Word) { override fun onWordSelected(word: Word) {
@ -288,11 +296,11 @@ class WKT9IME: IME, InputMethodService() {
} }
override fun onNextWord() { override fun onNextWord() {
wordsView?.next() wordsLayoutView?.next()
} }
override fun onPreviousWord() { override fun onPreviousWord() {
wordsView?.previous() wordsLayoutView?.previous()
} }
private fun deleteText(beforeCursor: Int, afterCursor: Int) { private fun deleteText(beforeCursor: Int, afterCursor: Int) {
@ -301,8 +309,22 @@ class WKT9IME: IME, InputMethodService() {
} }
} }
override fun record() {
setInputView(messageLayoutView)
whisper.record()
}
override fun transcribe() {
setInputView(loadingLayoutView)
whisper.transcribe()
}
override fun defaultView() {
setInputView(wordsLayoutView)
}
private fun finishComposing() { private fun finishComposing() {
wordsView?.clear() wordsLayoutView?.clear()
inputHandler?.onFinishComposing() inputHandler?.onFinishComposing()
} }

View File

@ -94,11 +94,26 @@ class WordInputHandler(
Command.INPUT_MODE -> inputMode(key) Command.INPUT_MODE -> inputMode(key)
Command.MOVE_CURSOR -> moveCursor(key) Command.MOVE_CURSOR -> moveCursor(key)
Command.NUMBER -> triggerOriginalKeyEvent(key) Command.NUMBER -> triggerOriginalKeyEvent(key)
Command.RECORD -> record()
Command.SPACE -> finalizeWordOrSentence(stats) Command.SPACE -> finalizeWordOrSentence(stats)
Command.TRANSCRIBE -> transcribe()
else -> Log.d(tag, "Command not implemented: $command") else -> Log.d(tag, "Command not implemented: $command")
} }
} }
private fun record() {
if (codeword.isNotEmpty()) {
wkt9.onCommit()
codeword.clear()
}
wkt9.record()
}
private fun transcribe() {
wkt9.transcribe()
}
override fun onWordSelected(word: Word) { override fun onWordSelected(word: Word) {
lastSelectedWord = word lastSelectedWord = word
} }

View File

@ -0,0 +1,9 @@
package net.mezimmah.wkt9.layout
import android.content.Context
import android.util.AttributeSet
import android.widget.LinearLayout
class LoadingLayout(context: Context, attributeSet: AttributeSet): LinearLayout(context, attributeSet) {
}

View File

@ -0,0 +1,9 @@
package net.mezimmah.wkt9.layout
import android.content.Context
import android.util.AttributeSet
import android.widget.LinearLayout
class MessageLayout(context: Context, attributeSet: AttributeSet): LinearLayout(context, attributeSet) {
}

View File

@ -13,7 +13,7 @@ import net.mezimmah.wkt9.R
import net.mezimmah.wkt9.WKT9IME import net.mezimmah.wkt9.WKT9IME
import net.mezimmah.wkt9.entity.Word import net.mezimmah.wkt9.entity.Word
class Words(context: Context, attributeSet: AttributeSet): HorizontalScrollView(context, attributeSet), View.OnClickListener, View.OnLongClickListener { class WordsLayout(context: Context, attributeSet: AttributeSet): HorizontalScrollView(context, attributeSet), View.OnClickListener, View.OnLongClickListener {
private var wkt9: WKT9IME private var wkt9: WKT9IME
private var wordCount: Int = 0 private var wordCount: Int = 0
private var current: Int = 0 private var current: Int = 0

View File

@ -2,17 +2,12 @@ package net.mezimmah.wkt9.voice
import android.media.MediaRecorder import android.media.MediaRecorder
import android.util.Log import android.util.Log
import android.view.View
import android.widget.HorizontalScrollView
import android.widget.LinearLayout
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import net.mezimmah.wkt9.R
import net.mezimmah.wkt9.WKT9IME import net.mezimmah.wkt9.WKT9IME
import net.mezimmah.wkt9.inputhandler.InputHandler
import okhttp3.MediaType.Companion.toMediaType import okhttp3.MediaType.Companion.toMediaType
import okhttp3.MultipartBody import okhttp3.MultipartBody
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
@ -23,17 +18,11 @@ import java.io.IOException
import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit
class Whisper( class Whisper(
private val context: WKT9IME, private val wkt9: WKT9IME,
private val inputHandler: InputHandler?,
private val ui: View
) { ) {
private val tag = "WKT9" private val tag = "WKT9"
private val ioScope = CoroutineScope(Dispatchers.IO + SupervisorJob())
private val mainScope = CoroutineScope(Dispatchers.Main + SupervisorJob())
private var ioJob: Job? = null private var ioJob: Job? = null
private var recorder: MediaRecorder? = null private var recorder: MediaRecorder? = null
private var recording: File? = null private var recording: File? = null
@ -47,19 +36,18 @@ class Whisper(
stopRecording() stopRecording()
val recording = this.recording ?: return val recording = this.recording ?: return
val ioScope = CoroutineScope(Dispatchers.IO + SupervisorJob())
showTranscribing()
ioJob?.cancel() ioJob?.cancel()
ioJob = ioScope.launch { ioJob = ioScope.launch {
try { try {
val transcription = run(recording) val transcription = run(recording)
val mainScope = CoroutineScope(Dispatchers.Main + SupervisorJob())
mainScope.launch { mainScope.launch {
showCandidates() wkt9.onCommit(transcription)
wkt9.defaultView()
} }
// inputHandler?.onInsertText(transcription.plus(" "))
} catch (e: IOException) { } catch (e: IOException) {
Log.d(tag, "A failure occurred in the communication with the speech-to-text server", e) Log.d(tag, "A failure occurred in the communication with the speech-to-text server", e)
} }
@ -70,9 +58,7 @@ class Whisper(
fun record() { fun record() {
if (recorder != null) stopRecording() if (recorder != null) stopRecording()
showMessage() recording = File.createTempFile("recording.3gp", null, wkt9.cacheDir)
recording = File.createTempFile("recording.3gp", null, context.cacheDir)
recorder = MediaRecorder().also { recorder = MediaRecorder().also {
it.setAudioSource(MediaRecorder.AudioSource.MIC) it.setAudioSource(MediaRecorder.AudioSource.MIC)
it.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP) it.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP)
@ -88,36 +74,6 @@ class Whisper(
} }
} }
private fun showCandidates() {
// val candidatesView = ui.findViewById<HorizontalScrollView>(R.id.suggestion_container)
// val loadingView = ui.findViewById<LinearLayout>(R.id.loading_container)
// val messageView = ui.findViewById<LinearLayout>(R.id.message_container)
//
// candidatesView.visibility = View.VISIBLE
// loadingView.visibility = View.GONE
// messageView.visibility = View.GONE
}
private fun showMessage() {
// val candidatesView = ui.findViewById<HorizontalScrollView>(R.id.suggestion_container)
// val loadingView = ui.findViewById<LinearLayout>(R.id.loading_container)
// val messageView = ui.findViewById<LinearLayout>(R.id.message_container)
//
// candidatesView.visibility = View.GONE
// loadingView.visibility = View.GONE
// messageView.visibility = View.VISIBLE
}
private fun showTranscribing() {
// val candidatesView = ui.findViewById<HorizontalScrollView>(R.id.suggestion_container)
// val loadingView = ui.findViewById<LinearLayout>(R.id.loading_container)
// val messageView = ui.findViewById<LinearLayout>(R.id.message_container)
//
// candidatesView.visibility = View.GONE
// loadingView.visibility = View.VISIBLE
// messageView.visibility = View.GONE
}
private fun stopRecording() { private fun stopRecording() {
recorder?.run { recorder?.run {
stop() stop()

View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="utf-8"?>
<net.mezimmah.wkt9.layout.LoadingLayout
xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="44dp"
android:layout_alignParentBottom="true"
android:layout_gravity="bottom"
android:theme="@style/Theme.AppCompat.DayNight"
android:gravity="bottom"
android:background="@color/black"
android:orientation="horizontal">
<ProgressBar
android:id="@+id/suggestions"
style="?android:attr/progressBarStyleLarge"
android:layout_width="wrap_content"
android:layout_height="40dp" />
<TextView
android:layout_width="wrap_content"
android:layout_height="match_parent"
android:textColor="@color/suggestion_text"
android:paddingVertical="5dp"
android:paddingHorizontal="8dp"
android:textSize="20sp"
android:textFontWeight="400"
android:text="Transcribing..." />
</net.mezimmah.wkt9.layout.LoadingLayout>

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<LinearLayout <net.mezimmah.wkt9.layout.MessageLayout
xmlns:android="http://schemas.android.com/apk/res/android" xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent" android:layout_width="match_parent"
android:layout_height="44dp" android:layout_height="44dp"
@ -10,11 +10,11 @@
android:background="@color/black" android:background="@color/black"
android:orientation="horizontal"> android:orientation="horizontal">
<ProgressBar <ImageView
android:id="@+id/suggestions" android:layout_height="40dp"
style="?android:attr/progressBarStyleLarge"
android:layout_width="wrap_content" android:layout_width="wrap_content"
android:layout_height="40dp" /> android:src="@drawable/mic"/>
<TextView <TextView
android:layout_width="wrap_content" android:layout_width="wrap_content"
@ -24,6 +24,6 @@
android:paddingHorizontal="8dp" android:paddingHorizontal="8dp"
android:textSize="20sp" android:textSize="20sp"
android:textFontWeight="400" android:textFontWeight="400"
android:text="Transcribing, please wait..." /> android:text="Recording..." />
</LinearLayout> </net.mezimmah.wkt9.layout.MessageLayout>

View File

@ -1,19 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout
xmlns:android="http://schemas.android.com/apk/res/android"
android:id="@+id/suggestion"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:padding="2dp">
<TextView
android:id="@+id/suggestion_text"
android:layout_width="wrap_content"
android:layout_height="match_parent"
android:textColor="@color/suggestion_text"
android:minWidth="40dp"
android:paddingVertical="5dp"
android:paddingHorizontal="8dp"
android:textSize="20sp"
android:textFontWeight="400" />
</LinearLayout>

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<net.mezimmah.wkt9.layout.Words xmlns:android="http://schemas.android.com/apk/res/android" <net.mezimmah.wkt9.layout.WordsLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent" android:layout_width="match_parent"
android:layout_height="wrap_content" android:layout_height="wrap_content"
android:layout_alignParentBottom="true" android:layout_alignParentBottom="true"
@ -14,4 +14,4 @@
android:layout_height="44dp" android:layout_height="44dp"
android:orientation="horizontal" /> android:orientation="horizontal" />
</net.mezimmah.wkt9.layout.Words> </net.mezimmah.wkt9.layout.WordsLayout>